Fmunu made real

parent ec3f994c
...@@ -247,7 +247,12 @@ function clover(g1::SU3{T},g2::SU3{T},g3::SU3{T},g4::SU3{T}) where {T} ...@@ -247,7 +247,12 @@ function clover(g1::SU3{T},g2::SU3{T},g3::SU3{T},g4::SU3{T}) where {T}
return -0.125*im*M3x3{T}(u11-conj(u11),u12-conj(u21),u13-conj(u31),u21-conj(u12),u22-conj(u22),u23-conj(u32),u31-conj(u13),u32-conj(u23),u33-conj(u33)) return -0.125*im*M3x3{T}(u11-conj(u11),u12-conj(u21),u13-conj(u31),u21-conj(u12),u22-conj(u22),u23-conj(u32),u31-conj(u13),u32-conj(u23),u33-conj(u33))
end end
"""
function Csw!(dws, U, gp, lp::SpaceParm)
Computes the clover and stores it in dws.csw. The twist is NOT implemented (since dws.csw diagonal has to be real) and its not implemented either in Dw!
"""
function Csw!(dws, U, gp, lp::SpaceParm{4,6,B,D}) where {B,D} function Csw!(dws, U, gp, lp::SpaceParm{4,6,B,D}) where {B,D}
@timeit "Csw computation" begin @timeit "Csw computation" begin
...@@ -255,7 +260,7 @@ function Csw!(dws, U, gp, lp::SpaceParm{4,6,B,D}) where {B,D} ...@@ -255,7 +260,7 @@ function Csw!(dws, U, gp, lp::SpaceParm{4,6,B,D}) where {B,D}
for i in 1:Int(lp.npls) for i in 1:Int(lp.npls)
z = exp(2im * pi * lp.ntw[i]/gp.ng) z = exp(2im * pi * lp.ntw[i]/gp.ng)
CUDA.@sync begin CUDA.@sync begin
CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_csw!(dws.csw, U, gp.Ubnd, i, z, lp) CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_csw!(dws.csw, U, gp.Ubnd, i, lp)
end end
end end
end end
...@@ -266,11 +271,24 @@ end ...@@ -266,11 +271,24 @@ end
function krnl_Dw_impr!(so, Fcsw, csw, si, lp::SpaceParm{4,6,B,D}) where {B,D} function krnl_Dw_impr!(so, Fcsw, csw, si, lp::SpaceParm{4,6,B,D}) where {B,D}
b = Int64(CUDA.threadIdx().x); r = Int64(CUDA.blockIdx().x)
@inbounds begin @inbounds begin
so[b,r] += 0.5*csw*im*( Fcsw[b,1,r]*dmul(Gamma{10},si[b,r]) + Fcsw[b,2,r]*dmul(Gamma{11},si[b,r]) + Fcsw[b,3,r]*dmul(Gamma{12},si[b,r])
b = Int64(CUDA.threadIdx().x);
r = Int64(CUDA.blockIdx().x)
I = point_coord((b,r), lp)
it = I[4]
id1, id2 = lp.plidx[ipl]
TWP = ((I[id1]==1)&&(I[id2]==1))
if TWP
so[b,r] += 0.5*csw*( Fcsw[b,1,r]*dmul(Gamma{10},si[b,r]) + Fcsw[b,2,r]*dmul(Gamma{11},si[b,r]) + Fcsw[b,3,r]*dmul(Gamma{12},si[b,r])
-Fcsw[b,4,r]*dmul(Gamma{15},si[b,r]) - Fcsw[b,5,r]*dmul(Gamma{14},si[b,r]) - Fcsw[b,6,r]*dmul(Gamma{13},si[b,r])) -Fcsw[b,4,r]*dmul(Gamma{15},si[b,r]) - Fcsw[b,5,r]*dmul(Gamma{14},si[b,r]) - Fcsw[b,6,r]*dmul(Gamma{13},si[b,r]))
else
so[b,r] += 0.5*csw*( Fcsw[b,1,r]*dmul(Gamma{10},si[b,r]) + Fcsw[b,2,r]*dmul(Gamma{11},si[b,r]) + Fcsw[b,3,r]*dmul(Gamma{12},si[b,r])
-Fcsw[b,4,r]*dmul(Gamma{15},si[b,r]) - Fcsw[b,5,r]*dmul(Gamma{14},si[b,r]) - Fcsw[b,6,r]*dmul(Gamma{13},si[b,r]))
end
end end
return nothing return nothing
...@@ -278,12 +296,25 @@ end ...@@ -278,12 +296,25 @@ end
function krnl_g5Dw_impr!(so, Fcsw, csw, si, lp::SpaceParm{4,6,B,D}) where {B,D} function krnl_g5Dw_impr!(so, Fcsw, csw, si, lp::SpaceParm{4,6,B,D}) where {B,D}
b = Int64(CUDA.threadIdx().x); r = Int64(CUDA.blockIdx().x)
@inbounds begin @inbounds begin
so[b,r] += 0.5*csw*im*dmul(Gamma{5},( Fcsw[b,1,r]*dmul(Gamma{10},si[b,r]) + Fcsw[b,2,r]*dmul(Gamma{11},si[b,r]) + Fcsw[b,3,r]*dmul(Gamma{12},si[b,r])
b = Int64(CUDA.threadIdx().x);
r = Int64(CUDA.blockIdx().x)
I = point_coord((b,r), lp)
it = I[4]
id1, id2 = lp.plidx[ipl]
TWP = ((I[id1]==1)&&(I[id2]==1))
if TWP
so[b,r] += 0.5*csw*dmul(Gamma{5},( Fcsw[b,1,r]*dmul(Gamma{10},si[b,r]) + Fcsw[b,2,r]*dmul(Gamma{11},si[b,r]) + Fcsw[b,3,r]*dmul(Gamma{12},si[b,r])
-Fcsw[b,4,r]*dmul(Gamma{15},si[b,r]) - Fcsw[b,5,r]*dmul(Gamma{14},si[b,r]) - Fcsw[b,6,r]*dmul(Gamma{13},si[b,r])))
else
so[b,r] += 0.5*csw*dmul(Gamma{5},( Fcsw[b,1,r]*dmul(Gamma{10},si[b,r]) + Fcsw[b,2,r]*dmul(Gamma{11},si[b,r]) + Fcsw[b,3,r]*dmul(Gamma{12},si[b,r])
-Fcsw[b,4,r]*dmul(Gamma{15},si[b,r]) - Fcsw[b,5,r]*dmul(Gamma{14},si[b,r]) - Fcsw[b,6,r]*dmul(Gamma{13},si[b,r]))) -Fcsw[b,4,r]*dmul(Gamma{15},si[b,r]) - Fcsw[b,5,r]*dmul(Gamma{14},si[b,r]) - Fcsw[b,6,r]*dmul(Gamma{13},si[b,r])))
end end
end
return nothing return nothing
end end
...@@ -362,7 +393,7 @@ function krnl_g5Dw!(so, U, si, m0, th, lp::SpaceParm{4,6,B,D}) where {B,D} ...@@ -362,7 +393,7 @@ function krnl_g5Dw!(so, U, si, m0, th, lp::SpaceParm{4,6,B,D}) where {B,D}
return nothing return nothing
end end
function krnl_csw!(csw::AbstractArray{T}, U, Ubnd, ipl, ztw, lp::SpaceParm{4,M,B,D}) where {T,M,B,D} function krnl_csw!(csw::AbstractArray{T}, U, Ubnd, ipl, lp::SpaceParm{4,M,B,D}) where {T,M,B,D}
@inbounds begin @inbounds begin
b = Int64(CUDA.threadIdx().x) b = Int64(CUDA.threadIdx().x)
...@@ -372,7 +403,6 @@ function krnl_csw!(csw::AbstractArray{T}, U, Ubnd, ipl, ztw, lp::SpaceParm{4,M,B ...@@ -372,7 +403,6 @@ function krnl_csw!(csw::AbstractArray{T}, U, Ubnd, ipl, ztw, lp::SpaceParm{4,M,B
id1, id2 = lp.plidx[ipl] id1, id2 = lp.plidx[ipl]
SFBC = ((B == BC_SF_AFWB) || (B == BC_SF_ORBI) ) && (id1 == 4) SFBC = ((B == BC_SF_AFWB) || (B == BC_SF_ORBI) ) && (id1 == 4)
TWP = ((I[id1]==1)&&(I[id2]==1))
bu1, ru1 = up((b, r), id1, lp) bu1, ru1 = up((b, r), id1, lp)
bu2, ru2 = up((b, r), id2, lp) bu2, ru2 = up((b, r), id2, lp)
...@@ -397,11 +427,7 @@ function krnl_csw!(csw::AbstractArray{T}, U, Ubnd, ipl, ztw, lp::SpaceParm{4,M,B ...@@ -397,11 +427,7 @@ function krnl_csw!(csw::AbstractArray{T}, U, Ubnd, ipl, ztw, lp::SpaceParm{4,M,B
if !(SFBC && (it == 1)) if !(SFBC && (it == 1))
if TWP csw[b,ipl,r] = 0.125*(antsym(M1)+antsym(M2)+antsym(M3)+antsym(M4))
csw[b,ipl,r] = -0.125*im*ztw*(antsym(M1)+antsym(M2)+antsym(M3)+antsym(M4))
else
csw[b,ipl,r] = -0.125*im*antsym(M1)+antsym(M2)+antsym(M3)+antsym(M4)
end
end end
end end
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment