Fmunu made real

parent ec3f994c
......@@ -247,7 +247,12 @@ function clover(g1::SU3{T},g2::SU3{T},g3::SU3{T},g4::SU3{T}) where {T}
return -0.125*im*M3x3{T}(u11-conj(u11),u12-conj(u21),u13-conj(u31),u21-conj(u12),u22-conj(u22),u23-conj(u32),u31-conj(u13),u32-conj(u23),u33-conj(u33))
end
"""
function Csw!(dws, U, gp, lp::SpaceParm)
Computes the clover and stores it in dws.csw. The twist is NOT implemented (since dws.csw diagonal has to be real) and its not implemented either in Dw!
"""
function Csw!(dws, U, gp, lp::SpaceParm{4,6,B,D}) where {B,D}
@timeit "Csw computation" begin
......@@ -255,7 +260,7 @@ function Csw!(dws, U, gp, lp::SpaceParm{4,6,B,D}) where {B,D}
for i in 1:Int(lp.npls)
z = exp(2im * pi * lp.ntw[i]/gp.ng)
CUDA.@sync begin
CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_csw!(dws.csw, U, gp.Ubnd, i, z, lp)
CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_csw!(dws.csw, U, gp.Ubnd, i, lp)
end
end
end
......@@ -266,23 +271,49 @@ end
function krnl_Dw_impr!(so, Fcsw, csw, si, lp::SpaceParm{4,6,B,D}) where {B,D}
b = Int64(CUDA.threadIdx().x); r = Int64(CUDA.blockIdx().x)
@inbounds begin
so[b,r] += 0.5*csw*im*( Fcsw[b,1,r]*dmul(Gamma{10},si[b,r]) + Fcsw[b,2,r]*dmul(Gamma{11},si[b,r]) + Fcsw[b,3,r]*dmul(Gamma{12},si[b,r])
b = Int64(CUDA.threadIdx().x);
r = Int64(CUDA.blockIdx().x)
I = point_coord((b,r), lp)
it = I[4]
id1, id2 = lp.plidx[ipl]
TWP = ((I[id1]==1)&&(I[id2]==1))
if TWP
so[b,r] += 0.5*csw*( Fcsw[b,1,r]*dmul(Gamma{10},si[b,r]) + Fcsw[b,2,r]*dmul(Gamma{11},si[b,r]) + Fcsw[b,3,r]*dmul(Gamma{12},si[b,r])
-Fcsw[b,4,r]*dmul(Gamma{15},si[b,r]) - Fcsw[b,5,r]*dmul(Gamma{14},si[b,r]) - Fcsw[b,6,r]*dmul(Gamma{13},si[b,r]))
else
so[b,r] += 0.5*csw*( Fcsw[b,1,r]*dmul(Gamma{10},si[b,r]) + Fcsw[b,2,r]*dmul(Gamma{11},si[b,r]) + Fcsw[b,3,r]*dmul(Gamma{12},si[b,r])
-Fcsw[b,4,r]*dmul(Gamma{15},si[b,r]) - Fcsw[b,5,r]*dmul(Gamma{14},si[b,r]) - Fcsw[b,6,r]*dmul(Gamma{13},si[b,r]))
end
end
end
return nothing
end
function krnl_g5Dw_impr!(so, Fcsw, csw, si, lp::SpaceParm{4,6,B,D}) where {B,D}
b = Int64(CUDA.threadIdx().x); r = Int64(CUDA.blockIdx().x)
@inbounds begin
so[b,r] += 0.5*csw*im*dmul(Gamma{5},( Fcsw[b,1,r]*dmul(Gamma{10},si[b,r]) + Fcsw[b,2,r]*dmul(Gamma{11},si[b,r]) + Fcsw[b,3,r]*dmul(Gamma{12},si[b,r])
-Fcsw[b,4,r]*dmul(Gamma{15},si[b,r]) - Fcsw[b,5,r]*dmul(Gamma{14},si[b,r]) - Fcsw[b,6,r]*dmul(Gamma{13},si[b,r])))
b = Int64(CUDA.threadIdx().x);
r = Int64(CUDA.blockIdx().x)
I = point_coord((b,r), lp)
it = I[4]
id1, id2 = lp.plidx[ipl]
TWP = ((I[id1]==1)&&(I[id2]==1))
if TWP
so[b,r] += 0.5*csw*dmul(Gamma{5},( Fcsw[b,1,r]*dmul(Gamma{10},si[b,r]) + Fcsw[b,2,r]*dmul(Gamma{11},si[b,r]) + Fcsw[b,3,r]*dmul(Gamma{12},si[b,r])
-Fcsw[b,4,r]*dmul(Gamma{15},si[b,r]) - Fcsw[b,5,r]*dmul(Gamma{14},si[b,r]) - Fcsw[b,6,r]*dmul(Gamma{13},si[b,r])))
else
so[b,r] += 0.5*csw*dmul(Gamma{5},( Fcsw[b,1,r]*dmul(Gamma{10},si[b,r]) + Fcsw[b,2,r]*dmul(Gamma{11},si[b,r]) + Fcsw[b,3,r]*dmul(Gamma{12},si[b,r])
-Fcsw[b,4,r]*dmul(Gamma{15},si[b,r]) - Fcsw[b,5,r]*dmul(Gamma{14},si[b,r]) - Fcsw[b,6,r]*dmul(Gamma{13},si[b,r])))
end
end
return nothing
......@@ -362,7 +393,7 @@ function krnl_g5Dw!(so, U, si, m0, th, lp::SpaceParm{4,6,B,D}) where {B,D}
return nothing
end
function krnl_csw!(csw::AbstractArray{T}, U, Ubnd, ipl, ztw, lp::SpaceParm{4,M,B,D}) where {T,M,B,D}
function krnl_csw!(csw::AbstractArray{T}, U, Ubnd, ipl, lp::SpaceParm{4,M,B,D}) where {T,M,B,D}
@inbounds begin
b = Int64(CUDA.threadIdx().x)
......@@ -372,7 +403,6 @@ function krnl_csw!(csw::AbstractArray{T}, U, Ubnd, ipl, ztw, lp::SpaceParm{4,M,B
id1, id2 = lp.plidx[ipl]
SFBC = ((B == BC_SF_AFWB) || (B == BC_SF_ORBI) ) && (id1 == 4)
TWP = ((I[id1]==1)&&(I[id2]==1))
bu1, ru1 = up((b, r), id1, lp)
bu2, ru2 = up((b, r), id2, lp)
......@@ -397,11 +427,7 @@ function krnl_csw!(csw::AbstractArray{T}, U, Ubnd, ipl, ztw, lp::SpaceParm{4,M,B
if !(SFBC && (it == 1))
if TWP
csw[b,ipl,r] = -0.125*im*ztw*(antsym(M1)+antsym(M2)+antsym(M3)+antsym(M4))
else
csw[b,ipl,r] = -0.125*im*antsym(M1)+antsym(M2)+antsym(M3)+antsym(M4)
end
csw[b,ipl,r] = 0.125*(antsym(M1)+antsym(M2)+antsym(M3)+antsym(M4))
end
end
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment