csw term added in DwdagDw and sfbndfix fixed to include the gamma5 case

parent e16e24cf
......@@ -49,7 +49,7 @@ function Dw!(so, U, si, dpar::DiracParam, lp::SpaceParm{4,6,B,D}, ymws = "ymws",
if B == BC_SF_AFWB || B == BC_SF_ORBI
CUDA.@sync begin
CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_sfbndfix!(si,si,1.0,lp)
CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_sfbndfix!(si,si,1.0,Gamma{16},lp)
end
@timeit "Dw" begin
CUDA.@sync begin
......@@ -57,7 +57,7 @@ function Dw!(so, U, si, dpar::DiracParam, lp::SpaceParm{4,6,B,D}, ymws = "ymws",
end
end
CUDA.@sync begin
CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_sfbndfix!(so,si,dpar.ct,lp)
CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_sfbndfix!(so,si,dpar.ct,Gamma{16},lp)
end
else
......@@ -102,15 +102,51 @@ function DwdagDw!(so, U, si, dpar::DiracParam, st, lp::SpaceParm{4,6,B,D}, ymws
end
CUDA.@sync begin
CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_sfbndfix!(st,si,dpar.ct,lp)
CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_sfbndfix!(st,si,dpar.ct,Gamma{5},lp)
end
if abs(dpar.csw) > 1.0E-10
if ymws == "ymws" || gp == "gp"
error("YM workspace and Gauge parameters must be included for the improved dirac operator.")
else
@timeit "Dw_improvement" begin
for i in 1:Int(lp.npls/2)
z1 = exp(2im * pi * lp.ntw[i]/gp.ng)
z2 = exp(2im * pi * lp.ntw[lp.npls-i]/gp.ng)
CUDA.@sync begin
CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_field_tensor!(ymws.frc1, ymws.frc2, U, gp.Ubnd, i, lp.npls-i, z1, z2, lp)
CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_g5Dw_impr!(so, ymws.frc1, ymws.frc2, Gamma{i+9}, Gamma{i+12}, si, dpar.csw, lp)
end
end
end
end
end
CUDA.@sync begin
CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_g5Dw!(so, U, st, dpar.m0, dpar.th, lp)
end
if abs(dpar.csw) > 1.0E-10
if ymws == "ymws" || gp == "gp"
error("YM workspace and Gauge parameters must be included for the improved dirac operator.")
else
@timeit "Dw_improvement" begin
for i in 1:Int(lp.npls/2)
z1 = exp(2im * pi * lp.ntw[i]/gp.ng)
z2 = exp(2im * pi * lp.ntw[lp.npls-i]/gp.ng)
CUDA.@sync begin
CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_field_tensor!(ymws.frc1, ymws.frc2, U, gp.Ubnd, i, lp.npls-i, z1, z2, lp)
CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_g5Dw_impr!(so, ymws.frc1, ymws.frc2, Gamma{i+9}, Gamma{i+12}, si, dpar.csw, lp)
end
end
end
end
end
CUDA.@sync begin
CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_sfbndfix!(so,st,dpar.ct,lp)
CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_sfbndfix!(so,st,dpar.ct,Gamma{5},lp)
end
end
......@@ -133,7 +169,7 @@ function g5Dw!(so, U, si, dpar, lp::SpaceParm{4,6,B,D}, ymws = "ymws", gp = "gp"
if B == BC_SF_AFWB || B == BC_SF_ORBI
CUDA.@sync begin
CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_sfbndfix!(si,si,1.0,lp)
CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_sfbndfix!(si,si,1.0,Gamma{16},lp)
end
@timeit "Dw" begin
CUDA.@sync begin
......@@ -141,7 +177,7 @@ function g5Dw!(so, U, si, dpar, lp::SpaceParm{4,6,B,D}, ymws = "ymws", gp = "gp"
end
end
CUDA.@sync begin
CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_sfbndfix!(so,si,dpar.ct,lp)
CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_sfbndfix!(so,si,dpar.ct,Gamma{5},lp)
end
else
......@@ -173,7 +209,7 @@ function g5Dw!(so, U, si, dpar, lp::SpaceParm{4,6,B,D}, ymws = "ymws", gp = "gp"
return nothing
end
function krnl_sfbndfix!(so,si,ct,lp::SpaceParm)
function krnl_sfbndfix!(so,si,ct,gamma,lp::SpaceParm)
b=Int64(CUDA.threadIdx().x)
r=Int64(CUDA.blockIdx().x)
......@@ -181,7 +217,7 @@ function krnl_sfbndfix!(so,si,ct,lp::SpaceParm)
so[b,r] = 0.0*so[b,r]
elseif (point_time((b,r),lp) == 2) || (point_time((b,r),lp) == lp.iL[4])
so[b,r] += (ct-1.0)si[b,r]
so[b,r] += (ct-1.0)*dmul(gamma,si[b,r])
end
return nothing
end
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment