csw term added in DwdagDw and sfbndfix fixed to include the gamma5 case

parent e16e24cf
...@@ -49,7 +49,7 @@ function Dw!(so, U, si, dpar::DiracParam, lp::SpaceParm{4,6,B,D}, ymws = "ymws", ...@@ -49,7 +49,7 @@ function Dw!(so, U, si, dpar::DiracParam, lp::SpaceParm{4,6,B,D}, ymws = "ymws",
if B == BC_SF_AFWB || B == BC_SF_ORBI if B == BC_SF_AFWB || B == BC_SF_ORBI
CUDA.@sync begin CUDA.@sync begin
CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_sfbndfix!(si,si,1.0,lp) CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_sfbndfix!(si,si,1.0,Gamma{16},lp)
end end
@timeit "Dw" begin @timeit "Dw" begin
CUDA.@sync begin CUDA.@sync begin
...@@ -57,7 +57,7 @@ function Dw!(so, U, si, dpar::DiracParam, lp::SpaceParm{4,6,B,D}, ymws = "ymws", ...@@ -57,7 +57,7 @@ function Dw!(so, U, si, dpar::DiracParam, lp::SpaceParm{4,6,B,D}, ymws = "ymws",
end end
end end
CUDA.@sync begin CUDA.@sync begin
CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_sfbndfix!(so,si,dpar.ct,lp) CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_sfbndfix!(so,si,dpar.ct,Gamma{16},lp)
end end
else else
...@@ -102,15 +102,51 @@ function DwdagDw!(so, U, si, dpar::DiracParam, st, lp::SpaceParm{4,6,B,D}, ymws ...@@ -102,15 +102,51 @@ function DwdagDw!(so, U, si, dpar::DiracParam, st, lp::SpaceParm{4,6,B,D}, ymws
end end
CUDA.@sync begin CUDA.@sync begin
CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_sfbndfix!(st,si,dpar.ct,lp) CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_sfbndfix!(st,si,dpar.ct,Gamma{5},lp)
end
if abs(dpar.csw) > 1.0E-10
if ymws == "ymws" || gp == "gp"
error("YM workspace and Gauge parameters must be included for the improved dirac operator.")
else
@timeit "Dw_improvement" begin
for i in 1:Int(lp.npls/2)
z1 = exp(2im * pi * lp.ntw[i]/gp.ng)
z2 = exp(2im * pi * lp.ntw[lp.npls-i]/gp.ng)
CUDA.@sync begin
CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_field_tensor!(ymws.frc1, ymws.frc2, U, gp.Ubnd, i, lp.npls-i, z1, z2, lp)
CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_g5Dw_impr!(so, ymws.frc1, ymws.frc2, Gamma{i+9}, Gamma{i+12}, si, dpar.csw, lp)
end
end
end
end
end end
CUDA.@sync begin CUDA.@sync begin
CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_g5Dw!(so, U, st, dpar.m0, dpar.th, lp) CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_g5Dw!(so, U, st, dpar.m0, dpar.th, lp)
end end
if abs(dpar.csw) > 1.0E-10
if ymws == "ymws" || gp == "gp"
error("YM workspace and Gauge parameters must be included for the improved dirac operator.")
else
@timeit "Dw_improvement" begin
for i in 1:Int(lp.npls/2)
z1 = exp(2im * pi * lp.ntw[i]/gp.ng)
z2 = exp(2im * pi * lp.ntw[lp.npls-i]/gp.ng)
CUDA.@sync begin CUDA.@sync begin
CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_sfbndfix!(so,st,dpar.ct,lp) CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_field_tensor!(ymws.frc1, ymws.frc2, U, gp.Ubnd, i, lp.npls-i, z1, z2, lp)
CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_g5Dw_impr!(so, ymws.frc1, ymws.frc2, Gamma{i+9}, Gamma{i+12}, si, dpar.csw, lp)
end
end
end
end
end
CUDA.@sync begin
CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_sfbndfix!(so,st,dpar.ct,Gamma{5},lp)
end end
end end
...@@ -133,7 +169,7 @@ function g5Dw!(so, U, si, dpar, lp::SpaceParm{4,6,B,D}, ymws = "ymws", gp = "gp" ...@@ -133,7 +169,7 @@ function g5Dw!(so, U, si, dpar, lp::SpaceParm{4,6,B,D}, ymws = "ymws", gp = "gp"
if B == BC_SF_AFWB || B == BC_SF_ORBI if B == BC_SF_AFWB || B == BC_SF_ORBI
CUDA.@sync begin CUDA.@sync begin
CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_sfbndfix!(si,si,1.0,lp) CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_sfbndfix!(si,si,1.0,Gamma{16},lp)
end end
@timeit "Dw" begin @timeit "Dw" begin
CUDA.@sync begin CUDA.@sync begin
...@@ -141,7 +177,7 @@ function g5Dw!(so, U, si, dpar, lp::SpaceParm{4,6,B,D}, ymws = "ymws", gp = "gp" ...@@ -141,7 +177,7 @@ function g5Dw!(so, U, si, dpar, lp::SpaceParm{4,6,B,D}, ymws = "ymws", gp = "gp"
end end
end end
CUDA.@sync begin CUDA.@sync begin
CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_sfbndfix!(so,si,dpar.ct,lp) CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_sfbndfix!(so,si,dpar.ct,Gamma{5},lp)
end end
else else
...@@ -173,7 +209,7 @@ function g5Dw!(so, U, si, dpar, lp::SpaceParm{4,6,B,D}, ymws = "ymws", gp = "gp" ...@@ -173,7 +209,7 @@ function g5Dw!(so, U, si, dpar, lp::SpaceParm{4,6,B,D}, ymws = "ymws", gp = "gp"
return nothing return nothing
end end
function krnl_sfbndfix!(so,si,ct,lp::SpaceParm) function krnl_sfbndfix!(so,si,ct,gamma,lp::SpaceParm)
b=Int64(CUDA.threadIdx().x) b=Int64(CUDA.threadIdx().x)
r=Int64(CUDA.blockIdx().x) r=Int64(CUDA.blockIdx().x)
...@@ -181,7 +217,7 @@ function krnl_sfbndfix!(so,si,ct,lp::SpaceParm) ...@@ -181,7 +217,7 @@ function krnl_sfbndfix!(so,si,ct,lp::SpaceParm)
so[b,r] = 0.0*so[b,r] so[b,r] = 0.0*so[b,r]
elseif (point_time((b,r),lp) == 2) || (point_time((b,r),lp) == lp.iL[4]) elseif (point_time((b,r),lp) == 2) || (point_time((b,r),lp) == lp.iL[4])
so[b,r] += (ct-1.0)si[b,r] so[b,r] += (ct-1.0)*dmul(gamma,si[b,r])
end end
return nothing return nothing
end end
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment