test for flw with D slash squared

parent ae47e6ce
...@@ -4,7 +4,8 @@ import ..YM.flw, ..YM.force_gauge ...@@ -4,7 +4,8 @@ import ..YM.flw, ..YM.force_gauge
function Nablanabla!(so, U, si, dpar::DiracParam, dws::DiracWorkspace, lp::SpaceParm{4,6,B,D}) where {B,D}
function Dslash_sq!(so, U, si, dpar::DiracParam, dws::DiracWorkspace, lp::SpaceParm{4,6,B,D}) where {B,D}
if B == BC_SF_AFWB || B == BC_SF_ORBI if B == BC_SF_AFWB || B == BC_SF_ORBI
@timeit "DwdagDw" begin @timeit "DwdagDw" begin
...@@ -17,20 +18,20 @@ function Nablanabla!(so, U, si, dpar::DiracParam, dws::DiracWorkspace, lp::Space ...@@ -17,20 +18,20 @@ function Nablanabla!(so, U, si, dpar::DiracParam, dws::DiracWorkspace, lp::Space
@timeit "g5Dw" begin @timeit "g5Dw" begin
CUDA.@sync begin CUDA.@sync begin
CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_g5Dw!(st, U, si, dpar.m0, dpar.th, lp) CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_g5Dw!(dws.st, U, si,0.0, dpar.th, lp)
end end
end end
@timeit "SF fix" begin @timeit "SF fix" begin
CUDA.@sync begin CUDA.@sync begin
CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_sfbndfix!(st,si,dpar.ct,Gamma{5},lp) CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_sfbndfix!(dws.st,si,dpar.ct,Gamma{5},lp)
end end
end end
if abs(dpar.csw) > 1.0E-10 if abs(dpar.csw) > 1.0E-10
@timeit "Dw_improvement" begin @timeit "Dw_improvement" begin
CUDA.@sync begin CUDA.@sync begin
CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_g5Dw_impr!(st, dws.csw, dpar.csw, si, lp) CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_g5Dw_impr!(dws.st, dws.csw, dpar.csw, si, lp)
end end
end end
end end
...@@ -38,21 +39,21 @@ function Nablanabla!(so, U, si, dpar::DiracParam, dws::DiracWorkspace, lp::Space ...@@ -38,21 +39,21 @@ function Nablanabla!(so, U, si, dpar::DiracParam, dws::DiracWorkspace, lp::Space
@timeit "g5Dw" begin @timeit "g5Dw" begin
CUDA.@sync begin CUDA.@sync begin
CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_g5Dw!(so, U, st, dpar.m0, dpar.th, lp) CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_g5Dw!(so, U, dws.st, 0.0, dpar.th, lp)
end end
end end
@timeit "SF fix" begin @timeit "SF fix" begin
CUDA.@sync begin CUDA.@sync begin
CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_sfbndfix!(so,st,dpar.ct,Gamma{5},lp) CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_sfbndfix!(so,dws.st,dpar.ct,Gamma{5},lp)
end end
end end
if abs(dpar.csw) > 1.0E-10 if abs(dpar.csw) > 1.0E-10
@timeit "Dw_improvement" begin @timeit "Dw_improvement" begin
CUDA.@sync begin CUDA.@sync begin
CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_g5Dw_impr!(so, dws.csw, dpar.csw, st, lp) CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_g5Dw_impr!(so, dws.csw, dpar.csw, dws.st, lp)
end end
end end
end end
...@@ -63,25 +64,25 @@ function Nablanabla!(so, U, si, dpar::DiracParam, dws::DiracWorkspace, lp::Space ...@@ -63,25 +64,25 @@ function Nablanabla!(so, U, si, dpar::DiracParam, dws::DiracWorkspace, lp::Space
else else
@timeit "DwdagDw" begin @timeit "DwdagDw" begin
CUDA.@sync begin CUDA.@sync begin
CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_g5Dw!(st, U, si, dpar.m0, dpar.th, lp) CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_g5Dw!(dws.st, U, si, 0.0, dpar.th, lp)
end end
if abs(dpar.csw) > 1.0E-10 if abs(dpar.csw) > 1.0E-10
@timeit "Dw_improvement" begin @timeit "Dw_improvement" begin
CUDA.@sync begin CUDA.@sync begin
CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_g5Dw_impr!(st, dws.csw, dpar.csw, si, lp) CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_g5Dw_impr!(dws.st, dws.csw, dpar.csw, si, lp)
end end
end end
end end
CUDA.@sync begin CUDA.@sync begin
CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_g5Dw!(so, U, st, dpar.m0, dpar.th, lp) CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_g5Dw!(so, U, dws.st, 0.0, dpar.th, lp)
end end
if abs(dpar.csw) > 1.0E-10 if abs(dpar.csw) > 1.0E-10
@timeit "Dw_improvement" begin @timeit "Dw_improvement" begin
CUDA.@sync begin CUDA.@sync begin
CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_g5Dw_impr!(so, dws.csw, dpar.csw, st, lp) CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_g5Dw_impr!(so, dws.csw, dpar.csw, dws.st, lp)
end end
end end
end end
...@@ -93,32 +94,37 @@ function Nablanabla!(so, U, si, dpar::DiracParam, dws::DiracWorkspace, lp::Space ...@@ -93,32 +94,37 @@ function Nablanabla!(so, U, si, dpar::DiracParam, dws::DiracWorkspace, lp::Space
end end
function flw(U, psi, int::FlowIntr{NI,T}, ns::Int64, eps, gp::GaugeParm, dpar::DiracParam, lp::SpaceParm, ymws::YMworkspace, dws::DiracWorkspace) where {NI,T} function flw(U, psi, int::FlowIntr{NI,T}, ns::Int64, eps, gp::GaugeParm, dpar::DiracParam, lp::SpaceParm, ymws::YMworkspace, dws::DiracWorkspace) where {NI,T}
@timeit "Integrating flow equations" begin @timeit "Integrating flow equations" begin
for i in 1:ns for i in 1:ns
force_gauge(ymws, U, int.c0, 1, gp, lp) force_gauge(ymws, U, int.c0, 1, gp, lp)
DwdagDw!(dws.sAp, U, psi, dpar, dws, dws.st, lp)
if int.add_zth if int.add_zth
add_zth_term(ymws::YMworkspace, U, lp) add_zth_term(ymws::YMworkspace, U, lp)
end end
psi .= psi + 0.5*int.r*eps*dws.sAp
ymws.mom .= ymws.frc1 ymws.mom .= ymws.frc1
U .= expm.(U, ymws.mom, 2*eps*int.r) U .= expm.(U, ymws.mom, 2*eps*int.r)
for k in 1:NI Dslash_sq!(dws.sAp, U, psi, dpar, dws, lp)
dws.sp .= dws.sAp
psi .= psi + 2*int.r*eps*dws.sp
for k in 1:NI
force_gauge(ymws, U, int.c0, 1, gp, lp) force_gauge(ymws, U, int.c0, 1, gp, lp)
DwdagDw!(dws.sp, U, psi, dpar, dws, dws.st, lp)
if int.add_zth if int.add_zth
add_zth_term(ymws::YMworkspace, U, lp) add_zth_term(ymws::YMworkspace, U, lp)
end end
psi .= psi + 2*eps*int.e0[k].*dws.sAp .+ 2*eps*int.e1[k].*dws.sp
ymws.mom .= int.e0[k].*ymws.mom .+ int.e1[k].*ymws.frc1 ymws.mom .= int.e0[k].*ymws.mom .+ int.e1[k].*ymws.frc1
U .= expm.(U, ymws.mom, 2*eps) U .= expm.(U, ymws.mom, 2*eps)
DwdagDw!(dws.sAp, U, psi, dpar, dws, dws.st, lp)
psi .= psi + 2*eps*int.e0[k].*dws.sp .+ 2*eps*int.e1[k].*dws.sAp
dws.sAp .= int.e0[k].*dws.sAp .+ int.e1[k].*dws.sp
end end
end end
end end
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment