Secong try

parent 22707d80
......@@ -4,6 +4,95 @@ import ..YM.flw, ..YM.force_gauge
function Nablanabla!(so, U, si, dpar::DiracParam, dws::DiracWorkspace, lp::SpaceParm{4,6,B,D}) where {B,D} # TO BE CONTINUED
if B == BC_SF_AFWB || B == BC_SF_ORBI
@timeit "DwdagDw" begin
@timeit "SF fix" begin
CUDA.@sync begin
CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_sfbndfix!(si,si,1.0,Gamma{16},lp)
end
end
@timeit "Dw" begin
CUDA.@sync begin
CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_Dw!(st, U, si, dpar.m0, dpar.th, lp)
end
end
@timeit "SF fix" begin
CUDA.@sync begin
CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_sfbndfix!(st,si,dpar.ct,Gamma{5},lp)
end
end
if abs(dpar.csw) > 1.0E-10
@timeit "Dw_improvement" begin
CUDA.@sync begin
CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_Dw_impr!(st, dws.csw, dpar.csw, si, lp)
end
end
end
@timeit "Dw" begin
CUDA.@sync begin
CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_Dw!(so, U, st, dpar.m0, dpar.th, lp)
end
end
@timeit "SF fix" begin
CUDA.@sync begin
CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_sfbndfix!(so,st,dpar.ct,Gamma{5},lp)
end
end
if abs(dpar.csw) > 1.0E-10
@timeit "Dw_improvement" begin
CUDA.@sync begin
CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_Dw_impr!(so, dws.csw, dpar.csw, st, lp)
end
end
end
end
else
@timeit "DwdagDw" begin
CUDA.@sync begin
CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_Dw!(st, U, si, dpar.m0, dpar.th, lp)
end
if abs(dpar.csw) > 1.0E-10
@timeit "Dw_improvement" begin
CUDA.@sync begin
CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_Dw_impr!(st, dws.csw, dpar.csw, si, lp)
end
end
end
CUDA.@sync begin
CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_Dw!(so, U, st, dpar.m0, dpar.th, lp)
end
if abs(dpar.csw) > 1.0E-10
@timeit "Dw_improvement" begin
CUDA.@sync begin
CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_Dw_impr!(so, dws.csw, dpar.csw, st, lp)
end
end
end
end
end
return nothing
end
function Dslash_sq!(so, U, si, dpar::DiracParam, dws::DiracWorkspace, lp::SpaceParm{4,6,B,D}) where {B,D}
......@@ -18,7 +107,7 @@ function Dslash_sq!(so, U, si, dpar::DiracParam, dws::DiracWorkspace, lp::SpaceP
@timeit "g5Dw" begin
CUDA.@sync begin
CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_g5Dw!(dws.st, U, si,0.0, dpar.th, lp)
CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_g5Dw!(dws.st, U, si,-4.0, dpar.th, lp)
end
end
......@@ -39,7 +128,7 @@ function Dslash_sq!(so, U, si, dpar::DiracParam, dws::DiracWorkspace, lp::SpaceP
@timeit "g5Dw" begin
CUDA.@sync begin
CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_g5Dw!(so, U, dws.st, 0.0, dpar.th, lp)
CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_g5Dw!(so, U, dws.st, -4.0, dpar.th, lp)
end
end
......@@ -64,7 +153,7 @@ function Dslash_sq!(so, U, si, dpar::DiracParam, dws::DiracWorkspace, lp::SpaceP
else
@timeit "DwdagDw" begin
CUDA.@sync begin
CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_g5Dw!(dws.st, U, si, 0.0, dpar.th, lp)
CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_g5Dw!(dws.st, U, si, -4.0, dpar.th, lp)
end
if abs(dpar.csw) > 1.0E-10
......@@ -76,7 +165,7 @@ function Dslash_sq!(so, U, si, dpar::DiracParam, dws::DiracWorkspace, lp::SpaceP
end
CUDA.@sync begin
CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_g5Dw!(so, U, dws.st, 0.0, dpar.th, lp)
CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_g5Dw!(so, U, dws.st, -4.0, dpar.th, lp)
end
if abs(dpar.csw) > 1.0E-10
......@@ -121,7 +210,7 @@ function flw(U, psi, int::FlowIntr{NI,T}, ns::Int64, eps, gp::GaugeParm, dpar::D
ymws.mom .= int.e0[k].*ymws.mom .+ int.e1[k].*ymws.frc1
U .= expm.(U, ymws.mom, 2*eps)
DwdagDw!(dws.sAp, U, psi, dpar, dws, dws.st, lp)
Dslash_sq!(dws.sAp, U, psi, dpar, dws, lp)
psi .= psi + 2*eps*int.e0[k].*dws.sp .+ 2*eps*int.e1[k].*dws.sAp
dws.sAp .= int.e0[k].*dws.sAp .+ int.e1[k].*dws.sp
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment