Should kinda work?

parent bfaacc5b
...@@ -136,8 +136,6 @@ flw(U, psi, int::FlowIntr{NI,T}, ns::Int64, gp::GaugeParm, dpar::DiracParam, lp: ...@@ -136,8 +136,6 @@ flw(U, psi, int::FlowIntr{NI,T}, ns::Int64, gp::GaugeParm, dpar::DiracParam, lp:
function Dslash_sq!(so, U, si, dpar::DiracParam, dws::DiracWorkspace, lp::SpaceParm{4,6,B,D}) where {B,D} function Dslash_sq!(so, U, si, dpar::DiracParam, dws::DiracWorkspace, lp::SpaceParm{4,6,B,D}) where {B,D}
if B == BC_SF_AFWB || B == BC_SF_ORBI if B == BC_SF_AFWB || B == BC_SF_ORBI
...@@ -151,7 +149,7 @@ function Dslash_sq!(so, U, si, dpar::DiracParam, dws::DiracWorkspace, lp::SpaceP ...@@ -151,7 +149,7 @@ function Dslash_sq!(so, U, si, dpar::DiracParam, dws::DiracWorkspace, lp::SpaceP
@timeit "g5Dslsh" begin @timeit "g5Dslsh" begin
CUDA.@sync begin CUDA.@sync begin
CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_g5Dslsh!(dws.st, U, si,-4.0, dpar.th, lp) CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_g5Dslsh!(dws.st, U, si, dpar.th, lp)
end end
end end
...@@ -172,7 +170,7 @@ function Dslash_sq!(so, U, si, dpar::DiracParam, dws::DiracWorkspace, lp::SpaceP ...@@ -172,7 +170,7 @@ function Dslash_sq!(so, U, si, dpar::DiracParam, dws::DiracWorkspace, lp::SpaceP
@timeit "g5Dslsh" begin @timeit "g5Dslsh" begin
CUDA.@sync begin CUDA.@sync begin
CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_g5Dslsh!(so, U, dws.st, -4.0, dpar.th, lp) CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_g5Dslsh!(so, U, dws.st, dpar.th, lp)
end end
end end
...@@ -197,7 +195,7 @@ function Dslash_sq!(so, U, si, dpar::DiracParam, dws::DiracWorkspace, lp::SpaceP ...@@ -197,7 +195,7 @@ function Dslash_sq!(so, U, si, dpar::DiracParam, dws::DiracWorkspace, lp::SpaceP
else else
@timeit "DwdagDw" begin @timeit "DwdagDw" begin
CUDA.@sync begin CUDA.@sync begin
CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_g5Dslsh!(dws.st, U, si, -4.0, dpar.th, lp) CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_g5Dslsh!(dws.st, U, si, dpar.th, lp)
end end
if abs(dpar.csw) > 1.0E-10 if abs(dpar.csw) > 1.0E-10
...@@ -209,7 +207,7 @@ function Dslash_sq!(so, U, si, dpar::DiracParam, dws::DiracWorkspace, lp::SpaceP ...@@ -209,7 +207,7 @@ function Dslash_sq!(so, U, si, dpar::DiracParam, dws::DiracWorkspace, lp::SpaceP
end end
CUDA.@sync begin CUDA.@sync begin
CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_g5Dslsh!(so, U, dws.st, -4.0, dpar.th, lp) CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_g5Dslsh!(so, U, dws.st, dpar.th, lp)
end end
if abs(dpar.csw) > 1.0E-10 if abs(dpar.csw) > 1.0E-10
...@@ -227,7 +225,7 @@ function Dslash_sq!(so, U, si, dpar::DiracParam, dws::DiracWorkspace, lp::SpaceP ...@@ -227,7 +225,7 @@ function Dslash_sq!(so, U, si, dpar::DiracParam, dws::DiracWorkspace, lp::SpaceP
end end
function krnl_g5Dslsh!(so, U, si, m0, th, lp::SpaceParm{4,6,B,D}) where {B,D} function krnl_g5Dslsh!(so, U, si, th, lp::SpaceParm{4,6,B,D}) where {B,D}
b = Int64(CUDA.threadIdx().x); r = Int64(CUDA.blockIdx().x) b = Int64(CUDA.threadIdx().x); r = Int64(CUDA.blockIdx().x)
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment