Test for Dslash

parent 195c4c31
...@@ -93,6 +93,33 @@ function Nablanabla!(so, U, si, dpar::DiracParam, dws::DiracWorkspace, lp::Space ...@@ -93,6 +93,33 @@ function Nablanabla!(so, U, si, dpar::DiracParam, dws::DiracWorkspace, lp::Space
end end
function krnl_g5Dslsh!(so, U, si, m0, th, lp::SpaceParm{4,6,B,D}) where {B,D}
b = Int64(CUDA.threadIdx().x); r = Int64(CUDA.blockIdx().x)
@inbounds begin
bu1, ru1 = up((b,r), 1, lp)
bd1, rd1 = dw((b,r), 1, lp)
bu2, ru2 = up((b,r), 2, lp)
bd2, rd2 = dw((b,r), 2, lp)
bu3, ru3 = up((b,r), 3, lp)
bd3, rd3 = dw((b,r), 3, lp)
bu4, ru4 = up((b,r), 4, lp)
bd4, rd4 = dw((b,r), 4, lp)
so[b,r] = -0.5*( th[1]*U[b,1,r]*dmul(Gamma{1},si[bu1,ru1]) +conj(th[1])*dag(U[bd1,1,rd1])*dmul(Gamma{1},si[bd1,rd1]) +
th[2]*U[b,2,r]*dmul(Gamma{2},si[bu1,ru1]) +conj(th[2])*dag(U[bd1,2,rd1])*dmul(Gamma{2},si[bd1,rd1]) +
th[3]*U[b,3,r]*dmul(Gamma{3},si[bu1,ru1]) +conj(th[3])*dag(U[bd1,3,rd1])*dmul(Gamma{3},si[bd1,rd1]) +
th[4]*U[b,4,r]*dmul(Gamma{4},si[bu1,ru1]) +conj(th[4])*dag(U[bd1,4,rd1])*dmul(Gamma{4},si[bd1,rd1]) )
so[b,r] = dmul(Gamma{5}, so[b,r])
end
return nothing
end
function Dslash_sq!(so, U, si, dpar::DiracParam, dws::DiracWorkspace, lp::SpaceParm{4,6,B,D}) where {B,D} function Dslash_sq!(so, U, si, dpar::DiracParam, dws::DiracWorkspace, lp::SpaceParm{4,6,B,D}) where {B,D}
...@@ -105,9 +132,9 @@ function Dslash_sq!(so, U, si, dpar::DiracParam, dws::DiracWorkspace, lp::SpaceP ...@@ -105,9 +132,9 @@ function Dslash_sq!(so, U, si, dpar::DiracParam, dws::DiracWorkspace, lp::SpaceP
end end
end end
@timeit "g5Dw" begin @timeit "g5Dslsh" begin
CUDA.@sync begin CUDA.@sync begin
CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_g5Dw!(dws.st, U, si,-4.0, dpar.th, lp) CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_g5Dslsh!(dws.st, U, si,-4.0, dpar.th, lp)
end end
end end
...@@ -120,15 +147,15 @@ function Dslash_sq!(so, U, si, dpar::DiracParam, dws::DiracWorkspace, lp::SpaceP ...@@ -120,15 +147,15 @@ function Dslash_sq!(so, U, si, dpar::DiracParam, dws::DiracWorkspace, lp::SpaceP
if abs(dpar.csw) > 1.0E-10 if abs(dpar.csw) > 1.0E-10
@timeit "Dw_improvement" begin @timeit "Dw_improvement" begin
CUDA.@sync begin CUDA.@sync begin
CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_g5Dw_impr!(dws.st, dws.csw, dpar.csw, si, lp) CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_g5Dslsh_impr!(dws.st, dws.csw, dpar.csw, si, lp)
end end
end end
end end
@timeit "g5Dw" begin @timeit "g5Dslsh" begin
CUDA.@sync begin CUDA.@sync begin
CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_g5Dw!(so, U, dws.st, -4.0, dpar.th, lp) CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_g5Dslsh!(so, U, dws.st, -4.0, dpar.th, lp)
end end
end end
...@@ -142,7 +169,7 @@ function Dslash_sq!(so, U, si, dpar::DiracParam, dws::DiracWorkspace, lp::SpaceP ...@@ -142,7 +169,7 @@ function Dslash_sq!(so, U, si, dpar::DiracParam, dws::DiracWorkspace, lp::SpaceP
if abs(dpar.csw) > 1.0E-10 if abs(dpar.csw) > 1.0E-10
@timeit "Dw_improvement" begin @timeit "Dw_improvement" begin
CUDA.@sync begin CUDA.@sync begin
CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_g5Dw_impr!(so, dws.csw, dpar.csw, dws.st, lp) CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_g5Dslsh_impr!(so, dws.csw, dpar.csw, dws.st, lp)
end end
end end
end end
...@@ -153,25 +180,25 @@ function Dslash_sq!(so, U, si, dpar::DiracParam, dws::DiracWorkspace, lp::SpaceP ...@@ -153,25 +180,25 @@ function Dslash_sq!(so, U, si, dpar::DiracParam, dws::DiracWorkspace, lp::SpaceP
else else
@timeit "DwdagDw" begin @timeit "DwdagDw" begin
CUDA.@sync begin CUDA.@sync begin
CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_g5Dw!(dws.st, U, si, -4.0, dpar.th, lp) CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_g5Dslsh!(dws.st, U, si, -4.0, dpar.th, lp)
end end
if abs(dpar.csw) > 1.0E-10 if abs(dpar.csw) > 1.0E-10
@timeit "Dw_improvement" begin @timeit "Dw_improvement" begin
CUDA.@sync begin CUDA.@sync begin
CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_g5Dw_impr!(dws.st, dws.csw, dpar.csw, si, lp) CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_g5Dslsh_impr!(dws.st, dws.csw, dpar.csw, si, lp)
end end
end end
end end
CUDA.@sync begin CUDA.@sync begin
CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_g5Dw!(so, U, dws.st, -4.0, dpar.th, lp) CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_g5Dslsh!(so, U, dws.st, -4.0, dpar.th, lp)
end end
if abs(dpar.csw) > 1.0E-10 if abs(dpar.csw) > 1.0E-10
@timeit "Dw_improvement" begin @timeit "Dw_improvement" begin
CUDA.@sync begin CUDA.@sync begin
CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_g5Dw_impr!(so, dws.csw, dpar.csw, dws.st, lp) CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_g5Dslsh_impr!(so, dws.csw, dpar.csw, dws.st, lp)
end end
end end
end end
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment