First try to properly add improvement

parent 2eb07322
......@@ -53,7 +53,7 @@ struct DiracWorkspace{T}
end
export DiracWorkspace, DiracParam
function Dw!(so, U, si, dpar::DiracParam, lp::SpaceParm{4,6,B,D}, ymws = "ymws", gp = "gp") where {B,D}
function Dw!(so, U, si, dpar::DiracParam, lp::SpaceParm{4,6,B,D}, ymws::YMWorkspace) where {B,D}
if B == BC_SF_AFWB || B == BC_SF_ORBI
......@@ -83,19 +83,10 @@ function Dw!(so, U, si, dpar::DiracParam, lp::SpaceParm{4,6,B,D}, ymws = "ymws",
end
end
@timeit "Dw_improvement" begin
if abs(dpar.csw) > 1.0E-10
if ymws == "ymws" || gp == "gp"
error("YM workspace and Gauge parameters must be included for the improved dirac operator.")
else
for i in 1:Int(lp.npls/2)
z1 = exp(2im * pi * lp.ntw[i]/gp.ng)
z2 = exp(2im * pi * lp.ntw[lp.npls-i+1]/gp.ng)
@timeit "Dw_improvement" begin
CUDA.@sync begin
CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_field_tensor!(ymws.frc1, ymws.frc2, U, gp.Ubnd, i, lp.npls-i+1, z1, z2, lp)
CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_Dw_impr!(so, ymws.frc1, ymws.frc2, Gamma{i+9}, Gamma{i+12}, si, dpar.csw, lp)
end
end
CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_Dw_impr!(so, ymws.csw, dpar.csw, si, lp)
end
end
end
......@@ -103,9 +94,7 @@ function Dw!(so, U, si, dpar::DiracParam, lp::SpaceParm{4,6,B,D}, ymws = "ymws",
return nothing
end
function DwdagDw!(so, U, si, dpar::DiracParam, st, lp::SpaceParm{4,6,B,D}, ymws = "ymws", gp = "gp") where {B,D}
function DwdagDw!(so, U, si, dpar::DiracParam, st, lp::SpaceParm{4,6,B,D}, ymws::YMWorkspace) where {B,D}
if B == BC_SF_AFWB || B == BC_SF_ORBI
@timeit "DwdagDw" begin
......@@ -128,19 +117,10 @@ function DwdagDw!(so, U, si, dpar::DiracParam, st, lp::SpaceParm{4,6,B,D}, ymws
end
end
@timeit "Dw_improvement" begin
if abs(dpar.csw) > 1.0E-10
if ymws == "ymws" || gp == "gp"
error("YM workspace and Gauge parameters must be included for the improved dirac operator.")
else
for i in 1:Int(lp.npls/2)
z1 = exp(2im * pi * lp.ntw[i]/gp.ng)
z2 = exp(2im * pi * lp.ntw[lp.npls-i+1]/gp.ng)
@timeit "Dw_improvement" begin
CUDA.@sync begin
CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_field_tensor!(ymws.frc1, ymws.frc2, U, gp.Ubnd, i, lp.npls-i+1, z1, z2, lp)
CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_g5Dw_impr!(st, ymws.frc1, ymws.frc2, Gamma{i+9}, Gamma{i+12}, si, dpar.csw, lp)
end
end
CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_g5Dw_impr!(so, ymws.csw, dpar.csw, si, lp)
end
end
end
......@@ -159,19 +139,10 @@ function DwdagDw!(so, U, si, dpar::DiracParam, st, lp::SpaceParm{4,6,B,D}, ymws
end
end
@timeit "Dw_improvement" begin
if abs(dpar.csw) > 1.0E-10
if ymws == "ymws" || gp == "gp"
error("YM workspace and Gauge parameters must be included for the improved dirac operator.")
else
for i in 1:Int(lp.npls/2)
z1 = exp(2im * pi * lp.ntw[i]/gp.ng)
z2 = exp(2im * pi * lp.ntw[lp.npls-i+1]/gp.ng)
@timeit "Dw_improvement" begin
CUDA.@sync begin
CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_field_tensor!(ymws.frc1, ymws.frc2, U, gp.Ubnd, i, lp.npls-i+1, z1, z2, lp)
CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_g5Dw_impr!(so, ymws.frc1, ymws.frc2, Gamma{i+9}, Gamma{i+12}, st, dpar.csw, lp)
end
end
CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_g5Dw_impr!(so, ymws.csw, dpar.csw, si, lp)
end
end
end
......@@ -184,9 +155,27 @@ function DwdagDw!(so, U, si, dpar::DiracParam, st, lp::SpaceParm{4,6,B,D}, ymws
CUDA.@sync begin
CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_g5Dw!(st, U, si, dpar.m0, dpar.th, lp)
end
if abs(dpar.csw) > 1.0E-10
@timeit "Dw_improvement" begin
CUDA.@sync begin
CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_g5Dw_impr!(so, ymws.csw, dpar.csw, si, lp)
end
end
end
CUDA.@sync begin
CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_g5Dw!(so, U, st, dpar.m0, dpar.th, lp)
end
if abs(dpar.csw) > 1.0E-10
@timeit "Dw_improvement" begin
CUDA.@sync begin
CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_g5Dw_impr!(so, ymws.csw, dpar.csw, si, lp)
end
end
end
end
end
......@@ -194,7 +183,7 @@ function DwdagDw!(so, U, si, dpar::DiracParam, st, lp::SpaceParm{4,6,B,D}, ymws
end
function g5Dw!(so, U, si, dpar, lp::SpaceParm{4,6,B,D}, ymws = "ymws", gp = "gp") where {B,D}
function g5Dw!(so, U, si, dpar, lp::SpaceParm{4,6,B,D}, ymws::YMWorkspace) where {B,D}
if B == BC_SF_AFWB || B == BC_SF_ORBI
@timeit "SF fix" begin
......@@ -221,26 +210,60 @@ function g5Dw!(so, U, si, dpar, lp::SpaceParm{4,6,B,D}, ymws = "ymws", gp = "gp"
end
@timeit "Dw_improvement" begin
if abs(dpar.csw) > 1.0E-10
if ymws == "ymws" || gp == "gp"
error("YM workspace and Gauge parameters must be included for the improved dirac operator.")
else
for i in 1:Int(lp.npls/2)
z1 = exp(2im * pi * lp.ntw[i]/gp.ng)
z2 = exp(2im * pi * lp.ntw[lp.npls-i+1]/gp.ng)
@timeit "Dw_improvement" begin
CUDA.@sync begin
CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_field_tensor!(ymws.frc1, ymws.frc2, U, gp.Ubnd, i, lp.npls-i+1, z1, z2, lp)
CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_g5Dw_impr!(so, ymws.frc1, ymws.frc2, Gamma{i+9}, Gamma{i+12}, si, dpar.csw, lp)
CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_g5Dw_impr!(so, ymws.csw, dpar.csw, si, lp)
end
end
end
return nothing
end
function Csw!(ymws, U, gp, lp::SpaceParm{4,6,B,D}) where {B,D}
@timeit "Csw computation" begin
for i in 1:Int(lp.npls)
z = exp(2im * pi * lp.ntw[i]/gp.ng)
CUDA.@sync begin
CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_csw!(ymws.csw, U, gp.Ubnd, i, z, lp)
end
end
end
return nothing
end
function krnl_Dw_impr!(so, Fcsw, csw, si, lp::SpaceParm{4,6,B,D}) where {B,D}
b = Int64(CUDA.threadIdx().x); r = Int64(CUDA.blockIdx().x)
@inbounds begin
so[b,r] += 0.25*csw*im*( Fcsw[b,1,r]*dmul(Gamma{10},si[b,r]) + Fcsw[b,2,r]*dmul(Gamma{11},si[b,r]) + Fcsw[b,3,r]*dmul(Gamma{12},si[b,r])
-Fcsw[b,4,r]*dmul(Gamma{15},si[b,r]) - Fcsw[b,5,r]*dmul(Gamma{14},si[b,r]) - Fcsw[b,6,r]*dmul(Gamma{13},si[b,r]))
end
return nothing
end
function krnl_g5Dw_impr!(so, Fcsw, csw, si, lp::SpaceParm{4,6,B,D}) where {B,D}
b = Int64(CUDA.threadIdx().x); r = Int64(CUDA.blockIdx().x)
@inbounds begin
so[b,r] += 0.25*csw*im*dmul(Gamma{5},( Fcsw[b,1,r]*dmul(Gamma{10},si[b,r]) + Fcsw[b,2,r]*dmul(Gamma{11},si[b,r]) + Fcsw[b,3,r]*dmul(Gamma{12},si[b,r])
-Fcsw[b,4,r]*dmul(Gamma{15},si[b,r]) - Fcsw[b,5,r]*dmul(Gamma{14},si[b,r]) - Fcsw[b,6,r]*dmul(Gamma{13},si[b,r])))
end
return nothing
end
function krnl_sfbndfix!(so,si,ct,gamma,lp::SpaceParm)
b=Int64(CUDA.threadIdx().x)
r=Int64(CUDA.blockIdx().x)
......@@ -314,27 +337,46 @@ function krnl_g5Dw!(so, U, si, m0, th, lp::SpaceParm{4,6,B,D}) where {B,D}
return nothing
end
function krnl_Dw_impr!(so, F1, F2, Gid1, Gid2, si, csw, lp::SpaceParm{4,6,B,D}) where {B,D}
b = Int64(CUDA.threadIdx().x); r = Int64(CUDA.blockIdx().x)
function krnl_csw!(csw::AbstractArray{T}, U, Ubnd, ipl, ztw, lp::SpaceParm{4,M,B,D}) where {T,M,B,D}
@inbounds begin
so[b,r] += 0.25*csw*im*( (F1[b,1,r]+F1[b,2,r]+F1[b,3,r]+F1[b,4,r])*dmul(Gid1,si[b,r]) - (F2[b,1,r]+F2[b,2,r]+F2[b,3,r]+F2[b,4,r])*dmul(Gid2,si[b,r]))
b = Int64(CUDA.threadIdx().x)
r = Int64(CUDA.blockIdx().x)
I = point_coord((b,r), lp)
it = I[4]
id1, id2 = lp.plidx[ipl]
SFBC = ((B == BC_SF_AFWB) || (B == BC_SF_ORBI) ) && (id1 == 4)
TWP = ((I[id1]==1)&&(I[id2]==1))
bu1, ru1 = up((b, r), id1, lp)
bu2, ru2 = up((b, r), id2, lp)
bd1, rd1 = dw((b, r), id1, lp)
bd2, rd2 = dw((b, r), id2, lp)
bdd, rdd = dw((bd1, rd1), id2, lp)
bud, rud = dw((bu1, ru1), id2, lp)
bdu, rdu = up((bd1, rd1), id2, lp)
if SFBC && (it == lp.iL[end])
gt1 = Ubnd[id2]
gt2 = Ubnd[id2]
else
gt1 = U[bu1,id2,ru1]
gt2 = U[bud,id2,rud]
end
return nothing
end
function krnl_g5Dw_impr!(so, F1, F2, Gid1, Gid2, si, csw, lp::SpaceParm{4,6,B,D}) where {B,D}
b = Int64(CUDA.threadIdx().x); r = Int64(CUDA.blockIdx().x)
@inbounds begin
if SFBC && (it == 1)
csw[b,ipl,r] = projalg(U[b,id1,r]*gt1/(U[b,id2,r]*U[bu2,id1,ru2])) + projalg((U[bd2,id2,rd2]\(U[bd2,id1,rd2]*gt2))/U[b,id1,r])
so[b,r] += 0.25*csw*im*dmul(Gamma{5},( (F1[b,1,r]+F1[b,2,r]+F1[b,3,r]+F1[b,4,r])*dmul(Gid1,si[b,r]) - (F2[b,1,r]+F2[b,2,r]+F2[b,3,r]+F2[b,4,r])*dmul(Gid2,si[b,r])))
else
if TWP
csw[b,ipl,r] = projalg(ztw,U[b,id1,r]*gt1/(U[b,id2,r]*U[bu2,id1,ru2])) + projalg(ztw,(U[bd2,id2,rd2]\(U[bd2,id1,rd2]*gt2))/U[b,id1,r]) +
projalg(ztw,(U[bdd,id2,rdd]*U[bd1,id1,rd1])\(U[bdd,id1,rdd]*U[bd2,id2,rd2])) + projalg(ztw,(U[b,id2,r]/(U[bd1,id2,rd1]*U[bdu,id1,rdu]))*U[bd1,id1,rd1])
else
csw[b,ipl,r] = projalg(U[b,id1,r]*gt1/(U[b,id2,r]*U[bu2,id1,ru2])) + projalg((U[bd2,id2,rd2]\(U[bd2,id1,rd2]*gt2))/U[b,id1,r]) +
projalg((U[bdd,id2,rdd]*U[bd1,id1,rd1])\(U[bdd,id1,rdd]*U[bd2,id2,rd2])) + projalg((U[b,id2,r]/(U[bd1,id2,rd1]*U[bdu,id1,rdu]))*U[bd1,id1,rd1])
end
end
end
......
......@@ -18,6 +18,7 @@ using ..Space
vector_field(::Type{T}, lp::SpaceParm) where {T} = CuArray{T, 3}(undef, lp.bsz, lp.ndim, lp.rsz)
scalar_field(::Type{T}, lp::SpaceParm) where {T} = CuArray{T, 2}(undef, lp.bsz, lp.rsz)
nscalar_field(::Type{T}, n, lp::SpaceParm) where {T} = CuArray{T, 3}(undef, lp.bsz, n, lp.rsz)
tensor_field(::Type{T}, lp::SpaceParm) where {T} = CuArray{T, 3}(undef, lp.bsz, lp.npls, lp.rsz)
scalar_field_point(::Type{T}, lp::SpaceParm{N,M,D}) where {T,N,M,D} = CuArray{T, N}(undef, lp.iL...)
......
......@@ -34,7 +34,7 @@ return sum(sumf)
end
function CG!(si, U, A, dpar::DiracParam, lp::SpaceParm, dws::DiracWorkspace{T}, ymws = "ymws", gp = "gp", maxiter::Int64 = 10, tol=1.0) where {T}
function CG!(si, U, A, dpar::DiracParam, lp::SpaceParm, dws::DiracWorkspace{T}, ymws::YMWorkspace, maxiter::Int64 = 10, tol=1.0) where {T}
dws.sr .= si
dws.sp .= si
......
......@@ -83,6 +83,7 @@ struct YMworkspace{T}
f2 = vector_field(SU2alg{T}, lp)
mm = vector_field(SU2alg{T}, lp)
u1 = vector_field(SU2{T}, lp)
csw = tensor_field(SU2alg{T},lp)
end
if (G == SU3)
......@@ -92,6 +93,7 @@ struct YMworkspace{T}
f2 = vector_field(SU3alg{T}, lp)
mm = vector_field(SU3alg{T}, lp)
u1 = vector_field(SU3{T}, lp)
csw = tensor_field(SU3alg{T},lp)
end
cs = scalar_field_point(Complex{T}, lp)
rs = scalar_field_point(T, lp)
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment