First try to properly add improvement

parent 2eb07322
...@@ -53,7 +53,7 @@ struct DiracWorkspace{T} ...@@ -53,7 +53,7 @@ struct DiracWorkspace{T}
end end
export DiracWorkspace, DiracParam export DiracWorkspace, DiracParam
function Dw!(so, U, si, dpar::DiracParam, lp::SpaceParm{4,6,B,D}, ymws = "ymws", gp = "gp") where {B,D} function Dw!(so, U, si, dpar::DiracParam, lp::SpaceParm{4,6,B,D}, ymws::YMWorkspace) where {B,D}
if B == BC_SF_AFWB || B == BC_SF_ORBI if B == BC_SF_AFWB || B == BC_SF_ORBI
...@@ -83,19 +83,10 @@ function Dw!(so, U, si, dpar::DiracParam, lp::SpaceParm{4,6,B,D}, ymws = "ymws", ...@@ -83,19 +83,10 @@ function Dw!(so, U, si, dpar::DiracParam, lp::SpaceParm{4,6,B,D}, ymws = "ymws",
end end
end end
@timeit "Dw_improvement" begin
if abs(dpar.csw) > 1.0E-10 if abs(dpar.csw) > 1.0E-10
if ymws == "ymws" || gp == "gp" @timeit "Dw_improvement" begin
error("YM workspace and Gauge parameters must be included for the improved dirac operator.")
else
for i in 1:Int(lp.npls/2)
z1 = exp(2im * pi * lp.ntw[i]/gp.ng)
z2 = exp(2im * pi * lp.ntw[lp.npls-i+1]/gp.ng)
CUDA.@sync begin CUDA.@sync begin
CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_field_tensor!(ymws.frc1, ymws.frc2, U, gp.Ubnd, i, lp.npls-i+1, z1, z2, lp) CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_Dw_impr!(so, ymws.csw, dpar.csw, si, lp)
CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_Dw_impr!(so, ymws.frc1, ymws.frc2, Gamma{i+9}, Gamma{i+12}, si, dpar.csw, lp)
end
end
end end
end end
end end
...@@ -103,9 +94,7 @@ function Dw!(so, U, si, dpar::DiracParam, lp::SpaceParm{4,6,B,D}, ymws = "ymws", ...@@ -103,9 +94,7 @@ function Dw!(so, U, si, dpar::DiracParam, lp::SpaceParm{4,6,B,D}, ymws = "ymws",
return nothing return nothing
end end
function DwdagDw!(so, U, si, dpar::DiracParam, st, lp::SpaceParm{4,6,B,D}, ymws = "ymws", gp = "gp") where {B,D} function DwdagDw!(so, U, si, dpar::DiracParam, st, lp::SpaceParm{4,6,B,D}, ymws::YMWorkspace) where {B,D}
if B == BC_SF_AFWB || B == BC_SF_ORBI if B == BC_SF_AFWB || B == BC_SF_ORBI
@timeit "DwdagDw" begin @timeit "DwdagDw" begin
...@@ -128,19 +117,10 @@ function DwdagDw!(so, U, si, dpar::DiracParam, st, lp::SpaceParm{4,6,B,D}, ymws ...@@ -128,19 +117,10 @@ function DwdagDw!(so, U, si, dpar::DiracParam, st, lp::SpaceParm{4,6,B,D}, ymws
end end
end end
@timeit "Dw_improvement" begin
if abs(dpar.csw) > 1.0E-10 if abs(dpar.csw) > 1.0E-10
if ymws == "ymws" || gp == "gp" @timeit "Dw_improvement" begin
error("YM workspace and Gauge parameters must be included for the improved dirac operator.")
else
for i in 1:Int(lp.npls/2)
z1 = exp(2im * pi * lp.ntw[i]/gp.ng)
z2 = exp(2im * pi * lp.ntw[lp.npls-i+1]/gp.ng)
CUDA.@sync begin CUDA.@sync begin
CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_field_tensor!(ymws.frc1, ymws.frc2, U, gp.Ubnd, i, lp.npls-i+1, z1, z2, lp) CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_g5Dw_impr!(so, ymws.csw, dpar.csw, si, lp)
CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_g5Dw_impr!(st, ymws.frc1, ymws.frc2, Gamma{i+9}, Gamma{i+12}, si, dpar.csw, lp)
end
end
end end
end end
end end
...@@ -159,19 +139,10 @@ function DwdagDw!(so, U, si, dpar::DiracParam, st, lp::SpaceParm{4,6,B,D}, ymws ...@@ -159,19 +139,10 @@ function DwdagDw!(so, U, si, dpar::DiracParam, st, lp::SpaceParm{4,6,B,D}, ymws
end end
end end
@timeit "Dw_improvement" begin
if abs(dpar.csw) > 1.0E-10 if abs(dpar.csw) > 1.0E-10
if ymws == "ymws" || gp == "gp" @timeit "Dw_improvement" begin
error("YM workspace and Gauge parameters must be included for the improved dirac operator.")
else
for i in 1:Int(lp.npls/2)
z1 = exp(2im * pi * lp.ntw[i]/gp.ng)
z2 = exp(2im * pi * lp.ntw[lp.npls-i+1]/gp.ng)
CUDA.@sync begin CUDA.@sync begin
CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_field_tensor!(ymws.frc1, ymws.frc2, U, gp.Ubnd, i, lp.npls-i+1, z1, z2, lp) CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_g5Dw_impr!(so, ymws.csw, dpar.csw, si, lp)
CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_g5Dw_impr!(so, ymws.frc1, ymws.frc2, Gamma{i+9}, Gamma{i+12}, st, dpar.csw, lp)
end
end
end end
end end
end end
...@@ -184,9 +155,27 @@ function DwdagDw!(so, U, si, dpar::DiracParam, st, lp::SpaceParm{4,6,B,D}, ymws ...@@ -184,9 +155,27 @@ function DwdagDw!(so, U, si, dpar::DiracParam, st, lp::SpaceParm{4,6,B,D}, ymws
CUDA.@sync begin CUDA.@sync begin
CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_g5Dw!(st, U, si, dpar.m0, dpar.th, lp) CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_g5Dw!(st, U, si, dpar.m0, dpar.th, lp)
end end
if abs(dpar.csw) > 1.0E-10
@timeit "Dw_improvement" begin
CUDA.@sync begin
CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_g5Dw_impr!(so, ymws.csw, dpar.csw, si, lp)
end
end
end
CUDA.@sync begin CUDA.@sync begin
CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_g5Dw!(so, U, st, dpar.m0, dpar.th, lp) CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_g5Dw!(so, U, st, dpar.m0, dpar.th, lp)
end end
if abs(dpar.csw) > 1.0E-10
@timeit "Dw_improvement" begin
CUDA.@sync begin
CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_g5Dw_impr!(so, ymws.csw, dpar.csw, si, lp)
end
end
end
end end
end end
...@@ -194,7 +183,7 @@ function DwdagDw!(so, U, si, dpar::DiracParam, st, lp::SpaceParm{4,6,B,D}, ymws ...@@ -194,7 +183,7 @@ function DwdagDw!(so, U, si, dpar::DiracParam, st, lp::SpaceParm{4,6,B,D}, ymws
end end
function g5Dw!(so, U, si, dpar, lp::SpaceParm{4,6,B,D}, ymws = "ymws", gp = "gp") where {B,D} function g5Dw!(so, U, si, dpar, lp::SpaceParm{4,6,B,D}, ymws::YMWorkspace) where {B,D}
if B == BC_SF_AFWB || B == BC_SF_ORBI if B == BC_SF_AFWB || B == BC_SF_ORBI
@timeit "SF fix" begin @timeit "SF fix" begin
...@@ -221,26 +210,60 @@ function g5Dw!(so, U, si, dpar, lp::SpaceParm{4,6,B,D}, ymws = "ymws", gp = "gp" ...@@ -221,26 +210,60 @@ function g5Dw!(so, U, si, dpar, lp::SpaceParm{4,6,B,D}, ymws = "ymws", gp = "gp"
end end
@timeit "Dw_improvement" begin
if abs(dpar.csw) > 1.0E-10 if abs(dpar.csw) > 1.0E-10
if ymws == "ymws" || gp == "gp" @timeit "Dw_improvement" begin
error("YM workspace and Gauge parameters must be included for the improved dirac operator.")
else
for i in 1:Int(lp.npls/2)
z1 = exp(2im * pi * lp.ntw[i]/gp.ng)
z2 = exp(2im * pi * lp.ntw[lp.npls-i+1]/gp.ng)
CUDA.@sync begin CUDA.@sync begin
CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_field_tensor!(ymws.frc1, ymws.frc2, U, gp.Ubnd, i, lp.npls-i+1, z1, z2, lp) CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_g5Dw_impr!(so, ymws.csw, dpar.csw, si, lp)
CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_g5Dw_impr!(so, ymws.frc1, ymws.frc2, Gamma{i+9}, Gamma{i+12}, si, dpar.csw, lp)
end end
end end
end end
return nothing
end
function Csw!(ymws, U, gp, lp::SpaceParm{4,6,B,D}) where {B,D}
@timeit "Csw computation" begin
for i in 1:Int(lp.npls)
z = exp(2im * pi * lp.ntw[i]/gp.ng)
CUDA.@sync begin
CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_csw!(ymws.csw, U, gp.Ubnd, i, z, lp)
end
end end
end end
return nothing return nothing
end end
function krnl_Dw_impr!(so, Fcsw, csw, si, lp::SpaceParm{4,6,B,D}) where {B,D}
b = Int64(CUDA.threadIdx().x); r = Int64(CUDA.blockIdx().x)
@inbounds begin
so[b,r] += 0.25*csw*im*( Fcsw[b,1,r]*dmul(Gamma{10},si[b,r]) + Fcsw[b,2,r]*dmul(Gamma{11},si[b,r]) + Fcsw[b,3,r]*dmul(Gamma{12},si[b,r])
-Fcsw[b,4,r]*dmul(Gamma{15},si[b,r]) - Fcsw[b,5,r]*dmul(Gamma{14},si[b,r]) - Fcsw[b,6,r]*dmul(Gamma{13},si[b,r]))
end
return nothing
end
function krnl_g5Dw_impr!(so, Fcsw, csw, si, lp::SpaceParm{4,6,B,D}) where {B,D}
b = Int64(CUDA.threadIdx().x); r = Int64(CUDA.blockIdx().x)
@inbounds begin
so[b,r] += 0.25*csw*im*dmul(Gamma{5},( Fcsw[b,1,r]*dmul(Gamma{10},si[b,r]) + Fcsw[b,2,r]*dmul(Gamma{11},si[b,r]) + Fcsw[b,3,r]*dmul(Gamma{12},si[b,r])
-Fcsw[b,4,r]*dmul(Gamma{15},si[b,r]) - Fcsw[b,5,r]*dmul(Gamma{14},si[b,r]) - Fcsw[b,6,r]*dmul(Gamma{13},si[b,r])))
end
return nothing
end
function krnl_sfbndfix!(so,si,ct,gamma,lp::SpaceParm) function krnl_sfbndfix!(so,si,ct,gamma,lp::SpaceParm)
b=Int64(CUDA.threadIdx().x) b=Int64(CUDA.threadIdx().x)
r=Int64(CUDA.blockIdx().x) r=Int64(CUDA.blockIdx().x)
...@@ -314,27 +337,46 @@ function krnl_g5Dw!(so, U, si, m0, th, lp::SpaceParm{4,6,B,D}) where {B,D} ...@@ -314,27 +337,46 @@ function krnl_g5Dw!(so, U, si, m0, th, lp::SpaceParm{4,6,B,D}) where {B,D}
return nothing return nothing
end end
function krnl_csw!(csw::AbstractArray{T}, U, Ubnd, ipl, ztw, lp::SpaceParm{4,M,B,D}) where {T,M,B,D}
function krnl_Dw_impr!(so, F1, F2, Gid1, Gid2, si, csw, lp::SpaceParm{4,6,B,D}) where {B,D}
b = Int64(CUDA.threadIdx().x); r = Int64(CUDA.blockIdx().x)
@inbounds begin @inbounds begin
b = Int64(CUDA.threadIdx().x)
so[b,r] += 0.25*csw*im*( (F1[b,1,r]+F1[b,2,r]+F1[b,3,r]+F1[b,4,r])*dmul(Gid1,si[b,r]) - (F2[b,1,r]+F2[b,2,r]+F2[b,3,r]+F2[b,4,r])*dmul(Gid2,si[b,r])) r = Int64(CUDA.blockIdx().x)
I = point_coord((b,r), lp)
it = I[4]
id1, id2 = lp.plidx[ipl]
SFBC = ((B == BC_SF_AFWB) || (B == BC_SF_ORBI) ) && (id1 == 4)
TWP = ((I[id1]==1)&&(I[id2]==1))
bu1, ru1 = up((b, r), id1, lp)
bu2, ru2 = up((b, r), id2, lp)
bd1, rd1 = dw((b, r), id1, lp)
bd2, rd2 = dw((b, r), id2, lp)
bdd, rdd = dw((bd1, rd1), id2, lp)
bud, rud = dw((bu1, ru1), id2, lp)
bdu, rdu = up((bd1, rd1), id2, lp)
if SFBC && (it == lp.iL[end])
gt1 = Ubnd[id2]
gt2 = Ubnd[id2]
else
gt1 = U[bu1,id2,ru1]
gt2 = U[bud,id2,rud]
end end
return nothing if SFBC && (it == 1)
end csw[b,ipl,r] = projalg(U[b,id1,r]*gt1/(U[b,id2,r]*U[bu2,id1,ru2])) + projalg((U[bd2,id2,rd2]\(U[bd2,id1,rd2]*gt2))/U[b,id1,r])
function krnl_g5Dw_impr!(so, F1, F2, Gid1, Gid2, si, csw, lp::SpaceParm{4,6,B,D}) where {B,D}
b = Int64(CUDA.threadIdx().x); r = Int64(CUDA.blockIdx().x)
@inbounds begin
so[b,r] += 0.25*csw*im*dmul(Gamma{5},( (F1[b,1,r]+F1[b,2,r]+F1[b,3,r]+F1[b,4,r])*dmul(Gid1,si[b,r]) - (F2[b,1,r]+F2[b,2,r]+F2[b,3,r]+F2[b,4,r])*dmul(Gid2,si[b,r]))) else
if TWP
csw[b,ipl,r] = projalg(ztw,U[b,id1,r]*gt1/(U[b,id2,r]*U[bu2,id1,ru2])) + projalg(ztw,(U[bd2,id2,rd2]\(U[bd2,id1,rd2]*gt2))/U[b,id1,r]) +
projalg(ztw,(U[bdd,id2,rdd]*U[bd1,id1,rd1])\(U[bdd,id1,rdd]*U[bd2,id2,rd2])) + projalg(ztw,(U[b,id2,r]/(U[bd1,id2,rd1]*U[bdu,id1,rdu]))*U[bd1,id1,rd1])
else
csw[b,ipl,r] = projalg(U[b,id1,r]*gt1/(U[b,id2,r]*U[bu2,id1,ru2])) + projalg((U[bd2,id2,rd2]\(U[bd2,id1,rd2]*gt2))/U[b,id1,r]) +
projalg((U[bdd,id2,rdd]*U[bd1,id1,rd1])\(U[bdd,id1,rdd]*U[bd2,id2,rd2])) + projalg((U[b,id2,r]/(U[bd1,id2,rd1]*U[bdu,id1,rdu]))*U[bd1,id1,rd1])
end
end
end end
......
...@@ -18,6 +18,7 @@ using ..Space ...@@ -18,6 +18,7 @@ using ..Space
vector_field(::Type{T}, lp::SpaceParm) where {T} = CuArray{T, 3}(undef, lp.bsz, lp.ndim, lp.rsz) vector_field(::Type{T}, lp::SpaceParm) where {T} = CuArray{T, 3}(undef, lp.bsz, lp.ndim, lp.rsz)
scalar_field(::Type{T}, lp::SpaceParm) where {T} = CuArray{T, 2}(undef, lp.bsz, lp.rsz) scalar_field(::Type{T}, lp::SpaceParm) where {T} = CuArray{T, 2}(undef, lp.bsz, lp.rsz)
nscalar_field(::Type{T}, n, lp::SpaceParm) where {T} = CuArray{T, 3}(undef, lp.bsz, n, lp.rsz) nscalar_field(::Type{T}, n, lp::SpaceParm) where {T} = CuArray{T, 3}(undef, lp.bsz, n, lp.rsz)
tensor_field(::Type{T}, lp::SpaceParm) where {T} = CuArray{T, 3}(undef, lp.bsz, lp.npls, lp.rsz)
scalar_field_point(::Type{T}, lp::SpaceParm{N,M,D}) where {T,N,M,D} = CuArray{T, N}(undef, lp.iL...) scalar_field_point(::Type{T}, lp::SpaceParm{N,M,D}) where {T,N,M,D} = CuArray{T, N}(undef, lp.iL...)
......
...@@ -34,7 +34,7 @@ return sum(sumf) ...@@ -34,7 +34,7 @@ return sum(sumf)
end end
function CG!(si, U, A, dpar::DiracParam, lp::SpaceParm, dws::DiracWorkspace{T}, ymws = "ymws", gp = "gp", maxiter::Int64 = 10, tol=1.0) where {T} function CG!(si, U, A, dpar::DiracParam, lp::SpaceParm, dws::DiracWorkspace{T}, ymws::YMWorkspace, maxiter::Int64 = 10, tol=1.0) where {T}
dws.sr .= si dws.sr .= si
dws.sp .= si dws.sp .= si
......
...@@ -83,6 +83,7 @@ struct YMworkspace{T} ...@@ -83,6 +83,7 @@ struct YMworkspace{T}
f2 = vector_field(SU2alg{T}, lp) f2 = vector_field(SU2alg{T}, lp)
mm = vector_field(SU2alg{T}, lp) mm = vector_field(SU2alg{T}, lp)
u1 = vector_field(SU2{T}, lp) u1 = vector_field(SU2{T}, lp)
csw = tensor_field(SU2alg{T},lp)
end end
if (G == SU3) if (G == SU3)
...@@ -92,6 +93,7 @@ struct YMworkspace{T} ...@@ -92,6 +93,7 @@ struct YMworkspace{T}
f2 = vector_field(SU3alg{T}, lp) f2 = vector_field(SU3alg{T}, lp)
mm = vector_field(SU3alg{T}, lp) mm = vector_field(SU3alg{T}, lp)
u1 = vector_field(SU3{T}, lp) u1 = vector_field(SU3{T}, lp)
csw = tensor_field(SU3alg{T},lp)
end end
cs = scalar_field_point(Complex{T}, lp) cs = scalar_field_point(Complex{T}, lp)
rs = scalar_field_point(T, lp) rs = scalar_field_point(T, lp)
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment