Ahora si venga

parent b5f5f43a
......@@ -4,86 +4,74 @@ import ..YM.flw, ..YM.force_gauge
function Nablanabla!(so, U, si, dpar::DiracParam, dws::DiracWorkspace, lp::SpaceParm{4,6,B,D}) where {B,D} # TO BE CONTINUED
if B == BC_SF_AFWB || B == BC_SF_ORBI
@timeit "DwdagDw" begin
function flw(U, psi, int::FlowIntr{NI,T}, ns::Int64, eps, gp::GaugeParm, dpar::DiracParam, lp::SpaceParm, ymws::YMworkspace, dws::DiracWorkspace) where {NI,T}
@timeit "Integrating flow equations" begin
for i in 1:ns
force_gauge(ymws, U, int.c0, 1, gp, lp)
@timeit "SF fix" begin
CUDA.@sync begin
CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_sfbndfix!(si,si,1.0,Gamma{16},lp)
end
if int.add_zth
add_zth_term(ymws::YMworkspace, U, lp)
end
@timeit "Dw" begin
CUDA.@sync begin
CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_Dw!(st, U, si, dpar.m0, dpar.th, lp)
end
end
ymws.mom .= ymws.frc1
U .= expm.(U, ymws.mom, 2*eps*int.r)
@timeit "SF fix" begin
CUDA.@sync begin
CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_sfbndfix!(st,si,dpar.ct,Gamma{5},lp)
end
Nablanabla!(dws.sAp, U, psi, dpar, dws, lp)
dws.sp .= dws.sAp
psi .= psi + 2*int.r*eps*dws.sp
for k in 1:NI
force_gauge(ymws, U, int.c0, 1, gp, lp)
if int.add_zth
add_zth_term(ymws::YMworkspace, U, lp)
end
if abs(dpar.csw) > 1.0E-10
@timeit "Dw_improvement" begin
CUDA.@sync begin
CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_Dw_impr!(st, dws.csw, dpar.csw, si, lp)
ymws.mom .= int.e0[k].*ymws.mom .+ int.e1[k].*ymws.frc1
U .= expm.(U, ymws.mom, 2*eps)
Nablanabla!(dws.sAp, U, psi, dpar, dws, lp)
dws.sAp .= int.e0[k].*dws.sAp .+ int.e1[k].*dws.sp
psi .= psi + 2*eps*int.e0[k].*dws.sp .+ 2*eps*int.e1[k].*dws.sAp
end
end
end
return nothing
end
flw(U, psi, int::FlowIntr{NI,T}, ns::Int64, gp::GaugeParm, dpar::DiracParam, lp::SpaceParm, ymws::YMworkspace, dws::DiracWorkspace) where {NI,T} = flw(U, psi, int::FlowIntr{NI,T}, ns::Int64, int.eps, gp::GaugeParm, dpar::DiracParam, lp::SpaceParm, ymws::YMworkspace, dws::DiracWorkspace)
@timeit "Dw" begin
CUDA.@sync begin
CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_Dw!(so, U, st, dpar.m0, dpar.th, lp)
end
end
function Nablanabla!(so, U, si, dpar::DiracParam, dws::DiracWorkspace, lp::SpaceParm{4,6,B,D}) where {B,D} # TO BE CONTINUED
if B == BC_SF_AFWB || B == BC_SF_ORBI
@timeit "Nablanabla" begin
@timeit "SF fix" begin
CUDA.@sync begin
CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_sfbndfix!(so,st,dpar.ct,Gamma{5},lp)
CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_sfbndfix!(si,si,1.0,Gamma{16},lp)
end
end
if abs(dpar.csw) > 1.0E-10
@timeit "Dw_improvement" begin
@timeit "Dw" begin
CUDA.@sync begin
CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_Dw_impr!(so, dws.csw, dpar.csw, st, lp)
CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_Nablanabla!(st, U, si, dpar.m0, dpar.th, lp)
end
end
end
end
else
@timeit "DwdagDw" begin
CUDA.@sync begin
CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_Dw!(st, U, si, dpar.m0, dpar.th, lp)
end
if abs(dpar.csw) > 1.0E-10
@timeit "Dw_improvement" begin
@timeit "SF fix" begin
CUDA.@sync begin
CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_Dw_impr!(st, dws.csw, dpar.csw, si, lp)
end
CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_sfbndfix!(st,si,dpar.ct,Gamma{5},lp)
end
end
CUDA.@sync begin
CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_Dw!(so, U, st, dpar.m0, dpar.th, lp)
end
if abs(dpar.csw) > 1.0E-10
@timeit "Dw_improvement" begin
else
@timeit "DwdagDw" begin
CUDA.@sync begin
CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_Dw_impr!(so, dws.csw, dpar.csw, st, lp)
end
end
CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_Nablanabla!(st, U, si, dpar.m0, dpar.th, lp)
end
end
......@@ -93,46 +81,33 @@ function Nablanabla!(so, U, si, dpar::DiracParam, dws::DiracWorkspace, lp::Space
end
function krnl_Nablanabla!(so, U, si, th, lp::SpaceParm{4,6,B,D}) where {B,D}
b = Int64(CUDA.threadIdx().x); r = Int64(CUDA.blockIdx().x)
function flw(U, psi, int::FlowIntr{NI,T}, ns::Int64, eps, gp::GaugeParm, dpar::DiracParam, lp::SpaceParm, ymws::YMworkspace, dws::DiracWorkspace) where {NI,T}
@timeit "Integrating flow equations" begin
for i in 1:ns
force_gauge(ymws, U, int.c0, 1, gp, lp)
if int.add_zth
add_zth_term(ymws::YMworkspace, U, lp)
end
ymws.mom .= ymws.frc1
U .= expm.(U, ymws.mom, 2*eps*int.r)
Dslash_sq!(dws.sAp, U, psi, dpar, dws, lp)
dws.sp .= dws.sAp
psi .= psi + 2*int.r*eps*dws.sp
@inbounds begin
for k in 1:NI
force_gauge(ymws, U, int.c0, 1, gp, lp)
so[b,r] = -8*si[b,r]
if int.add_zth
add_zth_term(ymws::YMworkspace, U, lp)
end
bu1, ru1 = up((b,r), 1, lp)
bd1, rd1 = dw((b,r), 1, lp)
bu2, ru2 = up((b,r), 2, lp)
bd2, rd2 = dw((b,r), 2, lp)
bu3, ru3 = up((b,r), 3, lp)
bd3, rd3 = dw((b,r), 3, lp)
bu4, ru4 = up((b,r), 4, lp)
bd4, rd4 = dw((b,r), 4, lp)
ymws.mom .= int.e0[k].*ymws.mom .+ int.e1[k].*ymws.frc1
U .= expm.(U, ymws.mom, 2*eps)
Dslash_sq!(dws.sAp, U, psi, dpar, dws, lp)
so[b,r] += (th[1] * U[b,1,r]*si[bu1,ru1] +conj(th[1]) * U[bd1,1,rd1]*si[bd1,rd1] +
th[2] * U[b,2,r]*si[bu2,ru2] +conj(th[2]) * U[bd2,2,rd2]*si[bd2,rd2] +
th[3] * U[b,3,r]*si[bu3,ru3] +conj(th[3]) * U[bd3,3,rd3]*si[bd3,rd3] +
th[4] * U[b,4,r]*si[bu4,ru4] +conj(th[4]) * U[bd4,4,rd4]*si[bd4,rd4] )
psi .= psi + 2*eps*int.e0[k].*dws.sp .+ 2*eps*int.e1[k].*dws.sAp
dws.sAp .= int.e0[k].*dws.sAp .+ int.e1[k].*dws.sp
end
end
end
return nothing
end
flw(U, psi, int::FlowIntr{NI,T}, ns::Int64, gp::GaugeParm, dpar::DiracParam, lp::SpaceParm, ymws::YMworkspace, dws::DiracWorkspace) where {NI,T} = flw(U, psi, int::FlowIntr{NI,T}, ns::Int64, int.eps, gp::GaugeParm, dpar::DiracParam, lp::SpaceParm, ymws::YMworkspace, dws::DiracWorkspace)
......@@ -231,6 +206,8 @@ function krnl_g5Dslsh!(so, U, si, th, lp::SpaceParm{4,6,B,D}) where {B,D}
@inbounds begin
so[b,r] = 4*si[b,r]
bu1, ru1 = up((b,r), 1, lp)
bd1, rd1 = dw((b,r), 1, lp)
bu2, ru2 = up((b,r), 2, lp)
......@@ -241,11 +218,10 @@ function krnl_g5Dslsh!(so, U, si, th, lp::SpaceParm{4,6,B,D}) where {B,D}
bd4, rd4 = dw((b,r), 4, lp)
so[b,r] = -0.5*( th[1]*dmul(Gamma{1},U[b,1,r]*si[bu1,ru1]) +conj(th[1])*dmul(Gamma{1},dag(U[bd1,1,rd1])*si[bd1,rd1]) +
th[2]*dmul(Gamma{2},U[b,2,r]*si[bu1,ru1]) +conj(th[2])*dmul(Gamma{2},dag(U[bd1,2,rd1])*si[bd1,rd1]) +
th[3]*dmul(Gamma{3},U[b,3,r]*si[bu1,ru1]) +conj(th[3])*dmul(Gamma{3},dag(U[bd1,3,rd1])*si[bd1,rd1]) +
th[4]*dmul(Gamma{4},U[b,4,r]*si[bu1,ru1]) +conj(th[4])*dmul(Gamma{4},dag(U[bd1,4,rd1])*si[bd1,rd1]) )
so[b,r] -= 0.5*( th[1] * U[b,1,r]*si[bu1,ru1] +conj(th[1]) * U[bd1,1,rd1]*si[bd1,rd1] +
th[2] * U[b,2,r]*si[bu2,ru2] +conj(th[2]) * U[bd2,2,rd2]*si[bd2,rd2] +
th[3] * U[b,3,r]*si[bu3,ru3] +conj(th[3]) * U[bd3,3,rd3]*si[bd3,rd3] +
th[4] * U[b,4,r]*si[bu4,ru4] +conj(th[4]) * U[bd4,4,rd4]*si[bd4,rd4] )
so[b,r] = dmul(Gamma{5}, so[b,r])
end
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment