Ahora si venga

parent b5f5f43a
...@@ -4,10 +4,50 @@ import ..YM.flw, ..YM.force_gauge ...@@ -4,10 +4,50 @@ import ..YM.flw, ..YM.force_gauge
function flw(U, psi, int::FlowIntr{NI,T}, ns::Int64, eps, gp::GaugeParm, dpar::DiracParam, lp::SpaceParm, ymws::YMworkspace, dws::DiracWorkspace) where {NI,T}
@timeit "Integrating flow equations" begin
for i in 1:ns
force_gauge(ymws, U, int.c0, 1, gp, lp)
if int.add_zth
add_zth_term(ymws::YMworkspace, U, lp)
end
ymws.mom .= ymws.frc1
U .= expm.(U, ymws.mom, 2*eps*int.r)
Nablanabla!(dws.sAp, U, psi, dpar, dws, lp)
dws.sp .= dws.sAp
psi .= psi + 2*int.r*eps*dws.sp
for k in 1:NI
force_gauge(ymws, U, int.c0, 1, gp, lp)
if int.add_zth
add_zth_term(ymws::YMworkspace, U, lp)
end
ymws.mom .= int.e0[k].*ymws.mom .+ int.e1[k].*ymws.frc1
U .= expm.(U, ymws.mom, 2*eps)
Nablanabla!(dws.sAp, U, psi, dpar, dws, lp)
dws.sAp .= int.e0[k].*dws.sAp .+ int.e1[k].*dws.sp
psi .= psi + 2*eps*int.e0[k].*dws.sp .+ 2*eps*int.e1[k].*dws.sAp
end
end
end
return nothing
end
flw(U, psi, int::FlowIntr{NI,T}, ns::Int64, gp::GaugeParm, dpar::DiracParam, lp::SpaceParm, ymws::YMworkspace, dws::DiracWorkspace) where {NI,T} = flw(U, psi, int::FlowIntr{NI,T}, ns::Int64, int.eps, gp::GaugeParm, dpar::DiracParam, lp::SpaceParm, ymws::YMworkspace, dws::DiracWorkspace)
function Nablanabla!(so, U, si, dpar::DiracParam, dws::DiracWorkspace, lp::SpaceParm{4,6,B,D}) where {B,D} # TO BE CONTINUED function Nablanabla!(so, U, si, dpar::DiracParam, dws::DiracWorkspace, lp::SpaceParm{4,6,B,D}) where {B,D} # TO BE CONTINUED
if B == BC_SF_AFWB || B == BC_SF_ORBI if B == BC_SF_AFWB || B == BC_SF_ORBI
@timeit "DwdagDw" begin @timeit "Nablanabla" begin
@timeit "SF fix" begin @timeit "SF fix" begin
CUDA.@sync begin CUDA.@sync begin
...@@ -17,7 +57,7 @@ function Nablanabla!(so, U, si, dpar::DiracParam, dws::DiracWorkspace, lp::Space ...@@ -17,7 +57,7 @@ function Nablanabla!(so, U, si, dpar::DiracParam, dws::DiracWorkspace, lp::Space
@timeit "Dw" begin @timeit "Dw" begin
CUDA.@sync begin CUDA.@sync begin
CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_Dw!(st, U, si, dpar.m0, dpar.th, lp) CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_Nablanabla!(st, U, si, dpar.m0, dpar.th, lp)
end end
end end
...@@ -27,65 +67,13 @@ function Nablanabla!(so, U, si, dpar::DiracParam, dws::DiracWorkspace, lp::Space ...@@ -27,65 +67,13 @@ function Nablanabla!(so, U, si, dpar::DiracParam, dws::DiracWorkspace, lp::Space
end end
end end
if abs(dpar.csw) > 1.0E-10
@timeit "Dw_improvement" begin
CUDA.@sync begin
CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_Dw_impr!(st, dws.csw, dpar.csw, si, lp)
end
end
end
@timeit "Dw" begin
CUDA.@sync begin
CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_Dw!(so, U, st, dpar.m0, dpar.th, lp)
end
end
@timeit "SF fix" begin
CUDA.@sync begin
CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_sfbndfix!(so,st,dpar.ct,Gamma{5},lp)
end
end
if abs(dpar.csw) > 1.0E-10
@timeit "Dw_improvement" begin
CUDA.@sync begin
CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_Dw_impr!(so, dws.csw, dpar.csw, st, lp)
end
end
end
end
else else
@timeit "DwdagDw" begin @timeit "DwdagDw" begin
CUDA.@sync begin CUDA.@sync begin
CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_Dw!(st, U, si, dpar.m0, dpar.th, lp) CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_Nablanabla!(st, U, si, dpar.m0, dpar.th, lp)
end
if abs(dpar.csw) > 1.0E-10
@timeit "Dw_improvement" begin
CUDA.@sync begin
CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_Dw_impr!(st, dws.csw, dpar.csw, si, lp)
end
end
end
CUDA.@sync begin
CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_Dw!(so, U, st, dpar.m0, dpar.th, lp)
end end
if abs(dpar.csw) > 1.0E-10
@timeit "Dw_improvement" begin
CUDA.@sync begin
CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_Dw_impr!(so, dws.csw, dpar.csw, st, lp)
end
end
end
end end
end end
...@@ -93,46 +81,33 @@ function Nablanabla!(so, U, si, dpar::DiracParam, dws::DiracWorkspace, lp::Space ...@@ -93,46 +81,33 @@ function Nablanabla!(so, U, si, dpar::DiracParam, dws::DiracWorkspace, lp::Space
end end
function krnl_Nablanabla!(so, U, si, th, lp::SpaceParm{4,6,B,D}) where {B,D}
b = Int64(CUDA.threadIdx().x); r = Int64(CUDA.blockIdx().x)
function flw(U, psi, int::FlowIntr{NI,T}, ns::Int64, eps, gp::GaugeParm, dpar::DiracParam, lp::SpaceParm, ymws::YMworkspace, dws::DiracWorkspace) where {NI,T} @inbounds begin
@timeit "Integrating flow equations" begin
for i in 1:ns
force_gauge(ymws, U, int.c0, 1, gp, lp)
if int.add_zth
add_zth_term(ymws::YMworkspace, U, lp)
end
ymws.mom .= ymws.frc1
U .= expm.(U, ymws.mom, 2*eps*int.r)
Dslash_sq!(dws.sAp, U, psi, dpar, dws, lp)
dws.sp .= dws.sAp
psi .= psi + 2*int.r*eps*dws.sp
for k in 1:NI so[b,r] = -8*si[b,r]
force_gauge(ymws, U, int.c0, 1, gp, lp)
if int.add_zth bu1, ru1 = up((b,r), 1, lp)
add_zth_term(ymws::YMworkspace, U, lp) bd1, rd1 = dw((b,r), 1, lp)
end bu2, ru2 = up((b,r), 2, lp)
bd2, rd2 = dw((b,r), 2, lp)
bu3, ru3 = up((b,r), 3, lp)
bd3, rd3 = dw((b,r), 3, lp)
bu4, ru4 = up((b,r), 4, lp)
bd4, rd4 = dw((b,r), 4, lp)
ymws.mom .= int.e0[k].*ymws.mom .+ int.e1[k].*ymws.frc1
U .= expm.(U, ymws.mom, 2*eps)
Dslash_sq!(dws.sAp, U, psi, dpar, dws, lp) so[b,r] += (th[1] * U[b,1,r]*si[bu1,ru1] +conj(th[1]) * U[bd1,1,rd1]*si[bd1,rd1] +
th[2] * U[b,2,r]*si[bu2,ru2] +conj(th[2]) * U[bd2,2,rd2]*si[bd2,rd2] +
th[3] * U[b,3,r]*si[bu3,ru3] +conj(th[3]) * U[bd3,3,rd3]*si[bd3,rd3] +
th[4] * U[b,4,r]*si[bu4,ru4] +conj(th[4]) * U[bd4,4,rd4]*si[bd4,rd4] )
psi .= psi + 2*eps*int.e0[k].*dws.sp .+ 2*eps*int.e1[k].*dws.sAp
dws.sAp .= int.e0[k].*dws.sAp .+ int.e1[k].*dws.sp
end
end
end end
return nothing return nothing
end end
flw(U, psi, int::FlowIntr{NI,T}, ns::Int64, gp::GaugeParm, dpar::DiracParam, lp::SpaceParm, ymws::YMworkspace, dws::DiracWorkspace) where {NI,T} = flw(U, psi, int::FlowIntr{NI,T}, ns::Int64, int.eps, gp::GaugeParm, dpar::DiracParam, lp::SpaceParm, ymws::YMworkspace, dws::DiracWorkspace)
...@@ -170,7 +145,7 @@ function Dslash_sq!(so, U, si, dpar::DiracParam, dws::DiracWorkspace, lp::SpaceP ...@@ -170,7 +145,7 @@ function Dslash_sq!(so, U, si, dpar::DiracParam, dws::DiracWorkspace, lp::SpaceP
@timeit "g5Dslsh" begin @timeit "g5Dslsh" begin
CUDA.@sync begin CUDA.@sync begin
CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_g5Dslsh!(so, U, dws.st, dpar.th, lp) CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_g5Dslsh!(so, U, dws.st, dpar.th, lp)
end end
end end
...@@ -230,7 +205,9 @@ function krnl_g5Dslsh!(so, U, si, th, lp::SpaceParm{4,6,B,D}) where {B,D} ...@@ -230,7 +205,9 @@ function krnl_g5Dslsh!(so, U, si, th, lp::SpaceParm{4,6,B,D}) where {B,D}
b = Int64(CUDA.threadIdx().x); r = Int64(CUDA.blockIdx().x) b = Int64(CUDA.threadIdx().x); r = Int64(CUDA.blockIdx().x)
@inbounds begin @inbounds begin
so[b,r] = 4*si[b,r]
bu1, ru1 = up((b,r), 1, lp) bu1, ru1 = up((b,r), 1, lp)
bd1, rd1 = dw((b,r), 1, lp) bd1, rd1 = dw((b,r), 1, lp)
bu2, ru2 = up((b,r), 2, lp) bu2, ru2 = up((b,r), 2, lp)
...@@ -241,11 +218,10 @@ function krnl_g5Dslsh!(so, U, si, th, lp::SpaceParm{4,6,B,D}) where {B,D} ...@@ -241,11 +218,10 @@ function krnl_g5Dslsh!(so, U, si, th, lp::SpaceParm{4,6,B,D}) where {B,D}
bd4, rd4 = dw((b,r), 4, lp) bd4, rd4 = dw((b,r), 4, lp)
so[b,r] = -0.5*( th[1]*dmul(Gamma{1},U[b,1,r]*si[bu1,ru1]) +conj(th[1])*dmul(Gamma{1},dag(U[bd1,1,rd1])*si[bd1,rd1]) + so[b,r] -= 0.5*( th[1] * U[b,1,r]*si[bu1,ru1] +conj(th[1]) * U[bd1,1,rd1]*si[bd1,rd1] +
th[2]*dmul(Gamma{2},U[b,2,r]*si[bu1,ru1]) +conj(th[2])*dmul(Gamma{2},dag(U[bd1,2,rd1])*si[bd1,rd1]) + th[2] * U[b,2,r]*si[bu2,ru2] +conj(th[2]) * U[bd2,2,rd2]*si[bd2,rd2] +
th[3]*dmul(Gamma{3},U[b,3,r]*si[bu1,ru1]) +conj(th[3])*dmul(Gamma{3},dag(U[bd1,3,rd1])*si[bd1,rd1]) + th[3] * U[b,3,r]*si[bu3,ru3] +conj(th[3]) * U[bd3,3,rd3]*si[bd3,rd3] +
th[4]*dmul(Gamma{4},U[b,4,r]*si[bu1,ru1]) +conj(th[4])*dmul(Gamma{4},dag(U[bd1,4,rd1])*si[bd1,rd1]) ) th[4] * U[b,4,r]*si[bu4,ru4] +conj(th[4]) * U[bd4,4,rd4]*si[bd4,rd4] )
so[b,r] = dmul(Gamma{5}, so[b,r]) so[b,r] = dmul(Gamma{5}, so[b,r])
end end
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment