Small changes and two big ones: SU2fund and new merged Dirac kernel (not g5Dw yet)

parent f110024a
...@@ -65,6 +65,21 @@ struct DiracWorkspace{T} ...@@ -65,6 +65,21 @@ struct DiracWorkspace{T}
return new{T}(sr,sp,sAp,st,csw,cs) return new{T}(sr,sp,sAp,st,csw,cs)
end end
function DiracWorkspace(::Type{SU2}, ::Type{T}, lp::SpaceParm{4,6,B,D}) where {G,T <: AbstractFloat, B,D}
sr = scalar_field(Spinor{4,G}, lp)
sp = scalar_field(Spinor{4,G}, lp)
sAp = scalar_field(Spinor{4,G}, lp)
st = scalar_field(Spinor{4,G}, lp)
csw = tensor_field(U2alg,lp)
cs = scalar_field_point(Complex{Float64}, lp)
return new{T}(sr,sp,sAp,st,csw,cs)
end
end end
export DiracWorkspace, DiracParam export DiracWorkspace, DiracParam
...@@ -279,8 +294,6 @@ function krnl_Dw_impr!(so, Fcsw, csw, si, lp::SpaceParm{4,6,B,D}) where {B,D} ...@@ -279,8 +294,6 @@ function krnl_Dw_impr!(so, Fcsw, csw, si, lp::SpaceParm{4,6,B,D}) where {B,D}
b = Int64(CUDA.threadIdx().x); b = Int64(CUDA.threadIdx().x);
r = Int64(CUDA.blockIdx().x) r = Int64(CUDA.blockIdx().x)
I = point_coord((b,r), lp)
TWP = false TWP = false
if TWP if TWP
...@@ -302,7 +315,6 @@ function krnl_g5Dw_impr!(so, Fcsw, csw, si, lp::SpaceParm{4,6,B,D}) where {B,D} ...@@ -302,7 +315,6 @@ function krnl_g5Dw_impr!(so, Fcsw, csw, si, lp::SpaceParm{4,6,B,D}) where {B,D}
b = Int64(CUDA.threadIdx().x); b = Int64(CUDA.threadIdx().x);
r = Int64(CUDA.blockIdx().x) r = Int64(CUDA.blockIdx().x)
I = point_coord((b,r), lp)
TWP = false TWP = false
...@@ -435,8 +447,190 @@ function krnl_csw!(csw::AbstractArray{T}, U, Ubnd, ipl, lp::SpaceParm{4,M,B,D}) ...@@ -435,8 +447,190 @@ function krnl_csw!(csw::AbstractArray{T}, U, Ubnd, ipl, lp::SpaceParm{4,M,B,D})
end end
############################### HMC for fermions ################################### function Dw_new!(so, U, si, dpar::DiracParam, dws::DiracWorkspace, lp::SpaceParm{4,6,B,D}) where {B,D}
if abs(dpar.csw) > 1.0E-10
@timeit "Dw_new" begin
CUDA.@sync begin
CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_Dwimpr_new!(so, U, si, dws.csw, dpar.m0, dpar.th, dpar.csw, dpar.ct, lp)
end
end
else
@timeit "Dw_new" begin
CUDA.@sync begin
CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_Dw_new!(so, U, si, dpar.m0, dpar.th, lp)
end
end
end
return nothing
end
function krnl_Dwimpr_new!(so, U, si, Fcsw, m0, th, csw, lp::SpaceParm{4,6,B,D}) where {B,D}
b = Int64(CUDA.threadIdx().x); r = Int64(CUDA.blockIdx().x)
bu1, ru1 = up((b,r), 1, lp)
bd1, rd1 = dw((b,r), 1, lp)
bu2, ru2 = up((b,r), 2, lp)
bd2, rd2 = dw((b,r), 2, lp)
bu3, ru3 = up((b,r), 3, lp)
bd3, rd3 = dw((b,r), 3, lp)
bu4, ru4 = up((b,r), 4, lp)
bd4, rd4 = dw((b,r), 4, lp)
@inbounds begin
so[b,r] = (4+m0)*si[b,r] + 0.5*csw*im*( Fcsw[b,1,r]*dmul(Gamma{10},si[b,r]) + Fcsw[b,2,r]*dmul(Gamma{11},si[b,r]) + Fcsw[b,3,r]*dmul(Gamma{12},si[b,r])
-Fcsw[b,4,r]*dmul(Gamma{15},si[b,r]) - Fcsw[b,5,r]*dmul(Gamma{14},si[b,r]) - Fcsw[b,6,r]*dmul(Gamma{13},si[b,r]))
so[b,r] -= 0.5*(th[1]*gpmul(Pgamma{1,-1},U[b,1,r],si[bu1,ru1]) +conj(th[1])*gdagpmul(Pgamma{1,+1},U[bd1,1,rd1],si[bd1,rd1]) +
th[2]*gpmul(Pgamma{2,-1},U[b,2,r],si[bu2,ru2]) +conj(th[2])*gdagpmul(Pgamma{2,+1},U[bd2,2,rd2],si[bd2,rd2]) +
th[3]*gpmul(Pgamma{3,-1},U[b,3,r],si[bu3,ru3]) +conj(th[3])*gdagpmul(Pgamma{3,+1},U[bd3,3,rd3],si[bd3,rd3]) +
th[4]*gpmul(Pgamma{4,-1},U[b,4,r],si[bu4,ru4]) +conj(th[4])*gdagpmul(Pgamma{4,+1},U[bd4,4,rd4],si[bd4,rd4]) )
end
return nothing
end
function krnl_Dw_new!(so, U, si, m0, th, lp::SpaceParm{4,6,B,D}) where {B,D}
b = Int64(CUDA.threadIdx().x); r = Int64(CUDA.blockIdx().x)
bu1, ru1 = up((b,r), 1, lp)
bd1, rd1 = dw((b,r), 1, lp)
bu2, ru2 = up((b,r), 2, lp)
bd2, rd2 = dw((b,r), 2, lp)
bu3, ru3 = up((b,r), 3, lp)
bd3, rd3 = dw((b,r), 3, lp)
bu4, ru4 = up((b,r), 4, lp)
bd4, rd4 = dw((b,r), 4, lp)
@inbounds begin
so[b,r] = (4+m0)*si[b,r]
so[b,r] -= 0.5*(th[1]*gpmul(Pgamma{1,-1},U[b,1,r],si[bu1,ru1]) +conj(th[1])*gdagpmul(Pgamma{1,+1},U[bd1,1,rd1],si[bd1,rd1]) +
th[2]*gpmul(Pgamma{2,-1},U[b,2,r],si[bu2,ru2]) +conj(th[2])*gdagpmul(Pgamma{2,+1},U[bd2,2,rd2],si[bd2,rd2]) +
th[3]*gpmul(Pgamma{3,-1},U[b,3,r],si[bu3,ru3]) +conj(th[3])*gdagpmul(Pgamma{3,+1},U[bd3,3,rd3],si[bd3,rd3]) +
th[4]*gpmul(Pgamma{4,-1},U[b,4,r],si[bu4,ru4]) +conj(th[4])*gdagpmul(Pgamma{4,+1},U[bd4,4,rd4],si[bd4,rd4]) )
end
return nothing
end
function Dw_new!(so, U, si, dpar::DiracParam, dws::DiracWorkspace, lp::SpaceParm{4,6,BC_SF_ORBI,D}) where {D}
if abs(dpar.csw) > 1.0E-10
@timeit "Dw_new" begin
CUDA.@sync begin
CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_Dwimpr_new!(so, U, si, dws.csw, dpar.m0, dpar.th, dpar.csw, dpar.ct, lp)
end
end
else
@timeit "Dw_new" begin
CUDA.@sync begin
CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_Dw_new!(so, U, si, dpar.m0, dpar.th, dpar.ct, lp)
end
end
end
return nothing
end
function krnl_Dwimpr_new!(so, U, si, Fcsw, m0, th, csw, ct, lp::SpaceParm{4,6,BC_SF_ORBI,D}) where {D}
# The field si is assumed to be zero at t = 0
b = Int64(CUDA.threadIdx().x); r = Int64(CUDA.blockIdx().x)
if (point_time((b,r),lp) != 1)
bu1, ru1 = up((b,r), 1, lp)
bd1, rd1 = dw((b,r), 1, lp)
bu2, ru2 = up((b,r), 2, lp)
bd2, rd2 = dw((b,r), 2, lp)
bu3, ru3 = up((b,r), 3, lp)
bd3, rd3 = dw((b,r), 3, lp)
bu4, ru4 = up((b,r), 4, lp)
bd4, rd4 = dw((b,r), 4, lp)
@inbounds begin
so[b,r] = (4+m0)*si[b,r] + 0.5*csw*im*( Fcsw[b,1,r]*dmul(Gamma{10},si[b,r]) + Fcsw[b,2,r]*dmul(Gamma{11},si[b,r]) + Fcsw[b,3,r]*dmul(Gamma{12},si[b,r])
-Fcsw[b,4,r]*dmul(Gamma{15},si[b,r]) - Fcsw[b,5,r]*dmul(Gamma{14},si[b,r]) - Fcsw[b,6,r]*dmul(Gamma{13},si[b,r]))
so[b,r] -= 0.5*(th[1]*gpmul(Pgamma{1,-1},U[b,1,r],si[bu1,ru1]) +conj(th[1])*gdagpmul(Pgamma{1,+1},U[bd1,1,rd1],si[bd1,rd1]) +
th[2]*gpmul(Pgamma{2,-1},U[b,2,r],si[bu2,ru2]) +conj(th[2])*gdagpmul(Pgamma{2,+1},U[bd2,2,rd2],si[bd2,rd2]) +
th[3]*gpmul(Pgamma{3,-1},U[b,3,r],si[bu3,ru3]) +conj(th[3])*gdagpmul(Pgamma{3,+1},U[bd3,3,rd3],si[bd3,rd3]) +
th[4]*gpmul(Pgamma{4,-1},U[b,4,r],si[bu4,ru4]) +conj(th[4])*gdagpmul(Pgamma{4,+1},U[bd4,4,rd4],si[bd4,rd4]) )
if (point_time((b,r),lp) == 2) || (point_time((b,r),lp) == lp.iL[4])
so[b,r] += (ct-1.0)*si[b,r]
end
end
end
return nothing
end
function krnl_Dw_new!(so, U, si, m0, th, ct, lp::SpaceParm{4,6,BC_SF_ORBI,D}) where {D}
# The field si is assumed to be zero at t = 0
b = Int64(CUDA.threadIdx().x); r = Int64(CUDA.blockIdx().x)
if (point_time((b,r),lp) != 1)
bu1, ru1 = up((b,r), 1, lp)
bd1, rd1 = dw((b,r), 1, lp)
bu2, ru2 = up((b,r), 2, lp)
bd2, rd2 = dw((b,r), 2, lp)
bu3, ru3 = up((b,r), 3, lp)
bd3, rd3 = dw((b,r), 3, lp)
bu4, ru4 = up((b,r), 4, lp)
bd4, rd4 = dw((b,r), 4, lp)
@inbounds begin
so[b,r] = (4+m0)*si[b,r]
so[b,r] -= 0.5*(th[1]*gpmul(Pgamma{1,-1},U[b,1,r],si[bu1,ru1]) +conj(th[1])*gdagpmul(Pgamma{1,+1},U[bd1,1,rd1],si[bd1,rd1]) +
th[2]*gpmul(Pgamma{2,-1},U[b,2,r],si[bu2,ru2]) +conj(th[2])*gdagpmul(Pgamma{2,+1},U[bd2,2,rd2],si[bd2,rd2]) +
th[3]*gpmul(Pgamma{3,-1},U[b,3,r],si[bu3,ru3]) +conj(th[3])*gdagpmul(Pgamma{3,+1},U[bd3,3,rd3],si[bd3,rd3]) +
th[4]*gpmul(Pgamma{4,-1},U[b,4,r],si[bu4,ru4]) +conj(th[4])*gdagpmul(Pgamma{4,+1},U[bd4,4,rd4],si[bd4,rd4]) )
if (point_time((b,r),lp) == 2) || (point_time((b,r),lp) == lp.iL[4])
so[b,r] += (ct-1.0)*si[b,r]
end
end
end
return nothing
end
function SF_bndfix!(sp, lp::SpaceParm{4,6,BC_SF_ORBI,D}) where {D}
CUDA.@sync begin
CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_sfbndfix_new!(sp, lp)
end
return nothing
end
function krnl_sfbndfix_new!(sp,lp::SpaceParm)
b=Int64(CUDA.threadIdx().x)
r=Int64(CUDA.blockIdx().x)
if (point_time((b,r),lp) == 1)
so[b,r] = 0.0*so[b,r]
end
return nothing
end
############################### HMC for fermions ###################################
function pfrandomize!(f::AbstractArray{Spinor{4, SU3fund{T}}},lp::SpaceParm,t::Int64=0) where {T} function pfrandomize!(f::AbstractArray{Spinor{4, SU3fund{T}}},lp::SpaceParm,t::Int64=0) where {T}
...@@ -474,7 +668,7 @@ function krnl_assign_pf!(f::AbstractArray, p ,lp::SpaceParm, t::Int64) ...@@ -474,7 +668,7 @@ function krnl_assign_pf!(f::AbstractArray, p ,lp::SpaceParm, t::Int64)
end end
export Dw!, DwdagDw!, g5Dw!, pfrandomize!, Csw! export Dw!, DwdagDw!, g5Dw!, pfrandomize!, Csw!, Dw_new!, SF_bndfix!
include("DiracIO.jl") include("DiracIO.jl")
......
...@@ -110,8 +110,6 @@ function krnl_Nablanabla!(so, U, si, th, lp::SpaceParm{4,6,B,D}) where {B,D} ...@@ -110,8 +110,6 @@ function krnl_Nablanabla!(so, U, si, th, lp::SpaceParm{4,6,B,D}) where {B,D}
return nothing return nothing
end end
function Dslash_sq!(so, U, si, dpar::DiracParam, dws::DiracWorkspace, lp::SpaceParm{4,6,B,D}) where {B,D} function Dslash_sq!(so, U, si, dpar::DiracParam, dws::DiracWorkspace, lp::SpaceParm{4,6,B,D}) where {B,D}
if B == BC_SF_AFWB || B == BC_SF_ORBI if B == BC_SF_AFWB || B == BC_SF_ORBI
......
struct U2alg{T} <: Algebra
u11::T
u22::T
u12::Complex{T}
end
function antsym(a::SU2{T}) where T <: AbstractFloat
return U2alg{T}(2.0*imag(a.t1),-2.0*imag(a.t1),2.0*a.t2)
end
Base.:*(a::U2alg{T},b::SU2fund{T}) where T <: AbstractFloat = SU2fund{T}(im*a.u11*b.t1 + a.u12*b.t2,
-conj(a.u12)*b.t1 + im*a.u22*b.t2)
Base.:+(a::U2alg{T},b::U2alg{T}) where T <: AbstractFloat = U2alg{T}(a.u11 + b.u11, a.u22 + b.u22, a.u12 + b.u12)
Base.:*(r::Number, a::U2alg{T}) where T <: AbstractFloat = U2alg{T}(r*a.u11, r*a.u22, r*a.u12)
struct SU2fund{T}
t1::Complex{T}
t2::Complex{T}
end
Base.zero(::Type{SU2fund{T}}) where T <: AbstractFloat = SU2fund{T}(zero(T),zero(T))
Random.rand(rng::AbstractRNG, ::Random.SamplerType{SU2fund{T}}) where T <: AbstractFloat = SU2fund{T}(complex(randn(rng,T),randn(rng,T)),
complex(randn(rng,T),randn(rng,T)))
SU2fund(a::T, b::T) where T <: AbstractFloat = SU2fund{T}(complex(a), complex(b))
"""
dag(a::SU2fund{T})
Returns the conjugate of a fundamental element.
"""
dag(a::SU2fund{T}) where T <: AbstractFloat = SU2fund{T}(conj(a.t1), conj(a.t2))
"""
norm(a::SU2fund{T})
Returns the norm of a fundamental element. Same result as dot(a,a).
"""
norm(a::SU2fund{T}) where T <: AbstractFloat = sqrt((abs2(a.t1) + abs2(a.t2)))
"""
norm(a::SU2fund{T})
Returns the norm of a fundamental element. Same result as sqrt(dot(a,a)).
"""
norm2(a::SU2fund{T}) where T <: AbstractFloat = (abs2(a.t1) + abs2(a.t2))
"""
dot(a::SU2fund{T},b::SU2fund{T})
Returns the scalar product of two fundamental elements. The convention is for the product to the linear in the second argument, and anti-linear in the first argument.
"""
dot(g1::SU2fund{T},g2::SU2fund{T}) where T <: AbstractFloat = conj(g1.t1)*g2.t1+conj(g1.t2)*g2.t2
"""
*(g::SU2{T},b::SU2fund{T})
Returns ga
"""
Base.:*(g::SU2{T},b::SU2fund{T}) where T <: AbstractFloat = SU2fund{T}(g.t1*b.t1 + g.t2*b.t2,
-conj(g.t2)*b.t1 + conj(g.t1)*b.t2)
"""
\(g::SU2{T},b::SU2fund{T})
Returns g^dag b
"""
Base.:\(g::SU2{T},b::SU2fund{T}) where T <: AbstractFloat = SU2fund{T}(conj(g.t1)*b.t1 - g.t2*b.t2,
conj(g.t2)*b.t1 + g.t1*b.t2)
"""
*(a::SU2alg{T},b::SU2fund{T})
Returns a*b
"""
Base.:*(a::SU2alg{T},b::SU2fund{T}) where T <: AbstractFloat = SU2fund{T}(complex(0.0, a.t3)*b.t1/2 + complex(a.t2,a.t1)*b.t2/2,
complex(-a.t2,a.t1)*b.t1/2 + complex(0.0, -a.t3)*b.t1/2)
Base.:+(a::SU2fund{T},b::SU2fund{T}) where T <: AbstractFloat = SU2fund{T}(a.t1+b.t1,a.t2+b.t2)
Base.:-(a::SU2fund{T},b::SU2fund{T}) where T <: AbstractFloat = SU2fund{T}(a.t1-b.t1,a.t2-b.t2)
Base.:+(a::SU2fund{T}) where T <: AbstractFloat = SU2fund{T}(a.t1,a.t2)
Base.:-(a::SU2fund{T}) where T <: AbstractFloat = SU2fund{T}(-a.t1,-a.t2)
imm(a::SU2fund{T}) where T <: AbstractFloat = SU2fund{T}(complex(-imag(a.t1),real(a.t1)),
complex(-imag(a.t2),real(a.t2)))
mimm(a::SU2fund{T}) where T <: AbstractFloat = SU2fund{T}(complex(imag(a.t1),-real(a.t1)),
complex(imag(a.t2),-real(a.t2)))
# Operations with numbers
Base.:*(a::SU2fund{T},b::Number) where T <: AbstractFloat = SU2fund{T}(b*a.t1,b*a.t2)
Base.:*(b::Number,a::SU2fund{T}) where T <: AbstractFloat = SU2fund{T}(b*a.t1,b*a.t2)
Base.:/(a::SU2fund{T},b::Number) where T <: AbstractFloat = SU2fund{T}(a.t1/b,a.t2/b)
Base.:*(a::M2x2{T},b::SU2fund{T}) where T <: AbstractFloat = SU2fund{T}(a.u11*b.t1 + a.u12*b.t2,
a.u21*b.t1 + a.u22*b.t2)
\ No newline at end of file
...@@ -16,7 +16,7 @@ include("Groups/Groups.jl") ...@@ -16,7 +16,7 @@ include("Groups/Groups.jl")
using .Groups using .Groups
export Group, Algebra, GMatrix export Group, Algebra, GMatrix
export SU2, SU2alg, SU3, SU3alg, M3x3, M2x2, U1, U1alg, SU3fund, U3alg export SU2, SU2alg, SU3, SU3alg, M3x3, M2x2, U1, U1alg, SU3fund, U3alg, SU2fund, U2alg
export dot, expm, exp, dag, unitarize, inverse, tr, projalg, norm, norm2, isgroup, alg2mat, dev_one, antsym export dot, expm, exp, dag, unitarize, inverse, tr, projalg, norm, norm2, isgroup, alg2mat, dev_one, antsym
include("Space/Space.jl") include("Space/Space.jl")
...@@ -57,7 +57,7 @@ export pmul, gpmul, gdagpmul, dmul ...@@ -57,7 +57,7 @@ export pmul, gpmul, gdagpmul, dmul
include("Dirac/Dirac.jl") include("Dirac/Dirac.jl")
using .Dirac using .Dirac
export DiracWorkspace, DiracParam export DiracWorkspace, DiracParam
export Dw!, DwdagDw!, pfrandomize!, g5Dw!, Csw! export Dw!, DwdagDw!, pfrandomize!, g5Dw!, Csw!, Dw_new!, SF_bndfix!
export read_prop, save_prop, read_dpar export read_prop, save_prop, read_dpar
export flw export flw
......
...@@ -59,9 +59,9 @@ end ...@@ -59,9 +59,9 @@ end
""" """
function bndpropagator(U, dpar::DiracParam{T}, dws::DiracWorkspace, lp::SpaceParm{4,6,1,D}, maxiter::Int64, tol::Float64, c::Int64, s::Int64) function bndpropagator!(pro,U, dpar::DiracParam{T}, dws::DiracWorkspace, lp::SpaceParm{4,6,1,D}, maxiter::Int64, tol::Float64, c::Int64, s::Int64)
Returns the propagator from the t=0 boundary to the bulk for the SF boundary conditions for a source with color 'c' and spin 's'. The factor c_t is included while the factor 1/sqrt(V) is not. Saves the propagator in from the t=0 boundary to the bulk for the SF boundary conditions for a source with color 'c' and spin 's'. The factor c_t is included while the factor 1/sqrt(V) is not.
For the propagator from T to the bulk, use the function Tbndpropagator(U, dpar::DiracParam{T}, dws::DiracWorkspace, lp::SpaceParm{4,6,1,D}, maxiter::Int64, tol::Float64, c::Int64, s::Int64) For the propagator from T to the bulk, use the function Tbndpropagator(U, dpar::DiracParam{T}, dws::DiracWorkspace, lp::SpaceParm{4,6,1,D}, maxiter::Int64, tol::Float64, c::Int64, s::Int64)
""" """
......
...@@ -74,7 +74,7 @@ function read_cnfg(fname::String) ...@@ -74,7 +74,7 @@ function read_cnfg(fname::String)
end end
end end
BDIO_close!(fb)
return CuArray(Ucpu) return CuArray(Ucpu)
end end
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment