Small changes and two big ones: SU2fund and new merged Dirac kernel (not g5Dw yet)

parent f110024a
......@@ -65,6 +65,21 @@ struct DiracWorkspace{T}
return new{T}(sr,sp,sAp,st,csw,cs)
end
function DiracWorkspace(::Type{SU2}, ::Type{T}, lp::SpaceParm{4,6,B,D}) where {G,T <: AbstractFloat, B,D}
sr = scalar_field(Spinor{4,G}, lp)
sp = scalar_field(Spinor{4,G}, lp)
sAp = scalar_field(Spinor{4,G}, lp)
st = scalar_field(Spinor{4,G}, lp)
csw = tensor_field(U2alg,lp)
cs = scalar_field_point(Complex{Float64}, lp)
return new{T}(sr,sp,sAp,st,csw,cs)
end
end
export DiracWorkspace, DiracParam
......@@ -279,8 +294,6 @@ function krnl_Dw_impr!(so, Fcsw, csw, si, lp::SpaceParm{4,6,B,D}) where {B,D}
b = Int64(CUDA.threadIdx().x);
r = Int64(CUDA.blockIdx().x)
I = point_coord((b,r), lp)
TWP = false
if TWP
......@@ -302,7 +315,6 @@ function krnl_g5Dw_impr!(so, Fcsw, csw, si, lp::SpaceParm{4,6,B,D}) where {B,D}
b = Int64(CUDA.threadIdx().x);
r = Int64(CUDA.blockIdx().x)
I = point_coord((b,r), lp)
TWP = false
......@@ -435,8 +447,190 @@ function krnl_csw!(csw::AbstractArray{T}, U, Ubnd, ipl, lp::SpaceParm{4,M,B,D})
end
############################### HMC for fermions ###################################
function Dw_new!(so, U, si, dpar::DiracParam, dws::DiracWorkspace, lp::SpaceParm{4,6,B,D}) where {B,D}
if abs(dpar.csw) > 1.0E-10
@timeit "Dw_new" begin
CUDA.@sync begin
CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_Dwimpr_new!(so, U, si, dws.csw, dpar.m0, dpar.th, dpar.csw, dpar.ct, lp)
end
end
else
@timeit "Dw_new" begin
CUDA.@sync begin
CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_Dw_new!(so, U, si, dpar.m0, dpar.th, lp)
end
end
end
return nothing
end
function krnl_Dwimpr_new!(so, U, si, Fcsw, m0, th, csw, lp::SpaceParm{4,6,B,D}) where {B,D}
b = Int64(CUDA.threadIdx().x); r = Int64(CUDA.blockIdx().x)
bu1, ru1 = up((b,r), 1, lp)
bd1, rd1 = dw((b,r), 1, lp)
bu2, ru2 = up((b,r), 2, lp)
bd2, rd2 = dw((b,r), 2, lp)
bu3, ru3 = up((b,r), 3, lp)
bd3, rd3 = dw((b,r), 3, lp)
bu4, ru4 = up((b,r), 4, lp)
bd4, rd4 = dw((b,r), 4, lp)
@inbounds begin
so[b,r] = (4+m0)*si[b,r] + 0.5*csw*im*( Fcsw[b,1,r]*dmul(Gamma{10},si[b,r]) + Fcsw[b,2,r]*dmul(Gamma{11},si[b,r]) + Fcsw[b,3,r]*dmul(Gamma{12},si[b,r])
-Fcsw[b,4,r]*dmul(Gamma{15},si[b,r]) - Fcsw[b,5,r]*dmul(Gamma{14},si[b,r]) - Fcsw[b,6,r]*dmul(Gamma{13},si[b,r]))
so[b,r] -= 0.5*(th[1]*gpmul(Pgamma{1,-1},U[b,1,r],si[bu1,ru1]) +conj(th[1])*gdagpmul(Pgamma{1,+1},U[bd1,1,rd1],si[bd1,rd1]) +
th[2]*gpmul(Pgamma{2,-1},U[b,2,r],si[bu2,ru2]) +conj(th[2])*gdagpmul(Pgamma{2,+1},U[bd2,2,rd2],si[bd2,rd2]) +
th[3]*gpmul(Pgamma{3,-1},U[b,3,r],si[bu3,ru3]) +conj(th[3])*gdagpmul(Pgamma{3,+1},U[bd3,3,rd3],si[bd3,rd3]) +
th[4]*gpmul(Pgamma{4,-1},U[b,4,r],si[bu4,ru4]) +conj(th[4])*gdagpmul(Pgamma{4,+1},U[bd4,4,rd4],si[bd4,rd4]) )
end
return nothing
end
function krnl_Dw_new!(so, U, si, m0, th, lp::SpaceParm{4,6,B,D}) where {B,D}
b = Int64(CUDA.threadIdx().x); r = Int64(CUDA.blockIdx().x)
bu1, ru1 = up((b,r), 1, lp)
bd1, rd1 = dw((b,r), 1, lp)
bu2, ru2 = up((b,r), 2, lp)
bd2, rd2 = dw((b,r), 2, lp)
bu3, ru3 = up((b,r), 3, lp)
bd3, rd3 = dw((b,r), 3, lp)
bu4, ru4 = up((b,r), 4, lp)
bd4, rd4 = dw((b,r), 4, lp)
@inbounds begin
so[b,r] = (4+m0)*si[b,r]
so[b,r] -= 0.5*(th[1]*gpmul(Pgamma{1,-1},U[b,1,r],si[bu1,ru1]) +conj(th[1])*gdagpmul(Pgamma{1,+1},U[bd1,1,rd1],si[bd1,rd1]) +
th[2]*gpmul(Pgamma{2,-1},U[b,2,r],si[bu2,ru2]) +conj(th[2])*gdagpmul(Pgamma{2,+1},U[bd2,2,rd2],si[bd2,rd2]) +
th[3]*gpmul(Pgamma{3,-1},U[b,3,r],si[bu3,ru3]) +conj(th[3])*gdagpmul(Pgamma{3,+1},U[bd3,3,rd3],si[bd3,rd3]) +
th[4]*gpmul(Pgamma{4,-1},U[b,4,r],si[bu4,ru4]) +conj(th[4])*gdagpmul(Pgamma{4,+1},U[bd4,4,rd4],si[bd4,rd4]) )
end
return nothing
end
function Dw_new!(so, U, si, dpar::DiracParam, dws::DiracWorkspace, lp::SpaceParm{4,6,BC_SF_ORBI,D}) where {D}
if abs(dpar.csw) > 1.0E-10
@timeit "Dw_new" begin
CUDA.@sync begin
CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_Dwimpr_new!(so, U, si, dws.csw, dpar.m0, dpar.th, dpar.csw, dpar.ct, lp)
end
end
else
@timeit "Dw_new" begin
CUDA.@sync begin
CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_Dw_new!(so, U, si, dpar.m0, dpar.th, dpar.ct, lp)
end
end
end
return nothing
end
function krnl_Dwimpr_new!(so, U, si, Fcsw, m0, th, csw, ct, lp::SpaceParm{4,6,BC_SF_ORBI,D}) where {D}
# The field si is assumed to be zero at t = 0
b = Int64(CUDA.threadIdx().x); r = Int64(CUDA.blockIdx().x)
if (point_time((b,r),lp) != 1)
bu1, ru1 = up((b,r), 1, lp)
bd1, rd1 = dw((b,r), 1, lp)
bu2, ru2 = up((b,r), 2, lp)
bd2, rd2 = dw((b,r), 2, lp)
bu3, ru3 = up((b,r), 3, lp)
bd3, rd3 = dw((b,r), 3, lp)
bu4, ru4 = up((b,r), 4, lp)
bd4, rd4 = dw((b,r), 4, lp)
@inbounds begin
so[b,r] = (4+m0)*si[b,r] + 0.5*csw*im*( Fcsw[b,1,r]*dmul(Gamma{10},si[b,r]) + Fcsw[b,2,r]*dmul(Gamma{11},si[b,r]) + Fcsw[b,3,r]*dmul(Gamma{12},si[b,r])
-Fcsw[b,4,r]*dmul(Gamma{15},si[b,r]) - Fcsw[b,5,r]*dmul(Gamma{14},si[b,r]) - Fcsw[b,6,r]*dmul(Gamma{13},si[b,r]))
so[b,r] -= 0.5*(th[1]*gpmul(Pgamma{1,-1},U[b,1,r],si[bu1,ru1]) +conj(th[1])*gdagpmul(Pgamma{1,+1},U[bd1,1,rd1],si[bd1,rd1]) +
th[2]*gpmul(Pgamma{2,-1},U[b,2,r],si[bu2,ru2]) +conj(th[2])*gdagpmul(Pgamma{2,+1},U[bd2,2,rd2],si[bd2,rd2]) +
th[3]*gpmul(Pgamma{3,-1},U[b,3,r],si[bu3,ru3]) +conj(th[3])*gdagpmul(Pgamma{3,+1},U[bd3,3,rd3],si[bd3,rd3]) +
th[4]*gpmul(Pgamma{4,-1},U[b,4,r],si[bu4,ru4]) +conj(th[4])*gdagpmul(Pgamma{4,+1},U[bd4,4,rd4],si[bd4,rd4]) )
if (point_time((b,r),lp) == 2) || (point_time((b,r),lp) == lp.iL[4])
so[b,r] += (ct-1.0)*si[b,r]
end
end
end
return nothing
end
function krnl_Dw_new!(so, U, si, m0, th, ct, lp::SpaceParm{4,6,BC_SF_ORBI,D}) where {D}
# The field si is assumed to be zero at t = 0
b = Int64(CUDA.threadIdx().x); r = Int64(CUDA.blockIdx().x)
if (point_time((b,r),lp) != 1)
bu1, ru1 = up((b,r), 1, lp)
bd1, rd1 = dw((b,r), 1, lp)
bu2, ru2 = up((b,r), 2, lp)
bd2, rd2 = dw((b,r), 2, lp)
bu3, ru3 = up((b,r), 3, lp)
bd3, rd3 = dw((b,r), 3, lp)
bu4, ru4 = up((b,r), 4, lp)
bd4, rd4 = dw((b,r), 4, lp)
@inbounds begin
so[b,r] = (4+m0)*si[b,r]
so[b,r] -= 0.5*(th[1]*gpmul(Pgamma{1,-1},U[b,1,r],si[bu1,ru1]) +conj(th[1])*gdagpmul(Pgamma{1,+1},U[bd1,1,rd1],si[bd1,rd1]) +
th[2]*gpmul(Pgamma{2,-1},U[b,2,r],si[bu2,ru2]) +conj(th[2])*gdagpmul(Pgamma{2,+1},U[bd2,2,rd2],si[bd2,rd2]) +
th[3]*gpmul(Pgamma{3,-1},U[b,3,r],si[bu3,ru3]) +conj(th[3])*gdagpmul(Pgamma{3,+1},U[bd3,3,rd3],si[bd3,rd3]) +
th[4]*gpmul(Pgamma{4,-1},U[b,4,r],si[bu4,ru4]) +conj(th[4])*gdagpmul(Pgamma{4,+1},U[bd4,4,rd4],si[bd4,rd4]) )
if (point_time((b,r),lp) == 2) || (point_time((b,r),lp) == lp.iL[4])
so[b,r] += (ct-1.0)*si[b,r]
end
end
end
return nothing
end
function SF_bndfix!(sp, lp::SpaceParm{4,6,BC_SF_ORBI,D}) where {D}
CUDA.@sync begin
CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_sfbndfix_new!(sp, lp)
end
return nothing
end
function krnl_sfbndfix_new!(sp,lp::SpaceParm)
b=Int64(CUDA.threadIdx().x)
r=Int64(CUDA.blockIdx().x)
if (point_time((b,r),lp) == 1)
so[b,r] = 0.0*so[b,r]
end
return nothing
end
############################### HMC for fermions ###################################
function pfrandomize!(f::AbstractArray{Spinor{4, SU3fund{T}}},lp::SpaceParm,t::Int64=0) where {T}
......@@ -474,7 +668,7 @@ function krnl_assign_pf!(f::AbstractArray, p ,lp::SpaceParm, t::Int64)
end
export Dw!, DwdagDw!, g5Dw!, pfrandomize!, Csw!
export Dw!, DwdagDw!, g5Dw!, pfrandomize!, Csw!, Dw_new!, SF_bndfix!
include("DiracIO.jl")
......
......@@ -110,8 +110,6 @@ function krnl_Nablanabla!(so, U, si, th, lp::SpaceParm{4,6,B,D}) where {B,D}
return nothing
end
function Dslash_sq!(so, U, si, dpar::DiracParam, dws::DiracWorkspace, lp::SpaceParm{4,6,B,D}) where {B,D}
if B == BC_SF_AFWB || B == BC_SF_ORBI
......
struct U2alg{T} <: Algebra
u11::T
u22::T
u12::Complex{T}
end
function antsym(a::SU2{T}) where T <: AbstractFloat
return U2alg{T}(2.0*imag(a.t1),-2.0*imag(a.t1),2.0*a.t2)
end
Base.:*(a::U2alg{T},b::SU2fund{T}) where T <: AbstractFloat = SU2fund{T}(im*a.u11*b.t1 + a.u12*b.t2,
-conj(a.u12)*b.t1 + im*a.u22*b.t2)
Base.:+(a::U2alg{T},b::U2alg{T}) where T <: AbstractFloat = U2alg{T}(a.u11 + b.u11, a.u22 + b.u22, a.u12 + b.u12)
Base.:*(r::Number, a::U2alg{T}) where T <: AbstractFloat = U2alg{T}(r*a.u11, r*a.u22, r*a.u12)
struct SU2fund{T}
t1::Complex{T}
t2::Complex{T}
end
Base.zero(::Type{SU2fund{T}}) where T <: AbstractFloat = SU2fund{T}(zero(T),zero(T))
Random.rand(rng::AbstractRNG, ::Random.SamplerType{SU2fund{T}}) where T <: AbstractFloat = SU2fund{T}(complex(randn(rng,T),randn(rng,T)),
complex(randn(rng,T),randn(rng,T)))
SU2fund(a::T, b::T) where T <: AbstractFloat = SU2fund{T}(complex(a), complex(b))
"""
dag(a::SU2fund{T})
Returns the conjugate of a fundamental element.
"""
dag(a::SU2fund{T}) where T <: AbstractFloat = SU2fund{T}(conj(a.t1), conj(a.t2))
"""
norm(a::SU2fund{T})
Returns the norm of a fundamental element. Same result as dot(a,a).
"""
norm(a::SU2fund{T}) where T <: AbstractFloat = sqrt((abs2(a.t1) + abs2(a.t2)))
"""
norm(a::SU2fund{T})
Returns the norm of a fundamental element. Same result as sqrt(dot(a,a)).
"""
norm2(a::SU2fund{T}) where T <: AbstractFloat = (abs2(a.t1) + abs2(a.t2))
"""
dot(a::SU2fund{T},b::SU2fund{T})
Returns the scalar product of two fundamental elements. The convention is for the product to the linear in the second argument, and anti-linear in the first argument.
"""
dot(g1::SU2fund{T},g2::SU2fund{T}) where T <: AbstractFloat = conj(g1.t1)*g2.t1+conj(g1.t2)*g2.t2
"""
*(g::SU2{T},b::SU2fund{T})
Returns ga
"""
Base.:*(g::SU2{T},b::SU2fund{T}) where T <: AbstractFloat = SU2fund{T}(g.t1*b.t1 + g.t2*b.t2,
-conj(g.t2)*b.t1 + conj(g.t1)*b.t2)
"""
\(g::SU2{T},b::SU2fund{T})
Returns g^dag b
"""
Base.:\(g::SU2{T},b::SU2fund{T}) where T <: AbstractFloat = SU2fund{T}(conj(g.t1)*b.t1 - g.t2*b.t2,
conj(g.t2)*b.t1 + g.t1*b.t2)
"""
*(a::SU2alg{T},b::SU2fund{T})
Returns a*b
"""
Base.:*(a::SU2alg{T},b::SU2fund{T}) where T <: AbstractFloat = SU2fund{T}(complex(0.0, a.t3)*b.t1/2 + complex(a.t2,a.t1)*b.t2/2,
complex(-a.t2,a.t1)*b.t1/2 + complex(0.0, -a.t3)*b.t1/2)
Base.:+(a::SU2fund{T},b::SU2fund{T}) where T <: AbstractFloat = SU2fund{T}(a.t1+b.t1,a.t2+b.t2)
Base.:-(a::SU2fund{T},b::SU2fund{T}) where T <: AbstractFloat = SU2fund{T}(a.t1-b.t1,a.t2-b.t2)
Base.:+(a::SU2fund{T}) where T <: AbstractFloat = SU2fund{T}(a.t1,a.t2)
Base.:-(a::SU2fund{T}) where T <: AbstractFloat = SU2fund{T}(-a.t1,-a.t2)
imm(a::SU2fund{T}) where T <: AbstractFloat = SU2fund{T}(complex(-imag(a.t1),real(a.t1)),
complex(-imag(a.t2),real(a.t2)))
mimm(a::SU2fund{T}) where T <: AbstractFloat = SU2fund{T}(complex(imag(a.t1),-real(a.t1)),
complex(imag(a.t2),-real(a.t2)))
# Operations with numbers
Base.:*(a::SU2fund{T},b::Number) where T <: AbstractFloat = SU2fund{T}(b*a.t1,b*a.t2)
Base.:*(b::Number,a::SU2fund{T}) where T <: AbstractFloat = SU2fund{T}(b*a.t1,b*a.t2)
Base.:/(a::SU2fund{T},b::Number) where T <: AbstractFloat = SU2fund{T}(a.t1/b,a.t2/b)
Base.:*(a::M2x2{T},b::SU2fund{T}) where T <: AbstractFloat = SU2fund{T}(a.u11*b.t1 + a.u12*b.t2,
a.u21*b.t1 + a.u22*b.t2)
\ No newline at end of file
......@@ -16,7 +16,7 @@ include("Groups/Groups.jl")
using .Groups
export Group, Algebra, GMatrix
export SU2, SU2alg, SU3, SU3alg, M3x3, M2x2, U1, U1alg, SU3fund, U3alg
export SU2, SU2alg, SU3, SU3alg, M3x3, M2x2, U1, U1alg, SU3fund, U3alg, SU2fund, U2alg
export dot, expm, exp, dag, unitarize, inverse, tr, projalg, norm, norm2, isgroup, alg2mat, dev_one, antsym
include("Space/Space.jl")
......@@ -57,7 +57,7 @@ export pmul, gpmul, gdagpmul, dmul
include("Dirac/Dirac.jl")
using .Dirac
export DiracWorkspace, DiracParam
export Dw!, DwdagDw!, pfrandomize!, g5Dw!, Csw!
export Dw!, DwdagDw!, pfrandomize!, g5Dw!, Csw!, Dw_new!, SF_bndfix!
export read_prop, save_prop, read_dpar
export flw
......
......@@ -59,9 +59,9 @@ end
"""
function bndpropagator(U, dpar::DiracParam{T}, dws::DiracWorkspace, lp::SpaceParm{4,6,1,D}, maxiter::Int64, tol::Float64, c::Int64, s::Int64)
function bndpropagator!(pro,U, dpar::DiracParam{T}, dws::DiracWorkspace, lp::SpaceParm{4,6,1,D}, maxiter::Int64, tol::Float64, c::Int64, s::Int64)
Returns the propagator from the t=0 boundary to the bulk for the SF boundary conditions for a source with color 'c' and spin 's'. The factor c_t is included while the factor 1/sqrt(V) is not.
Saves the propagator in from the t=0 boundary to the bulk for the SF boundary conditions for a source with color 'c' and spin 's'. The factor c_t is included while the factor 1/sqrt(V) is not.
For the propagator from T to the bulk, use the function Tbndpropagator(U, dpar::DiracParam{T}, dws::DiracWorkspace, lp::SpaceParm{4,6,1,D}, maxiter::Int64, tol::Float64, c::Int64, s::Int64)
"""
......
......@@ -74,7 +74,7 @@ function read_cnfg(fname::String)
end
end
BDIO_close!(fb)
return CuArray(Ucpu)
end
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment