Small changes and two big ones: SU2fund and new merged Dirac kernel (not g5Dw yet)

27ff9818 · Fernando Pérez Panadero · f110024a · 27ff9818 · 27ff9818 · 27ff9818
Commit 27ff9818 authored Nov 17, 2023 by Fernando Pérez Panadero
7 changed files
--- a/src/Dirac/Dirac.jl
+++ b/src/Dirac/Dirac.jl
@@ -65,6 +65,21 @@ struct DiracWorkspace{T}
        return new{T}(sr,sp,sAp,st,csw,cs)
    end
+    function DiracWorkspace(::Type{SU2}, ::Type{T}, lp::SpaceParm{4,6,B,D}) where {G,T <: AbstractFloat, B,D}
+        sr  = scalar_field(Spinor{4,G}, lp)
+        sp  = scalar_field(Spinor{4,G}, lp)
+        sAp = scalar_field(Spinor{4,G}, lp)
+        st  = scalar_field(Spinor{4,G}, lp)
+        csw = tensor_field(U2alg,lp)
+        cs = scalar_field_point(Complex{Float64}, lp)
+        return new{T}(sr,sp,sAp,st,csw,cs)
+    end
 end
 export DiracWorkspace, DiracParam
@@ -279,8 +294,6 @@ function krnl_Dw_impr!(so, Fcsw, csw, si, lp::SpaceParm{4,6,B,D}) where {B,D}
        b = Int64(CUDA.threadIdx().x);  
        r = Int64(CUDA.blockIdx().x)
-        I = point_coord((b,r), lp)
        TWP  = false
            if TWP
@@ -302,7 +315,6 @@ function krnl_g5Dw_impr!(so, Fcsw, csw, si, lp::SpaceParm{4,6,B,D}) where {B,D}
    b = Int64(CUDA.threadIdx().x);  
    r = Int64(CUDA.blockIdx().x)
-    I = point_coord((b,r), lp)
    TWP  = false
@@ -435,8 +447,190 @@ function krnl_csw!(csw::AbstractArray{T}, U, Ubnd, ipl, lp::SpaceParm{4,M,B,D})
 end
-###############################   HMC for fermions   ###################################
+function Dw_new!(so, U, si, dpar::DiracParam, dws::DiracWorkspace, lp::SpaceParm{4,6,B,D}) where {B,D}
+    if abs(dpar.csw) > 1.0E-10
+        @timeit "Dw_new" begin
+            CUDA.@sync begin
+               CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_Dwimpr_new!(so, U, si, dws.csw, dpar.m0, dpar.th, dpar.csw, dpar.ct, lp)
+            end
+        end
+    else
+        @timeit "Dw_new" begin
+            CUDA.@sync begin
+               CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_Dw_new!(so, U, si, dpar.m0, dpar.th, lp)
+            end
+        end
+    end
+    return nothing
+end
+function krnl_Dwimpr_new!(so, U, si, Fcsw, m0, th, csw, lp::SpaceParm{4,6,B,D}) where {B,D}
+    b = Int64(CUDA.threadIdx().x);  r = Int64(CUDA.blockIdx().x)
+    bu1, ru1 = up((b,r), 1, lp)
+    bd1, rd1 = dw((b,r), 1, lp)
+    bu2, ru2 = up((b,r), 2, lp)
+    bd2, rd2 = dw((b,r), 2, lp)
+    bu3, ru3 = up((b,r), 3, lp)
+    bd3, rd3 = dw((b,r), 3, lp)
+    bu4, ru4 = up((b,r), 4, lp)
+    bd4, rd4 = dw((b,r), 4, lp)
+    @inbounds begin 
+        so[b,r] = (4+m0)*si[b,r]  + 0.5*csw*im*( Fcsw[b,1,r]*dmul(Gamma{10},si[b,r]) + Fcsw[b,2,r]*dmul(Gamma{11},si[b,r]) + Fcsw[b,3,r]*dmul(Gamma{12},si[b,r])
+                                                -Fcsw[b,4,r]*dmul(Gamma{15},si[b,r]) - Fcsw[b,5,r]*dmul(Gamma{14},si[b,r]) - Fcsw[b,6,r]*dmul(Gamma{13},si[b,r]))          
+        so[b,r] -= 0.5*(th[1]*gpmul(Pgamma{1,-1},U[b,1,r],si[bu1,ru1]) +conj(th[1])*gdagpmul(Pgamma{1,+1},U[bd1,1,rd1],si[bd1,rd1]) +
+                        th[2]*gpmul(Pgamma{2,-1},U[b,2,r],si[bu2,ru2]) +conj(th[2])*gdagpmul(Pgamma{2,+1},U[bd2,2,rd2],si[bd2,rd2]) +
+                        th[3]*gpmul(Pgamma{3,-1},U[b,3,r],si[bu3,ru3]) +conj(th[3])*gdagpmul(Pgamma{3,+1},U[bd3,3,rd3],si[bd3,rd3]) +
+                        th[4]*gpmul(Pgamma{4,-1},U[b,4,r],si[bu4,ru4]) +conj(th[4])*gdagpmul(Pgamma{4,+1},U[bd4,4,rd4],si[bd4,rd4]) )
+    end
+    return nothing
+end
+function krnl_Dw_new!(so, U, si, m0, th, lp::SpaceParm{4,6,B,D}) where {B,D}
+    b = Int64(CUDA.threadIdx().x);  r = Int64(CUDA.blockIdx().x)
+    bu1, ru1 = up((b,r), 1, lp)
+    bd1, rd1 = dw((b,r), 1, lp)
+    bu2, ru2 = up((b,r), 2, lp)
+    bd2, rd2 = dw((b,r), 2, lp)
+    bu3, ru3 = up((b,r), 3, lp)
+    bd3, rd3 = dw((b,r), 3, lp)
+    bu4, ru4 = up((b,r), 4, lp)
+    bd4, rd4 = dw((b,r), 4, lp)
+    @inbounds begin 
+        so[b,r] = (4+m0)*si[b,r]
+        so[b,r] -= 0.5*(th[1]*gpmul(Pgamma{1,-1},U[b,1,r],si[bu1,ru1]) +conj(th[1])*gdagpmul(Pgamma{1,+1},U[bd1,1,rd1],si[bd1,rd1]) +
+                        th[2]*gpmul(Pgamma{2,-1},U[b,2,r],si[bu2,ru2]) +conj(th[2])*gdagpmul(Pgamma{2,+1},U[bd2,2,rd2],si[bd2,rd2]) +
+                        th[3]*gpmul(Pgamma{3,-1},U[b,3,r],si[bu3,ru3]) +conj(th[3])*gdagpmul(Pgamma{3,+1},U[bd3,3,rd3],si[bd3,rd3]) +
+                        th[4]*gpmul(Pgamma{4,-1},U[b,4,r],si[bu4,ru4]) +conj(th[4])*gdagpmul(Pgamma{4,+1},U[bd4,4,rd4],si[bd4,rd4]) )
+    end
+    return nothing
+end
+function Dw_new!(so, U, si, dpar::DiracParam, dws::DiracWorkspace, lp::SpaceParm{4,6,BC_SF_ORBI,D}) where {D}
+    if abs(dpar.csw) > 1.0E-10
+        @timeit "Dw_new" begin
+            CUDA.@sync begin
+               CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_Dwimpr_new!(so, U, si, dws.csw, dpar.m0, dpar.th, dpar.csw, dpar.ct, lp)
+            end
+        end
+    else
+        @timeit "Dw_new" begin
+            CUDA.@sync begin
+               CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_Dw_new!(so, U, si, dpar.m0, dpar.th, dpar.ct, lp)
+            end
+        end
+    end
+    return nothing
+end
+function krnl_Dwimpr_new!(so, U, si, Fcsw, m0, th, csw, ct, lp::SpaceParm{4,6,BC_SF_ORBI,D}) where {D}
+    # The field si is assumed to be zero at t = 0
+    b = Int64(CUDA.threadIdx().x);  r = Int64(CUDA.blockIdx().x)
+    if (point_time((b,r),lp) != 1)
+        bu1, ru1 = up((b,r), 1, lp)
+        bd1, rd1 = dw((b,r), 1, lp)
+        bu2, ru2 = up((b,r), 2, lp)
+        bd2, rd2 = dw((b,r), 2, lp)
+        bu3, ru3 = up((b,r), 3, lp)
+        bd3, rd3 = dw((b,r), 3, lp)
+        bu4, ru4 = up((b,r), 4, lp)
+        bd4, rd4 = dw((b,r), 4, lp)
+        @inbounds begin 
+            so[b,r] = (4+m0)*si[b,r]  + 0.5*csw*im*( Fcsw[b,1,r]*dmul(Gamma{10},si[b,r]) + Fcsw[b,2,r]*dmul(Gamma{11},si[b,r]) + Fcsw[b,3,r]*dmul(Gamma{12},si[b,r])
+                                                    -Fcsw[b,4,r]*dmul(Gamma{15},si[b,r]) - Fcsw[b,5,r]*dmul(Gamma{14},si[b,r]) - Fcsw[b,6,r]*dmul(Gamma{13},si[b,r]))          
+            so[b,r] -= 0.5*(th[1]*gpmul(Pgamma{1,-1},U[b,1,r],si[bu1,ru1]) +conj(th[1])*gdagpmul(Pgamma{1,+1},U[bd1,1,rd1],si[bd1,rd1]) +
+                            th[2]*gpmul(Pgamma{2,-1},U[b,2,r],si[bu2,ru2]) +conj(th[2])*gdagpmul(Pgamma{2,+1},U[bd2,2,rd2],si[bd2,rd2]) +
+                            th[3]*gpmul(Pgamma{3,-1},U[b,3,r],si[bu3,ru3]) +conj(th[3])*gdagpmul(Pgamma{3,+1},U[bd3,3,rd3],si[bd3,rd3]) +
+                            th[4]*gpmul(Pgamma{4,-1},U[b,4,r],si[bu4,ru4]) +conj(th[4])*gdagpmul(Pgamma{4,+1},U[bd4,4,rd4],si[bd4,rd4]) )
+                if (point_time((b,r),lp) == 2) || (point_time((b,r),lp) == lp.iL[4])
+                    so[b,r] += (ct-1.0)*si[b,r]
+                end
+        end
+    end
+    return nothing
+end
+function krnl_Dw_new!(so, U, si, m0, th, ct, lp::SpaceParm{4,6,BC_SF_ORBI,D}) where {D}
+    # The field si is assumed to be zero at t = 0
+    b = Int64(CUDA.threadIdx().x);  r = Int64(CUDA.blockIdx().x)
+    if (point_time((b,r),lp) != 1)
+        bu1, ru1 = up((b,r), 1, lp)
+        bd1, rd1 = dw((b,r), 1, lp)
+        bu2, ru2 = up((b,r), 2, lp)
+        bd2, rd2 = dw((b,r), 2, lp)
+        bu3, ru3 = up((b,r), 3, lp)
+        bd3, rd3 = dw((b,r), 3, lp)
+        bu4, ru4 = up((b,r), 4, lp)
+        bd4, rd4 = dw((b,r), 4, lp)
+        @inbounds begin 
+            so[b,r] = (4+m0)*si[b,r]
+            so[b,r] -= 0.5*(th[1]*gpmul(Pgamma{1,-1},U[b,1,r],si[bu1,ru1]) +conj(th[1])*gdagpmul(Pgamma{1,+1},U[bd1,1,rd1],si[bd1,rd1]) +
+                            th[2]*gpmul(Pgamma{2,-1},U[b,2,r],si[bu2,ru2]) +conj(th[2])*gdagpmul(Pgamma{2,+1},U[bd2,2,rd2],si[bd2,rd2]) +
+                            th[3]*gpmul(Pgamma{3,-1},U[b,3,r],si[bu3,ru3]) +conj(th[3])*gdagpmul(Pgamma{3,+1},U[bd3,3,rd3],si[bd3,rd3]) +
+                            th[4]*gpmul(Pgamma{4,-1},U[b,4,r],si[bu4,ru4]) +conj(th[4])*gdagpmul(Pgamma{4,+1},U[bd4,4,rd4],si[bd4,rd4]) )
+            if (point_time((b,r),lp) == 2) || (point_time((b,r),lp) == lp.iL[4])
+                so[b,r] += (ct-1.0)*si[b,r]
+            end
+        end
+    end
+    return nothing
+end
+function SF_bndfix!(sp, lp::SpaceParm{4,6,BC_SF_ORBI,D}) where {D}
+    CUDA.@sync begin
+        CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_sfbndfix_new!(sp, lp)
+    end
+    return nothing
+end
+function krnl_sfbndfix_new!(sp,lp::SpaceParm)
+    b=Int64(CUDA.threadIdx().x)
+    r=Int64(CUDA.blockIdx().x)
+    if (point_time((b,r),lp) == 1)
+        so[b,r] = 0.0*so[b,r]
+    end
+    return nothing
+end
+###############################   HMC for fermions   ###################################
 function pfrandomize!(f::AbstractArray{Spinor{4, SU3fund{T}}},lp::SpaceParm,t::Int64=0) where {T} 
@@ -474,7 +668,7 @@ function krnl_assign_pf!(f::AbstractArray, p ,lp::SpaceParm, t::Int64)
 end
-export Dw!, DwdagDw!, g5Dw!, pfrandomize!, Csw!
+export Dw!, DwdagDw!, g5Dw!, pfrandomize!, Csw!, Dw_new!, SF_bndfix!
 include("DiracIO.jl")

--- a/src/Dirac/Diracflw.jl
+++ b/src/Dirac/Diracflw.jl
@@ -110,8 +110,6 @@ function krnl_Nablanabla!(so, U, si, th, lp::SpaceParm{4,6,B,D}) where {B,D}
    return nothing
 end
 function Dslash_sq!(so, U, si, dpar::DiracParam, dws::DiracWorkspace, lp::SpaceParm{4,6,B,D}) where {B,D}
    if B == BC_SF_AFWB || B == BC_SF_ORBI

--- a/src/Groups/AlgebraU2.jl
+++ b/src/Groups/AlgebraU2.jl
+struct U2alg{T} <: Algebra
+    u11::T
+    u22::T
+    u12::Complex{T}
+end
+function antsym(a::SU2{T}) where T <: AbstractFloat
+    return U2alg{T}(2.0*imag(a.t1),-2.0*imag(a.t1),2.0*a.t2)
+end
+Base.:*(a::U2alg{T},b::SU2fund{T}) where T <: AbstractFloat = SU2fund{T}(im*a.u11*b.t1 + a.u12*b.t2,
+                                                                        -conj(a.u12)*b.t1 + im*a.u22*b.t2)
+Base.:+(a::U2alg{T},b::U2alg{T}) where T <: AbstractFloat = U2alg{T}(a.u11 + b.u11, a.u22 + b.u22, a.u12 + b.u12)
+Base.:*(r::Number, a::U2alg{T}) where T <: AbstractFloat = U2alg{T}(r*a.u11, r*a.u22, r*a.u12)
--- a/src/Groups/FundamentalSU2.jl
+++ b/src/Groups/FundamentalSU2.jl
+struct SU2fund{T}
+    t1::Complex{T}
+    t2::Complex{T}
+end
+Base.zero(::Type{SU2fund{T}}) where T <: AbstractFloat = SU2fund{T}(zero(T),zero(T))
+Random.rand(rng::AbstractRNG, ::Random.SamplerType{SU2fund{T}}) where T <: AbstractFloat = SU2fund{T}(complex(randn(rng,T),randn(rng,T)),
+                                                                                                      complex(randn(rng,T),randn(rng,T)))
+SU2fund(a::T, b::T)          where T <: AbstractFloat = SU2fund{T}(complex(a), complex(b))
+"""
+    dag(a::SU2fund{T})
+Returns the conjugate of a fundamental element. 
+"""
+dag(a::SU2fund{T})                 where T <: AbstractFloat = SU2fund{T}(conj(a.t1), conj(a.t2))
+"""
+    norm(a::SU2fund{T})
+Returns the norm of a fundamental element. Same result as dot(a,a).
+"""
+norm(a::SU2fund{T})                where T <: AbstractFloat = sqrt((abs2(a.t1) + abs2(a.t2)))
+"""
+    norm(a::SU2fund{T})
+Returns the norm of a fundamental element. Same result as sqrt(dot(a,a)).
+"""
+norm2(a::SU2fund{T})               where T <: AbstractFloat = (abs2(a.t1) + abs2(a.t2))
+"""
+    dot(a::SU2fund{T},b::SU2fund{T})
+Returns the scalar product of two fundamental elements. The convention is for the product to the linear in the second argument, and anti-linear in the first argument. 
+"""
+dot(g1::SU2fund{T},g2::SU2fund{T}) where T <: AbstractFloat = conj(g1.t1)*g2.t1+conj(g1.t2)*g2.t2
+"""
+    *(g::SU2{T},b::SU2fund{T})
+Returns ga
+"""
+Base.:*(g::SU2{T},b::SU2fund{T})     where T <: AbstractFloat = SU2fund{T}(g.t1*b.t1 + g.t2*b.t2,
+                                                                           -conj(g.t2)*b.t1 + conj(g.t1)*b.t2)
+"""
+    \(g::SU2{T},b::SU2fund{T})
+Returns g^dag b
+"""
+Base.:\(g::SU2{T},b::SU2fund{T})     where T <: AbstractFloat = SU2fund{T}(conj(g.t1)*b.t1 - g.t2*b.t2,
+                                                                           conj(g.t2)*b.t1 + g.t1*b.t2)
+"""
+    *(a::SU2alg{T},b::SU2fund{T})
+Returns a*b
+"""
+Base.:*(a::SU2alg{T},b::SU2fund{T})     where T <: AbstractFloat = SU2fund{T}(complex(0.0, a.t3)*b.t1/2 +  complex(a.t2,a.t1)*b.t2/2,
+                                                                              complex(-a.t2,a.t1)*b.t1/2 + complex(0.0, -a.t3)*b.t1/2)
+Base.:+(a::SU2fund{T},b::SU2fund{T}) where T <: AbstractFloat = SU2fund{T}(a.t1+b.t1,a.t2+b.t2)
+Base.:-(a::SU2fund{T},b::SU2fund{T}) where T <: AbstractFloat = SU2fund{T}(a.t1-b.t1,a.t2-b.t2)
+Base.:+(a::SU2fund{T})               where T <: AbstractFloat = SU2fund{T}(a.t1,a.t2)
+Base.:-(a::SU2fund{T})               where T <: AbstractFloat = SU2fund{T}(-a.t1,-a.t2)
+imm(a::SU2fund{T})                   where T <: AbstractFloat = SU2fund{T}(complex(-imag(a.t1),real(a.t1)),
+                                                                           complex(-imag(a.t2),real(a.t2)))
+mimm(a::SU2fund{T})                  where T <: AbstractFloat = SU2fund{T}(complex(imag(a.t1),-real(a.t1)),
+                                                                           complex(imag(a.t2),-real(a.t2)))
+# Operations with numbers
+Base.:*(a::SU2fund{T},b::Number) where T <: AbstractFloat = SU2fund{T}(b*a.t1,b*a.t2)
+Base.:*(b::Number,a::SU2fund{T}) where T <: AbstractFloat = SU2fund{T}(b*a.t1,b*a.t2)
+Base.:/(a::SU2fund{T},b::Number) where T <: AbstractFloat = SU2fund{T}(a.t1/b,a.t2/b)
+Base.:*(a::M2x2{T},b::SU2fund{T}) where T <: AbstractFloat = SU2fund{T}(a.u11*b.t1 + a.u12*b.t2,
+                                                                        a.u21*b.t1 + a.u22*b.t2)
\ No newline at end of file
--- a/src/LatticeGPU.jl
+++ b/src/LatticeGPU.jl
@@ -16,7 +16,7 @@ include("Groups/Groups.jl")
 using .Groups
 export Group, Algebra, GMatrix
-export SU2, SU2alg, SU3, SU3alg, M3x3, M2x2, U1, U1alg, SU3fund, U3alg
+export SU2, SU2alg, SU3, SU3alg, M3x3, M2x2, U1, U1alg, SU3fund, U3alg, SU2fund, U2alg
 export dot, expm, exp, dag, unitarize, inverse, tr, projalg, norm, norm2, isgroup, alg2mat, dev_one, antsym
 include("Space/Space.jl")
@@ -57,7 +57,7 @@ export pmul, gpmul, gdagpmul, dmul
 include("Dirac/Dirac.jl")
 using .Dirac
 export DiracWorkspace, DiracParam
-export Dw!, DwdagDw!, pfrandomize!, g5Dw!, Csw!
+export Dw!, DwdagDw!, pfrandomize!, g5Dw!, Csw!, Dw_new!, SF_bndfix!
 export read_prop, save_prop, read_dpar
 export flw

--- a/src/Solvers/Propagators.jl
+++ b/src/Solvers/Propagators.jl
@@ -59,9 +59,9 @@ end
 """
-    function bndpropagator(U, dpar::DiracParam{T}, dws::DiracWorkspace, lp::SpaceParm{4,6,1,D}, maxiter::Int64, tol::Float64, c::Int64, s::Int64)
+    function bndpropagator!(pro,U, dpar::DiracParam{T}, dws::DiracWorkspace, lp::SpaceParm{4,6,1,D}, maxiter::Int64, tol::Float64, c::Int64, s::Int64)
-Returns the propagator from the t=0 boundary to the bulk for the SF boundary conditions for a source with color 'c' and spin 's'. The factor c_t is included while the factor 1/sqrt(V) is not.
+Saves the propagator in from the t=0 boundary to the bulk for the SF boundary conditions for a source with color 'c' and spin 's'. The factor c_t is included while the factor 1/sqrt(V) is not.
 For the propagator from T to the bulk, use the function Tbndpropagator(U, dpar::DiracParam{T}, dws::DiracWorkspace, lp::SpaceParm{4,6,1,D}, maxiter::Int64, tol::Float64, c::Int64, s::Int64)
 """

--- a/src/YM/YMio.jl
+++ b/src/YM/YMio.jl
@@ -74,7 +74,7 @@ function read_cnfg(fname::String)
        end
    end
+    BDIO_close!(fb)
    return CuArray(Ucpu)   
 end