Dirac.jl 7.81 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21
###
### "THE BEER-WARE LICENSE":
### Alberto Ramos and Carlos Pena wrote this file. As long as you retain this  
### notice you can do whatever you want with this stuff. If we meet some 
### day, and you think this stuff is worth it, you can buy us a beer in 
### return. <alberto.ramos@cern.ch> <carlos.pena@uam.es>
###
### file:    Dirac.jl
### created: Thu Nov 18 17:20:24 2021
###                               


module Dirac

using CUDA, TimerOutputs
using ..Space
using ..Groups
using ..Fields
using ..YM
using ..Spinors

22 23 24 25 26
struct DiracParam{T}
    rep
    m0::T
    csw::T
    th::NTuple{4,T}
27
    ct::T
28 29 30
end


31 32 33 34 35 36 37 38 39 40 41 42 43 44 45
struct DiracWorkspace{T}
    sr
    sp
    sAp
    st
    
    function DiracWorkspace(::Type{G}, ::Type{T}, lp::SpaceParm{4,6,B,D}) where {G,T <: AbstractFloat, B,D}

        sr  = scalar_field(Spinor{4,G}, lp)
        sp  = scalar_field(Spinor{4,G}, lp)
        sAp = scalar_field(Spinor{4,G}, lp)
        st  = scalar_field(Spinor{4,G}, lp)
        return new{T}(sr,sp,sAp,st)
    end
end
46
export DiracWorkspace, DiracParam
47

48
function Dw!(so, U, si, dpar::DiracParam, lp::SpaceParm{4,6,B,D}) where {B,D}
49

50
    if B == BC_SF_AFWB || B == BC_SF_ORBI
51
        CUDA.@sync begin
52
            CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_sfbndfix!(si,si,1.0,lp)
53
        end
54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69
        @timeit "Dw" begin
        CUDA.@sync begin
            CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_Dw!(so, U, si, dpar.m0, dpar.th, lp)
            end
        end
        CUDA.@sync begin
            CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_sfbndfix!(so,si,dpar.ct,lp)
        end 

    else
        @timeit "Dw" begin
            CUDA.@sync begin
                CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_Dw!(so, U, si, dpar.m0, dpar.th, lp)
            end
            end
    end 
70 71 72 73
    
    return nothing
end

74
function DwdagDw!(so, U, si, dpar::DiracParam, st, lp::SpaceParm{4,6,B,D}) where {B,D}
75

76 77 78
    

    if B == BC_SF_AFWB || B == BC_SF_ORBI
79
        CUDA.@sync begin
80
            CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_sfbndfix!(si,si,1.0,lp)
81 82 83 84 85 86 87
        end
        @timeit "DwdagDw" begin
            CUDA.@sync begin
                CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_g5Dw!(st, U, si, dpar.m0, dpar.th, lp)
            end

            CUDA.@sync begin
88
                CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_sfbndfix!(st,si,dpar.ct,lp)
89 90 91 92 93 94 95
            end

            CUDA.@sync begin
                CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_g5Dw!(so, U, st, dpar.m0, dpar.th, lp)
            end

            CUDA.@sync begin
96
                CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_sfbndfix!(so,st,dpar.ct,lp)
97
            end
98
            end
99 100 101 102 103 104 105 106 107

    else
        @timeit "DwdagDw" begin
            CUDA.@sync begin
                CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_g5Dw!(st, U, si, dpar.m0, dpar.th, lp)
            end
            CUDA.@sync begin
                CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_g5Dw!(so, U, st, dpar.m0, dpar.th, lp)
            end
108
        end
109
    end 
110 111 112 113 114
    
    return nothing
end


115
function g5Dw!(so, U, si, dpar, lp::SpaceParm{4,6,B,D}) where {B,D}
116

117
    if B == BC_SF_AFWB || B == BC_SF_ORBI
118
        CUDA.@sync begin
119
            CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_sfbndfix!(si,si,1.0,lp)
120
        end
121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136
        @timeit "Dw" begin
        CUDA.@sync begin
            CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_g5Dw!(so, U, si, dpar.m0, dpar.th , lp)
            end
        end
        CUDA.@sync begin
            CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_sfbndfix!(so,si,dpar.ct,lp)
        end 
        
    else
        @timeit "Dw" begin
            CUDA.@sync begin
                CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_g5Dw!(so, U, si, dpar.m0, dpar.th , lp)
                end
            end
    end 
137 138 139 140
    
    return nothing
end

141 142 143 144 145 146 147 148 149 150 151 152 153 154
function krnl_sfbndfix!(so,si,ct,lp::SpaceParm)
    b=Int64(CUDA.threadIdx().x)
    r=Int64(CUDA.blockIdx().x)

    if (point_time((b,r),lp) == 1)
    so[b,r] = 0.0*so[b,r]

    elseif (point_time((b,r),lp) == 2) || (point_time((b,r),lp) == lp.iL[4])
    so[b,r] += (ct-1.0)si[b,r]
    end
    return nothing
end


155 156 157 158 159 160 161 162 163
function krnl_Dw!(so, U, si, m0, th, lp::SpaceParm{4,6,B,D}) where {B,D}

    b = Int64(CUDA.threadIdx().x);  r = Int64(CUDA.blockIdx().x)

    # For SF:
    #  - cttilde affects mass term at x0 = a, T-a
    #  - Spinor can be periodic as long as 0 at x_0=0
    @inbounds begin 
        so[b,r] = (4+m0)*si[b,r]
164 165 166 167 168 169 170 171 172
        
            bu1, ru1 = up((b,r), 1, lp)
            bd1, rd1 = dw((b,r), 1, lp)
            bu2, ru2 = up((b,r), 2, lp)
            bd2, rd2 = dw((b,r), 2, lp)
            bu3, ru3 = up((b,r), 3, lp)
            bd3, rd3 = dw((b,r), 3, lp)
            bu4, ru4 = up((b,r), 4, lp)
            bd4, rd4 = dw((b,r), 4, lp)
173
            
174 175 176 177 178 179

        so[b,r] -= 0.5*( th[1]*gpmul(Pgamma{1,-1},U[b,1,r],si[bu1,ru1]) +conj(th[1])*gdagpmul(Pgamma{1,+1},U[bd1,1,rd1],si[bd1,rd1]) +
                     th[2]*gpmul(Pgamma{2,-1},U[b,2,r],si[bu2,ru2]) +conj(th[2])*gdagpmul(Pgamma{2,+1},U[bd2,2,rd2],si[bd2,rd2]) +
                     th[3]*gpmul(Pgamma{3,-1},U[b,3,r],si[bu3,ru3]) +conj(th[3])*gdagpmul(Pgamma{3,+1},U[bd3,3,rd3],si[bd3,rd3]) +
                     th[4]*gpmul(Pgamma{4,-1},U[b,4,r],si[bu4,ru4]) +conj(th[4])*gdagpmul(Pgamma{4,+1},U[bd4,4,rd4],si[bd4,rd4])  )
    
180 181 182 183 184 185 186 187 188 189 190
    end

    return nothing
end

function krnl_g5Dw!(so, U, si, m0, th, lp::SpaceParm{4,6,B,D}) where {B,D}

    b = Int64(CUDA.threadIdx().x);  r = Int64(CUDA.blockIdx().x)

    @inbounds begin 
        so[b,r] = (4+m0)*si[b,r]
191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257

	    bu1, ru1 = up((b,r), 1, lp)
            bd1, rd1 = dw((b,r), 1, lp)
            bu2, ru2 = up((b,r), 2, lp)
            bd2, rd2 = dw((b,r), 2, lp)
            bu3, ru3 = up((b,r), 3, lp)
            bd3, rd3 = dw((b,r), 3, lp)
            bu4, ru4 = up((b,r), 4, lp)
            bd4, rd4 = dw((b,r), 4, lp)


        so[b,r] -= 0.5*( th[1]*gpmul(Pgamma{1,-1},U[b,1,r],si[bu1,ru1]) +conj(th[1])*gdagpmul(Pgamma{1,+1},U[bd1,1,rd1],si[bd1,rd1]) +
                     th[2]*gpmul(Pgamma{2,-1},U[b,2,r],si[bu2,ru2]) +conj(th[2])*gdagpmul(Pgamma{2,+1},U[bd2,2,rd2],si[bd2,rd2]) +
                     th[3]*gpmul(Pgamma{3,-1},U[b,3,r],si[bu3,ru3]) +conj(th[3])*gdagpmul(Pgamma{3,+1},U[bd3,3,rd3],si[bd3,rd3]) +
                     th[4]*gpmul(Pgamma{4,-1},U[b,4,r],si[bu4,ru4]) +conj(th[4])*gdagpmul(Pgamma{4,+1},U[bd4,4,rd4],si[bd4,rd4])  )


        so[b,r] = dmul(Gamma{5}, so[b,r])
    end
        
    return nothing
end



###############################   HMC for fermions   ###################################



function pfrandomize!(f,lp::SpaceParm,dpar::DiracParam,t::Int64=0) #DiracParam to apply dirac operator later?

    if dpar.rep == SU3fund && lp.ndim == 4
        @timeit "Randomize pseudofermion field" begin
            p = ntuple(i->CUDA.randn(Float64, lp.bsz, 3, lp.rsz,2),4) # complex generation not suported for Julia 1.5.4
            CUDA.@sync begin
                CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_assign_pf!(f,p,lp,t)
            end
        end
    end

    return nothing

end

function krnl_assign_pf!(f::AbstractArray{T}, p ,lp::SpaceParm, t::Int64) where {T} #only valid for SU3fund for now. Check performance and maybe change it for the tuple gen in the krnl

    @inbounds begin
        b = Int64(CUDA.threadIdx().x)
        r = Int64(CUDA.blockIdx().x)

            if t == 0
            f[b,r] = Spinor(map(x->SU3fund(x[b,1,r,1] + im* x[b,1,r,2],
                                           x[b,2,r,1] + im* x[b,2,r,2],
                                           x[b,3,r,1] + im* x[b,3,r,2]),p))
            elseif point_time((b,r),lp) == t
            f[b,r] = Spinor(map(x->SU3fund(x[b,1,r,1] + im* x[b,1,r,2],
                                           x[b,2,r,1] + im* x[b,2,r,2],
                                           x[b,3,r,1] + im* x[b,3,r,2]),p))
            end
            
    end

    return nothing

end

export Dw!, DwdagDw!, g5Dw!, pfrandomize!
258 259

end