### ### "THE BEER-WARE LICENSE": ### Alberto Ramos and Carlos Pena wrote this file. As long as you retain this ### notice you can do whatever you want with this stuff. If we meet some ### day, and you think this stuff is worth it, you can buy us a beer in ### return. ### ### file: Dirac.jl ### created: Thu Nov 18 17:20:24 2021 ### module Dirac using CUDA, TimerOutputs using ..Space using ..Groups using ..Fields using ..YM using ..Spinors struct DiracParam{T,R} m0::T csw::T th::NTuple{4,Complex{T}} ct::T function DiracParam{T}(::Type{R},m0,csw,th,ct) where {T,R} return new{T,R}(m0,csw,th,ct) end function DiracParam{T}(m0,csw,th,ct) where {T} return new{T,SU3fund}(m0,csw,th,ct) end end function Base.show(io::IO, dpar::DiracParam{T,R}) where {T,R} println(io, "Wilson fermions in the: ", R, " representation") println(io, " - Bare mass: ", dpar.m0," // Kappa = ",0.5/(dpar.m0+4)) println(io, " - Csw : ", dpar.csw) println(io, " - c_t: ", dpar.ct) println(io, " - Theta: ", dpar.th) return nothing end struct DiracWorkspace{T} sr sp sAp st csw cs function DiracWorkspace(::Type{G}, ::Type{T}, lp::SpaceParm{4,6,B,D}) where {G,T <: AbstractFloat, B,D} sr = scalar_field(Spinor{4,G}, lp) sp = scalar_field(Spinor{4,G}, lp) sAp = scalar_field(Spinor{4,G}, lp) st = scalar_field(Spinor{4,G}, lp) csw = tensor_field(U3alg{T},lp) cs = scalar_field_point(Complex{Float64}, lp) return new{T}(sr,sp,sAp,st,csw) end end export DiracWorkspace, DiracParam function Dw!(so, U, si, dpar::DiracParam, dws::DiracWorkspace, lp::SpaceParm{4,6,B,D}) where {B,D} if B == BC_SF_AFWB || B == BC_SF_ORBI @timeit "SF fix" begin CUDA.@sync begin CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_sfbndfix!(si,si,1.0,Gamma{16},lp) end end @timeit "Dw" begin CUDA.@sync begin CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_Dw!(so, U, si, dpar.m0, dpar.th, lp) end end @timeit "SF fix" begin CUDA.@sync begin CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_sfbndfix!(so,si,dpar.ct,Gamma{16},lp) end end else @timeit "Dw" begin CUDA.@sync begin CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_Dw!(so, U, si, dpar.m0, dpar.th, lp) end end end if abs(dpar.csw) > 1.0E-10 @timeit "Dw_improvement" begin CUDA.@sync begin CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_Dw_impr!(so, dws.csw, dpar.csw, si, lp) end end end return nothing end function DwdagDw!(so, U, si, dpar::DiracParam, dws::DiracWorkspace, st, lp::SpaceParm{4,6,B,D}) where {B,D} if B == BC_SF_AFWB || B == BC_SF_ORBI @timeit "DwdagDw" begin @timeit "SF fix" begin CUDA.@sync begin CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_sfbndfix!(si,si,1.0,Gamma{16},lp) end end @timeit "g5Dw" begin CUDA.@sync begin CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_g5Dw!(st, U, si, dpar.m0, dpar.th, lp) end end @timeit "SF fix" begin CUDA.@sync begin CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_sfbndfix!(st,si,dpar.ct,Gamma{5},lp) end end if abs(dpar.csw) > 1.0E-10 @timeit "Dw_improvement" begin CUDA.@sync begin CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_g5Dw_impr!(st, dws.csw, dpar.csw, si, lp) end end end @timeit "g5Dw" begin CUDA.@sync begin CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_g5Dw!(so, U, st, dpar.m0, dpar.th, lp) end end @timeit "SF fix" begin CUDA.@sync begin CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_sfbndfix!(so,st,dpar.ct,Gamma{5},lp) end end if abs(dpar.csw) > 1.0E-10 @timeit "Dw_improvement" begin CUDA.@sync begin CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_g5Dw_impr!(so, dws.csw, dpar.csw, st, lp) end end end end else @timeit "DwdagDw" begin CUDA.@sync begin CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_g5Dw!(st, U, si, dpar.m0, dpar.th, lp) end if abs(dpar.csw) > 1.0E-10 @timeit "Dw_improvement" begin CUDA.@sync begin CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_g5Dw_impr!(st, dws.csw, dpar.csw, si, lp) end end end CUDA.@sync begin CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_g5Dw!(so, U, st, dpar.m0, dpar.th, lp) end if abs(dpar.csw) > 1.0E-10 @timeit "Dw_improvement" begin CUDA.@sync begin CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_g5Dw_impr!(so, dws.csw, dpar.csw, st, lp) end end end end end return nothing end function g5Dw!(so, U, si, dpar::DiracParam, dws::DiracWorkspace, lp::SpaceParm{4,6,B,D}) where {B,D} if B == BC_SF_AFWB || B == BC_SF_ORBI @timeit "SF fix" begin CUDA.@sync begin CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_sfbndfix!(si,si,1.0,Gamma{16},lp) end end @timeit "g5Dw" begin CUDA.@sync begin CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_g5Dw!(so, U, si, dpar.m0, dpar.th , lp) end end @timeit "SF fix" begin CUDA.@sync begin CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_sfbndfix!(so,si,dpar.ct,Gamma{5},lp) end end else @timeit "g5Dw" begin CUDA.@sync begin CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_g5Dw!(so, U, si, dpar.m0, dpar.th , lp) end end end if abs(dpar.csw) > 1.0E-10 @timeit "Dw_improvement" begin CUDA.@sync begin CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_g5Dw_impr!(so, dws.csw, dpar.csw, si, lp) end end end return nothing end function clover(g1::SU3{T},g2::SU3{T},g3::SU3{T},g4::SU3{T}) where {T} u11 = g1.u11 + g2.u11 + g3.u11 + g4.u11 u12 = g1.u12 + g2.u12 + g3.u12 + g4.u12 u13 = g1.u13 + g2.u13 + g3.u13 + g4.u13 u21 = g1.u21 + g2.u21 + g3.u21 + g4.u21 u22 = g1.u22 + g2.u22 + g3.u22 + g4.u22 u23 = g1.u23 + g2.u23 + g3.u23 + g4.u23 u31 = conj(g1.u12*g1.u23 - g1.u13*g1.u22) + conj(g2.u12*g2.u23 - g2.u13*g2.u22) + conj(g3.u12*g3.u23 - g3.u13*g3.u22) + conj(g4.u12*g4.u23 - g4.u13*g4.u22) u32 = conj(g1.u13*g1.u21 - g1.u11*g1.u23) + conj(g2.u13*g2.u21 - g2.u11*g2.u23) + conj(g3.u13*g3.u21 - g3.u11*g3.u23) + conj(g4.u13*g4.u21 - g4.u11*g4.u23) u33 = conj(g1.u11*g1.u22 - g1.u12*g1.u21) + conj(g2.u11*g2.u22 - g2.u12*g2.u21) + conj(g3.u11*g3.u22 - g3.u12*g3.u21) + conj(g4.u11*g4.u22 - g4.u12*g4.u21) return -0.125*im*M3x3{T}(u11-conj(u11),u12-conj(u21),u13-conj(u31),u21-conj(u12),u22-conj(u22),u23-conj(u32),u31-conj(u13),u32-conj(u23),u33-conj(u33)) end """ function Csw!(dws, U, gp, lp::SpaceParm) Computes the clover and stores it in dws.csw. The twist is NOT implemented (since dws.csw diagonal has to be real) and its not implemented either in Dw! """ function Csw!(dws, U, gp, lp::SpaceParm{4,6,B,D}) where {B,D} @timeit "Csw computation" begin for i in 1:Int(lp.npls) z = exp(2im * pi * lp.ntw[i]/gp.ng) CUDA.@sync begin CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_csw!(dws.csw, U, gp.Ubnd, i, lp) end end end return nothing end function krnl_Dw_impr!(so, Fcsw, csw, si, lp::SpaceParm{4,6,B,D}) where {B,D} @inbounds begin b = Int64(CUDA.threadIdx().x); r = Int64(CUDA.blockIdx().x) I = point_coord((b,r), lp) TWP = false if TWP so[b,r] += 0.5*csw*im*( Fcsw[b,1,r]*dmul(Gamma{10},si[b,r]) + Fcsw[b,2,r]*dmul(Gamma{11},si[b,r]) + Fcsw[b,3,r]*dmul(Gamma{12},si[b,r]) -Fcsw[b,4,r]*dmul(Gamma{15},si[b,r]) - Fcsw[b,5,r]*dmul(Gamma{14},si[b,r]) - Fcsw[b,6,r]*dmul(Gamma{13},si[b,r])) else so[b,r] += 0.5*csw*im*( Fcsw[b,1,r]*dmul(Gamma{10},si[b,r]) + Fcsw[b,2,r]*dmul(Gamma{11},si[b,r]) + Fcsw[b,3,r]*dmul(Gamma{12},si[b,r]) -Fcsw[b,4,r]*dmul(Gamma{15},si[b,r]) - Fcsw[b,5,r]*dmul(Gamma{14},si[b,r]) - Fcsw[b,6,r]*dmul(Gamma{13},si[b,r])) end end return nothing end function krnl_g5Dw_impr!(so, Fcsw, csw, si, lp::SpaceParm{4,6,B,D}) where {B,D} @inbounds begin b = Int64(CUDA.threadIdx().x); r = Int64(CUDA.blockIdx().x) I = point_coord((b,r), lp) TWP = false if TWP so[b,r] += 0.5*csw*im*dmul(Gamma{5},( Fcsw[b,1,r]*dmul(Gamma{10},si[b,r]) + Fcsw[b,2,r]*dmul(Gamma{11},si[b,r]) + Fcsw[b,3,r]*dmul(Gamma{12},si[b,r]) -Fcsw[b,4,r]*dmul(Gamma{15},si[b,r]) - Fcsw[b,5,r]*dmul(Gamma{14},si[b,r]) - Fcsw[b,6,r]*dmul(Gamma{13},si[b,r]))) else so[b,r] += 0.5*csw*im*dmul(Gamma{5},( Fcsw[b,1,r]*dmul(Gamma{10},si[b,r]) + Fcsw[b,2,r]*dmul(Gamma{11},si[b,r]) + Fcsw[b,3,r]*dmul(Gamma{12},si[b,r]) -Fcsw[b,4,r]*dmul(Gamma{15},si[b,r]) - Fcsw[b,5,r]*dmul(Gamma{14},si[b,r]) - Fcsw[b,6,r]*dmul(Gamma{13},si[b,r]))) end end return nothing end function krnl_sfbndfix!(so,si,ct,gamma,lp::SpaceParm) b=Int64(CUDA.threadIdx().x) r=Int64(CUDA.blockIdx().x) if (point_time((b,r),lp) == 1) so[b,r] = 0.0*so[b,r] elseif (point_time((b,r),lp) == 2) || (point_time((b,r),lp) == lp.iL[4]) so[b,r] += (ct-1.0)*dmul(gamma,si[b,r]) end return nothing end function krnl_Dw!(so, U, si, m0, th, lp::SpaceParm{4,6,B,D}) where {B,D} b = Int64(CUDA.threadIdx().x); r = Int64(CUDA.blockIdx().x) # For SF: # - cttilde affects mass term at x0 = a, T-a # - Spinor can be periodic as long as 0 at x_0=0 @inbounds begin so[b,r] = (4+m0)*si[b,r] bu1, ru1 = up((b,r), 1, lp) bd1, rd1 = dw((b,r), 1, lp) bu2, ru2 = up((b,r), 2, lp) bd2, rd2 = dw((b,r), 2, lp) bu3, ru3 = up((b,r), 3, lp) bd3, rd3 = dw((b,r), 3, lp) bu4, ru4 = up((b,r), 4, lp) bd4, rd4 = dw((b,r), 4, lp) so[b,r] -= 0.5*( th[1]*gpmul(Pgamma{1,-1},U[b,1,r],si[bu1,ru1]) +conj(th[1])*gdagpmul(Pgamma{1,+1},U[bd1,1,rd1],si[bd1,rd1]) + th[2]*gpmul(Pgamma{2,-1},U[b,2,r],si[bu2,ru2]) +conj(th[2])*gdagpmul(Pgamma{2,+1},U[bd2,2,rd2],si[bd2,rd2]) + th[3]*gpmul(Pgamma{3,-1},U[b,3,r],si[bu3,ru3]) +conj(th[3])*gdagpmul(Pgamma{3,+1},U[bd3,3,rd3],si[bd3,rd3]) + th[4]*gpmul(Pgamma{4,-1},U[b,4,r],si[bu4,ru4]) +conj(th[4])*gdagpmul(Pgamma{4,+1},U[bd4,4,rd4],si[bd4,rd4]) ) end return nothing end function krnl_g5Dw!(so, U, si, m0, th, lp::SpaceParm{4,6,B,D}) where {B,D} b = Int64(CUDA.threadIdx().x); r = Int64(CUDA.blockIdx().x) @inbounds begin so[b,r] = (4+m0)*si[b,r] bu1, ru1 = up((b,r), 1, lp) bd1, rd1 = dw((b,r), 1, lp) bu2, ru2 = up((b,r), 2, lp) bd2, rd2 = dw((b,r), 2, lp) bu3, ru3 = up((b,r), 3, lp) bd3, rd3 = dw((b,r), 3, lp) bu4, ru4 = up((b,r), 4, lp) bd4, rd4 = dw((b,r), 4, lp) so[b,r] -= 0.5*( th[1]*gpmul(Pgamma{1,-1},U[b,1,r],si[bu1,ru1]) +conj(th[1])*gdagpmul(Pgamma{1,+1},U[bd1,1,rd1],si[bd1,rd1]) + th[2]*gpmul(Pgamma{2,-1},U[b,2,r],si[bu2,ru2]) +conj(th[2])*gdagpmul(Pgamma{2,+1},U[bd2,2,rd2],si[bd2,rd2]) + th[3]*gpmul(Pgamma{3,-1},U[b,3,r],si[bu3,ru3]) +conj(th[3])*gdagpmul(Pgamma{3,+1},U[bd3,3,rd3],si[bd3,rd3]) + th[4]*gpmul(Pgamma{4,-1},U[b,4,r],si[bu4,ru4]) +conj(th[4])*gdagpmul(Pgamma{4,+1},U[bd4,4,rd4],si[bd4,rd4]) ) so[b,r] = dmul(Gamma{5}, so[b,r]) end return nothing end function krnl_csw!(csw::AbstractArray{T}, U, Ubnd, ipl, lp::SpaceParm{4,M,B,D}) where {T,M,B,D} @inbounds begin b = Int64(CUDA.threadIdx().x) r = Int64(CUDA.blockIdx().x) I = point_coord((b,r), lp) it = I[4] id1, id2 = lp.plidx[ipl] SFBC = ((B == BC_SF_AFWB) || (B == BC_SF_ORBI) ) && (id1 == 4) bu1, ru1 = up((b, r), id1, lp) bu2, ru2 = up((b, r), id2, lp) bd1, rd1 = dw((b, r), id1, lp) bd2, rd2 = dw((b, r), id2, lp) bdd, rdd = dw((bd1, rd1), id2, lp) bud, rud = dw((bu1, ru1), id2, lp) bdu, rdu = up((bd1, rd1), id2, lp) if SFBC && (it == lp.iL[end]) gt1 = Ubnd[id2] gt2 = Ubnd[id2] else gt1 = U[bu1,id2,ru1] gt2 = U[bud,id2,rud] end M1 = U[b,id1,r]*gt1/(U[b,id2,r]*U[bu2,id1,ru2]) M2 = (U[bd2,id2,rd2]\(U[bd2,id1,rd2]*gt2))/U[b,id1,r] M3 = (U[bdd,id2,rdd]*U[bd1,id1,rd1])\(U[bdd,id1,rdd]*U[bd2,id2,rd2]) M4 = (U[b,id2,r]/(U[bd1,id2,rd1]*U[bdu,id1,rdu]))*U[bd1,id1,rd1] if !(SFBC && (it == 1)) csw[b,ipl,r] = 0.125*(antsym(M1)+antsym(M2)+antsym(M3)+antsym(M4)) end end return nothing end ############################### HMC for fermions ################################### function pfrandomize!(f,lp::SpaceParm,t::Int64=0) where {T} @timeit "Randomize pseudofermion field" begin p = ntuple(i->CUDA.randn(Float64, lp.bsz, 3, lp.rsz,2),4) # complex generation not suported for Julia 1.5.4 CUDA.@sync begin CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_assign_pf!(f,p,lp,t) end end return nothing end function krnl_assign_pf!(f::AbstractArray{T}, p ,lp::SpaceParm, t::Int64) where {T} @inbounds begin b = Int64(CUDA.threadIdx().x) r = Int64(CUDA.blockIdx().x) if t == 0 f[b,r] = Spinor(map(x->SU3fund(x[b,1,r,1] + im* x[b,1,r,2], x[b,2,r,1] + im* x[b,2,r,2], x[b,3,r,1] + im* x[b,3,r,2]),p)) elseif point_time((b,r),lp) == t f[b,r] = Spinor(map(x->SU3fund(x[b,1,r,1] + im* x[b,1,r,2], x[b,2,r,1] + im* x[b,2,r,2], x[b,3,r,1] + im* x[b,3,r,2]),p)) end end return nothing end export Dw!, DwdagDw!, g5Dw!, pfrandomize!, Csw! end