Some nice changes, should be minor

7fa4bc16 · Fernando Pérez Panadero · e16780b6 · 7fa4bc16 · 7fa4bc16 · 7fa4bc16
Commit 7fa4bc16 authored Oct 19, 2023 by Fernando Pérez Panadero
Showing with 67 additions and 37 deletions

src/LatticeGPU.jl src/LatticeGPU.jl +1 -1

src/Solvers/Propagators.jl src/Solvers/Propagators.jl +57 -29

src/Solvers/Solvers.jl src/Solvers/Solvers.jl +1 -1

src/Solvers/sfcf.jl src/Solvers/sfcf.jl +8 -6

No files found.
--- a/src/LatticeGPU.jl
+++ b/src/LatticeGPU.jl
@@ -63,7 +63,7 @@ export Dw!, DwdagDw!, pfrandomize!, g5Dw!, Csw!
 include("Solvers/Solvers.jl")
 using .Solvers
 export CG!
-export propagator, bndpropagator, Tbndpropagator
+export propagator!, bndpropagator!, Tbndpropagator!, bndtobnd
 export fP, fA, kV, kT

 end # module
--- a/src/Solvers/Propagators.jl
+++ b/src/Solvers/Propagators.jl
@@ -2,13 +2,13 @@


 """
-    function propagator(U, dpar::DiracParam{T}, dws::DiracWorkspace,  lp::SpaceParm, maxiter::Int64, tol::Float64, y::NTuple{4,Int64}, c::Int64, s::Int64)
+    function propagator!(pro,U, dpar::DiracParam{T}, dws::DiracWorkspace,  lp::SpaceParm, maxiter::Int64, tol::Float64, y::NTuple{4,Int64}, c::Int64, s::Int64)

-Returns the fermionic progapator  for a source at point `y` with color `c` and spin `s`. If the last three arguments are replaced by `time::Int64`, the source is replaced
+Saves the fermionic progapator in pro for a source at point `y` with color `c` and spin `s`. If the last three arguments are replaced by `time::Int64`, the source is replaced
 by a random source in spin and color at t = `time`.

 """
-function propagator(U, dpar::DiracParam{T}, dws::DiracWorkspace, lp::SpaceParm, maxiter::Int64, tol::Float64, y::NTuple{4,Int64}, c::Int64, s::Int64) where {T}
+function propagator!(pro, U, dpar::DiracParam{T}, dws::DiracWorkspace, lp::SpaceParm, maxiter::Int64, tol::Float64, y::NTuple{4,Int64}, c::Int64, s::Int64) where {T}

    function krnlg5!(src)
        b=Int64(CUDA.threadIdx().x)
@@ -17,23 +17,22 @@ function propagator(U, dpar::DiracParam{T}, dws::DiracWorkspace, lp::SpaceParm,
        return nothing
    end
  
-    pro = scalar_field(Spinor{4,SU3fund{Float64}},lp)
-    fill!(dws.sr,zero(eltype(scalar_field(Spinor{4,SU3fund{Float64}},lp))))
+    fill!(dws.sp,zero(eltype(scalar_field(Spinor{4,SU3fund{Float64}},lp))))
    
    #src = fill!(scalar_field(Spinor{4,SU3fund{Float64}},lp),zero(eltype(scalar_field(Spinor{4,SU3fund{Float64}},lp))))
-    CUDA.@allowscalar dws.sr[point_index(CartesianIndex{lp.ndim}(y),lp)...] = Spinor{4,SU3fund{Float64}}(ntuple(i -> (i==s)*SU3fund{Float64}(ntuple(j -> (j==c)*1.0,3)...),4))
+    CUDA.@allowscalar dws.sp[point_index(CartesianIndex{lp.ndim}(y),lp)...] = Spinor{4,SU3fund{Float64}}(ntuple(i -> (i==s)*SU3fund{Float64}(ntuple(j -> (j==c)*1.0,3)...),4))
       
    CUDA.@sync begin
-            CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnlg5!(dws.sr)
+            CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnlg5!(dws.sp)
    end
       
-    g5Dw!(pro,U,dws.sr,dpar,dws,lp)
+    g5Dw!(pro,U,dws.sp,dpar,dws,lp)
      
    CG!(pro,U,DwdagDw!,dpar,lp,dws,maxiter,tol)
-    return pro
+    return nothing
 end

-function propagator(U, dpar::DiracParam{T}, dws::DiracWorkspace, lp::SpaceParm, maxiter::Int64, tol::Float64, time::Int64) where {T}
+function propagator!(pro, U, dpar::DiracParam{T}, dws::DiracWorkspace, lp::SpaceParm, maxiter::Int64, tol::Float64, time::Int64) where {T}

    function krnlg5!(src)
        b=Int64(CUDA.threadIdx().x)
@@ -42,17 +41,16 @@ function propagator(U, dpar::DiracParam{T}, dws::DiracWorkspace, lp::SpaceParm,
        return nothing
    end
  
-    pro = scalar_field(Spinor{4,SU3fund{Float64}},lp)
-    pfrandomize!(dws.sr,lp,dpar,time)
+    pfrandomize!(dws.sp,lp,dpar,time)
    
    CUDA.@sync begin
-            CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnlg5!(dws.sr)
+            CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnlg5!(dws.sp)
    end
       
-    g5Dw!(pro,U,dws.sr,dpar,dws,lp)
+    g5Dw!(pro,U,dws.sp,dpar,dws,lp)

    CUDA.@sync begin
-        CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnlg5!(dws.sr)
+        CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnlg5!(dws.sp)
    end
      
    CG!(pro,U,DwdagDw!,dpar,lp,dws,maxiter,tol)
@@ -67,7 +65,7 @@ Returns the propagator from the t=0 boundary to the bulk for the SF boundary con
 For the propagator from T to the bulk, use the function Tbndpropagator(U, dpar::DiracParam{T}, dws::DiracWorkspace, lp::SpaceParm{4,6,1,D}, maxiter::Int64, tol::Float64, c::Int64, s::Int64)

 """
-function bndpropagator(U, dpar::DiracParam{T}, dws::DiracWorkspace, lp::SpaceParm{4,6,1,D}, maxiter::Int64, tol::Float64, c::Int64, s::Int64) where {T,D}
+function bndpropagator!(pro, U, dpar::DiracParam{T}, dws::DiracWorkspace, lp::SpaceParm{4,6,1,D}, maxiter::Int64, tol::Float64, c::Int64, s::Int64) where {T,D}

    function krnlg5!(src)
        b=Int64(CUDA.threadIdx().x)
@@ -87,20 +85,18 @@ function bndpropagator(U, dpar::DiracParam{T}, dws::DiracWorkspace, lp::SpacePar

        return nothing
    end
-  
-    pro = scalar_field(Spinor{4,SU3fund{Float64}},lp)

-    fill!(dws.sr,zero(eltype(scalar_field(Spinor{4,SU3fund{Float64}},lp))))
+    fill!(dws.sp,zero(eltype(scalar_field(Spinor{4,SU3fund{Float64}},lp))))
    
    CUDA.@sync begin
-        CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_assign_bndsrc!(dws.sr, U, lp, c, s)
+        CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_assign_bndsrc!(dws.sp, U, lp, c, s)
    end

    CUDA.@sync begin
-            CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnlg5!(dws.sr)
+            CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnlg5!(dws.sp)
    end
       
-    g5Dw!(pro,U,dpar.ct*dws.sr,dpar,dws,lp)
+    g5Dw!(pro,U,dpar.ct*dws.sp,dpar,dws,lp)
    
    CG!(pro,U,DwdagDw!,dpar,lp,dws,maxiter,tol)
    return pro
@@ -111,10 +107,10 @@ end
    function Tbndpropagator(U, dpar::DiracParam{T}, dws::DiracWorkspace, lp::SpaceParm{4,6,1,D}, maxiter::Int64, tol::Float64, c::Int64, s::Int64)
        
 Returns the propagator from the t=T boundary to the bulk for the SF boundary conditions for a source with color 'c' and spin 's'. The factor c_t is included while the factor 1/sqrt(V) is not.
-For the propagator from t=00 to the bulk, use the function bndpropagator(U, dpar::DiracParam{T}, dws::DiracWorkspace, lp::SpaceParm{4,6,1,D}, maxiter::Int64, tol::Float64, c::Int64, s::Int64)
+For the propagator from t=0 to the bulk, use the function bndpropagator(U, dpar::DiracParam{T}, dws::DiracWorkspace, lp::SpaceParm{4,6,1,D}, maxiter::Int64, tol::Float64, c::Int64, s::Int64)

 """
-function Tbndpropagator(U, dpar::DiracParam{T}, dws::DiracWorkspace, lp::SpaceParm{4,6,1,D}, maxiter::Int64, tol::Float64, c::Int64, s::Int64) where {T,D}
+function Tbndpropagator!(pro, U, dpar::DiracParam{T}, dws::DiracWorkspace, lp::SpaceParm{4,6,1,D}, maxiter::Int64, tol::Float64, c::Int64, s::Int64) where {T,D}

    function krnlg5!(src)
        b=Int64(CUDA.threadIdx().x)
@@ -134,19 +130,51 @@ function Tbndpropagator(U, dpar::DiracParam{T}, dws::DiracWorkspace, lp::SpacePa
        return nothing
    end
  
-    pro = scalar_field(Spinor{4,SU3fund{Float64}},lp)
-    fill!(dws.sr,zero(eltype(scalar_field(Spinor{4,SU3fund{Float64}},lp))))
+    fill!(dws.sp,zero(eltype(scalar_field(Spinor{4,SU3fund{Float64}},lp))))
    
    CUDA.@sync begin
-        CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_assign_bndsrc!(dws.sr, U, lp, c, s)
+        CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_assign_bndsrc!(dws.sp, U, lp, c, s)
    end

    CUDA.@sync begin
-            CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnlg5!(dws.sr)
+            CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnlg5!(dws.sp)
    end
       
-    g5Dw!(pro,U,dpar.ct*dws.sr,dpar,dws,lp)
+    g5Dw!(pro,U,dpar.ct*dws.sp,dpar,dws,lp)
    
    CG!(pro,U,DwdagDw!,dpar,lp,dws,maxiter,tol)
    return pro
 end
+
+
+"""
+    function bndtobnd(bndpro, U, dpar, dws, lp)
+
+Returns the boundary to boundary propagator of the Schrodinger functional given that bnd pro is the propagator from t = 0 to the bulk, given by the function bndpropagator.
+
+"""
+function bndtobnd(bndpro::AbstractArray, U::AbstractArray, dpar::DiracParam, dws::DiracWorkspace, lp::SpaceParm{4,6,1,D}) where {D}
+
+    function krnl_bndtobnd!(psi::AbstractArray, bndp::AbstractArray, U::AbstractArray, lp)
+        b=Int64(CUDA.threadIdx().x)
+        r=Int64(CUDA.blockIdx().x)
+
+        it = point_time((b, r), lp)
+
+        if it = lp.il[end]
+            psi[b,r] = gdagpmul(Pgamma{4,1},U[b,4,r],bndpro[b,r])
+        else
+            psi[b,r] = 0.0 * bndpro[b,r]
+        end
+
+        return nothing
+    end
+
+    CUDA.@sync begin
+        CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_bndtobnd!(dws.sp, bndpro ,U, lp)
+    end
+
+    res = -dpar.ct*CUDA.mapreduce(x -> x, +, dws.sp)./(lp.iL[1]*lp.iL[2]*lp.iL[3])
+
+    return res
+end
--- a/src/Solvers/Solvers.jl
+++ b/src/Solvers/Solvers.jl
@@ -23,7 +23,7 @@ include("CG.jl")
 export CG!

 include("Propagators.jl")
-export propagator, bndpropagator, Tbndpropagator
+export propagator!, bndpropagator!, Tbndpropagator!, bndtobnd

 include("sfcf.jl")
 export fP, fA, kV, kT

--- a/src/Solvers/sfcf.jl
+++ b/src/Solvers/sfcf.jl
@@ -16,7 +16,7 @@ function fP(prop::AbstractArray,dws::DiracWorkspace, lp::SpaceParm)
        CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_fP!(dws.cs,prop)
    end

-    return reshape(Array(CUDA.reduce(+, dws.cs; dims=(1,2,3))),lp.iL[end])
+    return reshape(Array(CUDA.reduce(+, dws.cs; dims=(1,2,3))),lp.iL[end])./(lp.iL[1]*lp.iL[2]*lp.iL[3])
 end

 function fA(prop::AbstractArray,dws::DiracWorkspace, lp::SpaceParm)
@@ -34,7 +34,7 @@ function fA(prop::AbstractArray,dws::DiracWorkspace, lp::SpaceParm)
        CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_fA!(dws.cs,prop)
    end
    
-    return reshape(Array(CUDA.reduce(+, dws.cs; dims=(1,2,3))),lp.iL[end])
+    return reshape(Array(CUDA.reduce(+, dws.cs; dims=(1,2,3))),lp.iL[end])./(lp.iL[1]*lp.iL[2]*lp.iL[3])
 end

 function kV(prop::AbstractArray,dws::DiracWorkspace, lp::SpaceParm)
@@ -44,7 +44,7 @@ function kV(prop::AbstractArray,dws::DiracWorkspace, lp::SpaceParm)
            r=Int64(CUDA.blockIdx().x)    
            I = point_coord((b,r), lp)

-            sum[I] = -norm2(dmul(Gamma{6},pro[b,r]) + dmul(Gamma{7},pro[b,r]) + dmul(Gamma{8},pro[b,r]))/6
+            sum[I] = -(norm2(dmul(Gamma{6},pro[b,r])) + norm2(dmul(Gamma{7},pro[b,r])) + norm2(dmul(Gamma{8},pro[b,r])))/6
        return nothing
    end
    
@@ -52,7 +52,7 @@ function kV(prop::AbstractArray,dws::DiracWorkspace, lp::SpaceParm)
        CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_kV!(dws.cs,prop)
    end
    
-    return reshape(Array(CUDA.reduce(+, dws.cs; dims=(1,2,3))),lp.iL[end])
+    return reshape(Array(CUDA.reduce(+, dws.cs; dims=(1,2,3))),lp.iL[end])./(lp.iL[1]*lp.iL[2]*lp.iL[3])
 end

 function kT(prop::AbstractArray,dws::DiracWorkspace, lp::SpaceParm)
@@ -62,7 +62,9 @@ function kT(prop::AbstractArray,dws::DiracWorkspace, lp::SpaceParm)
            r=Int64(CUDA.blockIdx().x)    
            I = point_coord((b,r), lp)

-            sum[I] = -im*dot(pro[b,r],dmul(Gamma{1},dmul(Gamma{10},pro[b,r])) + dmul(Gamma{2},dmul(Gamma{11},pro[b,r])) + dmul(Gamma{3},dmul(Gamma{12},pro[b,r]))) /6
+            sum[I] = im*(dot(dmul(Gamma{6},pro[b,r]),dmul(Gamma{5},dmul(Gamma{10},pro[b,r])))
+                       + dot(dmul(Gamma{7},pro[b,r]),dmul(Gamma{5},dmul(Gamma{11},pro[b,r])))
+                       + dot(dmul(Gamma{8},pro[b,r]),dmul(Gamma{5},dmul(Gamma{12},pro[b,r]))))/6
        return nothing
    end
    
@@ -70,5 +72,5 @@ function kT(prop::AbstractArray,dws::DiracWorkspace, lp::SpaceParm)
        CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_kT!(dws.cs,prop)
    end
    
-    return reshape(Array(CUDA.reduce(+, dws.cs; dims=(1,2,3))),lp.iL[end])
+    return reshape(Array(CUDA.reduce(+, dws.cs; dims=(1,2,3))),lp.iL[end])./(lp.iL[1]*lp.iL[2]*lp.iL[3])
 end