Bug fixes. Stable io version.

7b2d14d5 · Fernando Pérez Panadero · 78447fa4 · 7b2d14d5 · 7b2d14d5
Commit 7b2d14d5 authored Feb 12, 2025 by Fernando Pérez Panadero
Expand all Show whitespace changes
Inline Side-by-side

Showing with 157 additions and 129 deletions

src/meas.jl src/meas.jl +19 -3

src/utils.jl src/utils.jl +138 -126

No files found.
--- a/src/meas.jl
+++ b/src/meas.jl
@@ -316,7 +316,7 @@ function Two_pt_lagrange() # Will be 2pt lagrange mult
            pptilde_t0[:,noi,f] .= zero(ComplexF64)
            @timeit "Volume sum" begin
                pphat_t0[:,noi,f] .= -scalar_contraction(psi)
-                pptilde_t0[:,noi,f] .= gammazero_contraction(psi)
+                pptilde_t0[:,noi,f] .= dot_contraction(dws.st,psi)
            end
            _,epslist = flw_adapt(U, psi, int, params["Frontflow"]["t_zero"], gp, dpar[f], lp, ymws, dws)
@@ -337,7 +337,7 @@ function Two_pt_lagrange() # Will be 2pt lagrange mult
                pptilde_t[:,noi,fl,f] .= zero(ComplexF64)
                @timeit "Volume sum" begin
                    pphat_t[:,noi,fl,f] .= -scalar_contraction(psi)
-                    pptilde_t[:,noi,fl,f] .= gammazero_contraction(psi)
+                    pptilde_t[:,noi,fl,f] .= dot_contraction(dws.st,psi)
                end
                ymws.U1 .= U
                flw(U, psi, int, 1, params["Frontflow"]["epsilon"], gp, dpar[f], lp, ymws, dws)
@@ -349,7 +349,7 @@ function Two_pt_lagrange() # Will be 2pt lagrange mult
            pptilde_t[:,noi,end,f] .= zero(ComplexF64)
            @timeit "Volume sum" begin
                pphat_t[:,noi,end,f] .= -scalar_contraction(psi)
-                pptilde_t[:,noi,end,f] .= gammazero_contraction(psi)
+                pptilde_t[:,noi,end,f] .= dot_contraction(dws.st,psi)
            end
            @timeit "CPU to GPU" copyto!(U,U_CPU)
@@ -387,6 +387,22 @@ function scalar_contraction(psi)
    return reshape(Array(CUDA.reduce(+, ymws.rm; dims=(1,2,3))),lp.iL[end])
 end
+function krnl_dot_contraction!(cm,st, psi, lp)
+    @inbounds begin
+        b = Int64(CUDA.threadIdx().x)
+        r = Int64(CUDA.blockIdx().x)
+        I = point_coord((b,r), lp)
+        cm[I] = dot(st[b,r],psi[b,r])
+    end
+    return nothing
+end
+function dot_contraction(st,psi)
+    CUDA.@sync begin
+        CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_dot_contraction!(ymws.cm,st, psi, lp)
+    end
+    return reshape(Array(CUDA.reduce(+, ymws.cm; dims=(1,2,3))),lp.iL[end])
+end
 function krnl_gammazero_contraction!(cm, psi, lp)
    @inbounds begin
        b = Int64(CUDA.threadIdx().x)

--- a/src/utils.jl
+++ b/src/utils.jl