Volume sum improvement atttempt.

parent f217d551
......@@ -87,15 +87,12 @@ function Frontflow_pt() # Will be Frontflow
Eoft[tstep+1,:] .= sum(Eofpla,dims = 2)
end
pp_density .= Array(norm2.(psi))
ap_density .= Array(dot.(psi,dmul.(Gamma{4},psi)))
pp_corr_t[:,noi,tstep] .= zero(Float64)
ap_corr_t[:,noi,tstep] .= zero(ComplexF64)
@timeit "Volume sum" for b in 1:lp.bsz for r in 1:lp.rsz
t = point_time((b,r),lp)
pp_corr_t[t,noi,tstep] += pp_density[b,r]
ap_corr_t[t,noi,tstep] += ap_density[b,r]
end end
@timeit "Volume sum performance test" begin
pp_corr_t[:,noi,tstep] .= scalar_contraction(psi)
ap_corr_t[:,noi,tstep] .= gammazero_contraction(psi)
end
flw(U, psi, int, 1, params["Frontflow"]["epsilon"], gp, dpar, lp, ymws, dws)
end
......@@ -349,3 +346,37 @@ function Two_pt_lagrange() # Will be 2pt lagrange mult
return nothing
end
function krnl_scalar_contraction!(rm, psi, lp)
@inbounds begin
b = Int64(CUDA.threadIdx().x)
r = Int64(CUDA.blockIdx().x)
I = point_coord((b,r), lp)
rm[I] = norm2(psi[b,r])
end
return nothing
end
function scalar_contraction(psi)
CUDA.@sync begin
CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_scalar_contraction!(ymws.rm, psi, lp)
end
return reshape(Array(CUDA.reduce(+, ymws.rm; dims=(1,2,3))),lp.iL[end])
end
function krnl_gammazero_contraction!(cm, psi, lp)
@inbounds begin
b = Int64(CUDA.threadIdx().x)
r = Int64(CUDA.blockIdx().x)
I = point_coord((b,r), lp)
cm[I] = dot(psi[b,r],dmul(Gamma{4},psi[b,r]))
end
return nothing
end
function gammazero_contraction(psi)
CUDA.@sync begin
CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_gammazero_contraction!(ymws.cm, psi, lp)
end
return reshape(Array(CUDA.reduce(+, ymws.cm; dims=(1,2,3))),lp.iL[end])
end
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment