Bug fixes. Stable io version.

parent 78447fa4
...@@ -316,7 +316,7 @@ function Two_pt_lagrange() # Will be 2pt lagrange mult ...@@ -316,7 +316,7 @@ function Two_pt_lagrange() # Will be 2pt lagrange mult
pptilde_t0[:,noi,f] .= zero(ComplexF64) pptilde_t0[:,noi,f] .= zero(ComplexF64)
@timeit "Volume sum" begin @timeit "Volume sum" begin
pphat_t0[:,noi,f] .= -scalar_contraction(psi) pphat_t0[:,noi,f] .= -scalar_contraction(psi)
pptilde_t0[:,noi,f] .= gammazero_contraction(psi) pptilde_t0[:,noi,f] .= dot_contraction(dws.st,psi)
end end
_,epslist = flw_adapt(U, psi, int, params["Frontflow"]["t_zero"], gp, dpar[f], lp, ymws, dws) _,epslist = flw_adapt(U, psi, int, params["Frontflow"]["t_zero"], gp, dpar[f], lp, ymws, dws)
...@@ -337,7 +337,7 @@ function Two_pt_lagrange() # Will be 2pt lagrange mult ...@@ -337,7 +337,7 @@ function Two_pt_lagrange() # Will be 2pt lagrange mult
pptilde_t[:,noi,fl,f] .= zero(ComplexF64) pptilde_t[:,noi,fl,f] .= zero(ComplexF64)
@timeit "Volume sum" begin @timeit "Volume sum" begin
pphat_t[:,noi,fl,f] .= -scalar_contraction(psi) pphat_t[:,noi,fl,f] .= -scalar_contraction(psi)
pptilde_t[:,noi,fl,f] .= gammazero_contraction(psi) pptilde_t[:,noi,fl,f] .= dot_contraction(dws.st,psi)
end end
ymws.U1 .= U ymws.U1 .= U
flw(U, psi, int, 1, params["Frontflow"]["epsilon"], gp, dpar[f], lp, ymws, dws) flw(U, psi, int, 1, params["Frontflow"]["epsilon"], gp, dpar[f], lp, ymws, dws)
...@@ -349,7 +349,7 @@ function Two_pt_lagrange() # Will be 2pt lagrange mult ...@@ -349,7 +349,7 @@ function Two_pt_lagrange() # Will be 2pt lagrange mult
pptilde_t[:,noi,end,f] .= zero(ComplexF64) pptilde_t[:,noi,end,f] .= zero(ComplexF64)
@timeit "Volume sum" begin @timeit "Volume sum" begin
pphat_t[:,noi,end,f] .= -scalar_contraction(psi) pphat_t[:,noi,end,f] .= -scalar_contraction(psi)
pptilde_t[:,noi,end,f] .= gammazero_contraction(psi) pptilde_t[:,noi,end,f] .= dot_contraction(dws.st,psi)
end end
@timeit "CPU to GPU" copyto!(U,U_CPU) @timeit "CPU to GPU" copyto!(U,U_CPU)
...@@ -387,6 +387,22 @@ function scalar_contraction(psi) ...@@ -387,6 +387,22 @@ function scalar_contraction(psi)
return reshape(Array(CUDA.reduce(+, ymws.rm; dims=(1,2,3))),lp.iL[end]) return reshape(Array(CUDA.reduce(+, ymws.rm; dims=(1,2,3))),lp.iL[end])
end end
function krnl_dot_contraction!(cm,st, psi, lp)
@inbounds begin
b = Int64(CUDA.threadIdx().x)
r = Int64(CUDA.blockIdx().x)
I = point_coord((b,r), lp)
cm[I] = dot(st[b,r],psi[b,r])
end
return nothing
end
function dot_contraction(st,psi)
CUDA.@sync begin
CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_dot_contraction!(ymws.cm,st, psi, lp)
end
return reshape(Array(CUDA.reduce(+, ymws.cm; dims=(1,2,3))),lp.iL[end])
end
function krnl_gammazero_contraction!(cm, psi, lp) function krnl_gammazero_contraction!(cm, psi, lp)
@inbounds begin @inbounds begin
b = Int64(CUDA.threadIdx().x) b = Int64(CUDA.threadIdx().x)
......
This diff is collapsed.
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment