sumf field in CG defined only once per inversion

parent 1173a89a
...@@ -24,9 +24,8 @@ function krnl_dot!(sum,fone,ftwo) ...@@ -24,9 +24,8 @@ function krnl_dot!(sum,fone,ftwo)
return nothing return nothing
end end
function field_dot(fone::CuArray{Spinor{4, SU3fund{T}}},ftwo::CuArray{Spinor{4, SU3fund{T}}},lp) where {T} #Change the fact that it allocates memory (?) define the field in CG directly function field_dot(fone::CuArray{Spinor{4, SU3fund{T}}},ftwo::CuArray{Spinor{4, SU3fund{T}}},sumf,lp) where {T}
sumf = scalar_field(Complex{T}, lp)
CUDA.@sync begin CUDA.@sync begin
CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_dot!(sumf,fone,ftwo) CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_dot!(sumf,fone,ftwo)
end end
...@@ -44,12 +43,13 @@ function CG!(si, U, A, dpar::DiracParam, lp::SpaceParm, dws::DiracWorkspace{T}, ...@@ -44,12 +43,13 @@ function CG!(si, U, A, dpar::DiracParam, lp::SpaceParm, dws::DiracWorkspace{T},
err = 0.0 err = 0.0
iterations = 0 iterations = 0
sumf = scalar_field(Complex{T}, lp)
niter = 0 niter = 0
for i in 1:maxiter for i in 1:maxiter
A(dws.sAp, U, dws.sp, dpar, dws.st, lp) A(dws.sAp, U, dws.sp, dpar, dws.st, lp)
prod = field_dot(dws.sp,dws.sAp,lp) prod = field_dot(dws.sp,dws.sAp,sumf,lp)
#prod = CUDA.mapreduce(x -> dot(x[1],x[2]), +, zip(dws.sp, dws.sAp)) #prod = CUDA.mapreduce(x -> dot(x[1],x[2]), +, zip(dws.sp, dws.sAp))
alpha = norm/prod alpha = norm/prod
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment