Changed the CG solver. Defined field_dot function(s)

parent 81c60e34
......@@ -14,21 +14,44 @@
Solves the linear equation `Ax = si`
"""
function CG!(si, U, m0, A, lp::SpaceParm, dws::DiracWorkspace)
function krnl_dot!(sum,fone,ftwo)
b=Int64(CUDA.threadIdx().x)
r=Int64(CUDA.blockIdx().x)
sum[b,r] = dot(fone[b,r],ftwo[b,r])
return nothing
end
function field_dot(fone::CuArray{Spinor{4, SU3fund{T}}},ftwo::CuArray{Spinor{4, SU3fund{T}}},lp) where {T} #Change the fact that it allocates memory (?)
sumf = scalar_field(Complex{T}, lp)
CUDA.@sync begin
CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_dot!(sumf,fone,ftwo)
end
return sum(sumf)
end
function CG!(si, U, A, dpar::DiracParam, lp::SpaceParm, dws::DiracWorkspace{T}, maxiter::Int64 = 10, tol=1.0) where {T}
dws.sr .= si
dws.sp .= si
norm = CUDA.mapreduce(x -> norm2(x), +, si)
fill!(si,zero(eltype(so)))
fill!(si,zero(eltype(si)))
err = 0.0
tol = eps * norm
iterations = 0
niter = 0
for i in 1:maxiter
A(dws.sAp, U, dws.sp, am0, dws.st, lp)
prod = CUDA.mapreduce(x -> dot(x[1],x[2]), +, zip(dws.sp, dws.sAp))
A(dws.sAp, U, dws.sp, dpar, dws.st, lp)
prod = field_dot(dws.sp,dws.sAp,lp)
#prod = CUDA.mapreduce(x -> dot(x[1],x[2]), +, zip(dws.sp, dws.sAp))
alpha = norm/prod
si .= si .+ alpha .* dws.sp
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment