Volume sum semi-optimization

parent 2da4146e
......@@ -148,13 +148,22 @@ function one_pt()
flush(log_file)
@timeit "CPU to GPU" copyto!(dws.st,psi_CPU)
Quark_cond[:,noi,fl] .= zero(ComplexF64)
ap_density .= Array(dot(dws.st[b,r],psi[b,r]))
for t in 1:lp.iL[4]
for i in 1:lp.iL[1] for j in 1:lp.iL[2] for k in 1:lp.iL[3]
b,r = point_index(CartesianIndex{lp.ndim}((i,j,k,t)),lp)
CUDA.@allowscalar Quark_cond[t,noi,fl] += ap_density[b,r]
end end end
end
Quark_cond_cfl[:,noi,fl] .= zero(ComplexF64)
pp_density .= Array(norm2.(dws.st))
for t in 1:lp.iL[4]
for i in 1:lp.iL[1] for j in 1:lp.iL[2] for k in 1:lp.iL[3]
b,r = point_index(CartesianIndex{lp.ndim}((i,j,k,t)),lp)
CUDA.@allowscalar Quark_cond[t,noi,fl] += dot(dws.st[b,r],psi[b,r])
CUDA.@allowscalar Quark_cond_cfl[t,noi,fl] += -norm2(dws.st[b,r])
CUDA.@allowscalar Quark_cond_cfl[t,noi,fl] += -pp_density[b,r]
end end end
end
......@@ -180,12 +189,31 @@ function one_pt()
@timeit "CPU to GPU" copyto!(dws.st,psit_CPU)
ChiDchi[:,noi,fl] .= zero(Float64)
ap_density .= Array(dot(dws.st[b,r],dws.sp[b,r]))
for t in 1:lp.iL[4]
for i in 1:lp.iL[1] for j in 1:lp.iL[2] for k in 1:lp.iL[3]
b,r = point_index(CartesianIndex{lp.ndim}((i,j,k,t)),lp)
CUDA.@allowscalar ChiDchi[t,noi,fl] += ap_density[b,r]
end end end
end
Dw!(dws.sp,U,dws.st,dpar,dws,lp)
ap_density .= Array(dot(dws.sp[b,r],psi[b,r]))
for t in 1:lp.iL[4]
for i in 1:lp.iL[1] for j in 1:lp.iL[2] for k in 1:lp.iL[3]
b,r = point_index(CartesianIndex{lp.ndim}((i,j,k,t)),lp)
CUDA.@allowscalar ChiDchi[t,noi,fl] += -ap_density[b,r]
end end end
end
Quark_cond2[:,noi,fl] .= zero(ComplexF64)
ap_density .= Array(dot(dws.st[b,r],psi[b,r]))
for t in 1:lp.iL[4]
for i in 1:lp.iL[1] for j in 1:lp.iL[2] for k in 1:lp.iL[3]
b,r = point_index(CartesianIndex{lp.ndim}((i,j,k,t)),lp)
CUDA.@allowscalar ChiDchi[t,noi,fl] += dot(dws.st[b,r],dws.sp[b,r])
CUDA.@allowscalar Quark_cond2[t,noi,fl] += dot(dws.st[b,r],psi[b,r])
CUDA.@allowscalar Quark_cond2[t,noi,fl] += ap_density[b,r]
end end end
end
......@@ -195,13 +223,29 @@ function one_pt()
flw_adapt(U, psi, int, flow_times[fl], gp, dpar, lp, ymws, dws)
Dw!(dws.sp,U,psi,dpar,dws,lp)
Quark_cond2_cfl[:,noi,fl] .= zero(ComplexF64)
ChiDchi_cfl[:,noi,fl] .= zero(ComplexF64)
ap_density .= Array(dot(dws.st[b,r],dws.sp[b,r]))
for t in 1:lp.iL[4]
for i in 1:lp.iL[1] for j in 1:lp.iL[2] for k in 1:lp.iL[3]
b,r = point_index(CartesianIndex{lp.ndim}((i,j,k,t)),lp)
CUDA.@allowscalar ChiDchi_cfl[t,noi,fl] += ap_density[b,r]
end end end
end
Dw!(dws.sp,U,dws.st,dpar,dws,lp)
ap_density .= Array(dot(dws.sp[b,r],psi[b,r]))
for t in 1:lp.iL[4]
for i in 1:lp.iL[1] for j in 1:lp.iL[2] for k in 1:lp.iL[3]
b,r = point_index(CartesianIndex{lp.ndim}((i,j,k,t)),lp)
CUDA.@allowscalar ChiDchi_cfl[t,noi,fl] += -ap_density[b,r]
end end end
end
Quark_cond2_cfl[:,noi,fl] .= zero(ComplexF64)
ap_density .= Array(dot(dws.st[b,r],psi[b,r]))
for t in 1:lp.iL[4]
for i in 1:lp.iL[1] for j in 1:lp.iL[2] for k in 1:lp.iL[3]
b,r = point_index(CartesianIndex{lp.ndim}((i,j,k,t)),lp)
CUDA.@allowscalar ChiDchi_cfl[t,noi,fl] += dot(dws.st[b,r],dws.sp[b,r])
CUDA.@allowscalar Quark_cond2_cfl[t,noi,fl] += dot(dws.st[b,r],psi[b,r])
CUDA.@allowscalar Quark_cond2_cfl[t,noi,fl] += ap_density[b,r]
end end end
end
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment