First try for multiple fermions

parent 2b5ec8ab
...@@ -66,12 +66,14 @@ function load_structs() ...@@ -66,12 +66,14 @@ function load_structs()
CUDA.device!(parsed_args["G"]) CUDA.device!(parsed_args["G"])
global lp = SpaceParm{4}(tuple(params["Space"]["size"]...), tuple(params["Space"]["blocks"]...),params["Space"]["bc"], (0,0,0,0,0,0)) global lp = SpaceParm{4}(tuple(params["Space"]["size"]...), tuple(params["Space"]["blocks"]...),params["Space"]["bc"], (0,0,0,0,0,0))
global gp = GaugeParm{Float64}(SU3{Float64},params["HMC"]["beta"],params["HMC"]["c0"],(params["HMC"]["cG"],0.0),(0.0,0.0),lp.iL); global gp = GaugeParm{Float64}(SU3{Float64},params["HMC"]["beta"],params["HMC"]["c0"],(params["HMC"]["cG"],0.0),(0.0,0.0),lp.iL);
global dpar = DiracParam{Float64}(SU3fund,(1/(2*params["Fermion"]["kappa"])) - 4,params["Fermion"]["csw"],ntuple(i -> exp(((i!=4)*im*params["Fermion"]["theta"]/lp.iL[i]) + ((i==4)*im*params["Fermion"]["theta_t"]/lp.iL[i])),4),0.0,params["Fermion"]["ct"]);
global dws = DiracWorkspace(SU3fund,Float64,lp); global dws = DiracWorkspace(SU3fund,Float64,lp);
global ymws = YMworkspace(SU3,Float64,lp); global ymws = YMworkspace(SU3,Float64,lp);
global int = wfl_rk3(Float64, params["Frontflow"]["epsilon"], params["Backflow"]["tol"]) global int = wfl_rk3(Float64, params["Frontflow"]["epsilon"], params["Backflow"]["tol"])
global intsch = omf4(Float64,params["HMC"]["eps"], params["HMC"]["ns"]); global intsch = omf4(Float64,params["HMC"]["eps"], params["HMC"]["ns"]);
global fernames = [replace(k, "Fermion" => "") for k in keys(params) if startswith(k, "Fermion")]
global dpar = [DiracParam{Float64}(SU3fund,(1/(2*params["Fermion"*f]["kappa"])) - 4,params["Fermion"*f]["csw"],ntuple(i -> exp(((i!=4)*im*params["Fermion"*f]["theta"]/lp.iL[i]) + ((i==4)*im*params["Fermion"*f]["theta_t"]/lp.iL[i])),4),0.0,params["Fermion"*f]["ct"]) for f in fernames];
global flow_times = params["Backflow"]["Flow_times"] global flow_times = params["Backflow"]["Flow_times"]
global MCid = 0 global MCid = 0
if params["Space"]["bc"] == 0 if params["Space"]["bc"] == 0
...@@ -103,11 +105,15 @@ function write_log() ...@@ -103,11 +105,15 @@ function write_log()
println(log_file,"Parameters:") println(log_file,"Parameters:")
println(log_file,"Lattice size: ", lp.iL) println(log_file,"Lattice size: ", lp.iL)
println(log_file,"Boundary conditions: ", params["Space"]["bc"]) println(log_file,"Boundary conditions: ", params["Space"]["bc"])
println(log_file,"kappa = ", params["Fermion"]["kappa"]) for i in fernames
println(log_file,"theta = ", params["Fermion"]["theta"]) println(log_file,"Fermion"*i*" parameters:")
println(log_file,"csw = ", dpar.csw) println(log_file,"kappa = ", params["Fermion"*f]["kappa"])
println(log_file,"tolerance = ", params["Fermion"]["tolerance"]) println(log_file,"theta_t = ", params["Fermion"*f]["theta_t"])
println(log_file,"maxiter = ", params["Fermion"]["maxiter"]) println(log_file,"theta = ", params["Fermion"*f]["theta"])
println(log_file,"csw = ", params["Fermion"*f]["csw"])
println(log_file,"tolerance = ", params["Fermion"*f]["tolerance"])
println(log_file,"maxiter = ", params["Fermion"*f]["maxiter"])
end
println(log_file,"N noise frontflow = ", params["Frontflow"]["N_noise"]) println(log_file,"N noise frontflow = ", params["Frontflow"]["N_noise"])
println(log_file,"t_zero = ", params["Frontflow"]["t_zero"]) println(log_file,"t_zero = ", params["Frontflow"]["t_zero"])
println(log_file,"eps frontflw = ", params["Frontflow"]["epsilon"]) println(log_file,"eps frontflw = ", params["Frontflow"]["epsilon"])
...@@ -115,6 +121,7 @@ function write_log() ...@@ -115,6 +121,7 @@ function write_log()
println(log_file,"Flow times = ", params["Backflow"]["Flow_times"]) println(log_file,"Flow times = ", params["Backflow"]["Flow_times"])
println(log_file,"N noise backflow = ", params["Backflow"]["N_noise"]) println(log_file,"N noise backflow = ", params["Backflow"]["N_noise"])
println(log_file,"t_source backflow = ", params["Backflow"]["tsource"]) println(log_file,"t_source backflow = ", params["Backflow"]["tsource"])
println(log_file,"Backflow tolerance = ", params["Backflow"]["tol"])
flush(log_file) flush(log_file)
return nothing return nothing
...@@ -140,8 +147,11 @@ function save_data() ...@@ -140,8 +147,11 @@ function save_data()
BDIO_start_record!(fb, BDIO_BIN_GENERIC, 1) BDIO_start_record!(fb, BDIO_BIN_GENERIC, 1)
BDIO_write!(fb, [convert(Int32, 4)]) BDIO_write!(fb, [convert(Int32, 4)])
BDIO_write!(fb, [convert(Int32, lp.iL[i]) for i in 1:4]) BDIO_write!(fb, [convert(Int32, lp.iL[i]) for i in 1:4])
BDIO_write!(fb, [dpar.m0, dpar.csw]) BDIO_write!(fb, [convert(Int32, length(fernames))])
BDIO_write!(fb, [dpar.th[i] for i in 1:4]) for k in 1:length(fernames)
BDIO_write!(fb, [dpar[k].m0, dpar[k].csw])
BDIO_write!(fb, [dpar[k].th[i] for i in 1:4])
end
BDIO_write!(fb, [convert(Int32,params["Frontflow"]["N_noise"])]) BDIO_write!(fb, [convert(Int32,params["Frontflow"]["N_noise"])])
BDIO_write!(fb, [convert(Int32,params["Frontflow"]["nsteps"])]) BDIO_write!(fb, [convert(Int32,params["Frontflow"]["nsteps"])])
BDIO_write!(fb, [convert(Int32,params["Backflow"]["N_noise"])]) BDIO_write!(fb, [convert(Int32,params["Backflow"]["N_noise"])])
...@@ -158,16 +168,19 @@ function save_data() ...@@ -158,16 +168,19 @@ function save_data()
BDIO_start_record!(fb, BDIO_BIN_GENERIC, 8, true) BDIO_start_record!(fb, BDIO_BIN_GENERIC, 8, true)
for noi in 1:params["Frontflow"]["N_noise"] for f in 1:length(dpar)
BDIO_write!(fb,pp_corr_t0[:,noi]) for noi in 1:params["Frontflow"]["N_noise"]
BDIO_write!(fb,ap_corr_t0[:,noi]) BDIO_write!(fb,pp_corr_t0[:,noi,f])
BDIO_write!(fb,ap_corr_t0[:,noi,f])
for fl in 1:params["Frontflow"]["nsteps"]+1 BDIO_write!(fb,pphat_t0[:,noi,f])
BDIO_write!(fb,pp_corr_t[:,noi,fl]) BDIO_write!(fb,pptilde_t0[:,noi,f])
BDIO_write!(fb,ap_corr_t[:,noi,fl]) for fl in 1:params["Frontflow"]["nsteps"]+1
BDIO_write!(fb,pp_corr_t[:,noi,fl,f])
BDIO_write!(fb,pphat_t[:,noi,fl]) BDIO_write!(fb,ap_corr_t[:,noi,fl,f])
BDIO_write!(fb,pptilde_t[:,noi,fl])
BDIO_write!(fb,pphat_t[:,noi,fl,f])
BDIO_write!(fb,pptilde_t[:,noi,fl,f])
end
end end
end end
...@@ -179,20 +192,21 @@ function save_data() ...@@ -179,20 +192,21 @@ function save_data()
BDIO_write!(fb,Qt[fl,:]) BDIO_write!(fb,Qt[fl,:])
end end
for noi in 1:params["Backflow"]["N_noise"] for f in 1:length(dpar)
for noi in 1:params["Backflow"]["N_noise"]
for fl in 1:length(flow_times)
BDIO_write!(fb,pp_corr_tfl[:,noi,fl,f])
BDIO_write!(fb,ap_corr_tfl[:,noi,fl,f])
for fl in 1:length(flow_times) BDIO_write!(fb,Quark_cond[:,noi,fl,f])
BDIO_write!(fb,pp_corr_tfl[:,noi,fl]) BDIO_write!(fb,Quark_cond_cfl[:,noi,fl,f])
BDIO_write!(fb,ap_corr_tfl[:,noi,fl])
BDIO_write!(fb,Quark_cond[:,noi,fl]) BDIO_write!(fb,Quark_cond2[:,noi,fl,f])
BDIO_write!(fb,Quark_cond_cfl[:,noi,fl]) BDIO_write!(fb,Quark_cond2_cfl[:,noi,fl,f])
BDIO_write!(fb,Quark_cond2[:,noi,fl]) BDIO_write!(fb,ChiDchi[:,noi,fl,f])
BDIO_write!(fb,Quark_cond2_cfl[:,noi,fl]) BDIO_write!(fb,ChiDchi_cfl[:,noi,fl,f])
end
BDIO_write!(fb,ChiDchi[:,noi,fl])
BDIO_write!(fb,ChiDchi_cfl[:,noi,fl])
end end
end end
......
...@@ -18,25 +18,25 @@ function load_fields() ...@@ -18,25 +18,25 @@ function load_fields()
# Obs ending in 0 are only relevant if tzero > 0 # Obs ending in 0 are only relevant if tzero > 0
global pp_corr_t0 = fill(zero(Float64),(lp.iL[4],params["Frontflow"]["N_noise"])); global pp_corr_t0 = fill(zero(Float64),(lp.iL[4],params["Frontflow"]["N_noise"],length(dpar)));
global ap_corr_t0 = fill(zero(ComplexF64),(lp.iL[4],params["Frontflow"]["N_noise"])); global ap_corr_t0 = fill(zero(ComplexF64),(lp.iL[4],params["Frontflow"]["N_noise"],length(dpar)));
global pphat_t0 = Array{Float64}(undef,lp.iL[4],params["Frontflow"]["N_noise"]); global pphat_t0 = Array{Float64}(undef,lp.iL[4],params["Frontflow"]["N_noise"],length(dpar));
global pptilde_t0 = Array{Complex{Float64}}(undef,lp.iL[4],params["Frontflow"]["N_noise"]); global pptilde_t0 = Array{Complex{Float64}}(undef,lp.iL[4],params["Frontflow"]["N_noise"],length(dpar));
global Qt0 = Array{Complex{Float64}}(undef,lp.iL[4]); global Qt0 = Array{Complex{Float64}}(undef,lp.iL[4]);
global Eoft0 = Array{Complex{Float64}}(undef,lp.iL[4]); global Eoft0 = Array{Complex{Float64}}(undef,lp.iL[4]);
# Frontflow # Frontflow
global pp_corr_t = fill(zero(Float64),(lp.iL[4],params["Frontflow"]["N_noise"],params["Frontflow"]["nsteps"] + 1)); global pp_corr_t = fill(zero(Float64),(lp.iL[4],params["Frontflow"]["N_noise"],params["Frontflow"]["nsteps"] + 1,length(dpar)));
global ap_corr_t = fill(zero(ComplexF64),(lp.iL[4],params["Frontflow"]["N_noise"],params["Frontflow"]["nsteps"] + 1)); global ap_corr_t = fill(zero(ComplexF64),(lp.iL[4],params["Frontflow"]["N_noise"],params["Frontflow"]["nsteps"] + 1,length(dpar)));
global pp_corr_tfl = fill(zero(Float64),(lp.iL[4],params["Backflow"]["N_noise"],length(flow_times))); global pp_corr_tfl = fill(zero(Float64),(lp.iL[4],params["Backflow"]["N_noise"],length(flow_times),length(dpar)));
global ap_corr_tfl = fill(zero(ComplexF64),(lp.iL[4],params["Backflow"]["N_noise"],length(flow_times))); global ap_corr_tfl = fill(zero(ComplexF64),(lp.iL[4],params["Backflow"]["N_noise"],length(flow_times),length(dpar)));
global pphat_t = Array{Float64}(undef,lp.iL[4],params["Frontflow"]["N_noise"],params["Frontflow"]["nsteps"] + 1); global pphat_t = Array{Float64}(undef,lp.iL[4],params["Frontflow"]["N_noise"],params["Frontflow"]["nsteps"] + 1,length(dpar));
global pptilde_t = Array{Complex{Float64}}(undef,lp.iL[4],params["Frontflow"]["N_noise"],params["Frontflow"]["nsteps"] + 1); global pptilde_t = Array{Complex{Float64}}(undef,lp.iL[4],params["Frontflow"]["N_noise"],params["Frontflow"]["nsteps"] + 1,length(dpar));
global Qteuc = Array{Float64}(undef,lp.iL[4]); global Qteuc = Array{Float64}(undef,lp.iL[4]);
global Qt = Array{Complex{Float64}}(undef,params["Frontflow"]["nsteps"] + 1,lp.iL[4]); global Qt = Array{Complex{Float64}}(undef,params["Frontflow"]["nsteps"] + 1,lp.iL[4]);
...@@ -45,13 +45,13 @@ function load_fields() ...@@ -45,13 +45,13 @@ function load_fields()
# Backflow # Backflow
global Quark_cond = Array{Complex{Float64}}(undef,lp.iL[4],params["Backflow"]["N_noise"],length(flow_times)); global Quark_cond = Array{Complex{Float64}}(undef,lp.iL[4],params["Backflow"]["N_noise"],length(flow_times),length(dpar));
global Quark_cond_cfl = Array{Complex{Float64}}(undef,lp.iL[4],params["Backflow"]["N_noise"],length(flow_times)); global Quark_cond_cfl = Array{Complex{Float64}}(undef,lp.iL[4],params["Backflow"]["N_noise"],length(flow_times),length(dpar));
global Quark_cond2 = Array{Complex{Float64}}(undef,lp.iL[4],params["Backflow"]["N_noise"],length(flow_times)); global Quark_cond2 = Array{Complex{Float64}}(undef,lp.iL[4],params["Backflow"]["N_noise"],length(flow_times),length(dpar));
global Quark_cond2_cfl = Array{Complex{Float64}}(undef,lp.iL[4],params["Backflow"]["N_noise"],length(flow_times)); global Quark_cond2_cfl = Array{Complex{Float64}}(undef,lp.iL[4],params["Backflow"]["N_noise"],length(flow_times),length(dpar));
global ChiDchi = Array{Complex{Float64}}(undef,lp.iL[4],params["Backflow"]["N_noise"],length(flow_times)); global ChiDchi = Array{Complex{Float64}}(undef,lp.iL[4],params["Backflow"]["N_noise"],length(flow_times),length(dpar));
global ChiDchi_cfl = Array{Complex{Float64}}(undef,lp.iL[4],params["Backflow"]["N_noise"],length(flow_times)); global ChiDchi_cfl = Array{Complex{Float64}}(undef,lp.iL[4],params["Backflow"]["N_noise"],length(flow_times),length(dpar));
return nothing return nothing
end end
...@@ -65,77 +65,80 @@ Stores the two point function in the variables 'pp_corr_t0', 'ap_corr_t0', 'pp_c ...@@ -65,77 +65,80 @@ Stores the two point function in the variables 'pp_corr_t0', 'ap_corr_t0', 'pp_c
""" """
function Frontflow_pt() # Will be Frontflow function Frontflow_pt() # Will be Frontflow
println(log_file,"\nMeasuring 2pt...") for f in 1:length(dpar)
flush(log_file) println(log_file,"\nMeasuring 2pt for Fermion"*fernames[f]*"...")
flush(log_file)
Eoft_plaq(Eofpla,U, gp, lp, ymws) Eoft_plaq(Eofpla,U, gp, lp, ymws)
Eoft0 .= sum(Eofpla,dims = 2) Eoft0 .= sum(Eofpla,dims = 2)
Qtop(Qteuc,U, gp, lp, ymws) Qtop(Qteuc,U, gp, lp, ymws)
Qt0 .= Qteuc Qt0 .= Qteuc
for noi in 1:params["Frontflow"]["N_noise"] for noi in 1:params["Frontflow"]["N_noise"]
niter = propagator!(psi,U, dpar, dws, lp,params["Fermion"]["maxiter"], params["Fermion"]["tolerance"],params["Frontflow"]["tsource"]) niter = propagator!(psi,U, dpar[f], dws, lp,params["Fermion"*fernames[f]]["maxiter"], params["Fermion"*fernames[f]]["tolerance"],params["Frontflow"]["tsource"])
println(log_file,"CG converged after ",niter," iterations with residue ",CUDA.mapreduce(x -> norm2(x), +, dws.sr)) println(log_file,"CG converged after ",niter," iterations with residue ",CUDA.mapreduce(x -> norm2(x), +, dws.sr))
flush(log_file) flush(log_file)
pp_corr_t0[:,noi,tstep] .= zero(Float64) pp_corr_t0[:,noi,f] .= zero(Float64)
ap_corr_t0[:,noi,tstep] .= zero(ComplexF64) ap_corr_t0[:,noi,f] .= zero(ComplexF64)
@timeit "Volume sum performance test" begin @timeit "Volume sum" begin
pp_corr_t0[:,noi,tstep] .= scalar_contraction(psi) pp_corr_t0[:,noi,f] .= scalar_contraction(psi)
ap_corr_t0[:,noi,tstep] .= gammazero_contraction(psi) ap_corr_t0[:,noi,f] .= gammazero_contraction(psi)
end end
if params["Frontflow"]["t_zero"]>0.0 if params["Frontflow"]["t_zero"]>0.0
_,epslist = flw_adapt(U, psi, int, params["Frontflow"]["t_zero"], gp, dpar, lp, ymws, dws) _,epslist = flw_adapt(U, psi, int, params["Frontflow"]["t_zero"], gp, dpar[f], lp, ymws, dws)
println(log_file,"Flowed correlator to t = ",params["Frontflow"]["t_zero"]," with last stepsize ",epslist[end-1]) println(log_file,"Flowed correlator to t = ",params["Frontflow"]["t_zero"]," with last stepsize ",epslist[end-1])
end
println(log_file,"Flowing correlator ",params["Frontflow"]["nsteps"]," steps of size ",params["Frontflow"]["epsilon"])
flush(log_file) flush(log_file)
end
for tstep in 1:params["Frontflow"]["nsteps"] for tstep in 1:params["Frontflow"]["nsteps"]
if noi ==1
Eoft_plaq(Eofpla,U, gp, lp, ymws)
Eoft[tstep,:] .= sum(Eofpla,dims = 2)
Qtop(Qteuc, U, gp, lp, ymws)
Qt[tstep,:] .= Qteuc
end
pp_corr_t[:,noi,tstep,f] .= zero(Float64)
ap_corr_t[:,noi,tstep,f] .= zero(ComplexF64)
@timeit "Volume sum" begin
pp_corr_t[:,noi,tstep,f] .= scalar_contraction(psi)
ap_corr_t[:,noi,tstep,f] .= gammazero_contraction(psi)
end
flw(U, psi, int, 1, params["Frontflow"]["epsilon"], gp, dpar[f], lp, ymws, dws)
end
if noi ==1 if noi ==1
Eoft_plaq(Eofpla,U, gp, lp, ymws) Eoft_plaq(Eofpla,U, gp, lp, ymws)
Eoft[tstep,:] .= sum(Eofpla,dims = 2) Eoft[end,:] .= sum(Eofpla,dims = 2)
Qtop(Qteuc, U, gp, lp, ymws) Qtop(Qteuc, U, gp, lp, ymws)
Qt[tstep,:] .= Qteuc Qt[end,:] .= Qteuc
end end
pp_corr_t[:,noi,tstep] .= zero(Float64) pp_corr_t[:,noi,end,f] .= zero(Float64)
ap_corr_t[:,noi,tstep] .= zero(ComplexF64) ap_corr_t[:,noi,end,f] .= zero(ComplexF64)
@timeit "Volume sum performance test" begin @timeit "Volume sum" begin
pp_corr_t[:,noi,tstep] .= scalar_contraction(psi) pp_corr_t[:,noi,end,f] .= scalar_contraction(psi)
ap_corr_t[:,noi,tstep] .= gammazero_contraction(psi) ap_corr_t[:,noi,end,f] .= gammazero_contraction(psi)
end end
flw(U, psi, int, 1, params["Frontflow"]["epsilon"], gp, dpar, lp, ymws, dws) @timeit "CPU to GPU" copyto!(U,U_CPU)
end
if noi ==1
Eoft_plaq(Eofpla,U, gp, lp, ymws)
Eoft[end,:] .= sum(Eofpla,dims = 2)
Qtop(Qteuc, U, gp, lp, ymws)
Qt[end,:] .= Qteuc
end
pp_corr_t[:,noi,end] .= zero(Float64)
ap_corr_t[:,noi,end] .= zero(ComplexF64)
@timeit "Volume sum performance test" begin
pp_corr_t[:,noi,end] .= scalar_contraction(psi)
ap_corr_t[:,noi,end] .= gammazero_contraction(psi)
end end
@timeit "CPU to GPU" copyto!(U,U_CPU)
end end
pp_corr_t0 .= pp_corr_t0./prod(lp.iL[1:3])
ap_corr_t0 .= ap_corr_t0./prod(lp.iL[1:3])
pp_corr_t .= pp_corr_t./prod(lp.iL[1:3])
ap_corr_t .= ap_corr_t./prod(lp.iL[1:3])
pp_corr_t0 .= pp_corr_t0./prod(lp.iL[1:3]) println(log_file,"Finished measuring 2pt")
ap_corr_t0 .= ap_corr_t0./prod(lp.iL[1:3]) flush(log_file)
pp_corr_t .= pp_corr_t./prod(lp.iL[1:3])
ap_corr_t .= ap_corr_t./prod(lp.iL[1:3])
println(log_file,"Finished measuring 2pt")
flush(log_file)
return nothing return nothing
end end
...@@ -152,113 +155,115 @@ function Backflow_pt() # Will be backflow ...@@ -152,113 +155,115 @@ function Backflow_pt() # Will be backflow
@timeit "CPU to GPU" copyto!(U,U_CPU) @timeit "CPU to GPU" copyto!(U,U_CPU)
for fl in 1:length(flow_times) for f in 1:length(dpar)
println(log_file,"\nMeasuring 1pt at t = "*string(flow_times[fl])*"...")
flush(log_file)
for noi in 1:params["Backflow"]["N_noise"]
## The change in the field before randomize should be moved to the package
psi .= 0.0*psi
pfrandomize!(psi,lp,params["Backflow"]["tsource"])
@timeit "GPU to CPU" psit_CPU .= Array(psi)
backflow(psi, U, flow_times[fl], params["Backflow"]["Nsaves"] , gp, dpar, lp, ymws, dws)
@timeit "GPU to CPU" psi_CPU .= Array(psi) for fl in 1:length(flow_times)
println(log_file,"\nMeasuring 1pt at t = "*string(flow_times[fl])*" for Fermion"*fernames[f]*"...")
@timeit "Inversion" begin flush(log_file)
dws.sp .= dmul.(Gamma{5},psi)
g5Dw!(psi,U,dws.sp,dpar,dws,lp)
niter = CG!(psi,U,DwdagDw!,dpar,lp,dws,params["Fermion"]["maxiter"], params["Fermion"]["tolerance"]) for noi in 1:params["Backflow"]["N_noise"]
end
println(log_file,"CG converged after ",niter," iterations with residue ",CUDA.mapreduce(x -> norm2(x), +, dws.sr)) ## The change in the field before randomize should be moved to the package
flush(log_file) psi .= 0.0*psi
pfrandomize!(psi,lp,params["Backflow"]["tsource"])
@timeit "GPU to CPU" psit_CPU .= Array(psi)
@timeit "CPU to GPU" copyto!(dws.st,psi_CPU) backflow(psi, U, flow_times[fl], params["Backflow"]["Nsaves"] , gp, dpar[f], lp, ymws, dws)
Quark_cond[:,noi,fl] .= zero(ComplexF64)
ap_density .= Array(dot.(dws.st,psi))
Quark_cond_cfl[:,noi,fl] .= zero(ComplexF64)
pp_density .= Array(norm2.(dws.st))
@timeit "Volume sum" for b in 1:lp.bsz for r in 1:lp.rsz
t = point_time((b,r),lp)
Quark_cond[t,noi,fl] += ap_density[b,r]
Quark_cond_cfl[t,noi,fl] += -pp_density[b,r]
end end
pp_density .= Array(norm2.(psi))
ap_density .= Array(dot.(psi,dmul.(Gamma{4},psi)))
pp_corr_tfl[:,noi,fl] .= zero(Float64)
ap_corr_tfl[:,noi,fl] .= zero(ComplexF64)
@timeit "Volume sum" for b in 1:lp.bsz for r in 1:lp.rsz
t = point_time((b,r),lp)
pp_corr_tfl[t,noi,fl] += pp_density[b,r]
ap_corr_tfl[t,noi,fl] += ap_density[b,r]
end end
@timeit "CPU to GPU" copyto!(U,U_CPU) @timeit "GPU to CPU" psi_CPU .= Array(psi)
@timeit "ChiDchi" begin @timeit "Inversion" begin
dws.sp .= dmul.(Gamma{5},psi)
g5Dw!(psi,U,dws.sp,dpar[f],dws,lp)
niter = CG!(psi,U,DwdagDw!,dpar[f],lp,dws,params["Fermion"*fernames[f]]["maxiter"], params["Fermion"*fernames[f]]["tolerance"])
end
flw_adapt(U, psi, int, flow_times[fl], gp, dpar, lp, ymws, dws) println(log_file,"CG converged after ",niter," iterations with residue ",CUDA.mapreduce(x -> norm2(x), +, dws.sr))
Dw!(dws.sp,U,psi,dpar,dws,lp) flush(log_file)
@timeit "CPU to GPU" copyto!(dws.st,psit_CPU) @timeit "CPU to GPU" copyto!(dws.st,psi_CPU)
ChiDchi[:,noi,fl] .= zero(Float64) Quark_cond[:,noi,fl,f] .= zero(ComplexF64)
ap_density .= Array(dot.(dws.st,dws.sp)) ap_density .= Array(dot.(dws.st,psi))
@timeit "Volume sum" for b in 1:lp.bsz for r in 1:lp.rsz Quark_cond_cfl[:,noi,fl,f] .= zero(ComplexF64)
t = point_time((b,r),lp) pp_density .= Array(norm2.(dws.st))
ChiDchi[t,noi,fl] += ap_density[b,r]
end end
Dw!(dws.sp,U,dws.st,dpar,dws,lp)
ap_density .= Array(dot.(dws.sp,psi))
@timeit "Volume sum" for b in 1:lp.bsz for r in 1:lp.rsz @timeit "Volume sum" for b in 1:lp.bsz for r in 1:lp.rsz
t = point_time((b,r),lp) t = point_time((b,r),lp)
ChiDchi[t,noi,fl] += -ap_density[b,r] Quark_cond[t,noi,fl,f] += ap_density[b,r]
Quark_cond_cfl[t,noi,fl,f] += -pp_density[b,r]
end end end end
Quark_cond2[:,noi,fl] .= zero(ComplexF64) pp_density .= Array(norm2.(psi))
ap_density .= Array(dot.(dws.st,psi)) ap_density .= Array(dot.(psi,dmul.(Gamma{4},psi)))
pp_corr_tfl[:,noi,fl,f] .= zero(Float64)
ap_corr_tfl[:,noi,fl,f] .= zero(ComplexF64)
@timeit "Volume sum" for b in 1:lp.bsz for r in 1:lp.rsz @timeit "Volume sum" for b in 1:lp.bsz for r in 1:lp.rsz
t = point_time((b,r),lp) t = point_time((b,r),lp)
Quark_cond2[t,noi,fl] += ap_density[b,r] pp_corr_tfl[t,noi,fl,f] += pp_density[b,r]
ap_corr_tfl[t,noi,fl,f] += ap_density[b,r]
end end end end
@timeit "CPU to GPU" copyto!(U,U_CPU) @timeit "CPU to GPU" copyto!(U,U_CPU)
@timeit "CPU to GPU" copyto!(psi,psi_CPU)
flw_adapt(U, psi, int, flow_times[fl], gp, dpar, lp, ymws, dws) @timeit "ChiDchi" begin
Dw!(dws.sp,U,psi,dpar,dws,lp)
flw_adapt(U, psi, int, flow_times[fl], gp, dpar[f], lp, ymws, dws)
Dw!(dws.sp,U,psi,dpar[f],dws,lp)
@timeit "CPU to GPU" copyto!(dws.st,psit_CPU)
ChiDchi[:,noi,fl,f] .= zero(Float64)
ap_density .= Array(dot.(dws.st,dws.sp))
@timeit "Volume sum" for b in 1:lp.bsz for r in 1:lp.rsz
t = point_time((b,r),lp)
ChiDchi[t,noi,fl,f] += ap_density[b,r]
end end
Dw!(dws.sp,U,dws.st,dpar[f],dws,lp)
ap_density .= Array(dot.(dws.sp,psi))
@timeit "Volume sum" for b in 1:lp.bsz for r in 1:lp.rsz
t = point_time((b,r),lp)
ChiDchi[t,noi,fl,f] += -ap_density[b,r]
end end
Quark_cond2[:,noi,fl,f] .= zero(ComplexF64)
ap_density .= Array(dot.(dws.st,psi))
@timeit "Volume sum" for b in 1:lp.bsz for r in 1:lp.rsz
t = point_time((b,r),lp)
Quark_cond2[t,noi,fl,f] += ap_density[b,r]
end end
@timeit "CPU to GPU" copyto!(U,U_CPU)
@timeit "CPU to GPU" copyto!(psi,psi_CPU)
flw_adapt(U, psi, int, flow_times[fl], gp, dpar[f], lp, ymws, dws)
Dw!(dws.sp,U,psi,dpar[f],dws,lp)
ChiDchi_cfl[:,noi,fl,f] .= zero(ComplexF64)
ap_density .= Array(dot.(dws.st,dws.sp))
@timeit "Volume sum" for b in 1:lp.bsz for r in 1:lp.rsz
t = point_time((b,r),lp)
ChiDchi_cfl[t,noi,fl,f] += ap_density[b,r]
end end
Dw!(dws.sp,U,dws.st,dpar[f],dws,lp)
ap_density .= Array(dot.(dws.sp,psi))
@timeit "Volume sum" for b in 1:lp.bsz for r in 1:lp.rsz
t = point_time((b,r),lp)
ChiDchi_cfl[t,noi,fl,f] += -ap_density[b,r]
end end
Quark_cond2_cfl[:,noi,fl,f] .= zero(ComplexF64)
ap_density .= Array(dot.(dws.st,psi))
@timeit "Volume sum" for b in 1:lp.bsz for r in 1:lp.rsz
t = point_time((b,r),lp)
Quark_cond2_cfl[t,noi,fl,f] += ap_density[b,r]
end end
end
ChiDchi_cfl[:,noi,fl] .= zero(ComplexF64)
ap_density .= Array(dot.(dws.st,dws.sp))
@timeit "Volume sum" for b in 1:lp.bsz for r in 1:lp.rsz
t = point_time((b,r),lp)
ChiDchi_cfl[t,noi,fl] += ap_density[b,r]
end end
Dw!(dws.sp,U,dws.st,dpar,dws,lp)
ap_density .= Array(dot.(dws.sp,psi))
@timeit "Volume sum" for b in 1:lp.bsz for r in 1:lp.rsz
t = point_time((b,r),lp)
ChiDchi_cfl[t,noi,fl] += -ap_density[b,r]
end end
Quark_cond2_cfl[:,noi,fl] .= zero(ComplexF64) @timeit "CPU to GPU" copyto!(U,U_CPU)
ap_density .= Array(dot.(dws.st,psi))
@timeit "Volume sum" for b in 1:lp.bsz for r in 1:lp.rsz
t = point_time((b,r),lp)
Quark_cond2_cfl[t,noi,fl] += ap_density[b,r]
end end
end end
@timeit "CPU to GPU" copyto!(U,U_CPU)
end end
end end
...@@ -290,68 +295,68 @@ function Two_pt_lagrange() # Will be 2pt lagrange mult ...@@ -290,68 +295,68 @@ function Two_pt_lagrange() # Will be 2pt lagrange mult
@timeit "CPU to GPU" copyto!(U,U_CPU) @timeit "CPU to GPU" copyto!(U,U_CPU)
println(log_file,"Measuring Lagrange multiplier correlators...") for f in 1:length(dpar)
flush(log_file) println(log_file,"\nMeasuring Lagrange multiplier correlators for Fermion"*fernames[f]*"...")
for noi in 1:params["Frontflow"]["N_noise"] flush(log_file)
for noi in 1:params["Frontflow"]["N_noise"]
pfrandomize!(psi,lp)
@timeit "GPU to CPU" psi_CPU .= Array(psi)
@timeit "Inversion" begin
dws.sp .= dmul.(Gamma{5},psi)
g5Dw!(psi,U,dws.sp,dpar,dws,lp)
niter = CG!(psi,U,DwdagDw!,dpar,lp,dws,params["Fermion"]["maxiter"], params["Fermion"]["tolerance"]) pfrandomize!(psi,lp)
end
println(log_file,"CG converged after ",niter," iterations with residue ",CUDA.mapreduce(x -> norm2(x), +, dws.sr)) @timeit "GPU to CPU" psi_CPU .= Array(psi)
flush(log_file)
pp_corr_t0[:,noi,fl] .= zero(Float64) @timeit "Inversion" begin
ap_corr_t0[:,noi,fl] .= zero(ComplexF64) dws.sp .= dmul.(Gamma{5},psi)
@timeit "Volume sum performance test" begin g5Dw!(psi,U,dws.sp,dpar[f],dws,lp)
pphat_t0[:,noi,fl] .= -scalar_contraction(psi) niter = CG!(psi,U,DwdagDw!,dpar[f],lp,dws,params["Fermion"*fernames[f]]["maxiter"], params["Fermion"*fernames[f]]["tolerance"])
pptilde_t0[:,noi,fl] .= gammazero_contraction(psi) end
end
println(log_file,"CG converged after ",niter," iterations with residue ",CUDA.mapreduce(x -> norm2(x), +, dws.sr))
flush(log_file)
pphat_t0[:,noi,f] .= zero(Float64)
pptilde_t0[:,noi,f] .= zero(ComplexF64)
@timeit "Volume sum" begin
pphat_t0[:,noi,f] .= -scalar_contraction(psi)
pptilde_t0[:,noi,f] .= gammazero_contraction(psi)
end
_,epslist = flw_adapt(U, psi, int, params["Frontflow"]["t_zero"], gp, dpar, lp, ymws, dws) _,epslist = flw_adapt(U, psi, int, params["Frontflow"]["t_zero"], gp, dpar[f], lp, ymws, dws)
@timeit "GPU to CPU" psit_CPU .= Array(psi) @timeit "GPU to CPU" psit_CPU .= Array(psi)
@timeit "CPU to GPU" copyto!(U,U_CPU) @timeit "CPU to GPU" copyto!(U,U_CPU)
@timeit "CPU to GPU" copyto!(psi,psi_CPU) @timeit "CPU to GPU" copyto!(psi,psi_CPU)
if params["Frontflow"]["t_zero"]>0.0 if params["Frontflow"]["t_zero"]>0.0
_,epslist = flw_adapt(U, psi, int, params["Frontflow"]["t_zero"], gp, dpar, lp, ymws, dws) _,epslist = flw_adapt(U, psi, int, params["Frontflow"]["t_zero"], gp, dpar[f], lp, ymws, dws)
println(log_file,"Flowed correlators to t = ",params["Frontflow"]["t_zero"]," with last stepsize ",epslist[end-1]) println(log_file,"Flowed correlator to t = ",params["Frontflow"]["t_zero"]," with last stepsize ",epslist[end-1])
end
println(log_file,"Flowing correlator ",params["Frontflow"]["nsteps"]," steps of size ",params["Frontflow"]["epsilon"])
flush(log_file) flush(log_file)
end
@timeit "CPU to GPU" copyto!(dws.st,psit_CPU) @timeit "CPU to GPU" copyto!(dws.st,psit_CPU)
for fl in 1:params["Frontflow"]["nsteps"] for fl in 1:params["Frontflow"]["nsteps"]
pp_corr_t[:,noi,fl] .= zero(Float64) pphat_t[:,noi,fl,f] .= zero(Float64)
ap_corr_t[:,noi,fl] .= zero(ComplexF64) pptilde_t[:,noi,fl,f] .= zero(ComplexF64)
@timeit "Volume sum performance test" begin @timeit "Volume sum" begin
pphat_t[:,noi,fl] .= -scalar_contraction(psi) pphat_t[:,noi,fl,f] .= -scalar_contraction(psi)
pptilde_t[:,noi,fl] .= gammazero_contraction(psi) pptilde_t[:,noi,fl,f] .= gammazero_contraction(psi)
end end
ymws.U1 .= U ymws.U1 .= U
flw(U, psi, int, 1, params["Frontflow"]["epsilon"], gp, dpar, lp, ymws, dws) flw(U, psi, int, 1, params["Frontflow"]["epsilon"], gp, dpar[f], lp, ymws, dws)
U .= ymws.U1 U .= ymws.U1
flw(U, dws.st, int, 1, params["Frontflow"]["epsilon"], gp, dpar, lp, ymws, dws) flw(U, dws.st, int, 1, params["Frontflow"]["epsilon"], gp, dpar[f], lp, ymws, dws)
end end
pp_corr_t[:,noi,end] .= zero(Float64) pphat_t[:,noi,end,f] .= zero(Float64)
ap_corr_t[:,noi,end] .= zero(ComplexF64) pptilde_t[:,noi,end,f] .= zero(ComplexF64)
@timeit "Volume sum performance test" begin @timeit "Volume sum" begin
pphat_t[:,noi,end] .= -scalar_contraction(psi) pphat_t[:,noi,end,f] .= -scalar_contraction(psi)
pptilde_t[:,noi,end] .= gammazero_contraction(psi) pptilde_t[:,noi,end,f] .= gammazero_contraction(psi)
end end
@timeit "CPU to GPU" copyto!(U,U_CPU) @timeit "CPU to GPU" copyto!(U,U_CPU)
end
end end
pphat_t0 .= pphat_t0./prod(lp.iL[1:3]) pphat_t0 .= pphat_t0./prod(lp.iL[1:3])
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment