Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Unprec exp-clover forces added #70

Open
wants to merge 283 commits into
base: exp-clover
Choose a base branch
from
Open
Changes from 1 commit
Commits
Show all changes
283 commits
Select commit Hold shift + click to select a range
ee28b22
Fix compiler warnings
eromero-vlc May 21, 2022
17c87f8
Fix bug in cloneOperator
eromero-vlc May 21, 2022
db79aae
Change protocol in Operator callbacks
eromero-vlc May 21, 2022
14577cb
Clear cache and show performance metrics for superbblas
eromero-vlc May 21, 2022
124875e
Change the sparse tensor layout making the X the slowest coordinate
eromero-vlc May 21, 2022
ea68ad5
First version of even-odd preconditioner
eromero-vlc May 23, 2022
3b8d414
Merge remote-tracking branch 'origin/devel' into eloy/mgproton-progress
eromero-vlc May 24, 2022
f457c66
Fixes to compile chroma for gpus without gdpjit
eromero-vlc May 24, 2022
20c0af8
Fixes in new even-odd preconditioner
eromero-vlc May 24, 2022
8324252
Fixes for gpus
eromero-vlc May 25, 2022
0acf32c
Fix split_dimension
eromero-vlc May 25, 2022
caf535c
Fix many things.
eromero-vlc May 25, 2022
4bdb95c
Fix clone operator for even-odd preconditioner
eromero-vlc May 26, 2022
1d05f2f
Fix eo preconditioner
eromero-vlc May 27, 2022
b87ebbb
Optimize clone operator
eromero-vlc May 27, 2022
f6e2c5d
Fix extending support when it's larger than half the dimension's size
eromero-vlc May 28, 2022
2c10150
Add superbblas time tracker and track fgmres
eromero-vlc May 28, 2022
6c35067
Add eigensolver
eromero-vlc May 29, 2022
8dec86d
Add support for orthogonalization in fgmres
eromero-vlc May 29, 2022
2017c3f
Check on orthogonality level
eromero-vlc May 30, 2022
7622933
Fix ortho_level and tweak orthogonalization stopping criteria
eromero-vlc May 30, 2022
90fd475
Fix cholinv and ortho_level
eromero-vlc May 30, 2022
5d8f17d
Add check for the dense solver
eromero-vlc May 30, 2022
500b0d9
Check residual vector in fgmres; fix logic for chirality splitting
eromero-vlc May 30, 2022
e03c378
Fix minor bugs and reformat the code
eromero-vlc May 31, 2022
4949dfe
Fix bug in collapse_dimensions; add operator_ordering xml option; and…
eromero-vlc Jun 2, 2022
3e70654
Fix bug in fgmres and change the default value of operator_ordering
eromero-vlc Jun 3, 2022
f938d32
Expose the sparse operator nonzero block layout to a xml option
eromero-vlc Jun 3, 2022
88f35dd
Add blocking support (not passing eo tests)
eromero-vlc Jun 9, 2022
443f100
Fix several issues with blocking.
eromero-vlc Jun 10, 2022
bfb0acf
Fix even-odd preconditioning.
eromero-vlc Jun 11, 2022
e84b509
Merge branch 'eloy/mgproton-progress' into eloy/mgproton-progress-blo…
eromero-vlc Jun 11, 2022
63b10fc
Fix mg with blocking
eromero-vlc Jun 11, 2022
ae65bef
Add checks for compatible tensor distributions and use them on sparse…
eromero-vlc Jun 13, 2022
50c30d5
Fix more things with blocking, still basic blocking fails in parallel
eromero-vlc Jun 14, 2022
5f6ac30
Add print to SpTensor.
eromero-vlc Jun 20, 2022
8ec94a7
Fix blocking sparse operators, still it needs more work to detect edg…
eromero-vlc Jun 21, 2022
c7c2fd2
Fix reordering for sparse tensors.
eromero-vlc Jun 21, 2022
e075580
Fix support for sparse tensor powers.
eromero-vlc Jun 24, 2022
57ef775
Fix append_dimension.
eromero-vlc Jun 24, 2022
081f4dc
Fix minor thing from previous commit about blocking
eromero-vlc Jun 25, 2022
45cc074
Change Tensor::make_sure to make a compatible partion when the distri…
eromero-vlc Jun 29, 2022
50e56af
Add support for even-odd preconditioning with an non-even-odd precond…
eromero-vlc Jun 30, 2022
264351e
Fix even-odd ordering for the coarse operator
eromero-vlc Aug 6, 2022
1e14a63
Fix creating of null vectors
eromero-vlc Aug 6, 2022
f5361f0
Destroy chroma action when using mgproton to save memory
eromero-vlc Aug 8, 2022
c63c327
Missing change in last commit
eromero-vlc Aug 8, 2022
b157b10
Use Gaussian noise to build the null vectors.
eromero-vlc Aug 8, 2022
cbea6a9
Fix mgproton without chirality splitting.
eromero-vlc Aug 12, 2022
249aad6
Cache the extraction of the block diagonals while generating the even…
eromero-vlc Aug 12, 2022
dbbbd46
Merge remote-tracking branch 'origin/devel' into eloy/mgproton-progre…
eromero-vlc Aug 13, 2022
2d4eee2
Remove repartitioning of the input tensors for contractions.
eromero-vlc Aug 17, 2022
6bb3002
Merge branch 'eloy/mgproton-progress-blocking' of github.com:Jefferso…
eromero-vlc Aug 17, 2022
5176b42
superb tasks: select gpu device when not using qdp-jit
eromero-vlc Aug 17, 2022
49cb72b
superb tasks: use SB_NUM_GPUS_ON_NODE before SLURM_LOCALID when not u…
eromero-vlc Aug 19, 2022
4a8420a
Update the new interface of get_strides
eromero-vlc Aug 19, 2022
e019075
mgproton: fix blocking x direction with an odd value and improve effi…
eromero-vlc Aug 25, 2022
d4cb0d0
Merge branch 'devel' into eloy/mgproton-progress-blocking
eromero-vlc Aug 25, 2022
e248dea
Generate oddity masks more efficiently
eromero-vlc Aug 29, 2022
b0bbf08
mgproton: add casting to single/double precsion.
eromero-vlc Aug 31, 2022
768fb9e
Fix std::array constructor undefined behaviour
eromero-vlc Sep 2, 2022
711e24b
Add cusolver/hipsolver dependency needed by superbblas
eromero-vlc Sep 5, 2022
e6817d3
superb: add cost tracks for cloneOperator
eromero-vlc Sep 5, 2022
b585940
superb: allow reshape_dimensions in more cases
eromero-vlc Sep 5, 2022
5ec544c
Fix previous commit
eromero-vlc Sep 5, 2022
e137e2b
superb: fix append_dimension.
eromero-vlc Sep 6, 2022
87067f9
mgproton: add options to improve the numerical stability of fgmres
eromero-vlc Sep 6, 2022
ba3b507
Merge branch 'devel' into eloy/mgproton-progress-blocking
eromero-vlc Sep 19, 2022
0a10f72
mgproton: efficient support for explicit operators
eromero-vlc Sep 20, 2022
c9a1dc5
superb: fix Maybe for clang
eromero-vlc Sep 20, 2022
fbec642
superb: fix SpTensor::kvslice_from_size from previous commit
eromero-vlc Sep 20, 2022
0331265
mgproton: fix building prolongators for only-even operators
eromero-vlc Sep 22, 2022
a7a78bb
Merge branch 'devel' into eloy/mgproton-progress-blocking
eromero-vlc Sep 24, 2022
ae9ab58
superb and mgproton: fix even-odd prec and minor things with diagonal…
eromero-vlc Sep 25, 2022
1750d3f
superb: fix compilation with quda
eromero-vlc Sep 25, 2022
8d00e47
mgproton: add left preconditioning with Op_ee to even-odd prec.
eromero-vlc Sep 26, 2022
477f185
mgproton: add block jacobi
eromero-vlc Sep 26, 2022
81d20c5
mgproton: save some matvecs with prec_ee
eromero-vlc Sep 26, 2022
9cb2ea6
mgproton: better support for block jacobi
eromero-vlc Oct 3, 2022
c81da12
superb: remove Maybe with Tensor
eromero-vlc Oct 4, 2022
ef17a2a
superb: change Tensor interface to get the pointer and the context
eromero-vlc Oct 5, 2022
589a3ab
superb: first version of automatic overlapping of communications
eromero-vlc Oct 7, 2022
bcf9f56
mgproton: add inversions with full spin-color fields.
eromero-vlc Oct 14, 2022
0463213
superb: automatic distribution for superb tensors
eromero-vlc Oct 24, 2022
475a546
Add prolongator cache
eromero-vlc Oct 24, 2022
fcbf04b
Merge remote-tracking branch 'origin/devel' into eloy/mgproton-progre…
eromero-vlc Oct 25, 2022
b9c31ec
superb: compute laplacian eigenvectors more efficiently
eromero-vlc Oct 25, 2022
42b075a
superb: fix previous commit; add support for cloning operators where …
eromero-vlc Oct 26, 2022
d308b52
superb: use superbblas for partitioning not chroma tensors
eromero-vlc Oct 28, 2022
49aebc7
mgproton: fix bug in even-odd prec cache that shows up when creating …
eromero-vlc Oct 29, 2022
66213d5
superb: use a real arithmetic for the laplacian instead of complex; i…
eromero-vlc Oct 31, 2022
9e7e316
superb:
eromero-vlc Nov 4, 2022
d2320c2
mgproton: add bicgstab
eromero-vlc Nov 5, 2022
75f4fab
superb: fix make_eg to store the preferred location for the tensor, h…
eromero-vlc Nov 6, 2022
f4758e7
superb:
eromero-vlc Nov 9, 2022
10b5781
superb: roll back change to compute eigenvectors with real arithmetic…
eromero-vlc Nov 9, 2022
3c72553
superb: add Tensor::localVolume
eromero-vlc Nov 17, 2022
e061ad9
superb: fix compiling with cuda
eromero-vlc Nov 23, 2022
99f5437
superb: Update bsr_krylov call and fix its synchronization
eromero-vlc Dec 3, 2022
9e81d14
Add missing dependencies to rocm libraries
eromero-vlc Dec 29, 2022
6c7fde0
Add alternative init sequence without qdp-jit but using gpus.
eromero-vlc Dec 29, 2022
7f7379d
superb: fix missing declaration when using print
eromero-vlc Dec 29, 2022
d024689
superb: add support for the new BSR Kronecker format, which makes the…
eromero-vlc Jan 14, 2023
b8a951f
Add cuda/rocm support even when neither quda nor qdpjit is used.
eromero-vlc Feb 10, 2023
1e1a16b
superb: fix a couple of issues with the new kron bsr format
eromero-vlc Feb 15, 2023
a6d876a
mgproton: fix memory overuse building the prolongators
eromero-vlc Feb 16, 2023
921b138
superb: add full spin splitting to prolongators; extend cloning kron …
eromero-vlc Feb 21, 2023
f36df48
mgproton: fix memory issues generating prolongators
eromero-vlc Feb 22, 2023
33e63f9
superb: minor changes in format
eromero-vlc Feb 28, 2023
e6ab1d3
Merge branch 'devel' into eloy/mgproton-progress-blocking-kron
eromero-vlc Feb 28, 2023
18005e0
mgproton:
eromero-vlc Mar 8, 2023
8065b22
superb: allow copying with implicitly conjugated tensors
eromero-vlc Mar 8, 2023
70e4488
mgproton: fix commit 18005e0b8
eromero-vlc Mar 8, 2023
551378d
superb: reduce memory footprint on cloning operators and avoid puttin…
eromero-vlc Mar 9, 2023
0ed2b89
superb: update tensor contraction to accommodate the new interface, a…
eromero-vlc Mar 29, 2023
60771c4
superb: make function identity faster;
eromero-vlc Mar 30, 2023
5abed86
superb disco task: add list of momenta and make the trace database mo…
eromero-vlc Apr 12, 2023
ce6da7c
Merge branch 'eloy/superb-disco-momenta-list' into eloy/mgproton-prog…
eromero-vlc Apr 12, 2023
f769c24
superb disco: fix empty paths
eromero-vlc Apr 13, 2023
2d7b1a0
Add a random projection implementation, useful for testing
eromero-vlc Apr 13, 2023
2fc5b51
superb disco: fix writing the databases
eromero-vlc Apr 13, 2023
63cf03d
Merge branch 'eloy/superb-disco-momenta-list' into eloy/mgproton-prog…
eromero-vlc Apr 13, 2023
d821e7a
Fix gcc 12 complain on a missing array header
eromero-vlc Apr 13, 2023
e20a2df
superb disco: order the reported stats by momentum and displacement
eromero-vlc Apr 22, 2023
a0f8624
Merge branch 'eloy/superb-disco-momenta-list' into eloy/mgproton-prog…
eromero-vlc Apr 22, 2023
612871a
mgproton: add minimum residual solver
eromero-vlc Apr 26, 2023
6c4dd2c
superb: add glocal distributions;
eromero-vlc May 3, 2023
ec41827
mgproton: fix minor issue from prev commit
eromero-vlc May 8, 2023
b41e743
Merge remote-tracking branch 'origin/devel' into eloy/mgproton-progre…
eromero-vlc May 15, 2023
e3b16f8
Merge remote-tracking branch 'origin/devel' into eloy/mgproton-progre…
eromero-vlc May 16, 2023
5598227
Merge remote-tracking branch 'origin/devel' into eloy/mgproton-progre…
eromero-vlc May 19, 2023
4d2f7a0
superb: distribute the distillation eigenvectors computation running …
eromero-vlc May 19, 2023
ca5de20
superb: fix computation of eigenvectors, make it run on different procs
eromero-vlc May 21, 2023
2879ee1
Fix typo in superb meson preventing the detection of invalid xml
eromero-vlc May 30, 2023
0b1ed16
Fix missing close database in superb disco
eromero-vlc May 30, 2023
da108c9
Add an extension to superb dd solver that reduces the impact of inner…
eromero-vlc May 30, 2023
6543f19
Fix undefined behaviour in random projection (only used for testing p…
eromero-vlc May 31, 2023
80f7297
superb: add support for running contractions asynchronously
eromero-vlc May 31, 2023
373d6fe
superb: add summation flag for genprops
eromero-vlc Jun 1, 2023
ce04a93
superb: fix integration of primme for hip
eromero-vlc Jun 2, 2023
7fe43ee
superb: replace hanging references to diag in eo solver
eromero-vlc Jun 2, 2023
aa39f71
superb: add gcr solver
eromero-vlc Jun 27, 2023
f7df1e8
Add rocblas and rocsolver dependencies for future versions of superbblas
eromero-vlc Jul 7, 2023
979437d
superb: hierarchical preconditioner
eromero-vlc Jul 12, 2023
170ce55
Merge branch 'eloy/mgproton-progress-blocking-kron' of github.com:Jef…
eromero-vlc Jul 12, 2023
4e396e3
superb: fix bug in hierarchical precond
eromero-vlc Jul 19, 2023
ef5b838
superb: simplify getOption for vectors and support one level recursio…
eromero-vlc Jul 26, 2023
1f13928
superb: hierarchical preconditioner supports multidimensional divisions
eromero-vlc Jul 26, 2023
357b558
superb: add red-black to hie (in progress)
eromero-vlc Jul 27, 2023
000efc3
superb: fix hie precond when running in parallel
eromero-vlc Jul 28, 2023
09e1a96
superb: fix error in getOption with default value for vector types
eromero-vlc Jul 28, 2023
d66641f
superb: add a new task to compute the eigenpairs of Dirac operators w…
eromero-vlc Aug 20, 2023
9a4df09
Merge remote-tracking branch 'origin/devel' into eloy/mgproton-progre…
eromero-vlc Aug 22, 2023
a3a0e04
superb: fix format
eromero-vlc Aug 22, 2023
72914be
Merge remote-tracking branch 'origin/devel' into eloy/mgproton-progre…
eromero-vlc Aug 22, 2023
1e70b84
superb: fix minor issues.
eromero-vlc Aug 23, 2023
109e3c1
superb: add mgproto documentation (in progress)
eromero-vlc Aug 23, 2023
10de924
superb: remove dead code from superb prop task
eromero-vlc Sep 26, 2023
4e239e0
superb: improve memory footprint for contracting tensors on different…
eromero-vlc Sep 26, 2023
4e2f3bb
superb: fix compilation with rocm
eromero-vlc Sep 27, 2023
ec45031
superb: fix Dslash eigensolver task in parallel
eromero-vlc Sep 27, 2023
2d72a15
superb: fix split_dimension in a corner case
eromero-vlc Sep 27, 2023
eed2e6a
Revert "superb: improve memory footprint for contracting tensors on d…
eromero-vlc Oct 5, 2023
e2cf45f
superb: change interface to compute genprop and disco contractions: n…
eromero-vlc Oct 7, 2023
ce2fd58
Fix prev commit e2cf45fef
eromero-vlc Oct 9, 2023
68f1e5f
superb: reduce memory footprint and comms of `doMomGammaDisp_contract…
eromero-vlc Oct 12, 2023
b0adaec
superb: reduce memory footprint and comms for baryon and meson genera…
eromero-vlc Oct 16, 2023
d3108a4
Add a random projection implementation, useful for testing
eromero-vlc Apr 13, 2023
8577269
Fix undefined behaviour in random projection (only used for testing p…
eromero-vlc May 31, 2023
1d50b8d
Add explicit "close" to BinaryStoreDB database, it might avoid a mpi …
eromero-vlc Oct 18, 2023
3111b55
Replace BinaryStoreDB by LocalBinaryStoreDB in disco prob defl and th…
eromero-vlc Oct 18, 2023
7eb2a6a
superb: fix missing StopWatch start calls in computing Dslash eigenve…
eromero-vlc Oct 19, 2023
79dacf4
superb: reduce communications when phasing.
eromero-vlc Oct 19, 2023
292261e
Merge remote-tracking branch 'origin/devel' into eloy/mgproton
eromero-vlc Oct 20, 2023
17d0edf
superb: move mgproton to a .cc file to be compiled; change some publi…
eromero-vlc Oct 20, 2023
1e6d9fd
superb: make sure that exceptions produced during generating baryon show
eromero-vlc Oct 24, 2023
b9648d3
superb: fix format of mgproton.cc
eromero-vlc Oct 28, 2023
c5976c5
Fix disco probing deflation tasks: the contribution from the deflatio…
eromero-vlc Nov 1, 2023
8f65558
Merge remote-tracking branch 'origin/devel' into eloy/mgproton
eromero-vlc Nov 1, 2023
2995f6f
superb: deflation projection based on superb and add interface for su…
eromero-vlc Nov 1, 2023
da918fc
superb: add multigrid deflation and some doc.
eromero-vlc Nov 2, 2023
f3a2291
mgproton:
eromero-vlc Nov 5, 2023
0a7cb0c
mgproton: change default solver to GD and print primme options
eromero-vlc Nov 5, 2023
2cb8bfe
Extend coloring to support multiple shifts.
eromero-vlc Nov 11, 2023
2d341b8
Add a null projector for testing
eromero-vlc Nov 18, 2023
34f4f97
superb: use natural ordering for contractions in baryons, mesons, and…
eromero-vlc Nov 18, 2023
0f3f1d3
superb: fix commit 34f4f977, distribute momenta as the rest of the te…
eromero-vlc Nov 19, 2023
e5d9527
superb: baryons, fix default value for max_tslices_in_contraction
eromero-vlc Nov 28, 2023
ff4ae7e
superb: add more options to disco task to split the solution of color…
eromero-vlc Nov 28, 2023
ac354bf
superb: fix serious bug in TensorPartition::get_subpartition introduc…
eromero-vlc Dec 1, 2023
eed147a
superb: add more info and error messages for computing distillation b…
eromero-vlc Dec 1, 2023
72e0ac4
superb: fix generating genprop keys with negative t_slice value
eromero-vlc Dec 11, 2023
fa2ebee
Merge remote-tracking branch 'origin/devel' into eloy/mgproton
eromero-vlc Dec 11, 2023
8ddca2a
superb: add support for local storage and add the option for genprops…
eromero-vlc Jan 19, 2024
d7d08af
superb: fix sparse matrix slicing routines
eromero-vlc Feb 22, 2024
83b81f4
superb: add superb format storage for props
eromero-vlc Feb 26, 2024
a9d8ae1
Added an adjDisplace. Fixed up old glueball code.
grokqcd Apr 11, 2024
dc03be7
Remove unused option from baryon task
eromero-vlc Apr 19, 2024
c9363a3
Add support for time derivatives in superb genprop and disco tasks
eromero-vlc Apr 19, 2024
cbcc4ba
Fix compilation of superb tasks with libc++
eromero-vlc Apr 22, 2024
77408ad
Added MultiRHS solver (non-MG) for Clover only. Builds but requires t…
bjoo Apr 22, 2024
85aa968
Fixed a typo related to QDP-JIT
bjoo Apr 22, 2024
3d924bf
Need to test this on a multi-device system
bjoo Apr 23, 2024
08bd346
MRHS via Split Grid (1 RHS per split grid at a time)
bjoo Apr 25, 2024
3861ec2
mgproton: fix bug in bicgstab;
eromero-vlc Apr 26, 2024
366a1f9
superb: add left preconditioner to bicgstab
eromero-vlc Apr 26, 2024
416009b
superb: add recursive projector to the multigrid deflation.
eromero-vlc Apr 26, 2024
101f4c5
superb: fix preconditioning of bicgstab
eromero-vlc Apr 30, 2024
83e28b5
mgproton: add a shift action
eromero-vlc May 2, 2024
1983b6d
mgproton: add generic deflation preconditioner
eromero-vlc May 2, 2024
06b9877
mgproton: add even-odd schur preconditioner on the spin components
eromero-vlc May 3, 2024
2adcb87
Work around for icx
eromero-vlc May 3, 2024
3f41d94
Fix compilation with icc
eromero-vlc May 3, 2024
ca0b4c9
superb: fix compilation of eigenvalues task on old intel compiler
eromero-vlc May 13, 2024
e56acd5
Temporarily revert
bjoo May 16, 2024
38c0762
Works with my own QUDA without passing gauge fields.
bjoo May 18, 2024
2780cbf
Specialized QUDA-Clover Multi-Source Interface
bjoo May 30, 2024
ff209a6
Tidyup
bjoo May 30, 2024
871be09
Tidyup 2
bjoo May 30, 2024
7b870b6
Enabled to use split grid.
bjoo May 30, 2024
f103b29
Updated function call name
bjoo Jul 3, 2024
8ceccd5
Reverted these to before my attempts at split-grid MG
bjoo Jul 3, 2024
dbf9ef8
superb: improve ortho performance computing distillation vectors on gpu
eromero-vlc Jul 13, 2024
020f7d3
Merge pull request #69 from JeffersonLab/feature/quda-split-grid
bjoo Aug 1, 2024
ce17642
Added NullVectorsBatchSize Parameter
bjoo Jul 5, 2024
426bc18
Fixed MG Residuum for MRHS solver
bjoo Aug 1, 2024
032537f
Added wrappers for MRHS-MG
bjoo Aug 2, 2024
b29df25
Made FermActQprop use multi-RHS interface
bjoo Aug 12, 2024
f1dbea4
Can now solve MRHS Unpreconditioned Clover systems
bjoo Aug 12, 2024
1f531f2
Preparatory steps for next iteration of qdp-jit
fwinter Aug 26, 2024
9751d52
Unprec exp-clover forces added
henrymonge Aug 27, 2024
de6f231
Forgot entry to autotool
fwinter Sep 3, 2024
51c2299
Fix missing constant change, from commit 1f531f2
eromero-vlc Oct 1, 2024
13c24d4
superb: fix min() by lowest()
eromero-vlc Oct 1, 2024
c89b341
Add new task for testing inverters
eromero-vlc Oct 3, 2024
a201e04
Changed names of a0_2 to b0 in the chroma_gamma_matrices.
grokqcd Jul 24, 2023
dbe6bd0
Merge pull request #72 from JeffersonLab/feature/mrhs-mg
fwinter Oct 15, 2024
bfa685a
Added Projector to UnprecCloverFermAct
bjoo Oct 16, 2024
89a352e
Extend max_rhs to support several values in task INVERTER_TEST
eromero-vlc Oct 16, 2024
72915cc
Add superb version of inverter test task
eromero-vlc Oct 17, 2024
5469a0f
mgproton: fix bicgstab
eromero-vlc Oct 18, 2024
d2f9a72
mgproton: add support for unpreconditioned actions
eromero-vlc Oct 18, 2024
496703f
Added eoprec_exp-clover
henrymonge Oct 23, 2024
3ae4e0d
Merge remote-tracking branch 'origin/feature/mrhs-mg' into devel
eromero-vlc Oct 29, 2024
74da13d
Merge remote-tracking branch 'origin/devel' into exp-clover-hm
henrymonge Nov 14, 2024
d88d1ce
bring up to date with devel
henrymonge Nov 14, 2024
b93ce93
Added exp-clover file names to lib/CMakelists
henrymonge Dec 11, 2024
ed60e9e
fixed exp quda mg
henrymonge Dec 12, 2024
8dc5dc8
fixed inverter name
henrymonge Dec 12, 2024
8084c73
Added SysSolverQUDAMULTIGRIDExpClover support
henrymonge Dec 12, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
101 changes: 54 additions & 47 deletions lib/util/ferm/mgproton.h
Original file line number Diff line number Diff line change
@@ -45,7 +45,8 @@ namespace Chroma
/// output tensors have the same dimensions

template <std::size_t NOp, typename COMPLEX>
using OperatorFun = std::function<Tensor<NOp + 1, COMPLEX>(const Tensor<NOp + 1, COMPLEX>&)>;
using OperatorFun =
std::function<void(const Tensor<NOp + 1, COMPLEX>&, Tensor<NOp + 1, COMPLEX>)>;

/// Representation of an operator together with a map to convert from domain labels (columns) to
/// image labels (rows)
@@ -91,9 +92,23 @@ namespace Chroma
// The `t` labels that are not in `d` are the column labels
std::string cols = detail::union_dimensions(t.order, "", d.order); // t.order - d.order

return fop(t.template collapse_dimensions<NOp + 1>(cols, 'n').template make_sure<COMPLEX>())
.template split_dimension<N>('n', cols, t.kvdim())
.template make_sure<T>();
auto x = t.template collapse_dimensions<NOp + 1>(cols, 'n').template make_sure<COMPLEX>();
auto y = i.template like_this<NOp + 1>("%n", '%', "", {{'n', x.kvdim()['n']}});
fop(x, y);
return y.template split_dimension<N>('n', cols, t.kvdim()).template make_sure<T>();
}

/// Apply the operator
template <std::size_t N, typename T>
void operator()(const Tensor<N, T>& x, Tensor<N, T> y) const
{
// The `t` labels that are not in `d` are the column labels
std::string cols = detail::union_dimensions(x.order, "", d.order); // t.order - d.order

auto x0 = x.template collapse_dimensions<NOp + 1>(cols, 'n').template make_sure<COMPLEX>();
auto y0 = y.template collapse_dimensions<NOp + 1>(cols, 'n').template make_sure<COMPLEX>();
fop(x0, y0);
if (y.data != y0.data) y0.copyTo(y);
}
};

@@ -295,14 +310,14 @@ namespace Chroma
// Z(:,i) = prec * U(:,i)
if (prec.hasSome())
{
prec.getSome()(U.kvslice_from_size({{Vc, i}}, {{Vc, 1}}))
.copyTo(Z.kvslice_from_size({{Vc, i}}, {{Vc, 1}}));
prec.getSome()(U.kvslice_from_size({{Vc, i}}, {{Vc, 1}}),
Z.kvslice_from_size({{Vc, i}}, {{Vc, 1}}));
nprecs += num_cols;
}

// U(:,i+1) = op * Z(:,i)
op(Z.kvslice_from_size({{Vc, i}}, {{Vc, 1}}))
.copyTo(U.kvslice_from_size({{Vc, i + 1}}, {{Vc, 1}}));
op(Z.kvslice_from_size({{Vc, i}}, {{Vc, 1}}),
U.kvslice_from_size({{Vc, i + 1}}, {{Vc, 1}}));
nops += num_cols;
}

@@ -352,7 +367,7 @@ namespace Chroma

// Check final residual
if (error_if_not_converged) {
r = op(y);
op(y, r); // r = op(y)
nops += num_cols;
x.scale(-1).addTo(r);
auto normr = norm<1>(r, op.order_t + order_cols);
@@ -451,16 +466,14 @@ namespace Chroma
std::string prefix = getOption<std::string>(ops, "prefix", "");

// Return the solver
return {[=](const Tensor<NOp + 1, COMPLEX>& x) {
auto y = x.like_this();
return {[=](const Tensor<NOp + 1, COMPLEX>& x, Tensor<NOp + 1, COMPLEX> y) {
foreachInChuncks(
x, y, max_simultaneous_rhs,
[=](Tensor<NOp + 1, COMPLEX> x, Tensor<NOp + 1, COMPLEX> y) {
fgmres(op, prec, x, y, max_basis_size, tol, max_its, error_if_not_converged,
false /* no init guess */, verb, prefix);
},
'n');
return y;
},
op.i, op.d, nullptr, op.order_t};
}
@@ -540,12 +553,13 @@ namespace Chroma
// Construct the probing vectors, which they have as the rows the domain labels and as
// columns the domain blocking dimensions

constexpr int Nblk = NOp - Nd - 1;
auto t_bl = d.like_this(none, blkd, OnHost, OnEveryoneReplicated);
t_bl.set_zero();
auto t_bl_rows = i.template like_this<NOp - 5>("%", '%', "xyztX");
auto t_bl_rows = i.template like_this<Nblk>("%", '%', "xyztX");
t_bl_rows.set_zero();
auto t_blbl =
contract<NOp * 2 - 5>(t_bl_rows, t_bl, "").make_sure(none, none, OnEveryoneReplicated);
contract<NOp + Nblk>(t_bl_rows, t_bl, "").make_sure(none, none, OnEveryoneReplicated);
assert(!t_blbl.isSubtensor());
COMPLEX* t_blbl_data = t_blbl.data.get();
for (std::size_t i = 0, vol = t_bl.volume(); i < vol; ++i)
@@ -563,7 +577,6 @@ namespace Chroma
unsigned int num_colors = coloring.numColors();

// Get the number of neighbors
constexpr int Nblk = NOp - Nd - 1;
std::vector<Coor<Nd>> neighbors(1, Coor<Nd>{{}});
for (unsigned int j = 0; j < Nd; ++j)
{
@@ -586,7 +599,7 @@ namespace Chroma
{rd.at('y'), max_dist_neigbors + max_dist_neigbors % maxX},
{rd.at('z'), max_dist_neigbors + max_dist_neigbors % maxX},
{rd.at('t'), max_dist_neigbors + max_dist_neigbors % maxX}});
SpTensor<NOp, NOp, COMPLEX> sop{d_sop, i, NOp - 5, NOp - 5, (unsigned int)neighbors.size()};
SpTensor<NOp, NOp, COMPLEX> sop{d_sop, i, Nblk, Nblk, (unsigned int)neighbors.size()};

int maxY = std::min(2, dims[1]);
int maxZ = std::min(2, dims[2]);
@@ -737,11 +750,13 @@ namespace Chroma
sop.construct();

// Construct the operator to return
Operator<NOp, COMPLEX> rop{[=](const Tensor<NOp + 1, COMPLEX>& x) {
Operator<NOp, COMPLEX> rop{
[=](const Tensor<NOp + 1, COMPLEX>& x, Tensor<NOp + 1, COMPLEX> y) {
auto x0 = x.reorder("%Xxyztn", '%');
auto y = x0.like_this();
sop.contractWith(x0, rd, y);
return y;
auto y0 = (y.order == x0.order ? y : x0.like_this());
sop.contractWith(x0, rd, y0);
if (y.data != y0.data)
y0.copyTo(y);
},
i, i, nullptr, op.order_t};

@@ -822,30 +837,28 @@ namespace Chroma

// Return the operator
Tensor<NOp, COMPLEX> d = op.d.like_this(none, nv_blk.kvdim()), i = op.i;
return {[=](const Tensor<NOp + 1, COMPLEX>& t) -> Tensor<NOp + 1, COMPLEX> {
auto out = i.template like_this<NOp + 1>("%n", '%', "", {{'n', t.kvdim()['n']}});
auto out_blk =
out.rename_dims({{'X', 'W'}})
return {[=](const Tensor<NOp + 1, COMPLEX>& x, Tensor<NOp + 1, COMPLEX> y) {
auto y_blk =
y.rename_dims({{'X', 'W'}})
.template split_dimension<NOp + 1 + 3 - 1>(
'x', "wXx", {{'w', dimw}, {'X', 1}, {'x', opdims.at('x') / bx * X}})
.split_dimension('y', "Yy", blocking.at('y'))
.split_dimension('z', "Zz", blocking.at('z'))
.split_dimension('t', "Tt", blocking.at('t'))
.rename_dims({{'c', 'C'}, {'s', 'S'}});
contract(nv_blk, t, "cs", CopyTo, out_blk);
return out;
contract(nv_blk, x, "cs", CopyTo, y_blk);
},
d, i,
[=](const Tensor<NOp + 1, COMPLEX>& t) -> Tensor<NOp + 1, COMPLEX> {
auto t_blk =
t.rename_dims({{'X', 'W'}})
[=](const Tensor<NOp + 1, COMPLEX>& x, Tensor<NOp + 1, COMPLEX> y) {
auto x_blk =
x.rename_dims({{'X', 'W'}})
.template split_dimension<NOp + 1 + 3 - 1>(
'x', "wXx", {{'w', dimw}, {'X', 1}, {'x', opdims.at('x') / bx * X}})
.split_dimension('y', "Yy", blocking.at('y'))
.split_dimension('z', "Zz", blocking.at('z'))
.split_dimension('t', "Tt", blocking.at('t'))
.rename_dims({{'c', 'C'}, {'s', 'S'}});
return contract<NOp + 1>(nv_blk.conj(), t_blk, "WwYZTSC");
contract<NOp + 1>(nv_blk.conj(), x_blk, "WwYZTSC", CopyTo, y);
},
op.order_t};
}
@@ -887,13 +900,11 @@ namespace Chroma
unsigned int create_coarse_max_rhs =
getOption<unsigned int>(ops, "create_coarse_max_rhs", 0);
const Operator<NOp, COMPLEX> op_c = cloneOperator(Operator<NOp, COMPLEX>{
[&](Tensor<NOp + 1, COMPLEX> x) {
auto y = x.like_this();
[&](const Tensor<NOp + 1, COMPLEX>& x, Tensor<NOp + 1, COMPLEX> y) {
foreachInChuncks(x, y, create_coarse_max_rhs,
[&](Tensor<NOp + 1, COMPLEX> x, Tensor<NOp + 1, COMPLEX> y) {
V.tconj()(op(V(x))).copyTo(y);
V.tconj()(op(V(x)), y);
});
return y;
},
V.d, V.d, nullptr, op.order_t});

@@ -905,7 +916,7 @@ namespace Chroma
const Operator<NOp, COMPLEX> opSolver = getSolver(op, getOptions(ops, "solver_smoother"));

// Return the solver
return {[=](Tensor<NOp + 1, COMPLEX> x) {
return {[=](const Tensor<NOp + 1, COMPLEX>& x, Tensor<NOp + 1, COMPLEX> y) {
// y0 = V*solver(V'*Op*V, V'x)
auto y0 = V(coarseSolver(V.tconj()(x)));

@@ -914,10 +925,8 @@ namespace Chroma
x.addTo(x1);

// y = y0 + solver(Op, x1)
auto y = opSolver(std::move(x1));
opSolver(std::move(x1), y);
y0.addTo(y);

return y;
},
op.i, op.d, nullptr, op.order_t};
}
@@ -943,18 +952,16 @@ namespace Chroma
{
LatticeFermion a;
auto d = asTensorView(a).toComplex().make_eg();
return {[&](Tensor<Nd + 4, Complex> t) {
auto r = t.like_this();
LatticeFermion x, y;
unsigned int n = t.kvdim()['n'];
return {[&](const Tensor<Nd + 4, Complex>& x, Tensor<Nd + 4, Complex> y) {
LatticeFermion x0, y0;
unsigned int n = x.kvdim()['n'];
for (unsigned int i = 0; i < n; ++i)
{
t.kvslice_from_size({{'n', i}}, {{'n', 1}}).copyTo(asTensorView(x));
y = zero;
linOp(y, x, PLUS /* I believe, it's ignored */);
asTensorView(y).copyTo(r.kvslice_from_size({{'n', i}}, {{'n', 1}}));
x.kvslice_from_size({{'n', i}}, {{'n', 1}}).copyTo(asTensorView(x0));
y0 = zero;
linOp(y0, x0, PLUS /* I believe, it's ignored */);
asTensorView(y0).copyTo(y.kvslice_from_size({{'n', i}}, {{'n', 1}}));
}
return r;
},
d, d};
}