diff --git a/CMakeLists.txt b/CMakeLists.txt
index eb33534f81..d475d0987f 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -28,6 +28,8 @@ option(Chroma_ENABLE_LAPACK "Enable Lapack Binding in QDPLapack" OFF)
option(Chroma_ENABLE_OPENMP "Enable OpenMP" OFF)
option(Chroma_ENABLE_SUPERBBLAS "Enable tasks using Superbblas" OFF)
option(Chroma_ENABLE_PRIMME "Build with PRIMME" OFF)
+option(Chroma_ENABLE_CUDA "Build with CUDA" OFF)
+option(Chroma_ENABLE_ROCM "Build with ROCM" OFF)
set(BLAS_SUFFIX_LINK_FLAGS "" CACHE STRING "Extra linking flags placed after BLAS")
separate_arguments(Chroma_extra_link_flags UNIX_COMMAND ${BLAS_SUFFIX_LINK_FLAGS})
message(STATUS "Chroma extra links: ${Chroma_extra_link_flags}")
@@ -77,7 +79,7 @@ endif()
list( APPEND CMAKE_MODULE_PATH ${CMAKE_CURRENT_SOURCE_DIR}/cmake )
find_package(GMP)
-if( GMP_FOUND )
+if( GMP_FOUND )
set(BUILD_GMP_REMEZ 1) # For the header file
endif()
# Create an imported target
@@ -93,7 +95,7 @@ if(Chroma_ENABLE_OPENMP)
if( NOT OpenMP_FOUND )
find_package(OpenMP REQUIRED)
endif()
-
+
if( NOT Threads_FOUND )
find_package(Threads REQUIRED)
endif()
@@ -116,7 +118,7 @@ endif()
# 2nd: Deal with QPhiX
##########################
if( Chroma_ENABLE_QPHIX )
-#
+#
# Deal with QPhix
# find_package(QPhiX)
endif()
@@ -141,8 +143,10 @@ if( Chroma_ENABLE_QUDA )
find_package(QUDA REQUIRED)
if( QUDA_TARGET_CUDA )
set(CHROMA_TARGET_CUDA 1)
+ set(Chroma_ENABLE_CUDA ON)
elseif( QUDA_TARGET_HIP )
set(CHROMA_TARGET_HIP 1)
+ set(Chroma_ENABLE_ROCM ON)
endif()
if (QDP_IS_QDPJIT)
message(STATUS "QDP is QDPJIT so enabling QDPJIT Interfaces")
@@ -151,14 +155,17 @@ if( Chroma_ENABLE_QUDA )
set(BUILD_QUDA_DEVIFACE_SPINOR 1)
endif()
endif()
+if ( Chroma_ENABLE_CUDA AND Chroma_ENABLE_ROCM )
+ message( FATAL_ERROR "Don't set Chroma_ENABLE_CUDA and Chroma_ENABLE_ROCM" )
+endif()
###########################
# 5th: Deal with MDWF
###########################
if( Chroma_ENABLE_MDWF )
-#
+#
# Gonna need to write a find module for this
-#
+#
endif()
####################################################
@@ -180,7 +187,7 @@ if( Chroma_ENABLE_LAPACK )
set(QDPLapack_BINDING "lapack")
endif()
add_subdirectory(other_libs/qdp-lapack)
-
+
###########################
# 6th: Deal with superbblas
###########################
@@ -189,7 +196,7 @@ if( Chroma_ENABLE_SUPERBBLAS )
include(FindBLAS)
find_package(BLAS REQUIRED)
-
+
if( QDP_BACKEND_ROCM AND NOT HIP_CPP_CONFIG )
find_package(HIP REQUIRED)
execute_process(
@@ -203,10 +210,14 @@ if( Chroma_ENABLE_SUPERBBLAS )
mark_as_advanced(HIP_CPP_CONFIG)
endif()
- if( QDP_BACKEND_ROCM )
+ if( QDP_BACKEND_ROCM OR Chroma_ENABLE_ROCM)
find_package(hipBLAS REQUIRED)
+ find_package(rocBLAS REQUIRED)
+ find_package(hipSPARSE REQUIRED)
+ find_package(hipSOLVER REQUIRED)
+ find_package(rocSOLVER REQUIRED)
endif()
- if( Chroma_ENABLE_CUDA )
+ if( QDP_BACKEND_CUDA OR Chroma_ENABLE_CUDA)
find_package(CUDAToolkit REQUIRED)
endif()
@@ -225,11 +236,11 @@ endif()
#####################################
configure_file(${CMAKE_CURRENT_SOURCE_DIR}/lib/chroma_config_internal.h.cmake.in
lib/chroma_config_internal.h)
-
+
install(FILES ${CMAKE_CURRENT_BINARY_DIR}/lib/chroma_config_internal.h DESTINATION include)
# Now we are ready to build in lib
-# Heaven help us.
+# Heaven help us.
add_subdirectory(lib)
add_subdirectory(mainprogs/main)
@@ -239,7 +250,7 @@ add_subdirectory(mainprogs/tests)
# if needed
install(FILES ${CMAKE_CURRENT_SOURCE_DIR}/cmake/FindGMP.cmake
DESTINATION lib/cmake/Chroma )
-
+
#install the export description of the targets
install(EXPORT ChromaTargets
FILE ChromaTargets.cmake
diff --git a/configure.ac b/configure.ac
index 580fd3a98f..2b71641074 100755
--- a/configure.ac
+++ b/configure.ac
@@ -1282,7 +1282,7 @@ then
AC_SUBST(CUDA_CXXFLAGS, ["-I${CUDA_HOME}/include"] )
AC_SUBST(CUDA_LDFLAGS, ["-L${CUDA_LIBDIR} -Wl,-rpath,${CUDA_LIBDIR}"] )
- AC_SUBST(CUDA_LIBS, ["-lcublas -lcublasLt -lcudart -lcuda"])
+ AC_SUBST(CUDA_LIBS, ["-lcusolver -lcusparse -lcublas -lcublasLt -lcudart -lcuda"])
fi
###############################################
diff --git a/docs/notes/chroma_gamma_matrices.tex b/docs/notes/chroma_gamma_matrices.tex
index 8f82391405..ec9039b2ea 100644
--- a/docs/notes/chroma_gamma_matrices.tex
+++ b/docs/notes/chroma_gamma_matrices.tex
@@ -50,7 +50,7 @@
5 & 0101 & b1\_y & $\gamma_1 \gamma_3$ & $- b_1(y)$ & 10\\
6 & 0110 & b1\_x & $\gamma_2 \gamma_3$ & $b_1(x)$ & 9\\
7 & 0111 & pion\_2 & $\gamma_1 \gamma_2 \gamma_3 = \gamma_5 \gamma_4$ & $\pi$& 8 \\
-8 & 1000 & a0\_2 & $\gamma_4$ & $a_0$ & 7 \\
+8 & 1000 & b0 & $\gamma_4$ & $b_0$ & 7 \\
9 & 1001 & rho\_x\_2 & $\gamma_1 \gamma_4$ & $\varrho(x)$ & 6\\
10 & 1010 & rho\_y\_2 & $\gamma_2 \gamma_4$ & $\varrho(y)$ & 5\\
11 & 1011 & a1\_z & $\gamma_1 \gamma_2 \gamma_4 = \gamma_3 \gamma_5$ & $a_1(z)$ & 4\\
diff --git a/docs/superb_tasks.md b/docs/superb_tasks.md
index 1af97b6d59..47b8b7f973 100644
--- a/docs/superb_tasks.md
+++ b/docs/superb_tasks.md
@@ -75,7 +75,7 @@ Main options:
* Also all superb tasks support spatial phasing on the fly, so the option `phase` isn't usually needed.
-* The smearing information is stored in the created file's metadata. When a filed created with `write_fingerprint` being `true` is passed as input to another Chroma tasks, the smearing infomation passed as input to the task should be the same as the smearing used for generating the eigenvectors. In case is not, the Chroma task will emmit a runtime error.
+* The smearing information is stored in the created file's metadata. When a filed created with `write_fingerprint` being `true` is passed as input to another Chroma tasks, the smearing information passed as input to the task should be the same as the smearing used for generating the eigenvectors. In case is not, the Chroma task will emit a runtime error.
# Creation of mesons
@@ -844,3 +844,329 @@ Main options:
* `Param/eigensolver/verbosity`: (optional, default `nooutput`) one of `nooutput`, `summary`, `detailed`.
* `Param/Propagator`: propagator configuration.
* `NamedObject/eigs_file`: file to store the results.
+
+# `mgproton` solver collection
+
+## Flexible GMRES
+
+Example:
+```
+
+ MGPROTON
+ fgmres
+ 1e-7
+ 3
+ 20000
+ Detailed
+
+```
+
+Main options:
+
+* `tol`: residual norm tolerance, stopping when $\|Dx-b\|_2 \leq \text{tol}\ \|b\|_2$.
+* `max_basis_size`: (optional, default `5`) maximum size of the search subspace.
+* `max_its`: (optional, default infinity) maximum number of iterations.
+* `error_if_not_converged`: (optional, default `true`) whether to complain if tolerance is not achieved.
+* `prec`: (optional, default none) left preconditioning, does not affect residual norm.
+* `ortho_each_its`: (optional, default `8` for double and `4` for single precision) orthogonalize the basis every this number of iterations.
+* `max_residual_updates`: (optional, default `4` for double and `2` for single precision) recompute the residual vector every this number of iterations.
+* `max_simultaneous_rhs`: (optional, default infinity) solver this many right-hand-sides at once.
+* `verbosity`: (optional, default `nooutput`) level of verbosity, one of `nonoutput`, `summary`, `detailed`.
+* `prefix`: (optional, default none) prefix output related with this solver with this string.
+
+## BiCGstab
+
+Example:
+```
+
+ MGPROTON
+ bicgstab
+ 1e-7
+ 20000
+ Detailed
+
+```
+
+Main options:
+
+* `tol`: residual norm tolerance, stopping when $\|Dx-b\|_2 \leq \text{tol}\ \|b\|_2$.
+* `max_its`: (optional, default infinity) maximum number of iterations.
+* `error_if_not_converged`: (optional, default `true`) whether to complain if tolerance is not achieved.
+* `prec`: (optional, default none) left preconditioning, does not affect the residual norm.
+* `max_simultaneous_rhs`: (optional, default infinity) solver this many right-hand-sides at once.
+* `verbosity`: (optional, default `nooutput`) level of verbosity, one of `nonoutput`, `summary`, `detailed`.
+* `prefix`: (optional, default none) prefix output related with this solver with this string.
+
+## Minimum Residual (MR)
+
+Example:
+```
+
+ MGPROTON
+ mr
+ 1e-7
+ 20000
+ Detailed
+
+```
+
+Main options:
+
+* `tol`: residual norm tolerance, stopping when $\|Dx-b\|_2 \leq \text{tol}\ \|b\|_2$.
+* `max_its`: (optional, default infinity) maximum number of iterations.
+* `error_if_not_converged`: (optional, default `true`) whether to complain if tolerance is not achieved.
+* `prec`: (optional, default none) left preconditioning, does not affect residual norm.
+* `max_simultaneous_rhs`: (optional, default infinity) solver this many right-hand-sides at once.
+* `verbosity`: (optional, default `nooutput`) level of verbosity, one of `nonoutput`, `summary`, `detailed`.
+* `prefix`: (optional, default none) prefix output related with this solver with this string.
+
+## Generalized Conjugate Residual (GCR)
+
+Example:
+```
+
+ MGPROTON
+ gcr
+ 3
+ 1e-7
+ 20000
+ Detailed
+
+```
+
+Main options:
+
+* `tol`: residual norm tolerance, stopping when $\|Dx-b\|_2 \leq \text{tol}\ \|b\|_2$.
+* `max_basis_size`: (optional, default `3`) maximum size of the search subspace.
+* `max_its`: (optional, default infinity) maximum number of iterations.
+* `error_if_not_converged`: (optional, default `true`) whether to complain if tolerance is not achieved.
+* `prec`: (optional, default none) left preconditioning, does not affect residual norm.
+* `max_simultaneous_rhs`: (optional, default infinity) solver this many right-hand-sides at once.
+* `verbosity`: (optional, default `nooutput`) level of verbosity, one of `nonoutput`, `summary`, `detailed`.
+* `prefix`: (optional, default none) prefix output related with this solver with this string.
+
+## Even-odd preconditioning
+
+Approximate $D^{-1}$ by splitting the sites of $D$ into two colors (red-black, even-odd) and solving the Schur complement iteratively.
+
+Example:
+```
+
+ MGPROTON
+ eo
+
+ gcr
+ 1e-7
+ 3
+ 20000
+ Detailed
+
+
+```
+
+Main options:
+
+* `use_Aee_prec`: (optional, default `false`) whether to preconditioning the Schur complement with $D_{ee}^{-1}$, resulting in solving $(I-D_{eo}D_{oo}^{-1}D_{oe}D_{ee}^{-1})^{-1}$, insted of $(D_{ee}-D_{eo}D_{oo}^{-1}D_{oe})^{-1}$.
+* `prec_ee`: (optional, default none): left preconditioning acting on the even sites for solving the Schur complement.
+* `solver`: solver for the Schur complement.
+* `prefix`: (optional, default none) prefix output related with this solver with this string.
+
+## Multigrid preconditioner
+
+Example:
+```
+
+ MGPROTON
+ eo
+
+ mr
+ 1e-7
+ 20000
+ l0
+ Detailed
+
+
+ mg
+ 24
+ 4 4 4 4
+
+
+ eo
+
+ mr
+ 1e-3
+ 50
+ false
+ nv0
+ Detailed
+
+ dd
+
+ mr
+ 2
+ 1e-1
+ false
+ Detailed
+ nv0_dd
+
+
+
+
+
+
+ eo
+
+ mr
+ 1e-1
+ Detailed
+ c0
+
+ dd
+
+ mr
+ 2
+ 1e-1
+ false
+ Detailed
+ c0_dd
+
+
+
+
+
+ eo
+
+ mr
+ 1e-1
+ 5
+ false
+ s0
+
+ dd
+
+ mr
+ 4
+ 1e-1
+ false
+ Detailed
+ s0_dd
+
+
+
+
+
+
+```
+
+Main options:
+
+* `num_null_vecs`: number of null vectors.
+* `blocking`: sites factor reduction in each lattice direction for producing the prolongator $V$.
+* `null_vecs/solver`: if `null_vecs/eigensolver` is not given, solver to compute the null vectors approximating solutions of $Dx=0$; otherwise, solver used as the operator for the eigensolver.
+* `null_vecs/eigensolver`: (optional) if given, it uses as null vectors the approximated largest singular vectors of $D^{-1}$.
+* `null_vecs/tol`: (optional) if `null_vecs/eigensolver` is given, the tolerance for the eigensolver.
+* `solver_coarse`: solver for the coarse operator, $V^* D V$, where $V$ is the prolongator.
+* `solver_smoother`: solver for the correction (post-smoothing).
+
+# `mgproton` projection collection
+
+## Deflation projector
+
+If $U$ and $V$ and $\Sigma$ are the smallest singular triplets of $D$, that is $DV=\Sigma U$, then this builds the
+following oblique projector, $V(U^*D*V)^{-1}U^*D$.
+
+```
+
+ MGPROTON
+ defl
+ 200
+ 1e-1
+
+ bicgstab
+ 1e-2
+ 20000
+ Detailed
+ eig
+
+
+ Detailed
+
+
+```
+
+Main options:
+* `rank`: number of singular triplets to compute.
+* `tol`: (optional, default 0.1): relative error of the singular triplets relative to $\|D^{-1}\|_2$, $\|\gamma_5 D^{-1} v - \sigma v \|_2 \leq \text{tol}\ \|D^{-1}\|_2$.
+* `solver`: solver to estimate $D^{-1}$ used by the eigensolver.
+* `eigensolver/max_block_size`: (optional, default is `1`) maximum number of vectors expanding the search subspace in each iteration.
+* `eigensolver/max_basis_size`: (optional, default is `PRIMME`'s default) maximum rank of the search subspace.
+* `eigensolver/verbosity`: (optional, default `nooutput`) one of `nooutput`, `summary`, `detailed`.
+
+## Multigrid-based deflation projector
+
+If $U$ and $V$ and $\Sigma$ are the smallest singular triplets of $P^*DP$, that is $DV=\Sigma U$, and $P$ is a multigrid prolongator, then this builds the
+following oblique projector, $PV(U^*P^*D*PV)^{-1}U^*P^*D$.
+
+```
+
+ MGPROTON
+ mg
+
+ 24
+ 4 4 4 4
+
+
+ eo
+
+ mr
+ 1e-3
+ 50
+ false
+ nv0
+ Detailed
+
+ dd
+
+ mr
+ 2
+ 1e-1
+ false
+ Detailed
+ nv0_dd
+
+
+
+
+ 1e-2
+
+ Detailed
+
+
+
+
+ defl
+ 200
+ 1e-1
+
+ bicgstab
+ 1e-2
+ 20000
+ Detailed
+ eig
+
+
+ Detailed
+
+
+
+```
+
+Main options:
+* `prolongator/num_null_vecs`: number of null vectors.
+* `prolongator/blocking`: sites factor reduction in each lattice direction for producing the prolongator $V$.
+* `prolongator/null_vecs/solver`: if `prolongator/null_vecs/eigensolver` is not given, solver to compute the null vectors approximating solutions of $Dx=0$; otherwise, solver used as the operator for the eigensolver.
+* `prolongator/null_vecs/eigensolver`: (optional) if given, it uses as null vectors the approximated largest singular vectors of $D^{-1}$.
+* `prolongator/null_vecs/tol`: (optional) if `null_vecs/eigensolver` is given, the tolerance for the eigensolver.
+* `solver_coarse`: solver for the coarse operator, $V^* D V$, where $V$ is the prolongator.
+* `solver_smoother`: solver for the correction (post-smoothing).
+* `proj`: projector options onto the coarse operator, $V^* D V$.
diff --git a/lib/CMakeLists.txt b/lib/CMakeLists.txt
index 10924b2507..82d1128a58 100644
--- a/lib/CMakeLists.txt
+++ b/lib/CMakeLists.txt
@@ -549,10 +549,11 @@ list(APPEND ChromaLIB_HEADERS
actions/ferm/fermacts/fermact_factory_w.h
actions/ferm/fermacts/clover_fermact_params_w.h
actions/ferm/fermacts/eoprec_clover_fermact_w.h
+ actions/ferm/fermacts/eoprec_exp_clover_fermact_w.h
actions/ferm/fermacts/seoprec_clover_fermact_w.h
actions/ferm/fermacts/eoprec_clover_orbifold_fermact_w.h
actions/ferm/fermacts/unprec_clover_fermact_w.h
- actions/ferm/fermacts/unprec_exp_clover_fermact_w.h
+ actions/ferm/fermacts/unprec_exp_clover_fermact_w.h
actions/ferm/fermacts/nef_fermact_params_w.h
actions/ferm/fermacts/eoprec_slic_fermact_w.h
actions/ferm/fermacts/eoprec_slrc_fermact_w.h
@@ -654,19 +655,20 @@ list(APPEND ChromaLIB_HEADERS
actions/ferm/linop/lwldslash_base_3d_w.h
actions/ferm/linop/lwldslash_3d_qdp_w.h
actions/ferm/linop/clover_term_w.h
- actions/ferm/linop/clov_triang_qdp_w.h
+ actions/ferm/linop/clov_triang_qdp_w.h
actions/ferm/linop/clover_term_base_w.h
actions/ferm/linop/exp_clover_term_base_w.h
actions/ferm/linop/clover_term_qdp_w.h
- actions/ferm/linop/exp_clover_term_w.h
+ actions/ferm/linop/exp_clover_term_w.h
actions/ferm/linop/exp_clover_term_qdp_w.h
actions/ferm/linop/eoprec_clover_linop_w.h
+ actions/ferm/linop/eoprec_exp_clover_linop_w.h
actions/ferm/linop/seoprec_clover_linop_w.h
actions/ferm/linop/shifted_linop_w.h
actions/ferm/linop/eoprec_clover_dumb_linop_w.h
actions/ferm/linop/eoprec_clover_orbifold_linop_w.h
actions/ferm/linop/unprec_clover_linop_w.h
- actions/ferm/linop/unprec_exp_clover_linop_w.h
+ actions/ferm/linop/unprec_exp_clover_linop_w.h
actions/ferm/linop/eoprec_clover_extfield_linop_w.h
actions/ferm/linop/eoprec_dwflike_linop_base_array_w.h
actions/ferm/linop/eoprec_dwf_linop_array_w.h
@@ -1015,6 +1017,7 @@ list(APPEND ChromaLIB_HEADERS
meas/inline/hadron/inline_genprop_matelem_colorvec_w.h
meas/inline/hadron/inline_genprop_matelem_da_colorvec_w.h
meas/inline/hadron/inline_genprop_matelem_pt_colorvec_w.h
+ meas/inline/hadron/inline_inverter_test_w.h
meas/inline/hadron/inline_mres_w.h
meas/inline/hadron/inline_qpropqio_w.h
meas/inline/hadron/inline_qpropadd_w.h
@@ -1372,6 +1375,7 @@ target_sources(chromalib PRIVATE
util/ferm/subset_vectors.cc
util/ferm/block_couplings.cc
util/ferm/disp_soln_cache.cc
+ util/ferm/mgproton.cc
util/ft/sftmom.cc
util/ft/single_phase.cc
util/ft/time_slice_set.cc
@@ -1446,6 +1450,7 @@ target_sources(chromalib PRIVATE
meas/hadron/dilution_quark_source_const_w.cc
util/gauge/cern_gauge_init.cc
io/readcern.cc
+ constant.cc
)
########################################################
@@ -1469,10 +1474,11 @@ target_sources(chromalib PRIVATE
actions/ferm/fermacts/eoprec_ovext_fermact_array_w.cc
actions/ferm/fermacts/clover_fermact_params_w.cc
actions/ferm/fermacts/eoprec_clover_fermact_w.cc
+ actions/ferm/fermacts/eoprec_exp_clover_fermact_w.cc
actions/ferm/fermacts/seoprec_clover_fermact_w.cc
actions/ferm/fermacts/eoprec_clover_orbifold_fermact_w.cc
actions/ferm/fermacts/unprec_clover_fermact_w.cc
- actions/ferm/fermacts/unprec_exp_clover_fermact_w.cc
+ actions/ferm/fermacts/unprec_exp_clover_fermact_w.cc
actions/ferm/fermacts/nef_fermact_params_w.cc
actions/ferm/fermacts/eoprec_slic_fermact_w.cc
actions/ferm/fermacts/eoprec_slrc_fermact_w.cc
@@ -1565,12 +1571,13 @@ target_sources(chromalib PRIVATE
actions/ferm/linop/unprec_wilson_linop_w.cc
actions/ferm/linop/clover_term_base_w.cc
actions/ferm/linop/clover_term_qdp_w.cc
- actions/ferm/linop/eoprec_clover_linop_w.cc
+ actions/ferm/linop/eoprec_clover_linop_w.cc
+ actions/ferm/linop/eoprec_exp_clover_linop_w.cc
actions/ferm/linop/seoprec_clover_linop_w.cc
actions/ferm/linop/eoprec_clover_dumb_linop_w.cc
actions/ferm/linop/eoprec_clover_orbifold_linop_w.cc
actions/ferm/linop/unprec_clover_linop_w.cc
- actions/ferm/linop/unprec_exp_clover_linop_w.cc
+ actions/ferm/linop/unprec_exp_clover_linop_w.cc
actions/ferm/linop/eoprec_clover_extfield_linop_w.cc
actions/ferm/linop/eoprec_slic_linop_w.cc
actions/ferm/linop/eoprec_slrc_linop_w.cc
@@ -1783,6 +1790,7 @@ target_sources(chromalib PRIVATE
meas/inline/hadron/inline_genprop_matelem_colorvec_w.cc
meas/inline/hadron/inline_genprop_matelem_da_colorvec_w.cc
meas/inline/hadron/inline_genprop_matelem_pt_colorvec_w.cc
+ meas/inline/hadron/inline_inverter_test_w.cc
meas/inline/hadron/inline_mres_w.cc
meas/inline/hadron/inline_qpropqio_w.cc
meas/inline/hadron/inline_qpropadd_w.cc
@@ -1929,7 +1937,9 @@ endif()
#######################################################
if( Chroma_ENABLE_JIT_CLOVER )
list(APPEND ChromaLIB_HEADERS actions/ferm/linop/clover_term_jit_w.h)
+ list(APPEND ChromaLIB_HEADERS actions/ferm/linop/clover_term_jit2_w.h)
target_sources(chromalib PRIVATE util/gauge/stout_utils_jit.cc)
+ target_sources(chromalib PRIVATE util/gauge/stout_utils_jit2.cc)
endif()
@@ -1946,11 +1956,13 @@ if( Chroma_ENABLE_QUDA )
actions/ferm/invert/quda_solvers/multi_syssolver_quda_clover_params.h
actions/ferm/invert/quda_solvers/syssolver_linop_clover_quda_w.h
actions/ferm/invert/quda_solvers/syssolver_linop_clover_quda_multigrid_w.h
+ actions/ferm/invert/quda_solvers/syssolver_linop_exp_clover_quda_multigrid_w.h
actions/ferm/invert/quda_solvers/syssolver_linop_nef_quda_w.h
actions/ferm/invert/quda_solvers/syssolver_linop_wilson_quda_w.h
actions/ferm/invert/quda_solvers/syssolver_linop_wilson_quda_multigrid_w.h
actions/ferm/invert/quda_solvers/syssolver_mdagm_clover_quda_w.h
actions/ferm/invert/quda_solvers/syssolver_mdagm_clover_quda_multigrid_w.h
+ actions/ferm/invert/quda_solvers/syssolver_mdagm_exp_clover_quda_multigrid_w.h
actions/ferm/invert/quda_solvers/syssolver_mdagm_wilson_quda_w.h
actions/ferm/invert/quda_solvers/multi_syssolver_mdagm_cg_clover_quda_w.h
actions/ferm/invert/quda_solvers/multi_syssolver_mdagm_cg_wilson_quda_w.h
@@ -1976,11 +1988,13 @@ target_sources(chromalib PRIVATE
actions/ferm/invert/quda_solvers/syssolver_quda_multigrid_wilson_params.cc
actions/ferm/invert/quda_solvers/syssolver_linop_clover_quda_w.cc
actions/ferm/invert/quda_solvers/syssolver_linop_clover_quda_multigrid_w.cc
+ actions/ferm/invert/quda_solvers/syssolver_linop_exp_clover_quda_multigrid_w.cc
actions/ferm/invert/quda_solvers/syssolver_linop_nef_quda_w.cc
actions/ferm/invert/quda_solvers/syssolver_linop_wilson_quda_w.cc
actions/ferm/invert/quda_solvers/syssolver_linop_wilson_quda_multigrid_w.cc
actions/ferm/invert/quda_solvers/syssolver_mdagm_clover_quda_w.cc
actions/ferm/invert/quda_solvers/syssolver_mdagm_clover_quda_multigrid_w.cc
+ actions/ferm/invert/quda_solvers/syssolver_mdagm_exp_clover_quda_multigrid_w.cc
actions/ferm/invert/quda_solvers/syssolver_mdagm_wilson_quda_w.cc
actions/ferm/invert/quda_solvers/multi_syssolver_mdagm_cg_clover_quda_w.cc
actions/ferm/invert/quda_solvers/multi_syssolver_mdagm_cg_wilson_quda_w.cc
@@ -2011,6 +2025,8 @@ if( Chroma_ENABLE_SUPERBBLAS )
meas/inline/hadron/inline_baryon_matelem_colorvec_superb_w.h
meas/inline/hadron/inline_meson_matelem_colorvec_superb_w.h
meas/inline/hadron/inline_create_colorvecs_superb.h
+ meas/inline/hadron/inline_eigenvalues_superb_w.h
+ meas/inline/hadron/inline_inverter_test_superb_w.h
)
target_sources(chromalib PRIVATE
meas/inline/hadron/inline_disco_prob_defl_superb_w.cc
@@ -2019,6 +2035,8 @@ if( Chroma_ENABLE_SUPERBBLAS )
meas/inline/hadron/inline_baryon_matelem_colorvec_superb_w.cc
meas/inline/hadron/inline_meson_matelem_colorvec_superb_w.cc
meas/inline/hadron/inline_create_colorvecs_superb.cc
+ meas/inline/hadron/inline_eigenvalues_superb_w.cc
+ meas/inline/hadron/inline_inverter_test_superb_w.cc
)
target_include_directories(chromalib PUBLIC ${SUPERBBLAS_DIR}/include)
add_library(superbblas STATIC IMPORTED )
@@ -2030,7 +2048,7 @@ if( Chroma_ENABLE_SUPERBBLAS )
add_library(magma STATIC IMPORTED )
set_target_properties( magma PROPERTIES IMPORTED_LOCATION ${MAGMA_DIR}/lib/libmagma.a )
target_link_libraries(chromalib PUBLIC magma )
- if( QDP_BACKEND_CUDA )
+ if( QDP_BACKEND_CUDA OR Chroma_ENABLE_CUDA)
target_link_libraries(magma INTERFACE CUDA::cublas CUDA::cublasLt CUDA::cusparse CUDA::cudart)
endif()
target_link_libraries(magma INTERFACE BLAS::BLAS ${Chroma_extra_link_flags})
@@ -2043,19 +2061,19 @@ if( Chroma_ENABLE_SUPERBBLAS )
target_link_libraries(chromalib PUBLIC primme )
if( Chroma_ENABLE_MAGMA )
target_link_libraries(primme INTERFACE magma)
- elseif( QDP_BACKEND_CUDA )
+ elseif( QDP_BACKEND_CUDA OR Chroma_ENABLE_CUDA)
target_link_libraries(primme INTERFACE CUDA::cublas CUDA::cudart BLAS::BLAS ${Chroma_extra_link_flags})
else()
target_link_libraries(primme INTERFACE BLAS::BLAS ${Chroma_extra_link_flags})
endif()
endif()
- if( QDP_BACKEND_ROCM )
+ if( QDP_BACKEND_ROCM OR Chroma_ENABLE_ROCM)
target_compile_options(chromalib PUBLIC ${HIP_CPP_CONFIG})
- target_link_libraries(superbblas INTERFACE roc::hipblas)
+ target_link_libraries(superbblas INTERFACE roc::hipblas roc::hipsparse roc::hipsolver roc::rocblas roc::rocsolver)
endif()
- if( QDP_BACKEND_CUDA )
- target_link_libraries(superbblas INTERFACE CUDA::cublas CUDA::cublasLt CUDA::cusparse CUDA::cudart)
+ if( QDP_BACKEND_CUDA OR Chroma_ENABLE_CUDA)
+ target_link_libraries(superbblas INTERFACE CUDA::cublas CUDA::cublasLt CUDA::cusparse CUDA::cusolver CUDA::cudart)
endif()
target_link_libraries(superbblas INTERFACE BLAS::BLAS ${Chroma_extra_link_flags})
endif()
diff --git a/lib/Makefile.am b/lib/Makefile.am
index 95376eb4ae..6688c1ca39 100644
--- a/lib/Makefile.am
+++ b/lib/Makefile.am
@@ -927,9 +927,12 @@ nobase_include_HEADERS += \
meas/inline/hadron/inline_meson_matelem_colorvec_superb_w.h \
meas/inline/hadron/inline_unsmeared_hadron_node_distillation_w.h \
meas/inline/hadron/inline_unsmeared_hadron_node_distillation_superb_w.h \
+ meas/inline/hadron/inline_eigenvalues_superb_w.h \
meas/inline/hadron/inline_genprop_matelem_colorvec_w.h \
meas/inline/hadron/inline_genprop_matelem_da_colorvec_w.h \
meas/inline/hadron/inline_genprop_matelem_pt_colorvec_w.h \
+ meas/inline/hadron/inline_inverter_test_w.h \
+ meas/inline/hadron/inline_inverter_test_superb_w.h \
meas/inline/hadron/inline_mres_w.h \
meas/inline/hadron/inline_qpropqio_w.h \
meas/inline/hadron/inline_qpropadd_w.h \
@@ -1080,6 +1083,7 @@ lib_LIBRARIES = libchroma.a
# chroma/scripts/build_libchroma_sources.pl
#
libchroma_a_SOURCES = \
+ constant.cc \
actions/boson/operator/klein_gord.cc \
actions/ferm/fermacts/zolotarev_coeffs.cc \
actions/ferm/fermstates/hex_fermstate_params.cc \
@@ -1344,6 +1348,7 @@ libchroma_a_SOURCES = \
util/ferm/subset_vectors.cc \
util/ferm/block_couplings.cc \
util/ferm/disp_soln_cache.cc \
+ util/ferm/mgproton.cc \
util/ft/sftmom.cc \
util/ft/single_phase.cc \
util/ft/time_slice_set.cc \
@@ -1849,9 +1854,12 @@ libchroma_a_SOURCES += \
meas/inline/hadron/inline_meson_matelem_colorvec_superb_w.cc \
meas/inline/hadron/inline_unsmeared_hadron_node_distillation_w.cc \
meas/inline/hadron/inline_unsmeared_hadron_node_distillation_superb_w.cc \
+ meas/inline/hadron/inline_eigenvalues_superb_w.cc \
meas/inline/hadron/inline_genprop_matelem_colorvec_w.cc \
meas/inline/hadron/inline_genprop_matelem_da_colorvec_w.cc \
meas/inline/hadron/inline_genprop_matelem_pt_colorvec_w.cc \
+ meas/inline/hadron/inline_inverter_test_w.cc \
+ meas/inline/hadron/inline_inverter_test_superb_w.cc \
meas/inline/hadron/inline_mres_w.cc \
meas/inline/hadron/inline_qpropqio_w.cc \
meas/inline/hadron/inline_qpropadd_w.cc \
diff --git a/lib/actions/ferm/fermacts/eo3dprec_s_cprec_t_clover_fermact_w.cc b/lib/actions/ferm/fermacts/eo3dprec_s_cprec_t_clover_fermact_w.cc
index e244b71f87..26d1c694ec 100644
--- a/lib/actions/ferm/fermacts/eo3dprec_s_cprec_t_clover_fermact_w.cc
+++ b/lib/actions/ferm/fermacts/eo3dprec_s_cprec_t_clover_fermact_w.cc
@@ -7,6 +7,8 @@
#if QDP_ND == 4
#if QDP_NC == 3
+#if ! defined (QDP_IS_QDPJIT2)
+
#include "chromabase.h"
#include "actions/ferm/fermacts/eo3dprec_s_cprec_t_clover_fermact_w.h"
#include "actions/ferm/linop/eo3dprec_s_cprec_t_clover_linop_w.h"
@@ -79,6 +81,8 @@ namespace Chroma
}
+#endif
+
#endif
#endif
#endif
diff --git a/lib/actions/ferm/fermacts/eo3dprec_s_cprec_t_wilson_fermact_w.cc b/lib/actions/ferm/fermacts/eo3dprec_s_cprec_t_wilson_fermact_w.cc
index 16acd42f26..be5f70c6b9 100644
--- a/lib/actions/ferm/fermacts/eo3dprec_s_cprec_t_wilson_fermact_w.cc
+++ b/lib/actions/ferm/fermacts/eo3dprec_s_cprec_t_wilson_fermact_w.cc
@@ -7,6 +7,8 @@
#if QDP_NC == 3
#if QDP_ND == 4
+#if ! defined (QDP_IS_QDPJIT2)
+
#include "chromabase.h"
#include "actions/ferm/fermacts/eo3dprec_s_cprec_t_wilson_fermact_w.h"
#include "actions/ferm/linop/eo3dprec_s_cprec_t_wilson_linop_w.h"
@@ -79,6 +81,8 @@ namespace Chroma
}
+#endif
+
#endif
#endif
#endif
diff --git a/lib/actions/ferm/fermacts/eoprec_exp_clover_fermact_w.cc b/lib/actions/ferm/fermacts/eoprec_exp_clover_fermact_w.cc
new file mode 100644
index 0000000000..acc79f4fec
--- /dev/null
+++ b/lib/actions/ferm/fermacts/eoprec_exp_clover_fermact_w.cc
@@ -0,0 +1,91 @@
+/*! \file
+ * \brief Even-odd preconditioned ExpClover fermion action
+ */
+
+#include "chromabase.h"
+#include "actions/ferm/fermacts/fermact_factory_w.h"
+
+#include "actions/ferm/linop/eoprec_exp_clover_linop_w.h"
+#include "actions/ferm/fermacts/eoprec_exp_clover_fermact_w.h"
+#include "actions/ferm/invert/syssolver_linop_factory.h"
+
+//#include "actions/ferm/fermacts/fermact_factory_w.h"
+#include "actions/ferm/fermstates/ferm_createstate_reader_w.h"
+
+namespace Chroma
+{
+
+ //! Hooks to register the class with the fermact factory
+ namespace EvenOddPrecExpCloverFermActEnv
+ {
+ //! Callback function
+ WilsonTypeFermAct,
+ multi1d >* createFermAct4D(XMLReader& xml_in,
+ const std::string& path)
+ {
+ return new EvenOddPrecExpCloverFermAct(CreateFermStateEnv::reader(xml_in, path),
+ CloverFermActParams(xml_in, path));
+ }
+
+ //! Callback function
+ /*! Differs in return type */
+ FermionAction,
+ multi1d >* createFermAct(XMLReader& xml_in,
+ const std::string& path)
+ {
+ return createFermAct4D(xml_in, path);
+ }
+
+ //! Name to be used
+ const std::string name = "EXP_CLOVER";
+
+ //! Local registration flag
+ static bool registered = false;
+
+ //! Register all the factories
+ bool registerAll()
+ {
+ bool success = true;
+ if (! registered)
+ {
+ success &= Chroma::TheFermionActionFactory::Instance().registerObject(name, createFermAct);
+ success &= Chroma::TheWilsonTypeFermActFactory::Instance().registerObject(name, createFermAct4D);
+ registered = true;
+ }
+ return success;
+ }
+ }
+
+ //! Produce a linear operator for this action
+ /*!
+ * The operator acts on the odd subset
+ *
+ * \param state gauge field (Read)
+ */
+ EvenOddPrecLogDetLinearOperator,
+ multi1d >*
+ EvenOddPrecExpCloverFermAct::linOp(Handle< FermState > state) const
+ {
+ return new EvenOddPrecExpCloverLinOp(state,param);
+ }
+
+ //! Return a linear operator solver for this action to solve M*psi=chi
+ Projector*
+ EvenOddPrecExpCloverFermAct::projector(Handle< FermState > state,
+ const GroupXML_t& projParam) const
+ {
+ std::istringstream is(projParam.xml);
+ XMLReader paramtop(is);
+
+ return TheLinOpFermProjectorFactory::Instance().createObject(projParam.id,
+ paramtop,
+ projParam.path,
+ state,
+ linOp(state));
+ }
+
+}
+
diff --git a/lib/actions/ferm/fermacts/eoprec_exp_clover_fermact_w.h b/lib/actions/ferm/fermacts/eoprec_exp_clover_fermact_w.h
new file mode 100644
index 0000000000..7758b3dbfe
--- /dev/null
+++ b/lib/actions/ferm/fermacts/eoprec_exp_clover_fermact_w.h
@@ -0,0 +1,83 @@
+// -*- C++ -*-
+/*! \file
+ * \brief Even-odd preconditioned ExpClover fermion action
+ */
+
+#ifndef __prec_exp_clover_fermact_w_h__
+#define __prec_exp_clover_fermact_w_h__
+
+#include "eoprec_logdet_wilstype_fermact_w.h"
+#include "actions/ferm/linop/lgherm_w.h"
+#include "actions/ferm/fermacts/clover_fermact_params_w.h"
+
+namespace Chroma
+{
+ //! Name and registration
+ /*! \ingroup fermacts */
+ namespace EvenOddPrecExpCloverFermActEnv
+ {
+ extern const std::string name;
+ bool registerAll();
+ }
+
+
+ //! Even-odd preconditioned ExpClover fermion action
+ /*! \ingroup fermacts
+ *
+ * Even-odd preconditioned exponentiated clover fermion action.
+ * Only defined on odd subset.
+ */
+
+ class EvenOddPrecExpCloverFermAct : public EvenOddPrecLogDetWilsonTypeFermAct, multi1d >
+ {
+ public:
+ // Typedefs to save typing
+ typedef LatticeFermion T;
+ typedef multi1d P;
+ typedef multi1d Q;
+
+ //! Partial constructor
+ EvenOddPrecExpCloverFermAct() {}
+
+ //! General FermState
+ EvenOddPrecExpCloverFermAct(Handle< CreateFermState > cfs_,
+ const CloverFermActParams& param_) :
+ cfs(cfs_), param(param_) {}
+
+ //! Copy constructor
+ EvenOddPrecExpCloverFermAct(const EvenOddPrecExpCloverFermAct& a) :
+ cfs(a.cfs), param(a.param) {}
+
+ //! Produce a linear operator for this action
+ EvenOddPrecLogDetLinearOperator* linOp(Handle< FermState > state) const;
+
+ //! Produce the gamma_5 hermitian operator H_w
+ LinearOperator* hermitianLinOp(Handle< FermState > state) const
+ {
+ return new lgherm(linOp(state));
+ }
+
+ //! Return a projector after this action
+ Projector* projector(Handle< FermState > state,
+ const GroupXML_t& projParam) const override;
+
+ //! Destructor is automatic
+ ~EvenOddPrecExpCloverFermAct() {}
+
+ protected:
+ //! Return the fermion BC object for this action
+ const CreateFermState& getCreateState() const {return *cfs;}
+
+ //! Assignment
+ void operator=(const EvenOddPrecExpCloverFermAct& a) {}
+
+ private:
+ Handle< CreateFermState > cfs;
+ CloverFermActParams param;
+ };
+
+} // End Namespace Chroma
+
+
+#endif
diff --git a/lib/actions/ferm/fermacts/fermacts_aggregate_w.cc b/lib/actions/ferm/fermacts/fermacts_aggregate_w.cc
index 5b008c052f..cf43831133 100644
--- a/lib/actions/ferm/fermacts/fermacts_aggregate_w.cc
+++ b/lib/actions/ferm/fermacts/fermacts_aggregate_w.cc
@@ -6,6 +6,7 @@
#include "actions/ferm/fermacts/fermacts_aggregate_w.h"
#include "actions/ferm/fermacts/unprec_clover_fermact_w.h"
+#include "actions/ferm/fermacts/unprec_exp_clover_fermact_w.h"
#include "actions/ferm/fermacts/unprec_wilson_fermact_w.h"
#include "actions/ferm/fermacts/unprec_parwilson_fermact_w.h"
#include "actions/ferm/fermacts/unprec_graphene_fermact_w.h"
@@ -14,6 +15,7 @@
#include "actions/ferm/fermacts/unprec_w12_fermact_w.h"
#include "actions/ferm/fermacts/eoprec_clover_fermact_w.h"
+#include "actions/ferm/fermacts/eoprec_exp_clover_fermact_w.h"
#include "actions/ferm/fermacts/eoprec_clover_orbifold_fermact_w.h"
#include "actions/ferm/fermacts/eoprec_clover_extfield_fermact_w.h"
#include "actions/ferm/fermacts/eoprec_wilson_fermact_w.h"
@@ -100,8 +102,10 @@ namespace Chroma
success &= UnprecParWilsonFermActEnv::registerAll();
success &= EvenOddPrecCloverFermActEnv::registerAll();
+ success &= EvenOddPrecExpCloverFermActEnv::registerAll();
success &= SymEvenOddPrecCloverFermActEnv::registerAll();
success &= UnprecCloverFermActEnv::registerAll();
+ success &= UnprecExpCloverFermActEnv::registerAll();
success &= EvenOddPrecCloverOrbifoldFermActEnv::registerAll();
success &= EvenOddPrecSLICFermActEnv::registerAll();
success &= EvenOddPrecSLRCFermActEnv::registerAll();
@@ -116,6 +120,7 @@ namespace Chroma
#if QDP_NS == 4
#if QDP_NC == 3
#if QDP_ND == 4
+#if ! defined (QDP_IS_QDPJIT2)
success &= UnprecSpaceCentralPrecTimeWilsonFermActEnv::registerAll();
success &= ILUPrecSpaceCentralPrecTimeWilsonFermActEnv::registerAll();
success &= ILUPrecSpaceCentralPrecTimeCloverFermActEnv::registerAll();
@@ -125,6 +130,7 @@ namespace Chroma
success &= EO3DPrecSpaceCentralPrecTimeCloverFermActEnv::registerAll();
#endif
#endif
+#endif
#endif
registered = true;
}
diff --git a/lib/actions/ferm/fermacts/ilu2prec_s_cprec_t_clover_fermact_w.cc b/lib/actions/ferm/fermacts/ilu2prec_s_cprec_t_clover_fermact_w.cc
index 88f47349aa..71724edde2 100644
--- a/lib/actions/ferm/fermacts/ilu2prec_s_cprec_t_clover_fermact_w.cc
+++ b/lib/actions/ferm/fermacts/ilu2prec_s_cprec_t_clover_fermact_w.cc
@@ -6,6 +6,8 @@
#if QDP_ND == 4
#if QDP_NC == 3
+#if ! defined (QDP_IS_QDPJIT2)
+
#include "chromabase.h"
#include "actions/ferm/fermacts/ilu2prec_s_cprec_t_clover_fermact_w.h"
#include "actions/ferm/linop/ilu2prec_s_cprec_t_clover_linop_w.h"
@@ -81,3 +83,4 @@ namespace Chroma
#endif
#endif
#endif
+#endif
diff --git a/lib/actions/ferm/fermacts/ilu2prec_s_cprec_t_wilson_fermact_w.cc b/lib/actions/ferm/fermacts/ilu2prec_s_cprec_t_wilson_fermact_w.cc
index c0e2b1b27a..c1639e2d4a 100644
--- a/lib/actions/ferm/fermacts/ilu2prec_s_cprec_t_wilson_fermact_w.cc
+++ b/lib/actions/ferm/fermacts/ilu2prec_s_cprec_t_wilson_fermact_w.cc
@@ -6,6 +6,8 @@
#if QDP_ND == 4
#if QDP_NC == 3
+#if ! defined (QDP_IS_QDPJIT2)
+
#include "chromabase.h"
#include "actions/ferm/fermacts/ilu2prec_s_cprec_t_wilson_fermact_w.h"
#include "actions/ferm/linop/ilu2prec_s_cprec_t_wilson_linop_w.h"
@@ -81,3 +83,4 @@ namespace Chroma
#endif
#endif
#endif
+#endif
diff --git a/lib/actions/ferm/fermacts/iluprec_s_cprec_t_clover_fermact_w.cc b/lib/actions/ferm/fermacts/iluprec_s_cprec_t_clover_fermact_w.cc
index 4b25148ec8..a95745a8ab 100644
--- a/lib/actions/ferm/fermacts/iluprec_s_cprec_t_clover_fermact_w.cc
+++ b/lib/actions/ferm/fermacts/iluprec_s_cprec_t_clover_fermact_w.cc
@@ -6,6 +6,8 @@
#if QDP_ND == 4
#if QDP_NC == 3
+#if ! defined (QDP_IS_QDPJIT2)
+
#include "chromabase.h"
#include "actions/ferm/fermacts/iluprec_s_cprec_t_clover_fermact_w.h"
#include "actions/ferm/linop/iluprec_s_cprec_t_clover_linop_w.h"
@@ -81,3 +83,4 @@ namespace Chroma
#endif
#endif
#endif
+#endif
diff --git a/lib/actions/ferm/fermacts/iluprec_s_cprec_t_wilson_fermact_w.cc b/lib/actions/ferm/fermacts/iluprec_s_cprec_t_wilson_fermact_w.cc
index 2b06d244cf..82bc4e97c1 100644
--- a/lib/actions/ferm/fermacts/iluprec_s_cprec_t_wilson_fermact_w.cc
+++ b/lib/actions/ferm/fermacts/iluprec_s_cprec_t_wilson_fermact_w.cc
@@ -6,6 +6,8 @@
#if QDP_ND == 4
#if QDP_NC == 3
+#if ! defined (QDP_IS_QDPJIT2)
+
#include "chromabase.h"
#include "actions/ferm/fermacts/iluprec_s_cprec_t_wilson_fermact_w.h"
#include "actions/ferm/linop/iluprec_s_cprec_t_wilson_linop_w.h"
@@ -81,3 +83,4 @@ namespace Chroma
#endif
#endif
#endif
+#endif
diff --git a/lib/actions/ferm/fermacts/unprec_clover_fermact_w.cc b/lib/actions/ferm/fermacts/unprec_clover_fermact_w.cc
index d33ab30f9e..70ec53ea1e 100644
--- a/lib/actions/ferm/fermacts/unprec_clover_fermact_w.cc
+++ b/lib/actions/ferm/fermacts/unprec_clover_fermact_w.cc
@@ -7,6 +7,7 @@
#include "actions/ferm/linop/unprec_clover_linop_w.h"
#include "actions/ferm/fermacts/unprec_clover_fermact_w.h"
+#include "actions/ferm/invert/syssolver_linop_factory.h"
//#include "actions/ferm/fermacts/fermact_factory_w.h"
#include "actions/ferm/fermstates/ferm_createstate_reader_w.h"
@@ -72,5 +73,20 @@ namespace Chroma
return new UnprecCloverLinOp(state,param);
}
+
+ //! Return a linear operator solver for this action to solve M*psi=chi
+ Projector*
+ UnprecCloverFermAct::projector(Handle< FermState > state,
+ const GroupXML_t& projParam) const
+ {
+ std::istringstream is(projParam.xml);
+ XMLReader paramtop(is);
+
+ return TheLinOpFermProjectorFactory::Instance().createObject(projParam.id,
+ paramtop,
+ projParam.path,
+ state,
+ linOp(state));
+ }
}
diff --git a/lib/actions/ferm/fermacts/unprec_clover_fermact_w.h b/lib/actions/ferm/fermacts/unprec_clover_fermact_w.h
index 79e058b8a7..618bda9d05 100644
--- a/lib/actions/ferm/fermacts/unprec_clover_fermact_w.h
+++ b/lib/actions/ferm/fermacts/unprec_clover_fermact_w.h
@@ -52,6 +52,10 @@ namespace Chroma
return new lgherm(linOp(state));
}
+ //! Return a projector after this action
+ Projector* projector(Handle< FermState > state,
+ const GroupXML_t& projParam) const override;
+
//! Destructor is automatic
~UnprecCloverFermAct() {}
diff --git a/lib/actions/ferm/fermacts/unprec_s_cprec_t_wilson_fermact_w.cc b/lib/actions/ferm/fermacts/unprec_s_cprec_t_wilson_fermact_w.cc
index dabfde0ab3..90c24501de 100644
--- a/lib/actions/ferm/fermacts/unprec_s_cprec_t_wilson_fermact_w.cc
+++ b/lib/actions/ferm/fermacts/unprec_s_cprec_t_wilson_fermact_w.cc
@@ -8,6 +8,8 @@
#if QDP_NC == 3
#if QDP_ND == 4
+#if ! defined (QDP_IS_QDPJIT2)
+
#include "chromabase.h"
#include "actions/ferm/fermacts/unprec_s_cprec_t_wilson_fermact_w.h"
#include "actions/ferm/linop/unprec_s_cprec_t_wilson_linop_w.h"
@@ -83,3 +85,4 @@ namespace Chroma
#endif
#endif
#endif
+#endif
diff --git a/lib/actions/ferm/fermbcs/schr_sf_fermbc_w.cc b/lib/actions/ferm/fermbcs/schr_sf_fermbc_w.cc
index e4aad651e1..bb5b0eb5e1 100644
--- a/lib/actions/ferm/fermbcs/schr_sf_fermbc_w.cc
+++ b/lib/actions/ferm/fermbcs/schr_sf_fermbc_w.cc
@@ -90,7 +90,7 @@ namespace Chroma
if (mu != j_decay)
{
- Real ftmp = Chroma::twopi * getTheta()[i] / Real(QDP::Layout::lattSize()[mu]);
+ Real ftmp = Chroma::constant().twopi * getTheta()[i] / Real(QDP::Layout::lattSize()[mu]);
SFBndFld[mu] *= cmplx(cos(ftmp),sin(ftmp));
++i;
}
diff --git a/lib/actions/ferm/invert/containers.h b/lib/actions/ferm/invert/containers.h
index 2c7e1d9862..3df2139697 100644
--- a/lib/actions/ferm/invert/containers.h
+++ b/lib/actions/ferm/invert/containers.h
@@ -5,6 +5,9 @@
#include "chromabase.h"
+#if ! defined (QDP_IS_QDPJIT2)
+
+
namespace Chroma
{
namespace LinAlg
@@ -489,3 +492,4 @@ namespace Chroma
} // namespace Chroma
#endif
+#endif
diff --git a/lib/actions/ferm/invert/inv_eigcg2.cc b/lib/actions/ferm/invert/inv_eigcg2.cc
index 44151a9ae9..f08540eca9 100644
--- a/lib/actions/ferm/invert/inv_eigcg2.cc
+++ b/lib/actions/ferm/invert/inv_eigcg2.cc
@@ -12,6 +12,8 @@
#include "actions/ferm/invert/containers.h"
#include "actions/ferm/invert/norm_gram_schm.h"
+#if ! defined (QDP_IS_QDPJIT2)
+
//#define DEBUG
#define DEBUG_FINAL
@@ -1281,3 +1283,4 @@ namespace Chroma
}// End Namespace Chroma
+#endif
diff --git a/lib/actions/ferm/invert/inv_eigcg2.h b/lib/actions/ferm/invert/inv_eigcg2.h
index c7a1837ac5..89d911fa48 100644
--- a/lib/actions/ferm/invert/inv_eigcg2.h
+++ b/lib/actions/ferm/invert/inv_eigcg2.h
@@ -10,6 +10,8 @@
#include "syssolver.h"
#include "actions/ferm/invert/containers.h"
+#if ! defined (QDP_IS_QDPJIT2)
+
namespace Chroma
{
@@ -116,3 +118,4 @@ namespace Chroma
}// End Namespace Chroma
#endif
+#endif
diff --git a/lib/actions/ferm/invert/inv_eigcg2_array.cc b/lib/actions/ferm/invert/inv_eigcg2_array.cc
index db0418e2be..c51b7f79c8 100644
--- a/lib/actions/ferm/invert/inv_eigcg2_array.cc
+++ b/lib/actions/ferm/invert/inv_eigcg2_array.cc
@@ -12,6 +12,8 @@
#include "actions/ferm/invert/containers.h"
#include "actions/ferm/invert/norm_gram_schm.h"
+#if ! defined (QDP_IS_QDPJIT2)
+
//#define DEBUG
#define DEBUG_FINAL
@@ -997,3 +999,4 @@ namespace Chroma
}// End Namespace Chroma
+#endif
diff --git a/lib/actions/ferm/invert/inv_eigcg2_array.h b/lib/actions/ferm/invert/inv_eigcg2_array.h
index 9186f3d688..51b6b704ea 100644
--- a/lib/actions/ferm/invert/inv_eigcg2_array.h
+++ b/lib/actions/ferm/invert/inv_eigcg2_array.h
@@ -10,6 +10,8 @@
#include "syssolver.h"
#include "actions/ferm/invert/containers.h"
+#if ! defined (QDP_IS_QDPJIT2)
+
namespace Chroma
{
@@ -104,3 +106,4 @@ namespace Chroma
}// End Namespace Chroma
#endif
+#endif
diff --git a/lib/actions/ferm/invert/minv_rel_cg.cc b/lib/actions/ferm/invert/minv_rel_cg.cc
index 8343f89c25..cf1c1d8aa6 100644
--- a/lib/actions/ferm/invert/minv_rel_cg.cc
+++ b/lib/actions/ferm/invert/minv_rel_cg.cc
@@ -116,7 +116,7 @@ void MInvRelCG_a(const LinearOperator& A,
Double chi_norm = sqrt(chi_norm_sq);
- if( toBool( chi_norm < fuzz )) {
+ if( toBool( chi_norm < Chroma::constant().fuzz )) {
n_count = 0;
// The psi are all zero anyway at this point
diff --git a/lib/actions/ferm/invert/minvcg.cc b/lib/actions/ferm/invert/minvcg.cc
index 48d08ddc17..ab68bb4a7a 100644
--- a/lib/actions/ferm/invert/minvcg.cc
+++ b/lib/actions/ferm/invert/minvcg.cc
@@ -135,7 +135,7 @@ namespace Chroma
Double chi_norm_sq = norm2(chi_internal,sub); flopcount.addSiteFlops(4*Nc*Ns,sub);
Double chi_norm = sqrt(chi_norm_sq);
- if( toBool( chi_norm < fuzz ))
+ if( toBool( chi_norm < Chroma::constant().fuzz ))
{
swatch.stop();
diff --git a/lib/actions/ferm/invert/minvcg2.cc b/lib/actions/ferm/invert/minvcg2.cc
index c07d84d09a..d9014cf2f1 100644
--- a/lib/actions/ferm/invert/minvcg2.cc
+++ b/lib/actions/ferm/invert/minvcg2.cc
@@ -134,7 +134,7 @@ namespace Chroma
Double chi_norm_sq = norm2(chi_internal,sub); flopcount.addSiteFlops(4*Nc*Ns,sub);
Double chi_norm = sqrt(chi_norm_sq);
- if( toBool( chi_norm < fuzz ))
+ if( toBool( chi_norm < Chroma::constant().fuzz ))
{
swatch.stop();
diff --git a/lib/actions/ferm/invert/minvcg2_accum.cc b/lib/actions/ferm/invert/minvcg2_accum.cc
index c187cfe089..11433b6fde 100644
--- a/lib/actions/ferm/invert/minvcg2_accum.cc
+++ b/lib/actions/ferm/invert/minvcg2_accum.cc
@@ -132,7 +132,7 @@ namespace Chroma
Double chi_norm_sq = norm2(chi_internal,sub); flopcount.addSiteFlops(4*Nc*Ns,sub);
Double chi_norm = sqrt(chi_norm_sq);
- if( toBool( chi_norm < fuzz ))
+ if( toBool( chi_norm < Chroma::constant().fuzz ))
{
swatch.stop();
diff --git a/lib/actions/ferm/invert/minvcg_accumulate_array.cc b/lib/actions/ferm/invert/minvcg_accumulate_array.cc
index 362e3975c4..3e13fafe82 100644
--- a/lib/actions/ferm/invert/minvcg_accumulate_array.cc
+++ b/lib/actions/ferm/invert/minvcg_accumulate_array.cc
@@ -183,7 +183,7 @@ namespace Chroma
Double chi_norm = sqrt(chi_norm_sq);
- if( toBool( chi_norm < fuzz )) {
+ if( toBool( chi_norm < Chroma::constant().fuzz )) {
n_count = 0;
swatch.stop();
QDPIO::cout << "MinvCG: Finished. Iters taken = " << n_count << std::endl;
diff --git a/lib/actions/ferm/invert/minvcg_array.cc b/lib/actions/ferm/invert/minvcg_array.cc
index 87140756a3..6b764feaf5 100644
--- a/lib/actions/ferm/invert/minvcg_array.cc
+++ b/lib/actions/ferm/invert/minvcg_array.cc
@@ -174,7 +174,7 @@ namespace Chroma
Double chi_norm = sqrt(chi_norm_sq);
- if( toBool( chi_norm < fuzz )) {
+ if( toBool( chi_norm < Chroma::constant().fuzz )) {
n_count = 0;
swatch.stop();
QDPIO::cout << "MinvCG: Finished. Iters taken = " << n_count << std::endl;
diff --git a/lib/actions/ferm/invert/projector_null.h b/lib/actions/ferm/invert/projector_null.h
new file mode 100644
index 0000000000..f24ad1ab62
--- /dev/null
+++ b/lib/actions/ferm/invert/projector_null.h
@@ -0,0 +1,118 @@
+// -*- C++ -*-
+/*! \file
+ * \brief Oblique projector to a random vector (useful for testing)
+ */
+
+#ifndef __projector_null_h__
+#define __projector_null_h__
+
+#include "chroma_config.h"
+#include "handle.h"
+#include "syssolver.h"
+#include "linearop.h"
+
+namespace Chroma
+{
+
+ //! Return a null projector
+ /*! \ingroup invert
+ */
+ template
+ class ProjectorNull : public Projector
+ {
+ public:
+ using Ts = const std::vector>;
+ using const_Ts = const std::vector>;
+
+ //! Constructor
+ /*!
+ * \param A_ Linear operator ( Read )
+ */
+ ProjectorNull(Handle> A_) : A(A_)
+ {
+ }
+
+ //! Destructor is automatic
+ ~ProjectorNull() {}
+
+ //! Return the subset on which the operator acts
+ const Subset& subset() const
+ {
+ return A->subset();
+ }
+
+ //! Apply the oblique projector A*V*inv(U^H*A*V)*U^H
+ /*!
+ *! Returns A*V*inv(U^H*A*V)*U^H*chi = psi
+ */
+ void AVUObliqueProjector(Ts& psi, const_Ts&) const override
+ {
+ for (int i = 0; i < psi.size(); ++i)
+ {
+ *psi[i] = zero;
+ }
+ }
+
+ //! Apply the oblique projector V*inv(U^H*A*V)*U^H*A
+ /*!
+ * Returns V*inv(U^H*A*V)*U^H*A*chi = psi
+ */
+ void VUAObliqueProjector(Ts& psi, const_Ts&) const override
+ {
+ for (int i = 0; i < psi.size(); ++i)
+ {
+ *psi[i] = zero;
+ }
+ }
+
+ //! Rank of the projector, which is the rank of U and V also
+ unsigned int rank() const override
+ {
+ return 0;
+ }
+
+ //! Return U[i]
+ void U(unsigned int, T&) const override
+ {
+ throw std::runtime_error("ProjectorNull: rank of the projector is null");
+ }
+
+ //! Return V[i]
+ void V(unsigned int, T&) const override
+ {
+ throw std::runtime_error("ProjectorNull: rank of the projector is null");
+ }
+
+ //! Return U[i]^H*A*V[i]
+ void lambda(unsigned int, DComplex&) const override
+ {
+ throw std::runtime_error("ProjectorNull: rank of the projector is null");
+ }
+
+ private:
+ Handle> A;
+ };
+
+ //! Null projector namespace
+ namespace ProjectorNullEnv
+ {
+ Projector* createProjector(
+ XMLReader&, const std::string&,
+ Handle, multi1d>>,
+ Handle> A)
+ {
+ return new ProjectorNull(A);
+ }
+
+ //! Register the projector
+ inline bool registerAll() {
+ static bool registered = false;
+ if (registered) return true;
+ registered = true;
+ return Chroma::TheLinOpFermProjectorFactory::Instance().registerObject("NULL_PROJECTOR",
+ createProjector);
+ }
+ }
+} // End namespace
+
+#endif
diff --git a/lib/actions/ferm/invert/projector_random.h b/lib/actions/ferm/invert/projector_random.h
new file mode 100644
index 0000000000..4e2f310d84
--- /dev/null
+++ b/lib/actions/ferm/invert/projector_random.h
@@ -0,0 +1,130 @@
+// -*- C++ -*-
+/*! \file
+ * \brief Oblique projector to a random vector (useful for testing)
+ */
+
+#ifndef __projector_random_h__
+#define __projector_random_h__
+
+#include "chroma_config.h"
+#include "handle.h"
+#include "syssolver.h"
+#include "linearop.h"
+
+namespace Chroma
+{
+
+ //! Solve a M*psi=chi linear system by BICGSTAB
+ /*! \ingroup invert
+ */
+ template
+ class ProjectorRandom : public Projector
+ {
+ public:
+ using Ts = const std::vector>;
+ using const_Ts = const std::vector>;
+
+ //! Constructor
+ /*!
+ * \param A_ Linear operator ( Read )
+ */
+ ProjectorRandom(Handle> A_) : A(A_)
+ {
+ T u0 = zero, v0 = zero;
+ random(v0, A->subset());
+ v = v0 / sqrt(norm2(v0, A->subset()));
+ (*A)(u0, v, PLUS);
+ l = sqrt(norm2(u0, A->subset()));
+ u = u0 / l;
+ }
+
+ //! Destructor is automatic
+ ~ProjectorRandom() {}
+
+ //! Return the subset on which the operator acts
+ const Subset& subset() const
+ {
+ return A->subset();
+ }
+
+ //! Apply the oblique projector A*V*inv(U^H*A*V)*U^H
+ /*!
+ *! Returns A*V*inv(U^H*A*V)*U^H*chi = psi
+ */
+ void AVUObliqueProjector(Ts& psi, const_Ts& chi) const override
+ {
+ for (int i = 0; i < psi.size(); ++i)
+ {
+ // Return u * (u^* * chi)
+ *psi[i] = u * innerProduct(u, *chi[i], A->subset());
+ }
+ }
+
+ //! Apply the oblique projector V*inv(U^H*A*V)*U^H*A
+ /*!
+ * Returns V*inv(U^H*A*V)*U^H*A*chi = psi
+ */
+ void VUAObliqueProjector(Ts& psi, const_Ts& chi) const override
+ {
+ for (int i = 0; i < psi.size(); ++i)
+ {
+ T A_chi = zero;
+ (*A)(A_chi, *chi[i], PLUS);
+ // Return v * (u^* A * chi) / l
+ *psi[i] = v * (innerProduct(u, A_chi, A->subset()) / l);
+ }
+ }
+
+ //! Rank of the projector, which is the rank of U and V also
+ unsigned int rank() const override
+ {
+ return 1;
+ }
+
+ //! Return U[i]
+ void U(unsigned int i, T& psi) const override
+ {
+ psi = u;
+ }
+
+ //! Return V[i]
+ void V(unsigned int i, T& psi) const override
+ {
+ psi = v;
+ }
+
+ //! Return U[i]^H*A*V[i]
+ void lambda(unsigned int i, DComplex& lambda) const override
+ {
+ lambda = l;
+ }
+
+ private:
+ Handle> A;
+ T u, v;
+ DComplex l; ///< = u^* * A * v
+ };
+
+ //! Random projector namespace
+ namespace ProjectorRandomEnv
+ {
+ Projector* createProjector(
+ XMLReader&, const std::string&,
+ Handle, multi1d>>,
+ Handle> A)
+ {
+ return new ProjectorRandom(A);
+ }
+
+ //! Register the projector
+ inline bool registerAll() {
+ static bool registered = false;
+ if (registered) return true;
+ registered = true;
+ return Chroma::TheLinOpFermProjectorFactory::Instance().registerObject("RANDOM_PROJECTOR",
+ createProjector);
+ }
+ }
+} // End namespace
+
+#endif
diff --git a/lib/actions/ferm/invert/quda_solvers/multi_syssolver_mdagm_cg_clover_quda_w.h b/lib/actions/ferm/invert/quda_solvers/multi_syssolver_mdagm_cg_clover_quda_w.h
index 453db40d56..177011181a 100644
--- a/lib/actions/ferm/invert/quda_solvers/multi_syssolver_mdagm_cg_clover_quda_w.h
+++ b/lib/actions/ferm/invert/quda_solvers/multi_syssolver_mdagm_cg_clover_quda_w.h
@@ -365,19 +365,6 @@ namespace Chroma
#endif
- // Autotuning
- if( invParam.tuneDslashP ) {
- QDPIO::cout << "Enabling Dslash Autotuning" << std::endl;
-
- quda_inv_param.tune = QUDA_TUNE_YES;
- }
- else {
- QDPIO::cout << "Disabling Dslash Autotuning" << std::endl;
-
- quda_inv_param.tune = QUDA_TUNE_NO;
- }
-
-
// PADDING
// Setup padding
diff --git a/lib/actions/ferm/invert/quda_solvers/multi_syssolver_mdagm_cg_wilson_quda_w.h b/lib/actions/ferm/invert/quda_solvers/multi_syssolver_mdagm_cg_wilson_quda_w.h
index 55ca6ec98d..4301c1cb7b 100644
--- a/lib/actions/ferm/invert/quda_solvers/multi_syssolver_mdagm_cg_wilson_quda_w.h
+++ b/lib/actions/ferm/invert/quda_solvers/multi_syssolver_mdagm_cg_wilson_quda_w.h
@@ -270,17 +270,6 @@ namespace Chroma
quda_inv_param.dirac_order = QUDA_DIRAC_ORDER;
quda_inv_param.gamma_basis = QUDA_DEGRAND_ROSSI_GAMMA_BASIS;
- // Autotuning
- if( invParam.tuneDslashP ) {
- QDPIO::cout << "Enabling Dslash Autotuning" << std::endl;
-
- quda_inv_param.tune = QUDA_TUNE_YES;
- }
- else {
- QDPIO::cout << "Disabling Dslash Autotuning" << std::endl;
-
- quda_inv_param.tune = QUDA_TUNE_NO;
- }
// PADDING
diff --git a/lib/actions/ferm/invert/quda_solvers/multi_syssolver_quda_clover_params.cc b/lib/actions/ferm/invert/quda_solvers/multi_syssolver_quda_clover_params.cc
index 06de67f519..e5025a2f6b 100644
--- a/lib/actions/ferm/invert/quda_solvers/multi_syssolver_quda_clover_params.cc
+++ b/lib/actions/ferm/invert/quda_solvers/multi_syssolver_quda_clover_params.cc
@@ -34,7 +34,7 @@ namespace Chroma {
read(paramtop, "AsymmetricLinop", asymmetricP);
}
else {
- asymmetricP = false; // Symmetric is default
+ asymmetricP = true; // Asymmetric (i.e. CLOVER) is default
}
if( paramtop.count("CudaPrecision") > 0 ) {
@@ -100,14 +100,6 @@ namespace Chroma {
RsdToleranceFactor = Real(10); // Tolerate an order of magnitude difference by default.
}
- if( paramtop.count("AutotuneDslash") > 0 ) {
- read(paramtop, "AutotuneDslash", tuneDslashP);
- }
- else {
- tuneDslashP = false;
- }
- QDPIO::cout << "tuneDslasP = " << tuneDslashP << std::endl;
-
if( paramtop.count("GCRInnerParams") > 0 ) {
innerParams = new GCRInnerSolverParams(paramtop, "./GCRInnerParams");
@@ -160,7 +152,6 @@ namespace Chroma {
write(xml, "SilentFail", p.SilentFailP);
write(xml, "RsdToleranceFactor", p.RsdToleranceFactor);
write(xml, "CheckShifts", p.checkShiftsP);
- write(xml, "AutotuneDslash", p.tuneDslashP);
write(xml, "Pipeline", p.Pipeline);
if( p.innerParamsP ) {
diff --git a/lib/actions/ferm/invert/quda_solvers/multi_syssolver_quda_clover_params.h b/lib/actions/ferm/invert/quda_solvers/multi_syssolver_quda_clover_params.h
index 7657db3a24..e0b719b18c 100644
--- a/lib/actions/ferm/invert/quda_solvers/multi_syssolver_quda_clover_params.h
+++ b/lib/actions/ferm/invert/quda_solvers/multi_syssolver_quda_clover_params.h
@@ -22,11 +22,10 @@ namespace Chroma
cudaSloppyReconstruct=RECONS_12;
cudaRefinementPrecision=DEFAULT;
cudaRefinementReconstruct=RECONS_12;
- asymmetricP = false; //< Use asymmetric version of the linear operator
+ asymmetricP = true; //< Use asymmetric version of the linear operator
axialGaugeP = false; //< Fix Axial Gauge?
SilentFailP = false; //< If set to true ignore lack of convergence. Default is 'loud'
RsdToleranceFactor = Real(10); //< Tolerate if the solution achived is better (less) than rsdToleranceFactor*RsdTarget
- tuneDslashP = false ; //< v0.3 autotune feature
verboseP = false;
innerParamsP = false;
checkShiftsP = true;
@@ -50,7 +49,6 @@ namespace Chroma
axialGaugeP = p.axialGaugeP;
SilentFailP = p.SilentFailP;
RsdToleranceFactor = p.RsdToleranceFactor;
- tuneDslashP = p.tuneDslashP;
innerParamsP = p.innerParamsP;
innerParams = p.innerParams;
checkShiftsP = p.checkShiftsP;
@@ -75,7 +73,6 @@ namespace Chroma
bool axialGaugeP;
bool SilentFailP;
Real RsdToleranceFactor;
- bool tuneDslashP;
bool innerParamsP;
bool checkShiftsP;
int Pipeline;
diff --git a/lib/actions/ferm/invert/quda_solvers/quda_mg_utils.h b/lib/actions/ferm/invert/quda_solvers/quda_mg_utils.h
index e73e17e738..0eb3339c91 100644
--- a/lib/actions/ferm/invert/quda_solvers/quda_mg_utils.h
+++ b/lib/actions/ferm/invert/quda_solvers/quda_mg_utils.h
@@ -136,15 +136,6 @@ namespace Chroma {
mg_inv_param.clover_order = QUDA_PACKED_CLOVER_ORDER;
//
//Done...
- // Autotuning
- if( invParam.tuneDslashP ) {
- QDPIO::cout << "Enabling MG Dslash Autotuning" << std::endl;
- mg_inv_param.tune = QUDA_TUNE_YES;
- }
- else {
- QDPIO::cout << "Disabling MG Dslash Autotuning" << std::endl;
- mg_inv_param.tune = QUDA_TUNE_NO;
- }
if( invParam.MULTIGRIDParamsP ) {
QDPIO::cout << "Setting MULTIGRID solver params" << std::endl;
// Dereference handle
@@ -255,6 +246,7 @@ namespace Chroma {
// FIXME: Elevate ip.nvec, ip.nu_pre, ip.nu_post, ip.tol to arrays in the XML
if ( i < mg_param.n_level-1) {
mg_param.n_vec[i] = ip.nvec[i];
+ mg_param.n_vec_batch[i] = ip.nvec_batch[i];
mg_param.nu_pre[i] = ip.nu_pre[i];
mg_param.nu_post[i] = ip.nu_post[i];
}
diff --git a/lib/actions/ferm/invert/quda_solvers/quda_multigrid_params.cc b/lib/actions/ferm/invert/quda_solvers/quda_multigrid_params.cc
index e4eefdf7a9..d876cb2c6d 100644
--- a/lib/actions/ferm/invert/quda_solvers/quda_multigrid_params.cc
+++ b/lib/actions/ferm/invert/quda_solvers/quda_multigrid_params.cc
@@ -62,6 +62,7 @@ namespace Chroma {
nvec.resize(mg_levels-1);
+ nvec_batch.resize(mg_levels-1);
nu_pre.resize(mg_levels-1);
nu_post.resize(mg_levels-1);
maxIterSubspaceCreate.resize(mg_levels-1);
@@ -104,7 +105,6 @@ namespace Chroma {
<< " blockings but only " << nvec.size() << " sets of NullVectors" << std::endl;
QDP_abort(1);
}
-
if (nu_pre.size() != mg_levels-1 ) {
QDPIO::cout<<"Error. There are "<< (mg_levels-1)
@@ -112,6 +112,31 @@ namespace Chroma {
QDP_abort(1);
}
+ {
+ int paramcount = paramtop.count("NullVectorsBatchSize");
+ if ( paramcount == 1 ) {
+ read(paramtop, "NullVectorsBatchSize", nvec_batch);
+ if (nvec_batch.size() != mg_levels - 1 ) {
+ QDPIO::cout << "If NullVectorsBatchSize is given, then for "
+ << mg_levels << " levels, there must be " << mg_levels-1
+ << " values in the input. Currently the input has "
+ << nvec_batch.size() << " values \n";
+ QDP_abort(1);
+ }
+ }
+ else if ( paramcount > 1 ) {
+ QDPIO::cout << "NullVectorsBatchSize occurs more than once in this input\n";
+ QDP_abort(1);
+ }
+ else {
+ // Not found in output
+ nvec_batch.resize(mg_levels-1);
+ for( int i=0; i < mg_levels-1; i++) nvec_batch[i] = 1;
+ }
+ }
+
+
+
subspaceSolver.resize(mg_levels-1);
readArray(paramtop, "SubspaceSolver", subspaceSolver, CG);
@@ -258,6 +283,7 @@ namespace Chroma {
write(xml, "Reconstruct", p.reconstruct);
write(xml, "SchwarzType", p.schwarzType);
write(xml, "NullVectors", p.nvec);
+ write(xml, "NullVectorsBatchSize", p.nvec_batch);
write(xml, "MultiGridLevels", p.mg_levels);
write(xml, "GenerateNullSpace", p.generate_nullspace);
write(xml, "GenerateAllLevels", p.generate_all_levels);
diff --git a/lib/actions/ferm/invert/quda_solvers/quda_multigrid_params.h b/lib/actions/ferm/invert/quda_solvers/quda_multigrid_params.h
index e95a0aeede..0614d05a02 100644
--- a/lib/actions/ferm/invert/quda_solvers/quda_multigrid_params.h
+++ b/lib/actions/ferm/invert/quda_solvers/quda_multigrid_params.h
@@ -35,6 +35,7 @@ namespace Chroma
multi1d nu_pre;
multi1d nu_post;
multi1d< multi1d > blocking;
+ multi1d nvec_batch;
int outer_gcr_nkrylov;
int precond_gcr_nkrylov;
std::string cycle_type;
@@ -65,6 +66,7 @@ namespace Chroma
}
blocking.resize(mg_levels-1);
nvec.resize(mg_levels-1);
+ nvec_batch.resize(mg_levels-1);
nu_pre.resize(mg_levels-1);
nu_post.resize(mg_levels-1);
maxIterSubspaceCreate.resize(mg_levels-1);
@@ -87,7 +89,7 @@ namespace Chroma
nu_pre[l] = 2;
nu_post[l] = 2;
nvec[l] = 16;
-
+ nvec_batch[l]=1; // the batch size for Nvec solves is 1 by default
// Default params:
maxIterSubspaceCreate[l] = 500;
rsdTargetSubspaceCreate[l] = 5.0e-6;
diff --git a/lib/actions/ferm/invert/quda_solvers/syssolver_linop_clover_quda_multigrid_w.cc b/lib/actions/ferm/invert/quda_solvers/syssolver_linop_clover_quda_multigrid_w.cc
index 00a2ecba50..58211879fe 100644
--- a/lib/actions/ferm/invert/quda_solvers/syssolver_linop_clover_quda_multigrid_w.cc
+++ b/lib/actions/ferm/invert/quda_solvers/syssolver_linop_clover_quda_multigrid_w.cc
@@ -64,14 +64,13 @@ namespace Chroma
T& psi_s) const{
SystemSolverResults_t ret;
-
+ const auto& sub = A->subset();
T mod_chi;
// Copy source into mod_chi, and zero the off-parity
- mod_chi[rb[0]] = zero;
-
-
- if( invParam.asymmetricP ) {
+ if( is_precond ) {
+ mod_chi[rb[0]] = zero;
+ if( invParam.asymmetricP ) {
//
// symmetric
// Solve with M_symm = 1 - A^{-1}_oo D A^{-1}ee D
@@ -80,15 +79,18 @@ namespace Chroma
//
// So M x = b => A_oo (M_symm) x = b
// => M_symm x = A^{-1}_oo b = chi_mod
- invclov.apply(mod_chi, chi_s, PLUS, 1);
- }
- else {
- mod_chi[rb[1]] = chi_s;
- }
-
+ invclov.apply(mod_chi, chi_s, PLUS, 1);
+ }
+ else {
+ mod_chi[rb[1]] = chi_s;
+ }
+ }
+ else {
+ mod_chi = chi_s;
+ }
#ifndef BUILD_QUDA_DEVIFACE_SPINOR
- void* spinorIn =(void *)&(mod_chi.elem(rb[1].start()).elem(0).elem(0).real());
- void* spinorOut =(void *)&(psi_s.elem(rb[1].start()).elem(0).elem(0).real());
+ void* spinorIn =(void *)&(mod_chi.elem(sub.start()).elem(0).elem(0).real());
+ void* spinorOut =(void *)&(psi_s.elem(sub.start()).elem(0).elem(0).real());
#else
// void* spinorIn = GetMemoryPtr( mod_chi.getId() );
// void* spinorOut = GetMemoryPtr( psi_s.getId() );
@@ -111,11 +113,141 @@ namespace Chroma
QDPIO::cout << "\tTotal Time (incl. load gauge)=" << swatch1.getTimeInSeconds() <<" s"<& invclov,
+ const std::vector>& psi_s,
+ const std::vector>& chi_s,
+ std::vector& res) const {
+
+
+ StopWatch source_prep;
+ source_prep.reset();
+ source_prep.start();
+
+ multi1d mod_chi(chi_s.size());
+ const auto& sub = A->subset();
+
+ for(int i=0; i < chi_s.size(); i++) {
+
+ if( is_precond ) {
+ // Copy source into mod_chi, and zero the off-parity
+ mod_chi[i][rb[0]] = zero;
+
+ if( invParam.asymmetricP ) {
+ //
+ // symmetric
+ // Solve with M_symm = 1 - A^{-1}_oo D A^{-1}ee D
+ //
+ // Chroma M = A_oo ( M_symm )
+ //
+ // So M x = b => A_oo (M_symm) x = b
+ // => M_symm x = A^{-1}_oo b = chi_mod
+ invclov.apply(mod_chi[i], *(chi_s[i]), PLUS, 1);
+ }
+ else {
+ mod_chi[i][rb[1]] = *(chi_s[i]);
+ }
+ }
+ else {
+ mod_chi[i] = *(chi_s[i]);
+ }
+ }
+
+ std::vector spinorIn(chi_s.size());
+ std::vector spinorOut(psi_s.size());
+
+ int N_src = chi_s.size();
+#ifndef BUILD_QUDA_DEVIFACE_SPINOR
+ // Regular non-qdpjit approach. Just collect the pointers
+ for(int soln=0; soln < chi_s.size(); soln++) {
+ spinorIn[soln] = (void *)&(mod_chi[soln].elem(sub.start()).elem(0).elem(0).real());
+ spinorOut[soln] = (void *)&(psi_s[soln]->elem(sub.start()).elem(0).elem(0).real());
+ }
+#else
+ std::vector ids(2*N_src);
+
+ for(int soln=0; soln < N_src; soln++) {
+ ids[soln] = mod_chi[soln].getId();
+ ids[N_src+soln] = psi_s[soln]->getId();
+ }
+
+ // Grab all the keys
+ auto dev_ptr = QDP_get_global_cache().get_dev_ptrs( multi1d( ids.data(), ids.size()) );
+
+
+ for(int soln=0; soln < N_src; soln++) {
+ spinorIn[soln] = dev_ptr(soln);
+ spinorOut[soln] = dev_ptr(N_src+soln);
+ }
+ source_prep.stop();
+#endif
+
+ // Local quda_inv_param (?)
+ // Relies on quda_inv_param being just a dumb/struct and or copyable
+ QudaInvertParam local_quda_inv_param = quda_inv_param ;
+ local_quda_inv_param.num_src = mod_chi.size();
+
+ // No grid splitting for MG yet, so commenting that out
+#if 0
+ int totalSubgrids=1;
+ const multi1d& machine_size=QDP::Layout::logicalSize();
+
+ for (int i = 0; i < Nd; i++) {
+ local_quda_inv_param.split_grid[i] = invParam.GridSplitDims[i];
+ totalSubgrids *= invParam.GridSplitDims[i];
+ if ( machine_size[i] % invParam.GridSplitDims[i] != 0 ) {
+ QDPIO::cerr << "The split-grid-subgrid dimensions must divide the number ranks in each dimension exactly\n";
+ QDPIO::cerr << "Currently this is not the case: dim=" << i << " machine_size["< > A_,
Handle< FermState > state_,
const SysSolverQUDAMULTIGRIDCloverParams& invParam_) :
- A(A_), invParam(invParam_), clov(new CloverTermT() ), invclov(new CloverTermT())
+ A(A_), is_precond( true ), invParam(invParam_), clov(new CloverTermT() ), invclov(new CloverTermT())
{
StopWatch init_swatch;
init_swatch.reset(); init_swatch.start();
@@ -85,7 +85,11 @@ namespace Chroma
QDPIO::cout << solver_string << "Initializing" << std::endl;
// FOLLOWING INITIALIZATION in test QUDA program
-
+ const auto& sub = A->subset();
+ if( sub.start() == all.start() && sub.numSiteTable() == all.numSiteTable()) {
+ is_precond = false;
+ }
+
// 1) work out cpu_prec, cuda_prec, cuda_prec_sloppy
int s = sizeof( WordType::Type_t );
if (s == 4) {
@@ -266,32 +270,9 @@ namespace Chroma
// Solution type
//quda_inv_param.solution_type = QUDA_MATPC_SOLUTION;
//Taken from invert test.
- quda_inv_param.solution_type = QUDA_MATPC_SOLUTION;
+ quda_inv_param.solution_type = is_precond ? QUDA_MATPC_SOLUTION : QUDA_MAT_SOLUTION;
quda_inv_param.solve_type = QUDA_DIRECT_PC_SOLVE;
-
- // Solve type
- /*switch( invParam.solverType ) {
- case CG:
- quda_inv_param.solve_type = QUDA_NORMOP_PC_SOLVE;
- break;
- case BICGSTAB:
- quda_inv_param.solve_type = QUDA_DIRECT_PC_SOLVE;
- break;
- case GCR:
- quda_inv_param.solve_type = QUDA_DIRECT_PC_SOLVE;
- break;
-
- case MR:
- quda_inv_param.solve_type = QUDA_DIRECT_PC_SOLVE;
- break;
-
- default:
- quda_inv_param.solve_type = QUDA_NORMOP_PC_SOLVE;
-
- break;
- }*/
-
- quda_inv_param.matpc_type = QUDA_MATPC_ODD_ODD;
+ quda_inv_param.matpc_type = QUDA_MATPC_ODD_ODD; // Always
quda_inv_param.dagger = QUDA_DAG_NO;
quda_inv_param.mass_normalization = QUDA_KAPPA_NORMALIZATION;
@@ -317,13 +298,6 @@ namespace Chroma
quda_inv_param.output_location = QUDA_CUDA_FIELD_LOCATION;
#endif
- // Autotuning
- if( invParam.tuneDslashP ) {
- quda_inv_param.tune = QUDA_TUNE_YES;
- }
- else {
- quda_inv_param.tune = QUDA_TUNE_NO;
- }
// Setup padding
@@ -583,13 +557,13 @@ namespace Chroma
T g_chi,g_psi;
// Gauge Fix source and initial guess
- g_chi[ rb[1] ] = GFixMat * chi;
- g_psi[ rb[1] ] = GFixMat * psi;
+ g_chi[ A->subset() ] = GFixMat * chi;
+ g_psi[ A->subset() ] = GFixMat * psi;
res = qudaInvert(*clov,
*invclov,
g_chi,
g_psi);
- psi[ rb[1]] = adj(GFixMat)*g_psi;
+ psi[ A->subset() ] = adj(GFixMat)*g_psi;
}
else {
@@ -600,33 +574,25 @@ namespace Chroma
}
swatch.stop();
- Double rel_resid;
if( invParam.SolutionCheckP ) {
-
- {
T r;
r[A->subset()]=chi;
T tmp;
(*A)(tmp, psi, PLUS);
r[A->subset()] -= tmp;
- res.resid = sqrt(norm2(r, A->subset()));
- }
-
- rel_resid = res.resid/sqrt(norm2(chi,A->subset()));
-
- QDPIO::cout << solver_string << res.n_count << " iterations. Rsd = " << res.resid << " Relative Rsd = " << rel_resid << std::endl;
+ res.resid = sqrt(norm2(r, A->subset()))/sqrt(norm2(chi,A->subset()));
}
- else {
- // just believe the QUDA residuum.
- // which is always a true reiduum
- rel_resid = res.resid;
+ else {
+ QDPIO::cout << "Chroma <-> QUDA solution check disabled. Using (trusting) QUDA residuum\n";
}
+ QDPIO::cout << solver_string << res.n_count << " iterations. Relative Rsd = " << res.resid << std::endl;
+
// Convergence Check/Blow Up
if ( ! invParam.SilentFailP ) {
- if ( toBool( rel_resid > invParam.RsdToleranceFactor*invParam.RsdTarget) ) {
- QDPIO::cerr << solver_string << "ERROR: Solver residuum is outside tolerance: QUDA resid="<< rel_resid << " Desired =" << invParam.RsdTarget << " Max Tolerated = " << invParam.RsdToleranceFactor*invParam.RsdTarget << std::endl;
+ if ( toBool( res.resid > invParam.RsdToleranceFactor*invParam.RsdTarget) ) {
+ QDPIO::cerr << solver_string << "ERROR: Solver residuum is outside tolerance: QUDA resid="<< res.resid << " Desired =" << invParam.RsdTarget << " Max Tolerated = " << invParam.RsdToleranceFactor*invParam.RsdTarget << std::endl;
QDP_abort(1);
}
}
@@ -635,6 +601,68 @@ namespace Chroma
return res;
}
+ std::vector operator() (const std::vector>& psi, const std::vector>& chi) const override
+ {
+
+ START_CODE();
+ QDPIO::cout << "Entering MRHS solution: N_src = " << chi.size() << "\n";
+
+ std::vector res(chi.size());
+ if( psi.size() != chi.size() ) {
+ QDPIO::cout << "Number of sources does not match number of solutions\n";
+ QDPIO::cout << "psi.size() = " << psi.size() << " but chi.size() = " << chi.size() << "\n";
+ QDP_abort(1);
+ }
+
+ StopWatch swatch;
+ swatch.start();
+
+ if ( invParam.axialGaugeP ) {
+ QDPIO::cerr << "Multi RHS solve in axial gauge not yet implemented\n";
+ QDP_abort(1);
+ }
+
+ qudaInvertMultiSrc(*invclov, psi, chi, res);
+
+ swatch.stop();
+
+ // Check solutions -- if desired
+
+ if( invParam.SolutionCheckP ) {
+ for(int soln =0; soln < psi.size(); soln++) {
+ T r;
+ r[A->subset()]=*(chi[ soln ]);
+ T tmp;
+ (*A)(tmp, *(psi[soln]), PLUS);
+ r[A->subset()] -= tmp;
+ res[soln].resid = sqrt(norm2(r, A->subset()))/sqrt(norm2(*(chi[soln]),A->subset()));
+ }
+ }
+ else {
+ QDPIO::cout << "Chroma <-> QUDA solution check disabled. Using (trusting) QUDA residua\n";
+ }
+
+ for(int soln=0; soln < psi.size(); soln++ ) {
+ QDPIO::cout << "QUDA_"<< solver_string <<" solution " << soln <<
+ " : " << res[soln].n_count << " iterations. Relative Rsd = " << res[soln].resid << std::endl;
+
+ // Convergence Check/Blow Up
+ if ( ! invParam.SilentFailP ) {
+ if ( toBool( res[soln].resid > invParam.RsdToleranceFactor*invParam.RsdTarget) ) {
+ QDPIO::cerr << "ERROR: QUDA Solver residuum for solution " << soln
+ << " is outside tolerance: QUDA resid="<< res[soln].resid << " Desired ="
+ << invParam.RsdTarget << " Max Tolerated = "
+ << invParam.RsdToleranceFactor*invParam.RsdTarget << std::endl;
+ QDP_abort(1);
+ }
+ }
+ }
+
+
+ END_CODE();
+ return res;
+ }
+
private:
// Hide default constructor
LinOpSysSolverQUDAMULTIGRIDClover() {}
@@ -642,6 +670,7 @@ namespace Chroma
#if 1
Q links_orig;
#endif
+ bool is_precond;
U GFixMat;
QudaPrecision_s cpu_prec;
@@ -658,11 +687,16 @@ namespace Chroma
Handle< CloverTermT > invclov;
SystemSolverResults_t qudaInvert(const CloverTermT& clover,
- const CloverTermT& inv_clov,
+ const CloverTermT& invclov,
const T& chi_s,
T& psi_s
)const;
+ void qudaInvertMultiSrc(const CloverTermT& invclov,
+ const std::vector>& psi_s,
+ const std::vector>& chi_s,
+ std::vector& res) const;
+
std::string solver_string;
};
diff --git a/lib/actions/ferm/invert/quda_solvers/syssolver_linop_clover_quda_w.cc b/lib/actions/ferm/invert/quda_solvers/syssolver_linop_clover_quda_w.cc
index 45de3adef7..3dc0b552d2 100644
--- a/lib/actions/ferm/invert/quda_solvers/syssolver_linop_clover_quda_w.cc
+++ b/lib/actions/ferm/invert/quda_solvers/syssolver_linop_clover_quda_w.cc
@@ -19,161 +19,179 @@
namespace Chroma
{
- namespace LinOpSysSolverQUDACloverEnv
- {
-
- //! Anonymous namespace
- namespace
- {
- //! Name to be used
- const std::string name("QUDA_CLOVER_INVERTER");
-
- //! Local registration flag
- bool registered = false;
- }
-
-
-
- LinOpSystemSolver* createFerm(XMLReader& xml_in,
- const std::string& path,
- Handle< FermState< LatticeFermion, multi1d, multi1d > > state,
-
- Handle< LinearOperator > A)
+ namespace LinOpSysSolverQUDACloverEnv
{
- return new LinOpSysSolverQUDAClover(A, state,SysSolverQUDACloverParams(xml_in, path));
- }
- //! Register all the factories
- bool registerAll()
- {
- bool success = true;
- if (! registered)
- {
- success &= Chroma::TheLinOpFermSystemSolverFactory::Instance().registerObject(name, createFerm);
- registered = true;
- }
- return success;
+ //! Anonymous namespace
+ namespace
+ {
+ //! Name to be used
+ const std::string name("QUDA_CLOVER_INVERTER");
+
+ //! Local registration flag
+ bool registered = false;
+ }
+
+
+
+ LinOpSystemSolver* createFerm(XMLReader& xml_in,
+ const std::string& path,
+ Handle< FermState< LatticeFermion, multi1d, multi1d > > state,
+
+ Handle< LinearOperator > A)
+ {
+ return new LinOpSysSolverQUDAClover(A, state,SysSolverQUDACloverParams(xml_in, path));
+ }
+
+ //! Register all the factories
+ bool registerAll()
+ {
+ bool success = true;
+ if (! registered)
+ {
+ success &= Chroma::TheLinOpFermSystemSolverFactory::Instance().registerObject(name, createFerm);
+ registered = true;
+ }
+ return success;
+ }
}
- }
- SystemSolverResults_t
- LinOpSysSolverQUDAClover::qudaInvert(const CloverTermT& clover,
- const CloverTermT& invclov,
- const T& chi_s,
- T& psi_s) const{
+ SystemSolverResults_t LinOpSysSolverQUDAClover::qudaInvert( const T& chi_s, T& psi_s) const {
+ SystemSolverResults_t ret;
- SystemSolverResults_t ret;
-
- void *spinorIn;
- void *spinorOut;
+ void *spinorIn;
+ void *spinorOut;
+ auto sub = A->subset();
#ifdef BUILD_QUDA_DEVIFACE_SPINOR
- std::vector ids;
+ std::vector ids;
#endif
-
- // No need to transform source
-#ifndef BUILD_QUDA_DEVIFACE_SPINOR
- spinorIn =(void *)&(chi_s.elem(rb[1].start()).elem(0).elem(0).real());
-#else
- //spinorIn = GetMemoryPtr( chi_s.);
- //QDPIO::cout << "MDAGM spinor in = " << spinorIn << "\n";
- ids.push_back(chi_s.getId());
-#endif
-
+ // No need to transform source
#ifndef BUILD_QUDA_DEVIFACE_SPINOR
- spinorOut =(void *)&(psi_s.elem(rb[1].start()).elem(0).elem(0).real());
+ spinorIn =(void *)&(chi_s.elem(sub.start()).elem(0).elem(0).real());
#else
- ids.push_back(psi_s.getId());
- auto dev_ptr = GetMemoryPtr(ids);
- spinorIn = dev_ptr[0];
- spinorOut = dev_ptr[1];
-
+ //spinorIn = GetMemoryPtr( chi_s.);
+ //QDPIO::cout << "MDAGM spinor in = " << spinorIn << "\n";
+ ids.push_back(chi_s.getId());
#endif
- // Do the solve here
- StopWatch swatch1;
- swatch1.reset();
- swatch1.start();
- invertQuda(spinorOut, spinorIn, (QudaInvertParam*)&quda_inv_param);
- swatch1.stop();
-
- QDPIO::cout << "QUDA_"<>& psi_s,
+ const std::vector>& chi_s,
+ std::vector& res) const {
+ std::vector spinorIn(chi_s.size());
+ std::vector spinorOut(psi_s.size());
+ auto sub = A->subset();
- // Need to create a simple ferm state from the links_single...
- Handle< FermState > pstate(new PeriodicFermState(links_orig));
- const AnisoParam_t& aniso = invParam.CloverParams.anisoParam;
- QDPWilsonDslashT qdp_dslash(pstate, aniso);
-
- T tmp,psi2;
- tmp=zero;
- psi2=zero;
- // qdp_dslash.apply(psi2, mod_chi, PLUS, 0);
- qdp_dslash.apply(tmp, mod_chi, PLUS, 0);
- invclov.apply(psi2,tmp, PLUS, 0);
+ int N_src = chi_s.size();
+#ifndef BUILD_QUDA_DEVIFACE_SPINOR
+ // Regular non-qdpjit approach. Just collect the pointers
+ for(int soln=0; soln < chi_s.size(); soln++) {
+ spinorIn[soln] = (void *)&(chi_s[soln]->elem(sub.start()).elem(0).elem(0).real());
+ spinorOut[soln] = (void *)&(psi_s[soln]->elem(sub.start()).elem(0).elem(0).real());
+ }
+#else
+ std::vector ids(2*N_src);
+ for(int soln=0; soln < N_src; soln++) {
+ ids[soln] = chi_s[soln]->getId();
+ ids[N_src+soln] = psi_s[soln]->getId();
+ }
+
+ // Grab all the keys
+ auto dev_ptr = QDP_get_global_cache().get_dev_ptrs( multi1d( ids.data(), ids.size()) );
- T r=zero;
- r = psi2 - psi_s;
- QDPIO::cout << "CB=0" << std::endl;
- QDPIO::cout << "Dslash Test: || r || = " << sqrt(norm2(r,rb[0])) << std::endl;
- // QDPIO::cout << "Dslash Test: || r ||/|| psi || = " << sqrt(norm2(r,rb[0])/norm2(psi_s, rb[0])) << std::endl;
+ for(int soln=0; soln < N_src; soln++) {
+ spinorIn[soln] = dev_ptr(soln);
+ spinorOut[soln] = dev_ptr(N_src+soln);
+ }
+#endif
- QDPIO::cout << "CB=1: Should be zero" << std::endl;
- QDPIO::cout << "Dslash Test: || r || = " << sqrt(norm2(r,rb[1])) << std::endl;
- //QDPIO::cout << "Dslash Test: || r ||/|| psi || = " << sqrt(norm2(r,rb[1])/norm2(psi_s, rb[1])) << std::endl;
-
- const int* tab = rb[0].siteTable().slice();
- for(int i=0; i < rb[0].numSiteTable(); i++) {
- int j = tab[i];
- bool printSite=false;
-
- for(int spin=0; spin < 4; spin++) {
- for(int col=0; col < 3; col++) {
- if( (fabs(r.elem(j).elem(spin).elem(col).real()) > 1.0e-5 )
- || (fabs(r.elem(j).elem(spin).elem(col).imag()) > 1.0e-5 )) {
- printSite=true;
- }
- }
- }
- if( printSite ) {
-
- for(int spin=0; spin < 4; spin++) {
- for(int col=0; col < 3; col++) {
- QDPIO::cout << "Site= " << j << " Spin= "<< spin << " Col= " << col << " spinor = ( "
- << psi2.elem(j).elem(spin).elem(col).real() << " , "
- << psi2.elem(j).elem(spin).elem(col).imag() << " )" << std::endl;
- }
- }
- QDPIO::cout << std::endl;
- }
+ // Local quda_inv_param (?)
+ // Relies on quda_inv_param being just a dumb/struct and or copyable
+ QudaInvertParam local_quda_inv_param = quda_inv_param ;
+
+ int totalSubgrids=1;
+ const multi1d& machine_size=QDP::Layout::logicalSize();
+
+ for (int i = 0; i < Nd; i++) {
+ local_quda_inv_param.split_grid[i] = invParam.GridSplitDims[i];
+ totalSubgrids *= invParam.GridSplitDims[i];
+ if ( machine_size[i] % invParam.GridSplitDims[i] != 0 ) {
+ QDPIO::cerr << "The split-grid-subgrid dimensions must divide the number ranks in each dimension exactly\n";
+ QDPIO::cerr << "Currently this is not the case: dim=" << i << " machine_size["<
+#include
#include "util/gauge/reunit.h"
#ifdef QDP_IS_QDPJIT
@@ -33,650 +34,694 @@
namespace Chroma
{
-//! Richardson system solver namespace
-namespace LinOpSysSolverQUDACloverEnv
-{
-//! Register the syssolver
-bool registerAll();
-}
-
-
-
-//! Solve a Clover Fermion System using the QUDA inverter
-/*! \ingroup invert
- *** WARNING THIS SOLVER WORKS FOR Clover FERMIONS ONLY ***
- */
-
-class LinOpSysSolverQUDAClover : public LinOpSystemSolver
-{
-public:
- typedef LatticeFermion T;
- typedef LatticeColorMatrix U;
- typedef multi1d Q;
-
- typedef LatticeFermionF TF;
- typedef LatticeColorMatrixF UF;
- typedef multi1d QF;
-
- typedef LatticeFermionF TD;
- typedef LatticeColorMatrixF UD;
- typedef multi1d QD;
-
- typedef WordType::Type_t REALT;
- //! Constructor
- /*!
- * \param M_ Linear operator ( Read )
- * \param invParam inverter parameters ( Read )
- */
- LinOpSysSolverQUDAClover(Handle< LinearOperator > A_,
- Handle< FermState > state_,
- const SysSolverQUDACloverParams& invParam_) :
- A(A_), invParam(invParam_), clov(new CloverTermT() ), invclov(new CloverTermT())
- {
- QDPIO::cout << "LinOpSysSolverQUDAClover:" << std::endl;
-
- // FOLLOWING INITIALIZATION in test QUDA program
-
- // 1) work out cpu_prec, cuda_prec, cuda_prec_sloppy
- int s = sizeof( WordType::Type_t );
- if (s == 4) {
- cpu_prec = QUDA_SINGLE_PRECISION;
- }
- else {
- cpu_prec = QUDA_DOUBLE_PRECISION;
- }
-
-
- // Work out GPU precision
- switch( invParam.cudaPrecision ) {
- case HALF:
- gpu_prec = QUDA_HALF_PRECISION;
- break;
- case SINGLE:
- gpu_prec = QUDA_SINGLE_PRECISION;
- break;
- case DOUBLE:
- gpu_prec = QUDA_DOUBLE_PRECISION;
- break;
- default:
- gpu_prec = cpu_prec;
- break;
- }
-
- // Work out GPU Sloppy precision
- // Default: No Sloppy
- switch( invParam.cudaSloppyPrecision ) {
- case HALF:
- gpu_half_prec = QUDA_HALF_PRECISION;
- break;
- case SINGLE:
- gpu_half_prec = QUDA_SINGLE_PRECISION;
- break;
- case DOUBLE:
- gpu_half_prec = QUDA_DOUBLE_PRECISION;
- break;
- default:
- gpu_half_prec = gpu_prec;
- break;
- }
-
- // 2) pull 'new; GAUGE and Invert params
- q_gauge_param = newQudaGaugeParam();
- quda_inv_param = newQudaInvertParam();
-
- // 3) set lattice size
- const multi1d& latdims = Layout::subgridLattSize();
-
- q_gauge_param.X[0] = latdims[0];
- q_gauge_param.X[1] = latdims[1];
- q_gauge_param.X[2] = latdims[2];
- q_gauge_param.X[3] = latdims[3];
-
- // 4) - deferred (anisotropy)
-
- // 5) - set QUDA_WILSON_LINKS, QUDA_GAUGE_ORDER
- q_gauge_param.type = QUDA_WILSON_LINKS;
+ //! Richardson system solver namespace
+ namespace LinOpSysSolverQUDACloverEnv
+ {
+ //! Register the syssolver
+ bool registerAll();
+ }
+
+
+
+ //! Solve a Clover Fermion System using the QUDA inverter
+ /*! \ingroup invert
+ *** WARNING THIS SOLVER WORKS FOR Clover FERMIONS ONLY ***
+ */
+
+ class LinOpSysSolverQUDAClover : public LinOpSystemSolver
+ {
+ public:
+ typedef LatticeFermion T;
+ typedef LatticeColorMatrix U;
+ typedef multi1d Q;
+
+ typedef LatticeFermionF TF;
+ typedef LatticeColorMatrixF UF;
+ typedef multi1d QF;
+
+ typedef LatticeFermionF TD;
+ typedef LatticeColorMatrixF UD;
+ typedef multi1d QD;
+
+ typedef WordType::Type_t REALT;
+ //! Constructor
+ /*!
+ * \param M_ Linear operator ( Read )
+ * \param invParam inverter parameters ( Read )
+ */
+ LinOpSysSolverQUDAClover(Handle< LinearOperator > A_,
+ Handle< FermState > state_,
+ const SysSolverQUDACloverParams& invParam_) :
+ A(A_), invParam(invParam_)
+ {
+
+ QDPIO::cout << "LinOpSysSolverQUDAClover:" << std::endl;
+
+ bool is_precond = true;
+ auto sub = A->subset();
+
+ if ( sub.start() == all.start() && sub.numSiteTable() == all.numSiteTable() ) is_precond = false;
+ // FOLLOWING INITIALIZATION in test QUDA program
+
+ // 1) work out cpu_prec, cuda_prec, cuda_prec_sloppy
+ int s = sizeof( WordType::Type_t );
+ if (s == 4) {
+ cpu_prec = QUDA_SINGLE_PRECISION;
+ }
+ else {
+ cpu_prec = QUDA_DOUBLE_PRECISION;
+ }
+
+
+ // Work out GPU precision
+ switch( invParam.cudaPrecision ) {
+ case HALF:
+ gpu_prec = QUDA_HALF_PRECISION;
+ break;
+ case SINGLE:
+ gpu_prec = QUDA_SINGLE_PRECISION;
+ break;
+ case DOUBLE:
+ gpu_prec = QUDA_DOUBLE_PRECISION;
+ break;
+ default:
+ gpu_prec = cpu_prec;
+ break;
+ }
+
+ // Work out GPU Sloppy precision
+ // Default: No Sloppy
+ switch( invParam.cudaSloppyPrecision ) {
+ case HALF:
+ gpu_half_prec = QUDA_HALF_PRECISION;
+ break;
+ case SINGLE:
+ gpu_half_prec = QUDA_SINGLE_PRECISION;
+ break;
+ case DOUBLE:
+ gpu_half_prec = QUDA_DOUBLE_PRECISION;
+ break;
+ default:
+ gpu_half_prec = gpu_prec;
+ break;
+ }
+
+ // 2) pull 'new; GAUGE and Invert params
+ q_gauge_param = newQudaGaugeParam();
+ quda_inv_param = newQudaInvertParam();
+
+ // 3) set lattice size
+ const multi1d& latdims = Layout::subgridLattSize();
+
+ q_gauge_param.X[0] = latdims[0];
+ q_gauge_param.X[1] = latdims[1];
+ q_gauge_param.X[2] = latdims[2];
+ q_gauge_param.X[3] = latdims[3];
+
+ // 4) - deferred (anisotropy)
+
+ // 5) - set QUDA_WILSON_LINKS, QUDA_GAUGE_ORDER
+ q_gauge_param.type = QUDA_WILSON_LINKS;
#ifndef BUILD_QUDA_DEVIFACE_GAUGE
- q_gauge_param.gauge_order = QUDA_QDP_GAUGE_ORDER; // gauge[mu], p
+ q_gauge_param.gauge_order = QUDA_QDP_GAUGE_ORDER; // gauge[mu], p
#else
- QDPIO::cout << "MDAGM Using QDP-JIT gauge order" << std::endl;
- q_gauge_param.location = QUDA_CUDA_FIELD_LOCATION;
- q_gauge_param.gauge_order = QUDA_QDPJIT_GAUGE_ORDER;
+ QDPIO::cout << "MDAGM Using QDP-JIT gauge order" << std::endl;
+ q_gauge_param.location = QUDA_CUDA_FIELD_LOCATION;
+ q_gauge_param.gauge_order = QUDA_QDPJIT_GAUGE_ORDER;
#endif
- // 6) - set t_boundary
- // Convention: BC has to be applied already
- // This flag just tells QUDA that this is so,
- // so that QUDA can take care in the reconstruct
- if( invParam.AntiPeriodicT ) {
- q_gauge_param.t_boundary = QUDA_ANTI_PERIODIC_T;
- }
- else {
- q_gauge_param.t_boundary = QUDA_PERIODIC_T;
- }
-
- // Set cpu_prec, cuda_prec, reconstruct and sloppy versions
- q_gauge_param.cpu_prec = cpu_prec;
- q_gauge_param.cuda_prec = gpu_prec;
-
-
- switch( invParam.cudaReconstruct ) {
- case RECONS_NONE:
- q_gauge_param.reconstruct = QUDA_RECONSTRUCT_NO;
- break;
- case RECONS_8:
- q_gauge_param.reconstruct = QUDA_RECONSTRUCT_8;
- break;
- case RECONS_12:
- q_gauge_param.reconstruct = QUDA_RECONSTRUCT_12;
- break;
- default:
- q_gauge_param.reconstruct = QUDA_RECONSTRUCT_12;
- break;
- };
-
- q_gauge_param.cuda_prec_sloppy = gpu_half_prec;
-
- // Default for no preconditioner -- may be overwritten based
- // on innerParams
- q_gauge_param.cuda_prec_precondition = gpu_half_prec;
-
- switch( invParam.cudaSloppyReconstruct ) {
- case RECONS_NONE:
- q_gauge_param.reconstruct_sloppy = QUDA_RECONSTRUCT_NO;
- break;
- case RECONS_8:
- q_gauge_param.reconstruct_sloppy = QUDA_RECONSTRUCT_8;
- break;
- case RECONS_12:
- q_gauge_param.reconstruct_sloppy = QUDA_RECONSTRUCT_12;
- break;
- default:
- q_gauge_param.reconstruct_sloppy = QUDA_RECONSTRUCT_12;
- break;
- };
-
- // Default. This may be overrridden later.
- q_gauge_param.reconstruct_precondition = q_gauge_param.reconstruct_sloppy;
- // Gauge fixing:
-
- // These are the links
- // They may be smeared and the BC's may be applied
- Q links_single(Nd);
-
- // Now downcast to single prec fields.
- for(int mu=0; mu < Nd; mu++) {
- links_single[mu] = (state_->getLinks())[mu];
- }
-
- // GaugeFix
- if( invParam.axialGaugeP ) {
- QDPIO::cout << "Fixing Temporal Gauge" << std::endl;
- temporalGauge(links_single, GFixMat, Nd-1);
- for(int mu=0; mu < Nd; mu++){
- links_single[mu] = GFixMat*(state_->getLinks())[mu]*adj(shift(GFixMat, FORWARD, mu));
- }
- q_gauge_param.gauge_fix = QUDA_GAUGE_FIXED_YES;
- }
- else {
- // No GaugeFix
- q_gauge_param.gauge_fix = QUDA_GAUGE_FIXED_NO; // No Gfix yet
- }
-
- // deferred 4) Gauge Anisotropy
- const AnisoParam_t& aniso = invParam.CloverParams.anisoParam;
- if( aniso.anisoP ) { // Anisotropic case
- Real gamma_f = aniso.xi_0 / aniso.nu;
- q_gauge_param.anisotropy = toDouble(gamma_f);
- }
- else {
- q_gauge_param.anisotropy = 1.0;
- }
-
- // MAKE FSTATE BEFORE RESCALING links_single
- // Because the clover term expects the unrescaled links...
- Handle > fstate( new PeriodicFermState(links_single));
-
- if( aniso.anisoP ) { // Anisotropic case
- multi1d cf=makeFermCoeffs(aniso);
- for(int mu=0; mu < Nd; mu++) {
- links_single[mu] *= cf[mu];
- }
- }
-
- // Now onto the inv param:
- // Dslash type
- quda_inv_param.dslash_type = QUDA_CLOVER_WILSON_DSLASH;
-
- // Invert type:
- switch( invParam.solverType ) {
- case CG:
- quda_inv_param.inv_type = QUDA_CG_INVERTER;
- solver_string = "CG";
- break;
- case BICGSTAB:
- quda_inv_param.inv_type = QUDA_BICGSTAB_INVERTER;
- solver_string = "BICGSTAB";
- break;
- case GCR:
- quda_inv_param.inv_type = QUDA_GCR_INVERTER;
- solver_string = "GCR";
- break;
- default:
- QDPIO::cerr << "Unknown SOlver type" << std::endl;
- QDP_abort(1);
- break;
- }
-
- // Mass
-
- // Fiendish idea from Ron. Set the kappa=1/2 and use
- // unmodified clover term, and ask for Kappa normalization
- // This should give us A - (1/2)D as the unpreconditioned operator
- // and probabl 1 - {1/4} A^{-1} D A^{-1} D as the preconditioned
- // op. Apart from the A_oo stuff on the antisymmetric we have
- // nothing to do...
- quda_inv_param.kappa = 0.5;
-
- // FIXME: If we want QUDA to compute the clover coeff, we need to be able to deal
- // with awfuless of anisotropy
- // The value below is a dummy one.
- quda_inv_param.clover_coeff = 1.0; // Dummy tree level value. Not used
- quda_inv_param.tol = toDouble(invParam.RsdTarget);
- quda_inv_param.maxiter = invParam.MaxIter;
- quda_inv_param.reliable_delta = toDouble(invParam.Delta);
- quda_inv_param.pipeline = invParam.Pipeline;
-
- // Solution type
- quda_inv_param.solution_type = QUDA_MATPC_SOLUTION;
-
- // Solve type
- switch( invParam.solverType ) {
- case CG:
- quda_inv_param.solve_type = QUDA_NORMOP_PC_SOLVE;
- break;
- case BICGSTAB:
- quda_inv_param.solve_type = QUDA_DIRECT_PC_SOLVE;
- break;
- case GCR:
- quda_inv_param.solve_type = QUDA_DIRECT_PC_SOLVE;
- break;
- case CA_GCR:
- quda_inv_param.solve_type = QUDA_DIRECT_PC_SOLVE;
- break;
- case MR:
- quda_inv_param.solve_type = QUDA_DIRECT_PC_SOLVE;
- break;
-
- default:
- quda_inv_param.solve_type = QUDA_NORMOP_PC_SOLVE;
-
- break;
- }
-
- if( invParam.asymmetricP ) {
- QDPIO::cout << "Using Asymmetric Linop: A_oo - D A^{-1}_ee D" << std::endl;
- quda_inv_param.matpc_type = QUDA_MATPC_ODD_ODD_ASYMMETRIC;
- }
- else {
- QDPIO::cout << "Using Symmetric Linop: 1 - A^{-1}_oo D A^{-1}_ee D" << std::endl;
- quda_inv_param.matpc_type = QUDA_MATPC_ODD_ODD;
- }
-
- quda_inv_param.dagger = QUDA_DAG_NO;
- quda_inv_param.mass_normalization = QUDA_KAPPA_NORMALIZATION;
-
- quda_inv_param.cpu_prec = cpu_prec;
- quda_inv_param.cuda_prec = gpu_prec;
- quda_inv_param.cuda_prec_sloppy = gpu_half_prec;
-
- // Default. May be overridden by inner params
- quda_inv_param.cuda_prec_precondition = gpu_half_prec;
-
-
- quda_inv_param.preserve_source = QUDA_PRESERVE_SOURCE_NO;
- quda_inv_param.gamma_basis = QUDA_DEGRAND_ROSSI_GAMMA_BASIS;
+ // 6) - set t_boundary
+ // Convention: BC has to be applied already
+ // This flag just tells QUDA that this is so,
+ // so that QUDA can take care in the reconstruct
+ if( invParam.AntiPeriodicT ) {
+ q_gauge_param.t_boundary = QUDA_ANTI_PERIODIC_T;
+ }
+ else {
+ q_gauge_param.t_boundary = QUDA_PERIODIC_T;
+ }
+
+ // Set cpu_prec, cuda_prec, reconstruct and sloppy versions
+ q_gauge_param.cpu_prec = cpu_prec;
+ q_gauge_param.cuda_prec = gpu_prec;
+
+
+ switch( invParam.cudaReconstruct ) {
+ case RECONS_NONE:
+ q_gauge_param.reconstruct = QUDA_RECONSTRUCT_NO;
+ break;
+ case RECONS_8:
+ q_gauge_param.reconstruct = QUDA_RECONSTRUCT_8;
+ break;
+ case RECONS_12:
+ q_gauge_param.reconstruct = QUDA_RECONSTRUCT_12;
+ break;
+ default:
+ q_gauge_param.reconstruct = QUDA_RECONSTRUCT_12;
+ break;
+ };
+
+ q_gauge_param.cuda_prec_sloppy = gpu_half_prec;
+
+ // Default for no preconditioner -- may be overwritten based
+ // on innerParams
+ q_gauge_param.cuda_prec_precondition = gpu_half_prec;
+
+ switch( invParam.cudaSloppyReconstruct ) {
+ case RECONS_NONE:
+ q_gauge_param.reconstruct_sloppy = QUDA_RECONSTRUCT_NO;
+ break;
+ case RECONS_8:
+ q_gauge_param.reconstruct_sloppy = QUDA_RECONSTRUCT_8;
+ break;
+ case RECONS_12:
+ q_gauge_param.reconstruct_sloppy = QUDA_RECONSTRUCT_12;
+ break;
+ default:
+ q_gauge_param.reconstruct_sloppy = QUDA_RECONSTRUCT_12;
+ break;
+ };
+
+ // Default. This may be overrridden later.
+ q_gauge_param.reconstruct_precondition = q_gauge_param.reconstruct_sloppy;
+ // Gauge fixing:
+
+ // These are the links
+ // They may be smeared and the BC's may be applied
+
+ Q links_single(Nd);
+
+ // Now downcast to single prec fields.
+ for(int mu=0; mu < Nd; mu++) {
+ links_single[mu] = (state_->getLinks())[mu];
+ }
+
+ // GaugeFix
+ if( invParam.axialGaugeP ) {
+ QDPIO::cout << "Fixing Temporal Gauge" << std::endl;
+ temporalGauge(links_single, GFixMat, Nd-1);
+ for(int mu=0; mu < Nd; mu++){
+ links_single[mu] = GFixMat*(state_->getLinks())[mu]*adj(shift(GFixMat, FORWARD, mu));
+ }
+ q_gauge_param.gauge_fix = QUDA_GAUGE_FIXED_YES;
+ }
+ else {
+ // No GaugeFix
+ q_gauge_param.gauge_fix = QUDA_GAUGE_FIXED_NO; // No Gfix yet
+ }
+
+ // deferred 4) Gauge Anisotropy
+ const AnisoParam_t& aniso = invParam.CloverParams.anisoParam;
+ if( aniso.anisoP ) { // Anisotropic case
+ Real gamma_f = aniso.xi_0 / aniso.nu;
+ q_gauge_param.anisotropy = toDouble(gamma_f);
+ }
+ else {
+ q_gauge_param.anisotropy = 1.0;
+ }
+
+ // MAKE FSTATE BEFORE RESCALING links_single
+ // Because the clover term expects the unrescaled links...
+ Handle > fstate( new PeriodicFermState(links_single));
+
+ if( aniso.anisoP ) { // Anisotropic case
+ multi1d cf=makeFermCoeffs(aniso);
+ for(int mu=0; mu < Nd; mu++) {
+ links_single[mu] *= cf[mu];
+ }
+ }
+
+ // Now onto the inv param:
+ // Dslash type
+ quda_inv_param.dslash_type = QUDA_CLOVER_WILSON_DSLASH;
+
+ // Invert type:
+ switch( invParam.solverType ) {
+ case CG:
+ quda_inv_param.inv_type = QUDA_CG_INVERTER;
+ solver_string = "CG";
+ break;
+ case BICGSTAB:
+ quda_inv_param.inv_type = QUDA_BICGSTAB_INVERTER;
+ solver_string = "BICGSTAB";
+ break;
+ case GCR:
+ quda_inv_param.inv_type = QUDA_GCR_INVERTER;
+ solver_string = "GCR";
+ break;
+ default:
+ QDPIO::cerr << "Unknown SOlver type" << std::endl;
+ QDP_abort(1);
+ break;
+ }
+
+ // Mass
+
+ // Fiendish idea from Ron. Set the kappa=1/2 and use
+ // unmodified clover term, and ask for Kappa normalization
+ // This should give us A - (1/2)D as the unpreconditioned operator
+ // and probabl 1 - {1/4} A^{-1} D A^{-1} D as the preconditioned
+ // op. Apart from the A_oo stuff on the antisymmetric we have
+ // nothing to do...
+ quda_inv_param.kappa = 0.5;
+
+ // FIXME: If we want QUDA to compute the clover coeff, we need to be able to deal
+ // with awfuless of anisotropy
+ // The value below is a dummy one.
+ quda_inv_param.clover_coeff = 1.0; // Dummy tree level value. Not used
+ quda_inv_param.tol = toDouble(invParam.RsdTarget);
+ quda_inv_param.maxiter = invParam.MaxIter;
+ quda_inv_param.reliable_delta = toDouble(invParam.Delta);
+ quda_inv_param.pipeline = invParam.Pipeline;
+
+ // Solve type: We always solve with a PC solve (even for a MAT solution)
+ // because PC solves have better conditioning. QUDA needs to do the EO reconstructs
+ switch( invParam.solverType ) {
+ case CG:
+ quda_inv_param.solve_type = QUDA_NORMOP_PC_SOLVE;
+ break;
+ case BICGSTAB:
+ quda_inv_param.solve_type = QUDA_DIRECT_PC_SOLVE;
+ break;
+ case GCR:
+ quda_inv_param.solve_type = QUDA_DIRECT_PC_SOLVE;
+ break;
+ case CA_GCR:
+ quda_inv_param.solve_type = QUDA_DIRECT_PC_SOLVE;
+ break;
+ case MR:
+ quda_inv_param.solve_type = QUDA_DIRECT_PC_SOLVE;
+ break;
+
+ default:
+ quda_inv_param.solve_type = QUDA_NORMOP_PC_SOLVE;
+ break;
+ }
+
+ if( is_precond ) {
+
+ // Solution type
+ quda_inv_param.solution_type = QUDA_MATPC_SOLUTION;
+
+ // Preconditioned system
+ if( invParam.asymmetricP ) {
+ QDPIO::cout << "Using Asymmetric Linop: A_oo - D A^{-1}_ee D" << std::endl;
+ quda_inv_param.matpc_type = QUDA_MATPC_ODD_ODD_ASYMMETRIC;
+ }
+ else {
+ QDPIO::cout << "Using Symmetric Linop: 1 - A^{-1}_oo D A^{-1}_ee D" << std::endl;
+ quda_inv_param.matpc_type = QUDA_MATPC_ODD_ODD;
+ }
+ }
+ else {
+ // Solution type
+ quda_inv_param.solution_type = QUDA_MAT_SOLUTION;
+ quda_inv_param.matpc_type = QUDA_MATPC_ODD_ODD; //QUDA's native preconditioning when using QUDA_DIRECT_PC solution
+ }
+
+ quda_inv_param.dagger = QUDA_DAG_NO;
+ quda_inv_param.mass_normalization = QUDA_KAPPA_NORMALIZATION;
+
+ quda_inv_param.cpu_prec = cpu_prec;
+ quda_inv_param.cuda_prec = gpu_prec;
+ quda_inv_param.cuda_prec_sloppy = gpu_half_prec;
+
+ // Default. May be overridden by inner params
+ quda_inv_param.cuda_prec_precondition = gpu_half_prec;
+
+
+ quda_inv_param.preserve_source = QUDA_PRESERVE_SOURCE_NO;
+ quda_inv_param.gamma_basis = QUDA_DEGRAND_ROSSI_GAMMA_BASIS;
#ifndef BUILD_QUDA_DEVIFACE_SPINOR
- quda_inv_param.dirac_order = QUDA_DIRAC_ORDER;
+ quda_inv_param.dirac_order = QUDA_DIRAC_ORDER;
#else
- QDPIO::cout << "MDAGM Using QDP-JIT spinor order" << std::endl;
- quda_inv_param.dirac_order = QUDA_QDPJIT_DIRAC_ORDER;
- quda_inv_param.input_location = QUDA_CUDA_FIELD_LOCATION;
- quda_inv_param.output_location = QUDA_CUDA_FIELD_LOCATION;
+ QDPIO::cout << "MDAGM Using QDP-JIT spinor order" << std::endl;
+ quda_inv_param.dirac_order = QUDA_QDPJIT_DIRAC_ORDER;
+ quda_inv_param.input_location = QUDA_CUDA_FIELD_LOCATION;
+ quda_inv_param.output_location = QUDA_CUDA_FIELD_LOCATION;
#endif
- // Clover precision and order
- quda_inv_param.clover_cpu_prec = cpu_prec;
- quda_inv_param.clover_cuda_prec = gpu_prec;
- quda_inv_param.clover_cuda_prec_sloppy = gpu_half_prec;
+ // Clover precision and order
+ quda_inv_param.clover_cpu_prec = cpu_prec;
+ quda_inv_param.clover_cuda_prec = gpu_prec;
+ quda_inv_param.clover_cuda_prec_sloppy = gpu_half_prec;
- // Default. may be overrridden by inner params
- quda_inv_param.clover_cuda_prec_precondition = gpu_half_prec;
+ // Default. may be overrridden by inner params
+ quda_inv_param.clover_cuda_prec_precondition = gpu_half_prec;
#ifndef BUILD_QUDA_DEVIFACE_CLOVER
#warning "NOT USING QUDA DEVICE IFACE"
- quda_inv_param.clover_order = QUDA_PACKED_CLOVER_ORDER;
+ quda_inv_param.clover_order = QUDA_PACKED_CLOVER_ORDER;
#else
#warning "USING QUDA DEVICE IFACE"
- QDPIO::cout << "MDAGM clover CUDA location\n";
- quda_inv_param.clover_location = QUDA_CUDA_FIELD_LOCATION;
- quda_inv_param.clover_order = QUDA_QDPJIT_CLOVER_ORDER;
+ QDPIO::cout << "MDAGM clover CUDA location\n";
+ quda_inv_param.clover_location = QUDA_CUDA_FIELD_LOCATION;
+ quda_inv_param.clover_order = QUDA_QDPJIT_CLOVER_ORDER;
#endif
- // Autotuning
- if( invParam.tuneDslashP ) {
- QDPIO::cout << "Enabling Dslash Autotuning" << std::endl;
-
- quda_inv_param.tune = QUDA_TUNE_YES;
- }
- else {
- QDPIO::cout << "Disabling Dslash Autotuning" << std::endl;
-
- quda_inv_param.tune = QUDA_TUNE_NO;
- }
-
-
- // Setup padding
- multi1d face_size(4);
- face_size[0] = latdims[1]*latdims[2]*latdims[3]/2;
- face_size[1] = latdims[0]*latdims[2]*latdims[3]/2;
- face_size[2] = latdims[0]*latdims[1]*latdims[3]/2;
- face_size[3] = latdims[0]*latdims[1]*latdims[2]/2;
-
- int max_face = face_size[0];
- for(int i=1; i <=3; i++) {
- if ( face_size[i] > max_face ) {
- max_face = face_size[i];
- }
- }
-
-
- q_gauge_param.ga_pad = max_face;
-
- if( invParam.innerParamsP ) {
- QDPIO::cout << "Setting inner solver params" << std::endl;
- // Dereference handle
- const GCRInnerSolverParams& ip = *(invParam.innerParams);
-
- // Set preconditioner precision
- switch( ip.precPrecondition ) {
- case HALF:
- quda_inv_param.cuda_prec_precondition = QUDA_HALF_PRECISION;
- quda_inv_param.clover_cuda_prec_precondition = QUDA_HALF_PRECISION;
- q_gauge_param.cuda_prec_precondition = QUDA_HALF_PRECISION;
- break;
-
- case SINGLE:
- quda_inv_param.cuda_prec_precondition = QUDA_SINGLE_PRECISION;
- quda_inv_param.clover_cuda_prec_precondition = QUDA_SINGLE_PRECISION;
- q_gauge_param.cuda_prec_precondition = QUDA_SINGLE_PRECISION;
- break;
-
- case DOUBLE:
- quda_inv_param.cuda_prec_precondition = QUDA_DOUBLE_PRECISION;
- quda_inv_param.clover_cuda_prec_precondition = QUDA_DOUBLE_PRECISION;
- q_gauge_param.cuda_prec_precondition = QUDA_DOUBLE_PRECISION;
- break;
- default:
- quda_inv_param.cuda_prec_precondition = QUDA_HALF_PRECISION;
- quda_inv_param.clover_cuda_prec_precondition = QUDA_HALF_PRECISION;
- q_gauge_param.cuda_prec_precondition = QUDA_HALF_PRECISION;
- break;
- }
-
- switch( ip.reconstructPrecondition ) {
- case RECONS_NONE:
- q_gauge_param.reconstruct_precondition = QUDA_RECONSTRUCT_NO;
- break;
- case RECONS_8:
- q_gauge_param.reconstruct_precondition = QUDA_RECONSTRUCT_8;
- break;
- case RECONS_12:
- q_gauge_param.reconstruct_precondition = QUDA_RECONSTRUCT_12;
- break;
- default:
- q_gauge_param.reconstruct_precondition = QUDA_RECONSTRUCT_12;
- break;
- };
-
- quda_inv_param.tol_precondition = toDouble(ip.tolPrecondition);
- quda_inv_param.maxiter_precondition = ip.maxIterPrecondition;
- quda_inv_param.gcrNkrylov = ip.gcrNkrylov;
- switch( ip.schwarzType ) {
- case ADDITIVE_SCHWARZ :
- quda_inv_param.schwarz_type = QUDA_ADDITIVE_SCHWARZ;
- break;
- case MULTIPLICATIVE_SCHWARZ :
- quda_inv_param.schwarz_type = QUDA_MULTIPLICATIVE_SCHWARZ;
- break;
- default:
- quda_inv_param.schwarz_type = QUDA_ADDITIVE_SCHWARZ;
- break;
- }
- quda_inv_param.precondition_cycle = ip.preconditionCycle;
-
- if( ip.verboseInner ) {
- quda_inv_param.verbosity_precondition = QUDA_VERBOSE;
- }
- else {
- quda_inv_param.verbosity_precondition = QUDA_SILENT;
- }
-
- switch( ip.invTypePrecondition ) {
- case CG:
- quda_inv_param.inv_type_precondition = QUDA_CG_INVERTER;
- break;
- case BICGSTAB:
- quda_inv_param.inv_type_precondition = QUDA_BICGSTAB_INVERTER;
-
- break;
-
- case CA_GCR:
- quda_inv_param.inv_type_precondition= QUDA_CA_GCR_INVERTER;
-
- case MR:
- quda_inv_param.inv_type_precondition= QUDA_MR_INVERTER;
- break;
-
- default:
- quda_inv_param.inv_type_precondition = QUDA_MR_INVERTER;
- break;
- }
- }
- else {
- QDPIO::cout << "Setting Precondition stuff to defaults for not using" << std::endl;
- quda_inv_param.inv_type_precondition= QUDA_INVALID_INVERTER;
- quda_inv_param.tol_precondition = 1.0e-1;
- quda_inv_param.maxiter_precondition = 1000;
- quda_inv_param.verbosity_precondition = QUDA_SILENT;
- q_gauge_param.reconstruct_precondition = QUDA_RECONSTRUCT_NO;
- quda_inv_param.gcrNkrylov = 1;
- }
-
-
- if( invParam.verboseP ) {
- quda_inv_param.verbosity = QUDA_VERBOSE;
- }
- else {
- quda_inv_param.verbosity = QUDA_SUMMARIZE;
- }
-
- // Set up the links
- void* gauge[4];
-
+ // Setup padding
+ multi1d face_size(4);
+ face_size[0] = latdims[1]*latdims[2]*latdims[3]/2;
+ face_size[1] = latdims[0]*latdims[2]*latdims[3]/2;
+ face_size[2] = latdims[0]*latdims[1]*latdims[3]/2;
+ face_size[3] = latdims[0]*latdims[1]*latdims[2]/2;
+
+ int max_face = face_size[0];
+ for(int i=1; i <=3; i++) {
+ if ( face_size[i] > max_face ) {
+ max_face = face_size[i];
+ }
+ }
+
+
+ q_gauge_param.ga_pad = max_face;
+
+ if( invParam.innerParamsP ) {
+ QDPIO::cout << "Setting inner solver params" << std::endl;
+ // Dereference handle
+ const GCRInnerSolverParams& ip = *(invParam.innerParams);
+
+ // Set preconditioner precision
+ switch( ip.precPrecondition ) {
+ case HALF:
+ quda_inv_param.cuda_prec_precondition = QUDA_HALF_PRECISION;
+ quda_inv_param.clover_cuda_prec_precondition = QUDA_HALF_PRECISION;
+ q_gauge_param.cuda_prec_precondition = QUDA_HALF_PRECISION;
+ break;
+
+ case SINGLE:
+ quda_inv_param.cuda_prec_precondition = QUDA_SINGLE_PRECISION;
+ quda_inv_param.clover_cuda_prec_precondition = QUDA_SINGLE_PRECISION;
+ q_gauge_param.cuda_prec_precondition = QUDA_SINGLE_PRECISION;
+ break;
+
+ case DOUBLE:
+ quda_inv_param.cuda_prec_precondition = QUDA_DOUBLE_PRECISION;
+ quda_inv_param.clover_cuda_prec_precondition = QUDA_DOUBLE_PRECISION;
+ q_gauge_param.cuda_prec_precondition = QUDA_DOUBLE_PRECISION;
+ break;
+ default:
+ quda_inv_param.cuda_prec_precondition = QUDA_HALF_PRECISION;
+ quda_inv_param.clover_cuda_prec_precondition = QUDA_HALF_PRECISION;
+ q_gauge_param.cuda_prec_precondition = QUDA_HALF_PRECISION;
+ break;
+ }
+
+ switch( ip.reconstructPrecondition ) {
+ case RECONS_NONE:
+ q_gauge_param.reconstruct_precondition = QUDA_RECONSTRUCT_NO;
+ break;
+ case RECONS_8:
+ q_gauge_param.reconstruct_precondition = QUDA_RECONSTRUCT_8;
+ break;
+ case RECONS_12:
+ q_gauge_param.reconstruct_precondition = QUDA_RECONSTRUCT_12;
+ break;
+ default:
+ q_gauge_param.reconstruct_precondition = QUDA_RECONSTRUCT_12;
+ break;
+ };
+
+ quda_inv_param.tol_precondition = toDouble(ip.tolPrecondition);
+ quda_inv_param.maxiter_precondition = ip.maxIterPrecondition;
+ quda_inv_param.gcrNkrylov = ip.gcrNkrylov;
+ switch( ip.schwarzType ) {
+ case ADDITIVE_SCHWARZ :
+ quda_inv_param.schwarz_type = QUDA_ADDITIVE_SCHWARZ;
+ break;
+ case MULTIPLICATIVE_SCHWARZ :
+ quda_inv_param.schwarz_type = QUDA_MULTIPLICATIVE_SCHWARZ;
+ break;
+ default:
+ quda_inv_param.schwarz_type = QUDA_ADDITIVE_SCHWARZ;
+ break;
+ }
+ quda_inv_param.precondition_cycle = ip.preconditionCycle;
+
+ if( ip.verboseInner ) {
+ quda_inv_param.verbosity_precondition = QUDA_VERBOSE;
+ }
+ else {
+ quda_inv_param.verbosity_precondition = QUDA_SILENT;
+ }
+
+ switch( ip.invTypePrecondition ) {
+ case CG:
+ quda_inv_param.inv_type_precondition = QUDA_CG_INVERTER;
+ break;
+ case BICGSTAB:
+ quda_inv_param.inv_type_precondition = QUDA_BICGSTAB_INVERTER;
+
+ break;
+
+ case CA_GCR:
+ quda_inv_param.inv_type_precondition= QUDA_CA_GCR_INVERTER;
+
+ case MR:
+ quda_inv_param.inv_type_precondition= QUDA_MR_INVERTER;
+ break;
+
+ default:
+ quda_inv_param.inv_type_precondition = QUDA_MR_INVERTER;
+ break;
+ }
+ }
+ else {
+ QDPIO::cout << "Setting Precondition stuff to defaults for not using" << std::endl;
+ quda_inv_param.inv_type_precondition= QUDA_INVALID_INVERTER;
+ quda_inv_param.tol_precondition = 1.0e-1;
+ quda_inv_param.maxiter_precondition = 1000;
+ quda_inv_param.verbosity_precondition = QUDA_SILENT;
+ q_gauge_param.reconstruct_precondition = QUDA_RECONSTRUCT_NO;
+ quda_inv_param.gcrNkrylov = 1;
+ }
+
+
+ if( invParam.verboseP ) {
+ quda_inv_param.verbosity = QUDA_VERBOSE;
+ }
+ else {
+ quda_inv_param.verbosity = QUDA_SUMMARIZE;
+ }
+
+ void* gauge[4]= { nullptr, nullptr, nullptr, nullptr };
+
#ifndef BUILD_QUDA_DEVIFACE_GAUGE
- for(int mu=0; mu < Nd; mu++) {
- gauge[mu] = (void *)&(links_single[mu].elem(all.start()).elem().elem(0,0).real());
- }
+ for(int mu=0; mu < Nd; mu++) {
+ gauge[mu] = (void *)&(links_single[mu].elem(all.start()).elem().elem(0,0).real());
+ }
#else
- GetMemoryPtrGauge(gauge,links_single);
- // gauge[mu] = GetMemoryPtr( links_single[mu].getId() );
- // QDPIO::cout << "MDAGM CUDA gauge[" << mu << "] in = " << gauge[mu] << "\n";
+ GetMemoryPtrGauge(gauge,links_single);
+ // gauge[mu] = GetMemoryPtr( links_single[mu].getId() );
+ // QDPIO::cout << "MDAGM CUDA gauge[" << mu << "] in = " << gauge[mu] << "\n";
#endif
- loadGaugeQuda((void *)gauge, &q_gauge_param);
+ loadGaugeQuda((void *)gauge, &q_gauge_param);
- // Setup the clover term...
- QDPIO::cout << "Creating CloverTerm" << std::endl;
- clov->create(fstate, invParam_.CloverParams);
- // Don't recompute, just copy
- invclov->create(fstate, invParam_.CloverParams);
+ // Setup the clover term...
+ CloverTermT clov;
+ CloverTermT invclov;
- QDPIO::cout << "Inverting CloverTerm" << std::endl;
- invclov->choles(0);
- invclov->choles(1);
+ QDPIO::cout << "Creating CloverTerm" << std::endl;
+ clov.create(fstate, invParam_.CloverParams);
+ // Don't recompute, just copy
+ invclov.create(fstate, invParam_.CloverParams);
+ QDPIO::cout << "Inverting CloverTerm" << std::endl;
+ invclov.choles(0);
+ invclov.choles(1);
-#ifndef BUILD_QUDA_DEVIFACE_CLOVER
- multi1d > packed_clov;
-
-
- packed_clov.resize(all.siteTable().size());
- clov->packForQUDA(packed_clov, 0);
- clov->packForQUDA(packed_clov, 1);
-
+#ifndef BUILD_QUDA_DEVIFACE_CLOVER
+ multi1d > packed_clov(all.siteTable().size());
+ multi1d > packed_invclov(all.siteTable().size());;
+
+ clov.packForQUDA(packed_clov, 0);
+ clov.packForQUDA(packed_clov, 1);
- // Always need inverse
- multi1d > packed_invclov(all.siteTable().size());
- invclov->packForQUDA(packed_invclov, 0);
- invclov->packForQUDA(packed_invclov, 1);
+ invclov.packForQUDA(packed_invclov, 0);
+ invclov.packForQUDA(packed_invclov, 1);
-
- loadCloverQuda(&(packed_clov[0]), &(packed_invclov[0]),&quda_inv_param);
-
+ loadCloverQuda(&(packed_clov[0]), &(packed_invclov[0]),&quda_inv_param);
#else
- void *clover[2];
- void *cloverInv[2];
-
- GetMemoryPtrClover(clov->getOffId(),clov->getDiaId(),invclov->getOffId(),invclov->getDiaId());
-
- loadCloverQuda( (void*)(clover) , (void*)(cloverInv) ,&quda_inv_param);
-
-#endif
-
+ void *clover[2];
+ void *cloverInv[2];
+ // This is a yucky macro and needs the existence of 'clover' and 'cloverInv' to work
+ GetMemoryPtrClover(clov.getOffId(),clov.getDiaId(),invclov.getOffId(),invclov.getDiaId());
- }
-
-
- //! Destructor is automatic
- ~LinOpSysSolverQUDAClover()
- {
- QDPIO::cout << "Destructing" << std::endl;
- freeGaugeQuda();
- freeCloverQuda();
- }
-
- //! Return the subset on which the operator acts
- const Subset& subset() const {return A->subset();}
-
- //! Solver the linear system
- /*!
- * \param psi solution ( Modify )
- * \param chi source ( Read )
- * \return syssolver results
- */
- SystemSolverResults_t operator() (T& psi, const T& chi) const
- {
- SystemSolverResults_t res;
-
- START_CODE();
- StopWatch swatch;
- swatch.start();
-
- // T MdagChi;
-
- // This is a CGNE. So create new RHS
- // (*A)(MdagChi, chi, MINUS);
- // Handle< LinearOperator > MM(new MdagMLinOp(A));
- if ( invParam.axialGaugeP ) {
- T g_chi,g_psi;
-
- // Gauge Fix source and initial guess
- QDPIO::cout << "Gauge Fixing source and initial guess" << std::endl;
- g_chi[ rb[1] ] = GFixMat * chi;
- g_psi[ rb[1] ] = GFixMat * psi;
- QDPIO::cout << "Solving" << std::endl;
- res = qudaInvert(*clov,
- *invclov,
- g_chi,
- g_psi);
- QDPIO::cout << "Untransforming solution." << std::endl;
- psi[ rb[1]] = adj(GFixMat)*g_psi;
-
- }
- else {
- res = qudaInvert(*clov,
- *invclov,
- chi,
- psi);
- }
-
- swatch.stop();
-
-
- {
- T r;
- r[A->subset()]=chi;
- T tmp;
- (*A)(tmp, psi, PLUS);
- r[A->subset()] -= tmp;
- res.resid = sqrt(norm2(r, A->subset()));
- }
-
- Double rel_resid = res.resid/sqrt(norm2(chi,A->subset()));
-
- QDPIO::cout << "QUDA_"<< solver_string <<"_CLOVER_SOLVER: " << res.n_count << " iterations. Rsd = " << res.resid << " Relative Rsd = " << rel_resid << std::endl;
-
- // Convergence Check/Blow Up
- if ( ! invParam.SilentFailP ) {
- if ( toBool( rel_resid > invParam.RsdToleranceFactor*invParam.RsdTarget) ) {
- QDPIO::cerr << "ERROR: QUDA Solver residuum is outside tolerance: QUDA resid="<< rel_resid << " Desired =" << invParam.RsdTarget << " Max Tolerated = " << invParam.RsdToleranceFactor*invParam.RsdTarget << std::endl;
- QDP_abort(1);
- }
- }
-
- END_CODE();
- return res;
- }
-
-
-private:
- // Hide default constructor
- LinOpSysSolverQUDAClover() {}
-
-#if 1
- Q links_orig;
+ loadCloverQuda( (void*)(clover) , (void*)(cloverInv) ,&quda_inv_param);
#endif
- U GFixMat;
- QudaPrecision_s cpu_prec;
- QudaPrecision_s gpu_prec;
- QudaPrecision_s gpu_half_prec;
-
- Handle< LinearOperator > A;
- const SysSolverQUDACloverParams invParam;
- QudaGaugeParam q_gauge_param;
- QudaInvertParam quda_inv_param;
-
- Handle< CloverTermT > clov;
- Handle< CloverTermT > invclov;
-
- SystemSolverResults_t qudaInvert(const CloverTermT& clover,
- const CloverTermT& inv_clov,
- const T& chi_s,
- T& psi_s
- )const ;
-
- std::string solver_string;
-};
+ }
+
+
+ //! Destructor is automatic
+ ~LinOpSysSolverQUDAClover()
+ {
+ QDPIO::cout << "Destructing" << std::endl;
+ freeGaugeQuda();
+ freeCloverQuda();
+ }
+
+ //! Return the subset on which the operator acts
+ const Subset& subset() const {return A->subset();}
+
+ //! Solver the linear system
+ /*!
+ * \param psi solution ( Modify )
+ * \param chi source ( Read )
+ * \return syssolver results
+ */
+ SystemSolverResults_t operator() (T& psi, const T& chi) const
+ {
+ SystemSolverResults_t res;
+
+ START_CODE();
+ StopWatch swatch;
+ swatch.start();
+
+ // T MdagChi;
+
+ // This is a CGNE. So create new RHS
+ // (*A)(MdagChi, chi, MINUS);
+ // Handle< LinearOperator > MM(new MdagMLinOp(A));
+ if ( invParam.axialGaugeP ) {
+ T g_chi,g_psi;
+
+ // Gauge Fix source and initial guess
+ QDPIO::cout << "Gauge Fixing source and initial guess" << std::endl;
+ g_chi[ A->subset() ] = GFixMat * chi;
+ g_psi[ A->subset() ] = GFixMat * psi;
+ QDPIO::cout << "Solving" << std::endl;
+ res = qudaInvert( g_chi, g_psi);
+ QDPIO::cout << "Untransforming solution." << std::endl;
+ psi[ A->subset() ] = adj(GFixMat)*g_psi;
+
+ }
+ else {
+ res = qudaInvert( chi, psi);
+ }
+
+ swatch.stop();
+
+ // If required, check the solutions
+ if( invParam.SolutionCheckP ) {
+ T r;
+ r[A->subset()]=chi;
+ T tmp;
+ (*A)(tmp, psi, PLUS);
+ r[A->subset()] -= tmp;
+ res.resid = sqrt(norm2(r, A->subset())) / sqrt(norm2(chi,A->subset()));
+ }
+ else {
+ QDPIO::cout << "Chroma <-> QUDA Solution Check disabled. Using (trusting) QUDA Residuum\n";
+ }
+
+ QDPIO::cout << "QUDA_"<< solver_string <<"_CLOVER_SOLVER: " << res.n_count << " iterations. Relative Rsd = " << res.resid << std::endl;
+
+ // Convergence Check/Blow Up
+ if ( ! invParam.SilentFailP ) {
+ if ( toBool( res.resid > invParam.RsdToleranceFactor*invParam.RsdTarget) ) {
+ QDPIO::cerr << "ERROR: QUDA Solver residuum is outside tolerance: QUDA resid="<< res.resid << " Desired =" << invParam.RsdTarget << " Max Tolerated = " << invParam.RsdToleranceFactor*invParam.RsdTarget << std::endl;
+ QDP_abort(1);
+ }
+ }
+
+ END_CODE();
+ return res;
+ }
+
+
+
+ std::vector operator() (const std::vector>& psi, const std::vector>& chi) const override
+ {
+
+ START_CODE();
+ QDPIO::cout << "Entering MRHS solution: N_src = " << chi.size() << "\n";
+
+ std::vector res(chi.size());
+ if( psi.size() != chi.size() ) {
+ QDPIO::cout << "Number of sources does not match number of solutions\n";
+ QDPIO::cout << "psi.size() = " << psi.size() << " but chi.size() = " << chi.size() << "\n";
+ QDP_abort(1);
+ }
+
+ StopWatch swatch;
+ swatch.start();
+
+ if ( invParam.axialGaugeP ) {
+ QDPIO::cerr << "Multi RHS solve in axial gauge not yet implemented\n";
+ QDP_abort(1);
+ }
+
+ qudaInvertMultiSrc( psi, chi, res);
+
+ swatch.stop();
+
+ // Check solutions
+ if( invParam.SolutionCheckP ) {
+ T r;
+ T tmp;
+ for(int soln =0; soln < psi.size(); soln++) {
+ r[A->subset()]=*(chi[ soln ]);
+ (*A)(tmp, *(psi[soln]), PLUS);
+ r[A->subset()] -= tmp;
+ res[soln].resid = sqrt(norm2(r, A->subset())) / sqrt(norm2(*(chi[soln]),A->subset()));
+ }
+ }
+ else {
+ QDPIO::cout << "Chroma <-> QUDA Solution Check disabled. Using (trusting) QUDA Residua\n";
+ }
+
+
+ for(int soln=0; soln < psi.size(); soln++) {
+ QDPIO::cout << "QUDA_"<< solver_string <<"_CLOVER_SOLVER: solution " << soln <<
+ " : " << res[soln].n_count << " iterations. Relative Rsd = " << res[soln].resid << std::endl;
+
+ // Convergence Check/Blow Up
+ if ( ! invParam.SilentFailP ) {
+ if ( toBool( res[soln].resid > invParam.RsdToleranceFactor*invParam.RsdTarget) ) {
+ QDPIO::cerr << "ERROR: QUDA Solver residuum for solution " << soln
+ << " is outside tolerance: QUDA resid="<< res[soln].resid << " Desired ="
+ << invParam.RsdTarget << " Max Tolerated = "
+ << invParam.RsdToleranceFactor*invParam.RsdTarget << std::endl;
+ QDP_abort(1);
+ }
+ }
+ }
+
+ END_CODE();
+ return res;
+ }
+
+ private:
+ // Hide default constructor
+ LinOpSysSolverQUDAClover() {}
+
+ U GFixMat;
+ QudaPrecision_s cpu_prec;
+ QudaPrecision_s gpu_prec;
+ QudaPrecision_s gpu_half_prec;
+
+ Handle< LinearOperator > A;
+ const SysSolverQUDACloverParams invParam;
+ QudaGaugeParam q_gauge_param;
+ QudaInvertParam quda_inv_param;
+
+ SystemSolverResults_t qudaInvert( const T& chi_s, T& psi_s) const ;
+
+ void qudaInvertMultiSrc( const std::vector>& psi,
+ const std::vector>& chi,
+ std::vector& res) const;
+
+ std::string solver_string;
+ };
} // End namespace
-
#endif // BUILD_QUDA
#endif
diff --git a/lib/actions/ferm/invert/quda_solvers/syssolver_linop_exp_clover_quda_multigrid_w.cc b/lib/actions/ferm/invert/quda_solvers/syssolver_linop_exp_clover_quda_multigrid_w.cc
new file mode 100644
index 0000000000..5f75968bf3
--- /dev/null
+++ b/lib/actions/ferm/invert/quda_solvers/syssolver_linop_exp_clover_quda_multigrid_w.cc
@@ -0,0 +1,253 @@
+/*! \file
+ * \QUDA MULTIGRID ExpClover solver.
+ */
+// comment
+#include "actions/ferm/invert/syssolver_linop_factory.h"
+#include "actions/ferm/invert/syssolver_linop_aggregate.h"
+#include "actions/ferm/invert/quda_solvers/syssolver_quda_multigrid_clover_params.h"
+#include "actions/ferm/invert/quda_solvers/syssolver_linop_exp_clover_quda_multigrid_w.h"
+#include "io/aniso_io.h"
+
+
+#include "handle.h"
+#include "actions/ferm/fermstates/periodic_fermstate.h"
+#include "actions/ferm/linop/lwldslash_w.h"
+#include "meas/glue/mesplq.h"
+// QUDA Headers
+#include
+// #include
+#include "actions/ferm/invert/quda_solvers/quda_mg_utils.h"
+
+namespace Chroma
+{
+ namespace LinOpSysSolverQUDAMULTIGRIDExpCloverEnv
+ {
+
+ //! Anonymous namespace
+ namespace
+ {
+ //! Name to be used
+ const std::string name("QUDA_MULTIGRID_EXP_CLOVER_INVERTER");
+
+ //! Local registration flag
+ bool registered = false;
+ }
+
+
+
+ LinOpSystemSolver* createFerm(XMLReader& xml_in,
+ const std::string& path,
+ Handle< FermState< LatticeFermion, multi1d, multi1d > > state,
+
+ Handle< LinearOperator > A)
+ {
+ return new LinOpSysSolverQUDAMULTIGRIDExpClover(A, state,SysSolverQUDAMULTIGRIDCloverParams(xml_in, path));
+ }
+
+ //! Register all the factories
+ bool registerAll()
+ {
+ bool success = true;
+ if (! registered)
+ {
+ success &= Chroma::TheLinOpFermSystemSolverFactory::Instance().registerObject(name, createFerm);
+ registered = true;
+ }
+ return success;
+ }
+ }
+
+ SystemSolverResults_t
+ LinOpSysSolverQUDAMULTIGRIDExpClover::qudaInvert(const ExpCloverTermT& clover,
+ const ExpCloverTermT& invclov,
+ const T& chi_s,
+ T& psi_s) const{
+
+ SystemSolverResults_t ret;
+ const auto& sub = A->subset();
+ T mod_chi;
+
+ // Copy source into mod_chi, and zero the off-parity
+ if( is_precond ) {
+ mod_chi[rb[0]] = zero;
+ if( invParam.asymmetricP ) {
+ //
+ // symmetric
+ // Solve with M_symm = 1 - A^{-1}_oo D A^{-1}ee D
+ //
+ // Chroma M = A_oo ( M_symm )
+ //
+ // So M x = b => A_oo (M_symm) x = b
+ // => M_symm x = A^{-1}_oo b = chi_mod
+ invclov.apply(mod_chi, chi_s, PLUS, 1);
+ }
+ else {
+ mod_chi[rb[1]] = chi_s;
+ }
+ }
+ else {
+ mod_chi = chi_s;
+ }
+#ifndef BUILD_QUDA_DEVIFACE_SPINOR
+ void* spinorIn =(void *)&(mod_chi.elem(sub.start()).elem(0).elem(0).real());
+ void* spinorOut =(void *)&(psi_s.elem(sub.start()).elem(0).elem(0).real());
+#else
+ // void* spinorIn = GetMemoryPtr( mod_chi.getId() );
+ // void* spinorOut = GetMemoryPtr( psi_s.getId() );
+ void* spinorIn;
+ void* spinorOut;
+ GetMemoryPtr2(spinorIn,spinorOut,mod_chi.getId(),psi_s.getId())
+
+#endif
+
+ // Do the solve here
+ StopWatch swatch1;
+ swatch1.reset();
+ swatch1.start();
+ invertQuda(spinorOut, spinorIn, (QudaInvertParam*)&quda_inv_param);
+ swatch1.stop();
+
+
+ QDPIO::cout << solver_string<< "time="<< quda_inv_param.secs <<" s" ;
+ QDPIO::cout << "\tPerformance="<< quda_inv_param.gflops/quda_inv_param.secs<<" GFLOPS" ;
+ QDPIO::cout << "\tTotal Time (incl. load gauge)=" << swatch1.getTimeInSeconds() <<" s"<& invclov,
+ const std::vector>& psi_s,
+ const std::vector>& chi_s,
+ std::vector& res) const {
+
+
+ StopWatch source_prep;
+ source_prep.reset();
+ source_prep.start();
+
+ multi1d mod_chi(chi_s.size());
+ const auto& sub = A->subset();
+
+ for(int i=0; i < chi_s.size(); i++) {
+
+ if( is_precond ) {
+ // Copy source into mod_chi, and zero the off-parity
+ mod_chi[i][rb[0]] = zero;
+
+ if( invParam.asymmetricP ) {
+ //
+ // symmetric
+ // Solve with M_symm = 1 - A^{-1}_oo D A^{-1}ee D
+ //
+ // Chroma M = A_oo ( M_symm )
+ //
+ // So M x = b => A_oo (M_symm) x = b
+ // => M_symm x = A^{-1}_oo b = chi_mod
+ invclov.apply(mod_chi[i], *(chi_s[i]), PLUS, 1);
+ }
+ else {
+ mod_chi[i][rb[1]] = *(chi_s[i]);
+ }
+ }
+ else {
+ mod_chi[i] = *(chi_s[i]);
+ }
+ }
+
+ std::vector spinorIn(chi_s.size());
+ std::vector spinorOut(psi_s.size());
+
+ int N_src = chi_s.size();
+#ifndef BUILD_QUDA_DEVIFACE_SPINOR
+ // Regular non-qdpjit approach. Just collect the pointers
+ for(int soln=0; soln < chi_s.size(); soln++) {
+ spinorIn[soln] = (void *)&(mod_chi[soln].elem(sub.start()).elem(0).elem(0).real());
+ spinorOut[soln] = (void *)&(psi_s[soln]->elem(sub.start()).elem(0).elem(0).real());
+ }
+#else
+ std::vector ids(2*N_src);
+
+ for(int soln=0; soln < N_src; soln++) {
+ ids[soln] = mod_chi[soln].getId();
+ ids[N_src+soln] = psi_s[soln]->getId();
+ }
+
+ // Grab all the keys
+ auto dev_ptr = QDP_get_global_cache().get_dev_ptrs( multi1d( ids.data(), ids.size()) );
+
+
+ for(int soln=0; soln < N_src; soln++) {
+ spinorIn[soln] = dev_ptr(soln);
+ spinorOut[soln] = dev_ptr(N_src+soln);
+ }
+ source_prep.stop();
+#endif
+
+ // Local quda_inv_param (?)
+ // Relies on quda_inv_param being just a dumb/struct and or copyable
+ QudaInvertParam local_quda_inv_param = quda_inv_param ;
+ local_quda_inv_param.num_src = mod_chi.size();
+
+ // No grid splitting for MG yet, so commenting that out
+#if 0
+ int totalSubgrids=1;
+ const multi1d& machine_size=QDP::Layout::logicalSize();
+
+ for (int i = 0; i < Nd; i++) {
+ local_quda_inv_param.split_grid[i] = invParam.GridSplitDims[i];
+ totalSubgrids *= invParam.GridSplitDims[i];
+ if ( machine_size[i] % invParam.GridSplitDims[i] != 0 ) {
+ QDPIO::cerr << "The split-grid-subgrid dimensions must divide the number ranks in each dimension exactly\n";
+ QDPIO::cerr << "Currently this is not the case: dim=" << i << " machine_size["<
+
+#include "handle.h"
+#include "state.h"
+#include "syssolver.h"
+#include "linearop.h"
+#include "actions/ferm/fermbcs/simple_fermbc.h"
+#include "actions/ferm/fermstates/periodic_fermstate.h"
+#include "actions/ferm/invert/quda_solvers/syssolver_quda_multigrid_clover_params.h"
+#include "actions/ferm/linop/exp_clover_term_w.h"
+#include "meas/gfix/temporal_gauge.h"
+#include "io/aniso_io.h"
+#include "quda_mg_utils.h"
+#include
+#include
+#include "util/gauge/reunit.h"
+#ifdef QDP_IS_QDPJIT
+#include "actions/ferm/invert/quda_solvers/qdpjit_memory_wrapper.h"
+#endif
+
+//#include
+
+namespace Chroma
+{
+
+ //! Richardson system solver namespace
+ namespace LinOpSysSolverQUDAMULTIGRIDExpCloverEnv
+ {
+ //! Register the syssolver
+ bool registerAll();
+ }
+
+ //! Solve a ExpClover Fermion System using the QUDA inverter
+ /*! \ingroup invert
+ *** WARNING THIS SOLVER WORKS FOR ExpClover FERMIONS ONLY ***
+ */
+
+ class LinOpSysSolverQUDAMULTIGRIDExpClover : public LinOpSystemSolver
+ {
+ public:
+ typedef LatticeFermion T;
+ typedef LatticeColorMatrix U;
+ typedef multi1d Q;
+
+ typedef LatticeFermionF TF;
+ typedef LatticeColorMatrixF UF;
+ typedef multi1d QF;
+
+ typedef LatticeFermionF TD;
+ typedef LatticeColorMatrixF UD;
+ typedef multi1d QD;
+
+ typedef WordType::Type_t REALT;
+ //! Constructor
+ /*!
+ * \param M_ Linear operator ( Read )
+ * \param invParam inverter parameters ( Read )
+ */
+ LinOpSysSolverQUDAMULTIGRIDExpClover(Handle< LinearOperator > A_,
+ Handle< FermState > state_,
+ const SysSolverQUDAMULTIGRIDCloverParams& invParam_) :
+ A(A_), is_precond( true ), invParam(invParam_), clov(new ExpCloverTermT() ), invclov(new ExpCloverTermT())
+ {
+ StopWatch init_swatch;
+ init_swatch.reset(); init_swatch.start();
+ // Set the solver string
+ {
+ std::ostringstream solver_string_stream;
+ solver_string_stream << "QUDA_MULTIGRID_EXP_CLOVER_LINOP_SOLVER( "
+ << invParam.SaveSubspaceID << " ): ";
+ solver_string = solver_string_stream.str();
+
+ }
+ QDPIO::cout << solver_string << "Initializing" << std::endl;
+
+ // FOLLOWING INITIALIZATION in test QUDA program
+ const auto& sub = A->subset();
+ if( sub.start() == all.start() && sub.numSiteTable() == all.numSiteTable()) {
+ is_precond = false;
+ }
+
+ // 1) work out cpu_prec, cuda_prec, cuda_prec_sloppy
+ int s = sizeof( WordType::Type_t );
+ if (s == 4) {
+ cpu_prec = QUDA_SINGLE_PRECISION;
+ }
+ else {
+ cpu_prec = QUDA_DOUBLE_PRECISION;
+ }
+
+ // Work out GPU precision
+ switch( invParam.cudaPrecision ) {
+ case HALF:
+ gpu_prec = QUDA_HALF_PRECISION;
+ break;
+ case SINGLE:
+ gpu_prec = QUDA_SINGLE_PRECISION;
+ break;
+ case DOUBLE:
+ gpu_prec = QUDA_DOUBLE_PRECISION;
+ break;
+ default:
+ gpu_prec = cpu_prec;
+ break;
+ }
+
+ // Work out GPU Sloppy precision
+ // Default: No Sloppy
+ switch( invParam.cudaSloppyPrecision ) {
+ case HALF:
+ gpu_half_prec = QUDA_HALF_PRECISION;
+ break;
+ case SINGLE:
+ gpu_half_prec = QUDA_SINGLE_PRECISION;
+ break;
+ case DOUBLE:
+ gpu_half_prec = QUDA_DOUBLE_PRECISION;
+ break;
+ default:
+ gpu_half_prec = gpu_prec;
+ break;
+ }
+
+ // 2) pull 'new; GAUGE and Invert params
+ q_gauge_param = newQudaGaugeParam();
+ quda_inv_param = newQudaInvertParam();
+
+ // 3) set lattice size
+ const multi1d& latdims = Layout::subgridLattSize();
+
+ q_gauge_param.X[0] = latdims[0];
+ q_gauge_param.X[1] = latdims[1];
+ q_gauge_param.X[2] = latdims[2];
+ q_gauge_param.X[3] = latdims[3];
+
+ // 4) - deferred (anisotropy)
+
+ // 5) - set QUDA_WILSON_LINKS, QUDA_GAUGE_ORDER
+ q_gauge_param.type = QUDA_WILSON_LINKS;
+#ifndef BUILD_QUDA_DEVIFACE_GAUGE
+ q_gauge_param.gauge_order = QUDA_QDP_GAUGE_ORDER; // gauge[mu], p
+#else
+ q_gauge_param.location = QUDA_CUDA_FIELD_LOCATION;
+ q_gauge_param.gauge_order = QUDA_QDPJIT_GAUGE_ORDER;
+#endif
+
+ // 6) - set t_boundary
+ // Convention: BC has to be applied already
+ // This flag just tells QUDA that this is so,
+ // so that QUDA can take care in the reconstruct
+ if( invParam.AntiPeriodicT ) {
+ q_gauge_param.t_boundary = QUDA_ANTI_PERIODIC_T;
+ }
+ else {
+ q_gauge_param.t_boundary = QUDA_PERIODIC_T;
+ }
+
+ // Set cpu_prec, cuda_prec, reconstruct and sloppy versions
+ q_gauge_param.cpu_prec = cpu_prec;
+ q_gauge_param.cuda_prec = gpu_prec;
+
+ switch( invParam.cudaReconstruct ) {
+ case RECONS_NONE:
+ q_gauge_param.reconstruct = QUDA_RECONSTRUCT_NO;
+ break;
+ case RECONS_8:
+ q_gauge_param.reconstruct = QUDA_RECONSTRUCT_8;
+ break;
+ case RECONS_12:
+ q_gauge_param.reconstruct = QUDA_RECONSTRUCT_12;
+ break;
+ default:
+ q_gauge_param.reconstruct = QUDA_RECONSTRUCT_12;
+ break;
+ };
+
+ q_gauge_param.cuda_prec_sloppy = gpu_half_prec;
+
+ // Default. This may be overwritten by inner params
+ q_gauge_param.cuda_prec_precondition = gpu_half_prec;
+
+ switch( invParam.cudaSloppyReconstruct ) {
+ case RECONS_NONE:
+ q_gauge_param.reconstruct_sloppy = QUDA_RECONSTRUCT_NO;
+ break;
+ case RECONS_8:
+ q_gauge_param.reconstruct_sloppy = QUDA_RECONSTRUCT_8;
+ break;
+ case RECONS_12:
+ q_gauge_param.reconstruct_sloppy = QUDA_RECONSTRUCT_12;
+ break;
+ default:
+ q_gauge_param.reconstruct_sloppy = QUDA_RECONSTRUCT_12;
+ break;
+ };
+ q_gauge_param.reconstruct_precondition = q_gauge_param.reconstruct_sloppy;
+ // Gauge fixing:
+
+ // These are the links
+ // They may be smeared and the BC's may be applied
+ Q links_single(Nd);
+
+ // Now downcast to single prec fields.
+ for(int mu=0; mu < Nd; mu++) {
+ links_single[mu] = (state_->getLinks())[mu];
+ }
+
+ // GaugeFix
+ if( invParam.axialGaugeP ) {
+ temporalGauge(links_single, GFixMat, Nd-1);
+ for(int mu=0; mu < Nd; mu++) {
+ links_single[mu] = GFixMat*(state_->getLinks())[mu]*adj(shift(GFixMat, FORWARD, mu));
+ }
+ q_gauge_param.gauge_fix = QUDA_GAUGE_FIXED_YES;
+ }
+ else {
+ // No GaugeFix
+ q_gauge_param.gauge_fix = QUDA_GAUGE_FIXED_NO;// No Gfix yet
+ }
+
+ // deferred 4) Gauge Anisotropy
+ const AnisoParam_t& aniso = invParam.CloverParams.anisoParam;
+ if( aniso.anisoP ) { // Anisotropic case
+ Real gamma_f = aniso.xi_0 / aniso.nu;
+ q_gauge_param.anisotropy = toDouble(gamma_f);
+ }
+ else {
+ q_gauge_param.anisotropy = 1.0;
+ }
+
+ // MAKE FSTATE BEFORE RESCALING links_single
+ // Because the clover term expects the unrescaled links...
+ Handle > fstate( new PeriodicFermState(links_single));
+
+ if( aniso.anisoP ) { // Anisotropic case
+ multi1d cf=makeFermCoeffs(aniso);
+ for(int mu=0; mu < Nd; mu++) {
+ links_single[mu] *= cf[mu];
+ }
+ }
+
+ // Now onto the inv param:
+ // Dslash type
+ quda_inv_param.dslash_type = QUDA_CLOVER_WILSON_DSLASH;
+
+ // Hardwire to GCR
+ quda_inv_param.inv_type = QUDA_GCR_INVERTER;
+
+
+ quda_inv_param.kappa = 0.5;
+ quda_inv_param.clover_coeff = 1.0; // Dummy, not used
+ quda_inv_param.Ls = 1;
+
+ quda_inv_param.tol = toDouble(invParam.RsdTarget);
+ quda_inv_param.maxiter = invParam.MaxIter;
+ quda_inv_param.reliable_delta = toDouble(invParam.Delta);
+ quda_inv_param.pipeline = invParam.Pipeline;
+
+ // Solution type
+ //quda_inv_param.solution_type = QUDA_MATPC_SOLUTION;
+ //Taken from invert test.
+ quda_inv_param.solution_type = is_precond ? QUDA_MATPC_SOLUTION : QUDA_MAT_SOLUTION;
+ quda_inv_param.solve_type = QUDA_DIRECT_PC_SOLVE;
+ quda_inv_param.matpc_type = QUDA_MATPC_ODD_ODD; // Always
+
+ quda_inv_param.dagger = QUDA_DAG_NO;
+ quda_inv_param.mass_normalization = QUDA_KAPPA_NORMALIZATION;
+
+ quda_inv_param.cpu_prec = cpu_prec;
+ quda_inv_param.cuda_prec = gpu_prec;
+ quda_inv_param.cuda_prec_sloppy = gpu_half_prec;
+ quda_inv_param.cuda_prec_precondition = gpu_half_prec;
+
+ //
+ //Done...
+ quda_inv_param.preserve_source = QUDA_PRESERVE_SOURCE_NO;
+ quda_inv_param.gamma_basis = QUDA_DEGRAND_ROSSI_GAMMA_BASIS;
+
+#ifndef BUILD_QUDA_DEVIFACE_SPINOR
+ quda_inv_param.dirac_order = QUDA_DIRAC_ORDER;
+ quda_inv_param.input_location = QUDA_CPU_FIELD_LOCATION;
+ quda_inv_param.output_location = QUDA_CPU_FIELD_LOCATION;
+
+#else
+ quda_inv_param.dirac_order = QUDA_QDPJIT_DIRAC_ORDER;
+ quda_inv_param.input_location = QUDA_CUDA_FIELD_LOCATION;
+ quda_inv_param.output_location = QUDA_CUDA_FIELD_LOCATION;
+#endif
+
+
+ // Setup padding
+
+ multi1d face_size(4);
+ face_size[0] = latdims[1]*latdims[2]*latdims[3]/2;
+ face_size[1] = latdims[0]*latdims[2]*latdims[3]/2;
+ face_size[2] = latdims[0]*latdims[1]*latdims[3]/2;
+ face_size[3] = latdims[0]*latdims[1]*latdims[2]/2;
+
+ int max_face = face_size[0];
+ for(int i=1; i <=3; i++) {
+ if ( face_size[i] > max_face ) {
+ max_face = face_size[i];
+ }
+ }
+
+ q_gauge_param.ga_pad = max_face;
+
+ // ExpClover precision and order
+ quda_inv_param.clover_cpu_prec = cpu_prec;
+ quda_inv_param.clover_cuda_prec = gpu_prec;
+ quda_inv_param.clover_cuda_prec_sloppy = gpu_half_prec;
+ quda_inv_param.clover_cuda_prec_precondition = gpu_half_prec;
+ if( invParam.MULTIGRIDParamsP ) {
+ const MULTIGRIDSolverParams& ip = *(invParam.MULTIGRIDParams);
+
+ // Set preconditioner precision
+ switch( ip.prec ) {
+ case HALF:
+ quda_inv_param.cuda_prec_precondition = QUDA_HALF_PRECISION;
+ quda_inv_param.clover_cuda_prec_precondition = QUDA_HALF_PRECISION;
+ q_gauge_param.cuda_prec_precondition = QUDA_HALF_PRECISION;
+ break;
+
+ case SINGLE:
+ quda_inv_param.cuda_prec_precondition = QUDA_SINGLE_PRECISION;
+ quda_inv_param.clover_cuda_prec_precondition = QUDA_SINGLE_PRECISION;
+ q_gauge_param.cuda_prec_precondition = QUDA_SINGLE_PRECISION;
+ break;
+
+ case DOUBLE:
+ quda_inv_param.cuda_prec_precondition = QUDA_DOUBLE_PRECISION;
+ quda_inv_param.clover_cuda_prec_precondition = QUDA_DOUBLE_PRECISION;
+ q_gauge_param.cuda_prec_precondition = QUDA_DOUBLE_PRECISION;
+ break;
+ default:
+ quda_inv_param.cuda_prec_precondition = QUDA_HALF_PRECISION;
+ quda_inv_param.clover_cuda_prec_precondition = QUDA_HALF_PRECISION;
+ q_gauge_param.cuda_prec_precondition = QUDA_HALF_PRECISION;
+ break;
+ }
+
+ switch( ip.reconstruct ) {
+ case RECONS_NONE:
+ q_gauge_param.reconstruct_precondition = QUDA_RECONSTRUCT_NO;
+ break;
+ case RECONS_8:
+ q_gauge_param.reconstruct_precondition = QUDA_RECONSTRUCT_8;
+ break;
+ case RECONS_12:
+ q_gauge_param.reconstruct_precondition = QUDA_RECONSTRUCT_12;
+ break;
+ default:
+ q_gauge_param.reconstruct_precondition = QUDA_RECONSTRUCT_12;
+ break;
+ };
+ }
+ // Set up the links
+ void* gauge[4];
+
+#ifndef BUILD_QUDA_DEVIFACE_GAUGE
+ for(int mu=0; mu < Nd; mu++) {
+ gauge[mu] = (void *)&(links_single[mu].elem(all.start()).elem().elem(0,0).real());
+ }
+#else
+ //gauge[mu] = GetMemoryPtr( links_single[mu].getId() );
+ GetMemoryPtrGauge(gauge,links_single);
+#endif
+
+ loadGaugeQuda((void *)gauge, &q_gauge_param);
+
+ MULTIGRIDSolverParams ip = *(invParam.MULTIGRIDParams);
+ //
+ quda_inv_param.tol_precondition = toDouble(ip.tol[0]);
+ quda_inv_param.maxiter_precondition = ip.maxIterations[0];
+ quda_inv_param.gcrNkrylov = ip.outer_gcr_nkrylov;
+ quda_inv_param.residual_type = static_cast(QUDA_L2_RELATIVE_RESIDUAL);
+
+
+ //Replacing above with what's in the invert test.
+ switch( ip.schwarzType ) {
+ case ADDITIVE_SCHWARZ :
+ quda_inv_param.schwarz_type = QUDA_ADDITIVE_SCHWARZ;
+ break;
+ case MULTIPLICATIVE_SCHWARZ :
+ quda_inv_param.schwarz_type = QUDA_MULTIPLICATIVE_SCHWARZ;
+ break;
+ default:
+ quda_inv_param.schwarz_type = QUDA_ADDITIVE_SCHWARZ;
+ break;
+ }
+ quda_inv_param.precondition_cycle = 1;
+ //Invert test always sets this to 1.
+
+
+ if( invParam.verboseP ) {
+ quda_inv_param.verbosity = QUDA_VERBOSE;
+ }
+ else {
+ quda_inv_param.verbosity = QUDA_SUMMARIZE;
+ }
+
+ quda_inv_param.verbosity_precondition = QUDA_SILENT;
+
+ quda_inv_param.inv_type_precondition = QUDA_MG_INVERTER;
+
+ QDPIO::cout<< solver_string << "Basic MULTIGRID params copied."<create(fstate, invParam_.CloverParams);
+
+ // Don't recompute, just copy
+ invclov->create(fstate, invParam_.CloverParams);
+
+ QDPIO::cout << solver_string << "Inverting ExpCloverTerm" << std::endl;
+ invclov->choles(0);
+ invclov->choles(1);
+
+#ifndef BUILD_QUDA_DEVIFACE_CLOVER
+#warning "NOT USING QUDA DEVICE IFACE"
+ quda_inv_param.clover_order = QUDA_PACKED_CLOVER_ORDER;
+
+ multi1d > packed_clov;
+
+
+ packed_clov.resize(all.siteTable().size());
+
+ clov->packForQUDA(packed_clov, 0);
+ clov->packForQUDA(packed_clov, 1);
+
+ // Always need inverse
+ multi1d > packed_invclov(all.siteTable().size());
+ invclov->packForQUDA(packed_invclov, 0);
+ invclov->packForQUDA(packed_invclov, 1);
+
+ loadCloverQuda(&(packed_clov[0]), &(packed_invclov[0]), &quda_inv_param);
+
+#else
+
+#warning "USING QUDA DEVICE IFACE"
+ quda_inv_param.clover_location = QUDA_CUDA_FIELD_LOCATION;
+ quda_inv_param.clover_order = QUDA_QDPJIT_CLOVER_ORDER;
+
+ void *clover[2];
+ void *cloverInv[2];
+
+ GetMemoryPtrClover(clov->getOffId(),clov->getDiaId(),invclov->getOffId(),invclov->getDiaId());
+
+ loadCloverQuda( (void*)(clover), (void *)(cloverInv), &quda_inv_param);
+
+#endif
+
+ quda_inv_param.omega = toDouble(ip.relaxationOmegaOuter);
+
+
+// merged from mdgam_clover_quda_multigrid, begin
+ if(TheNamedObjMap::Instance().check(invParam.SaveSubspaceID))
+ {
+ StopWatch update_swatch;
+ update_swatch.reset(); update_swatch.start();
+ // Subspace ID exists add it to mg_state
+ QDPIO::cout<< solver_string <<"Recovering subspace..."<(invParam.SaveSubspaceID);
+ for(int j=0; j < ip.mg_levels-1;++j) {
+ (subspace_pointers->mg_param).setup_maxiter_refresh[j] = 0;
+ }
+ updateMultigridQuda(subspace_pointers->preconditioner, &(subspace_pointers->mg_param));
+ update_swatch.stop();
+
+ QDPIO::cout << solver_string << " subspace_update_time = "
+ << update_swatch.getTimeInSeconds() << " sec. " << std::endl;
+ }
+ else
+ {
+ // Create the subspace.
+ StopWatch create_swatch;
+ create_swatch.reset(); create_swatch.start();
+ QDPIO::cout << solver_string << "Creating Subspace" << std::endl;
+ subspace_pointers = QUDAMGUtils::create_subspace(invParam);
+ XMLBufferWriter file_xml;
+ push(file_xml, "FileXML");
+ pop(file_xml);
+
+ int foo = 5;
+
+ XMLBufferWriter record_xml;
+ push(record_xml, "RecordXML");
+ write(record_xml, "foo", foo);
+ pop(record_xml);
+
+
+ TheNamedObjMap::Instance().create< QUDAMGUtils::MGSubspacePointers* >(invParam.SaveSubspaceID);
+ TheNamedObjMap::Instance().get(invParam.SaveSubspaceID).setFileXML(file_xml);
+ TheNamedObjMap::Instance().get(invParam.SaveSubspaceID).setRecordXML(record_xml);
+
+ TheNamedObjMap::Instance().getData< QUDAMGUtils::MGSubspacePointers* >(invParam.SaveSubspaceID) = subspace_pointers;
+ create_swatch.stop();
+ QDPIO::cout << solver_string << " subspace_create_time = "
+ << create_swatch.getTimeInSeconds() << " sec. " << std::endl;
+
+ }
+ quda_inv_param.preconditioner = subspace_pointers->preconditioner;
+// merged from mdgam_clover_quda_multigrid, end
+
+ init_swatch.stop();
+ QDPIO::cout << solver_string << "init_time = "
+ << init_swatch.getTimeInSeconds() << " sec. " << std::endl;
+
+ }
+
+ //! Destructor is automatic
+ ~LinOpSysSolverQUDAMULTIGRIDExpClover()
+ {
+ QDPIO::cout << solver_string << "Destructing" << std::endl;
+ quda_inv_param.preconditioner = nullptr;
+ subspace_pointers = nullptr;
+ freeGaugeQuda();
+ freeCloverQuda();
+// destroyMultigridQuda(quda_inv_param.preconditioner);
+ }
+
+ //! Return the subset on which the operator acts
+ const Subset& subset() const {return A->subset();}
+
+ //! Solver the linear system
+ /*!
+ * \param psi solution ( Modify )
+ * \param chi source ( Read )
+ * \return syssolver results
+ */
+ SystemSolverResults_t operator() (T& psi, const T& chi) const
+ {
+ SystemSolverResults_t res;
+
+ START_CODE();
+ StopWatch swatch;
+ swatch.start();
+
+ psi = zero; // Zero initial guess
+ // T MdagChi;
+
+ // This is a CGNE. So create new RHS
+ // (*A)(MdagChi, chi, MINUS);
+ // Handle< LinearOperator > MM(new MdagMLinOp(A));
+ if ( invParam.axialGaugeP ) {
+ T g_chi,g_psi;
+
+ // Gauge Fix source and initial guess
+ g_chi[ A->subset() ] = GFixMat * chi;
+ g_psi[ A->subset() ] = GFixMat * psi;
+ res = qudaInvert(*clov,
+ *invclov,
+ g_chi,
+ g_psi);
+ psi[ A->subset() ] = adj(GFixMat)*g_psi;
+
+ }
+ else {
+ res = qudaInvert(*clov,
+ *invclov,
+ chi,
+ psi);
+ }
+
+ swatch.stop();
+
+ if( invParam.SolutionCheckP ) {
+ T r;
+ r[A->subset()]=chi;
+ T tmp;
+ (*A)(tmp, psi, PLUS);
+ r[A->subset()] -= tmp;
+ res.resid = sqrt(norm2(r, A->subset()))/sqrt(norm2(chi,A->subset()));
+ }
+ else {
+ QDPIO::cout << "Chroma <-> QUDA solution check disabled. Using (trusting) QUDA residuum\n";
+ }
+
+ QDPIO::cout << solver_string << res.n_count << " iterations. Relative Rsd = " << res.resid << std::endl;
+
+ // Convergence Check/Blow Up
+ if ( ! invParam.SilentFailP ) {
+ if ( toBool( res.resid > invParam.RsdToleranceFactor*invParam.RsdTarget) ) {
+ QDPIO::cerr << solver_string << "ERROR: Solver residuum is outside tolerance: QUDA resid="<< res.resid << " Desired =" << invParam.RsdTarget << " Max Tolerated = " << invParam.RsdToleranceFactor*invParam.RsdTarget << std::endl;
+ QDP_abort(1);
+ }
+ }
+
+ END_CODE();
+ return res;
+ }
+
+ std::vector operator() (const std::vector>& psi, const std::vector>& chi) const override
+ {
+
+ START_CODE();
+ QDPIO::cout << "Entering MRHS solution: N_src = " << chi.size() << "\n";
+
+ std::vector res(chi.size());
+ if( psi.size() != chi.size() ) {
+ QDPIO::cout << "Number of sources does not match number of solutions\n";
+ QDPIO::cout << "psi.size() = " << psi.size() << " but chi.size() = " << chi.size() << "\n";
+ QDP_abort(1);
+ }
+
+ StopWatch swatch;
+ swatch.start();
+
+ if ( invParam.axialGaugeP ) {
+ QDPIO::cerr << "Multi RHS solve in axial gauge not yet implemented\n";
+ QDP_abort(1);
+ }
+
+ qudaInvertMultiSrc(*invclov, psi, chi, res);
+
+ swatch.stop();
+
+ // Check solutions -- if desired
+
+ if( invParam.SolutionCheckP ) {
+ for(int soln =0; soln < psi.size(); soln++) {
+ T r;
+ r[A->subset()]=*(chi[ soln ]);
+ T tmp;
+ (*A)(tmp, *(psi[soln]), PLUS);
+ r[A->subset()] -= tmp;
+ res[soln].resid = sqrt(norm2(r, A->subset()))/sqrt(norm2(*(chi[soln]),A->subset()));
+ }
+ }
+ else {
+ QDPIO::cout << "Chroma <-> QUDA solution check disabled. Using (trusting) QUDA residua\n";
+ }
+
+ for(int soln=0; soln < psi.size(); soln++ ) {
+ QDPIO::cout << "QUDA_"<< solver_string <<" solution " << soln <<
+ " : " << res[soln].n_count << " iterations. Relative Rsd = " << res[soln].resid << std::endl;
+
+ // Convergence Check/Blow Up
+ if ( ! invParam.SilentFailP ) {
+ if ( toBool( res[soln].resid > invParam.RsdToleranceFactor*invParam.RsdTarget) ) {
+ QDPIO::cerr << "ERROR: QUDA Solver residuum for solution " << soln
+ << " is outside tolerance: QUDA resid="<< res[soln].resid << " Desired ="
+ << invParam.RsdTarget << " Max Tolerated = "
+ << invParam.RsdToleranceFactor*invParam.RsdTarget << std::endl;
+ QDP_abort(1);
+ }
+ }
+ }
+
+
+ END_CODE();
+ return res;
+ }
+
+ private:
+ // Hide default constructor
+ LinOpSysSolverQUDAMULTIGRIDExpClover() {}
+
+#if 1
+ Q links_orig;
+#endif
+ bool is_precond;
+
+ U GFixMat;
+ QudaPrecision_s cpu_prec;
+ QudaPrecision_s gpu_prec;
+ QudaPrecision_s gpu_half_prec;
+
+ Handle< LinearOperator > A;
+ const SysSolverQUDAMULTIGRIDCloverParams invParam;
+ QudaGaugeParam q_gauge_param;
+ QudaInvertParam quda_inv_param;
+ mutable QUDAMGUtils::MGSubspacePointers* subspace_pointers;
+
+ Handle< ExpCloverTermT > clov;
+ Handle< ExpCloverTermT > invclov;
+
+ SystemSolverResults_t qudaInvert(const ExpCloverTermT& clover,
+ const ExpCloverTermT& invclov,
+ const T& chi_s,
+ T& psi_s
+ )const;
+
+ void qudaInvertMultiSrc(const ExpCloverTermT& invclov,
+ const std::vector>& psi_s,
+ const std::vector>& chi_s,
+ std::vector& res) const;
+
+ std::string solver_string;
+ };
+
+} // End namespace
+
+#endif // BUILD_QUDA
+#endif
+
diff --git a/lib/actions/ferm/invert/quda_solvers/syssolver_linop_nef_quda_w.h b/lib/actions/ferm/invert/quda_solvers/syssolver_linop_nef_quda_w.h
index 3e6c830d7d..669a4807cf 100644
--- a/lib/actions/ferm/invert/quda_solvers/syssolver_linop_nef_quda_w.h
+++ b/lib/actions/ferm/invert/quda_solvers/syssolver_linop_nef_quda_w.h
@@ -319,16 +319,6 @@ namespace Chroma
quda_inv_param.output_location = QUDA_CUDA_FIELD_LOCATION;
#endif
- // Autotuning
- if( invParam.tuneDslashP ) {
- QDPIO::cout << "Enabling Dslash Autotuning" << std::endl;
- quda_inv_param.tune = QUDA_TUNE_YES;
- }
- else {
- QDPIO::cout << "Disabling Dslash Autotuning" << std::endl;
- quda_inv_param.tune = QUDA_TUNE_NO;
- }
-
// Setup padding
multi1d face_size(4);
diff --git a/lib/actions/ferm/invert/quda_solvers/syssolver_linop_wilson_quda_multigrid_w.h b/lib/actions/ferm/invert/quda_solvers/syssolver_linop_wilson_quda_multigrid_w.h
index c8a7fa436f..85eedcfaed 100644
--- a/lib/actions/ferm/invert/quda_solvers/syssolver_linop_wilson_quda_multigrid_w.h
+++ b/lib/actions/ferm/invert/quda_solvers/syssolver_linop_wilson_quda_multigrid_w.h
@@ -340,19 +340,6 @@ class LinOpSysSolverQUDAMULTIGRIDWilson : public LinOpSystemSolver
quda_inv_param.use_init_guess = QUDA_USE_INIT_GUESS_NO;
quda_inv_param.dirac_order = QUDA_DIRAC_ORDER;
quda_inv_param.gamma_basis = QUDA_DEGRAND_ROSSI_GAMMA_BASIS;
- // Autotuning
- if( invParam.tuneDslashP ) {
- QDPIO::cout << "Enabling Dslash Autotuning" << std::endl;
-
- quda_inv_param.tune = QUDA_TUNE_YES;
- }
- else {
- QDPIO::cout << "Disabling Dslash Autotuning" << std::endl;
-
- quda_inv_param.tune = QUDA_TUNE_NO;
- }
-
-
+
// Setup padding
multi1d face_size(4);
face_size[0] = latdims[1]*latdims[2]*latdims[3]/2;
diff --git a/lib/actions/ferm/invert/quda_solvers/syssolver_mdagm_clover_quda_multigrid_w.h b/lib/actions/ferm/invert/quda_solvers/syssolver_mdagm_clover_quda_multigrid_w.h
index 693f6b2dea..85d280a829 100644
--- a/lib/actions/ferm/invert/quda_solvers/syssolver_mdagm_clover_quda_multigrid_w.h
+++ b/lib/actions/ferm/invert/quda_solvers/syssolver_mdagm_clover_quda_multigrid_w.h
@@ -302,14 +302,6 @@ class MdagMSysSolverQUDAMULTIGRIDClover : public MdagMSystemSolver face_size(4);
face_size[0] = latdims[1]*latdims[2]*latdims[3]/2;
diff --git a/lib/actions/ferm/invert/quda_solvers/syssolver_mdagm_clover_quda_w.h b/lib/actions/ferm/invert/quda_solvers/syssolver_mdagm_clover_quda_w.h
index 60a73c14fe..0bcbe673ee 100644
--- a/lib/actions/ferm/invert/quda_solvers/syssolver_mdagm_clover_quda_w.h
+++ b/lib/actions/ferm/invert/quda_solvers/syssolver_mdagm_clover_quda_w.h
@@ -382,19 +382,6 @@ class MdagMSysSolverQUDAClover : public MdagMSystemSolver
#endif
- // Autotuning
- if( invParam.tuneDslashP ) {
- QDPIO::cout << "Enabling Dslash Autotuning" << std::endl;
-
- quda_inv_param.tune = QUDA_TUNE_YES;
- }
- else {
- QDPIO::cout << "Disabling Dslash Autotuning" << std::endl;
-
- quda_inv_param.tune = QUDA_TUNE_NO;
- }
-
-
// Setup padding
multi1d face_size(4);
face_size[0] = latdims[1]*latdims[2]*latdims[3]/2;
diff --git a/lib/actions/ferm/invert/quda_solvers/syssolver_mdagm_exp_clover_quda_multigrid_w.cc b/lib/actions/ferm/invert/quda_solvers/syssolver_mdagm_exp_clover_quda_multigrid_w.cc
new file mode 100644
index 0000000000..99c6528b9f
--- /dev/null
+++ b/lib/actions/ferm/invert/quda_solvers/syssolver_mdagm_exp_clover_quda_multigrid_w.cc
@@ -0,0 +1,368 @@
+/*! \file
+ * \QUDA MULTIGRID MdagM Clover solver.
+ */
+// comment
+#include "actions/ferm/invert/syssolver_mdagm_factory.h"
+#include "actions/ferm/invert/syssolver_mdagm_aggregate.h"
+#include "actions/ferm/invert/quda_solvers/syssolver_quda_multigrid_clover_params.h"
+#include "actions/ferm/invert/quda_solvers/syssolver_mdagm_exp_clover_quda_multigrid_w.h"
+#include "io/aniso_io.h"
+
+
+#include "handle.h"
+#include "actions/ferm/fermstates/periodic_fermstate.h"
+#include "actions/ferm/linop/lwldslash_w.h"
+#include "meas/glue/mesplq.h"
+// QUDA Headers
+#include
+// #include
+
+#include
+#include
+#include
+#include
+namespace Chroma
+{
+ namespace MdagMSysSolverQUDAMULTIGRIDExpCloverEnv
+ {
+
+ //! Anonymous namespace
+ namespace
+ {
+ //! Name to be used
+ const std::string name("QUDA_MULTIGRID_EXP_CLOVER_INVERTER");
+
+ //! Local registration flag
+ bool registered = false;
+ }
+
+
+
+ MdagMSystemSolver* createFerm(XMLReader& xml_in,
+ const std::string& path,
+ Handle< FermState< LatticeFermion, multi1d, multi1d > > state,
+
+ Handle< LinearOperator > A)
+ {
+ return new MdagMSysSolverQUDAMULTIGRIDExpClover(A, state,SysSolverQUDAMULTIGRIDCloverParams(xml_in, path));
+ }
+
+ //! Register all the factories
+ bool registerAll()
+ {
+ bool success = true;
+ if (! registered)
+ {
+ success &= Chroma::TheMdagMFermSystemSolverFactory::Instance().registerObject(name, createFerm);
+ registered = true;
+ }
+ return success;
+ }
+ }
+
+ SystemSolverResults_t
+ MdagMSysSolverQUDAMULTIGRIDExpClover::qudaInvert(const ExpCloverTermT& clover,
+ const ExpCloverTermT& invclov,
+ const T& chi_s,
+ T& psi_s) const{
+
+ SystemSolverResults_t ret;
+
+ T mod_chi;
+
+ // Copy source into mod_chi, and zero the off-parity
+ mod_chi[rb[0]] = zero;
+
+
+ // This solver always solves with the SYMMETRIC preconditioned
+ // Operator. If we are working with Asymmetric preconditioning
+ // Then we must apply a clover inverse.
+ if( invParam.asymmetricP) {
+
+ //
+ // symmetric
+ // Solve with M_symm = 1 - A^{-1}_oo D A^{-1}ee D
+ //
+ // Chroma M = A_oo ( M_symm )
+ //
+ // So M x = b => A_oo (M_symm) x = b
+ // => M_symm x = A^{-1}_oo b = chi_mod
+ invclov.apply(mod_chi, chi_s, PLUS, 1);
+ }
+ else {
+ // If we work with symmetric preconditioning nothing else needs done
+ mod_chi[rb[1]] = chi_s;
+ }
+
+#ifndef BUILD_QUDA_DEVIFACE_SPINOR
+ void* spinorIn =(void *)&(mod_chi.elem(rb[1].start()).elem(0).elem(0).real());
+ void* spinorOut =(void *)&(psi_s.elem(rb[1].start()).elem(0).elem(0).real());
+#else
+ // void* spinorIn = GetMemoryPtr( mod_chi.getId() );
+ // void* spinorOut = GetMemoryPtr( psi_s.getId() );
+ void* spinorIn;
+ void* spinorOut;
+ GetMemoryPtr2(spinorIn,spinorOut,mod_chi.getId(),psi_s.getId());
+#endif
+
+ // Do the solve here
+ StopWatch swatch1;
+ swatch1.reset();
+ swatch1.start();
+ invertQuda(spinorOut, spinorIn, (QudaInvertParam*)&quda_inv_param);
+ swatch1.stop();
+
+
+
+ QDPIO::cout << solver_string<<"time="<< quda_inv_param.secs <<" s" ;
+ QDPIO::cout << "\tPerformance="<< quda_inv_param.gflops/quda_inv_param.secs<<" GFLOPS" ;
+ QDPIO::cout << "\tTotal Time (incl. load gauge)=" << swatch1.getTimeInSeconds() <<" s"<getLinks());
+ writer.close();
+ }
+
+ // Dump chi
+ {
+ XMLBufferWriter filebuf;
+ XMLBufferWriter recbuf;
+ push( filebuf, "ChiFile" );
+ write( filebuf, "FilePrefix", file_prefix);
+ pop( filebuf);
+
+ push( recbuf, "ChiRecord" );
+ write( recbuf, "FilePrefix", file_prefix);
+ pop( recbuf );
+
+ QDPIO::cout << "Dumping chi (original source) vector to " << chi_filename << std::endl;
+
+ QDPFileWriter writer(filebuf, chi_filename, QDPIO_SINGLEFILE, QDPIO_PARALLEL);
+ write(writer, recbuf, chi);
+ writer.close();
+
+
+ }
+
+ // Dump Y
+ {
+ XMLBufferWriter filebuf;
+ XMLBufferWriter recbuf;
+ push( filebuf, "YFile" );
+ write( filebuf, "FilePrefix", file_prefix);
+ pop( filebuf);
+
+ push( recbuf, "YRecord" );
+ write( recbuf, "FilePrefix", file_prefix);
+ pop( recbuf );
+
+ QDPIO::cout << "Dumping Y (source) vector to " << Y_filename << std::endl;
+
+ QDPFileWriter writer(filebuf, Y_filename, QDPIO_SINGLEFILE, QDPIO_PARALLEL);
+ write(writer, recbuf, Y);
+ writer.close();
+ }
+
+ // Dump MG state
+ {
+ auto mg_params = *(invParam.MULTIGRIDParams);
+ for(int l = 0; l < mg_params.mg_levels; ++l) {
+ std::ostringstream subspace_prefix;
+ subspace_prefix << file_prefix << "_subspace_l" << l;
+
+ // Up to the length of the buffer (256) padded with zeros
+ std::strncpy((subspace_pointers->mg_param).vec_outfile[l], (subspace_prefix.str()).c_str(), 256);
+ // If source string is too long it will be truncated and not null terminated, so null terminate
+ if( subspace_prefix.str().size() > 255 ) { (subspace_pointers->mg_param).vec_outfile[l][255] = '\0'; }
+
+ }
+ // Make sure everyone has thei varibles set before calling dump Multigrid
+ // Strictly speaking I am using a sum as a barrier;
+ double i=10;
+ QDPInternal::globalSum(i);
+
+#ifdef QUDA_MG_DUMP_ENABLED
+ dumpMultigridQuda(subspace_pointers->preconditioner, &(subspace_pointers->mg_param));
+#endif
+
+ for(int l = 0; l < mg_params.mg_levels; ++l) {
+ (subspace_pointers->mg_param).vec_outfile[l][0] ='\0';
+ }
+ QDPInternal::globalSum(i);
+ }
+ }
+
+
+ unsigned long MdagMSysSolverQUDAMULTIGRIDExpClover::seqno = 0;
+
+ void MdagMSysSolverQUDAMULTIGRIDExpClover::dumpXSolver(const LatticeFermion& chi,
+ const LatticeFermion& Y,
+ const LatticeFermion& X) const
+
+ {
+ // Grab the time - took this C++ way from stackoverflow
+ auto time_value = std::time(nullptr);
+ auto local_time = std::localtime(&time_value);
+ std::ostringstream time_strstream;
+ time_strstream << "./failed_X_solve_" << std::put_time(local_time, "%d-%m-%Y-%H-%M-%S");
+
+
+ std::string file_prefix( time_strstream.str() );
+
+ std::string gauge_filename = file_prefix + "_gauge_field.lime";
+ std::string chi_filename = file_prefix + "_chi.lime";
+ std::string Y_filename = file_prefix + "_Y.lime";
+ std::string X_filename = file_prefix + "_X.lime";
+
+ int foo = 5; // Some rubbish for the XML Files
+ // Dump gauge field
+ {
+ XMLBufferWriter filebuf;
+ XMLBufferWriter recbuf;
+ push( filebuf, "GaugeFile" );
+ write( filebuf, "FilePrefix", file_prefix);
+ pop( filebuf);
+
+ push( recbuf, "GaugeRecord" );
+ write( recbuf, "FilePrefix", file_prefix);
+ pop( recbuf );
+
+ QDPIO::cout << "Dumping gauge links to " << gauge_filename << std::endl;
+
+ QDPFileWriter writer(filebuf,gauge_filename, QDPIO_SINGLEFILE, QDPIO_PARALLEL);
+ write(writer, recbuf, gstate->getLinks());
+ writer.close();
+ }
+
+ // Dump chi
+ {
+ XMLBufferWriter filebuf;
+ XMLBufferWriter recbuf;
+ push( filebuf, "ChiFile" );
+ write( filebuf, "FilePrefix", file_prefix);
+ pop( filebuf);
+
+ push( recbuf, "ChiRecord" );
+ write( recbuf, "FilePrefix", file_prefix);
+ pop( recbuf );
+
+ QDPIO::cout << "Dumping chi (original source) vector to " << chi_filename << std::endl;
+
+ QDPFileWriter writer(filebuf, chi_filename, QDPIO_SINGLEFILE, QDPIO_PARALLEL);
+ write(writer, recbuf, chi);
+ writer.close();
+
+
+ }
+
+ // Dump Y
+ {
+ XMLBufferWriter filebuf;
+ XMLBufferWriter recbuf;
+ push( filebuf, "YFile" );
+ write( filebuf, "FilePrefix", file_prefix);
+ pop( filebuf);
+
+ push( recbuf, "YRecord" );
+ write( recbuf, "FilePrefix", file_prefix);
+ pop( recbuf );
+
+ QDPIO::cout << "Dumping Y (source) vector to " << Y_filename << std::endl;
+
+ QDPFileWriter writer(filebuf, Y_filename, QDPIO_SINGLEFILE, QDPIO_PARALLEL);
+ write(writer, recbuf, Y);
+ writer.close();
+ }
+
+ // Dump final X
+ {
+ XMLBufferWriter filebuf;
+ XMLBufferWriter recbuf;
+ push( filebuf, "XFile" );
+ write( filebuf, "FilePrefix", file_prefix);
+ pop( filebuf);
+
+ push( recbuf, "XRecord" );
+ write( recbuf, "FilePrefix", file_prefix);
+ pop( recbuf );
+
+ QDPIO::cout << "Dumping X (solution) vector to " << X_filename << std::endl;
+
+ QDPFileWriter writer(filebuf, X_filename, QDPIO_SINGLEFILE, QDPIO_PARALLEL);
+ write(writer, recbuf, X);
+ writer.close();
+ }
+
+
+ // Dump MG state
+ {
+ auto mg_params = *(invParam.MULTIGRIDParams);
+ for(int l = 0; l < mg_params.mg_levels; ++l) {
+ std::ostringstream subspace_prefix;
+ subspace_prefix << file_prefix << "_subspace_l" << l;
+
+ // Up to the length of the buffer (256) padded with zeros
+ std::strncpy((subspace_pointers->mg_param).vec_outfile[l], (subspace_prefix.str()).c_str(), 256);
+ // If source string is too long it will be truncated and not null terminated, so null terminate
+ if( subspace_prefix.str().size() > 255 ) { (subspace_pointers->mg_param).vec_outfile[l][255] = '\0'; }
+
+ }
+ // Make sure everyone has thei varibles set before calling dump Multigrid
+ // I use a global sum as a barrier here
+ double i=10;
+ QDPInternal::globalSum(i);
+
+
+#ifdef QUDA_MG_DUMP_ENABLED
+ dumpMultigridQuda(subspace_pointers->preconditioner, &(subspace_pointers->mg_param));
+#endif
+
+ for(int l = 0; l < mg_params.mg_levels; ++l) {
+ (subspace_pointers->mg_param).vec_outfile[l][0] ='\0';
+ }
+ // I use a global sum as a weak barrier here.
+ QDPInternal::globalSum(i); // Make sure everyone is done
+ }
+ }
+
+
+
+} // namespace
+
diff --git a/lib/actions/ferm/invert/quda_solvers/syssolver_mdagm_exp_clover_quda_multigrid_w.h b/lib/actions/ferm/invert/quda_solvers/syssolver_mdagm_exp_clover_quda_multigrid_w.h
new file mode 100644
index 0000000000..138600b1b3
--- /dev/null
+++ b/lib/actions/ferm/invert/quda_solvers/syssolver_mdagm_exp_clover_quda_multigrid_w.h
@@ -0,0 +1,1540 @@
+// -*- C++ -*-
+/*! \file
+ * \QUDA MULTIGRID MdagM Clover solver.
+ */
+
+#ifndef __syssolver_mdagm_quda_multigrid_exp_clover_h__
+#define __syssolver_mdagm_quda_multigrid_exp_clover_h__
+
+#include "chroma_config.h"
+#include "chromabase.h"
+#include
+#include
+
+using namespace QDP;
+
+
+
+#include "handle.h"
+#include "state.h"
+#include "syssolver.h"
+#include "linearop.h"
+#include "actions/ferm/fermbcs/simple_fermbc.h"
+#include "actions/ferm/fermstates/periodic_fermstate.h"
+#include "actions/ferm/invert/quda_solvers/syssolver_quda_multigrid_clover_params.h"
+#include "actions/ferm/linop/exp_clover_term_w.h"
+#include "meas/gfix/temporal_gauge.h"
+#include "io/aniso_io.h"
+#include
+#include
+
+#include "lmdagm.h"
+#include "util/gauge/reunit.h"
+#include "actions/ferm/invert/quda_solvers/quda_mg_utils.h"
+#include "actions/ferm/invert/mg_solver_exception.h"
+
+//#include
+#ifdef BUILD_QUDA
+#include
+#ifdef QDP_IS_QDPJIT
+#include "actions/ferm/invert/quda_solvers/qdpjit_memory_wrapper.h"
+#endif
+
+#include "update/molecdyn/predictor/zero_guess_predictor.h"
+#include "update/molecdyn/predictor/quda_predictor.h"
+#include "meas/inline/io/named_objmap.h"
+
+namespace Chroma
+{
+
+namespace MdagMSysSolverQUDAMULTIGRIDExpCloverEnv
+{
+//! Register the syssolver
+bool registerAll();
+
+}
+
+class MdagMSysSolverQUDAMULTIGRIDExpClover : public MdagMSystemSolver
+{
+public:
+ typedef LatticeFermion T;
+ typedef LatticeColorMatrix U;
+ typedef multi1d Q;
+
+ typedef LatticeFermionF TF;
+ typedef LatticeColorMatrixF UF;
+ typedef multi1d QF;
+
+ typedef LatticeFermionF TD;
+ typedef LatticeColorMatrixF UD;
+ typedef multi1d QD;
+
+ typedef WordType::Type_t REALT;
+
+
+
+ MdagMSysSolverQUDAMULTIGRIDExpClover(Handle< LinearOperator > A_,
+ Handle< FermState > state_,
+ const SysSolverQUDAMULTIGRIDCloverParams& invParam_) :
+ A(A_), gstate(state_), invParam(invParam_), clov(new ExpCloverTermT() ), invclov(new ExpCloverTermT())
+ {
+ StopWatch init_swatch;
+ init_swatch.reset(); init_swatch.start();
+
+ // Set the solver string
+ {
+ std::ostringstream solver_string_stream;
+ solver_string_stream << "QUDA_MULTIGRID_CLOVER_MDAGM_SOLVER( Mass = " << invParam.CloverParams.Mass <<" , Id = "
+ << invParam.SaveSubspaceID << " ): ";
+ solver_string = solver_string_stream.str();
+
+ }
+ QDPIO::cout << solver_string << "Initializing" << std::endl;
+
+ // Check free mem
+#if 0
+ size_t free_mem = QUDAMGUtils::getCUDAFreeMem();
+ QDPIO::cout << solver_string << "MEMCHECK: free mem = " << free_mem << std::endl;
+#endif
+ // FOLLOWING INITIALIZATION in test QUDA program
+
+ // 1) work out cpu_prec, cuda_prec, cuda_prec_sloppy
+ int s = sizeof( WordType::Type_t );
+ if (s == 4) {
+ cpu_prec = QUDA_SINGLE_PRECISION;
+ }
+ else {
+ cpu_prec = QUDA_DOUBLE_PRECISION;
+ }
+
+ // Work out GPU precision
+ switch( invParam.cudaPrecision ) {
+ case HALF:
+ gpu_prec = QUDA_HALF_PRECISION;
+ break;
+ case SINGLE:
+ gpu_prec = QUDA_SINGLE_PRECISION;
+ break;
+ case DOUBLE:
+ gpu_prec = QUDA_DOUBLE_PRECISION;
+ break;
+ default:
+ gpu_prec = cpu_prec;
+ break;
+ }
+
+ // Work out GPU Sloppy precision
+ // Default: No Sloppy
+ switch( invParam.cudaSloppyPrecision ) {
+ case HALF:
+ gpu_half_prec = QUDA_HALF_PRECISION;
+ break;
+ case SINGLE:
+ gpu_half_prec = QUDA_SINGLE_PRECISION;
+ break;
+ case DOUBLE:
+ gpu_half_prec = QUDA_DOUBLE_PRECISION;
+ break;
+ default:
+ gpu_half_prec = gpu_prec;
+ break;
+ }
+
+ // 2) pull 'new; GAUGE and Invert params
+ q_gauge_param = newQudaGaugeParam();
+ quda_inv_param = newQudaInvertParam();
+
+ // 3) set lattice size
+ const multi1d& latdims = Layout::subgridLattSize();
+
+ q_gauge_param.X[0] = latdims[0];
+ q_gauge_param.X[1] = latdims[1];
+ q_gauge_param.X[2] = latdims[2];
+ q_gauge_param.X[3] = latdims[3];
+
+ // 4) - deferred (anisotropy)
+
+ // 5) - set QUDA_WILSON_LINKS, QUDA_GAUGE_ORDER
+ q_gauge_param.type = QUDA_WILSON_LINKS;
+#ifndef BUILD_QUDA_DEVIFACE_GAUGE
+ q_gauge_param.gauge_order = QUDA_QDP_GAUGE_ORDER; // gauge[mu], p
+#else
+ q_gauge_param.location = QUDA_CUDA_FIELD_LOCATION;
+ q_gauge_param.gauge_order = QUDA_QDPJIT_GAUGE_ORDER;
+#endif
+
+ // 6) - set t_boundary
+ // Convention: BC has to be applied already
+ // This flag just tells QUDA that this is so,
+ // so that QUDA can take care in the reconstruct
+ if( invParam.AntiPeriodicT ) {
+ q_gauge_param.t_boundary = QUDA_ANTI_PERIODIC_T;
+ }
+ else {
+ q_gauge_param.t_boundary = QUDA_PERIODIC_T;
+ }
+
+ // Set cpu_prec, cuda_prec, reconstruct and sloppy versions
+ q_gauge_param.cpu_prec = cpu_prec;
+ q_gauge_param.cuda_prec = gpu_prec;
+
+ switch( invParam.cudaReconstruct ) {
+ case RECONS_NONE:
+ q_gauge_param.reconstruct = QUDA_RECONSTRUCT_NO;
+ break;
+ case RECONS_8:
+ q_gauge_param.reconstruct = QUDA_RECONSTRUCT_8;
+ break;
+ case RECONS_12:
+ q_gauge_param.reconstruct = QUDA_RECONSTRUCT_12;
+ break;
+ default:
+ q_gauge_param.reconstruct = QUDA_RECONSTRUCT_12;
+ break;
+ };
+
+ q_gauge_param.cuda_prec_sloppy = gpu_half_prec;
+ q_gauge_param.cuda_prec_precondition = gpu_half_prec;
+
+ switch( invParam.cudaSloppyReconstruct ) {
+ case RECONS_NONE:
+ q_gauge_param.reconstruct_sloppy = QUDA_RECONSTRUCT_NO;
+ break;
+ case RECONS_8:
+ q_gauge_param.reconstruct_sloppy = QUDA_RECONSTRUCT_8;
+ break;
+ case RECONS_12:
+ q_gauge_param.reconstruct_sloppy = QUDA_RECONSTRUCT_12;
+ break;
+ default:
+ q_gauge_param.reconstruct_sloppy = QUDA_RECONSTRUCT_12;
+ break;
+ };
+
+ // This may be overridden later
+ q_gauge_param.reconstruct_precondition=q_gauge_param.reconstruct_sloppy;
+
+ // Gauge fixing:
+
+ // These are the links
+ // They may be smeared and the BC's may be applied
+ Q links_single(Nd);
+
+ // Now downcast to single prec fields.
+ for(int mu=0; mu < Nd; mu++) {
+ links_single[mu] = (state_->getLinks())[mu];
+ }
+
+ // GaugeFix
+ if( invParam.axialGaugeP ) {
+ temporalGauge(links_single, GFixMat, Nd-1);
+ for(int mu=0; mu < Nd; mu++) {
+ links_single[mu] = GFixMat*(state_->getLinks())[mu]*adj(shift(GFixMat, FORWARD, mu));
+ }
+ q_gauge_param.gauge_fix = QUDA_GAUGE_FIXED_YES;
+ }
+ else {
+ // No GaugeFix
+ q_gauge_param.gauge_fix = QUDA_GAUGE_FIXED_NO;// No Gfix yet
+ }
+
+ // deferred 4) Gauge Anisotropy
+ const AnisoParam_t& aniso = invParam.CloverParams.anisoParam;
+ if( aniso.anisoP ) { // Anisotropic case
+ Real gamma_f = aniso.xi_0 / aniso.nu;
+ q_gauge_param.anisotropy = toDouble(gamma_f);
+ }
+ else {
+ q_gauge_param.anisotropy = 1.0;
+ }
+
+ // MAKE FSTATE BEFORE RESCALING links_single
+ // Because the clover term expects the unrescaled links...
+ Handle > fstate( new PeriodicFermState(links_single));
+
+ if( aniso.anisoP ) { // Anisotropic case
+ multi1d cf=makeFermCoeffs(aniso);
+ for(int mu=0; mu < Nd; mu++) {
+ links_single[mu] *= cf[mu];
+ }
+ }
+
+ // Now onto the inv param:
+ // Dslash type
+ quda_inv_param.dslash_type = QUDA_CLOVER_WILSON_DSLASH;
+
+ // Hardwire to GCR
+ quda_inv_param.inv_type = QUDA_GCR_INVERTER;
+ quda_inv_param.compute_true_res = 0;
+
+ quda_inv_param.kappa = 0.5;
+ quda_inv_param.clover_coeff = 1.0; // Dummy, not used
+ quda_inv_param.Ls=1;
+ quda_inv_param.tol = toDouble(invParam.RsdTarget);
+ quda_inv_param.maxiter = invParam.MaxIter;
+ quda_inv_param.reliable_delta = toDouble(invParam.Delta);
+ quda_inv_param.pipeline = invParam.Pipeline;
+
+ quda_inv_param.solution_type = QUDA_MATPC_SOLUTION;
+ quda_inv_param.solve_type = QUDA_DIRECT_PC_SOLVE;
+
+
+ quda_inv_param.matpc_type = QUDA_MATPC_ODD_ODD;
+
+ quda_inv_param.dagger = QUDA_DAG_NO;
+ quda_inv_param.mass_normalization = QUDA_KAPPA_NORMALIZATION;
+
+ quda_inv_param.cpu_prec = cpu_prec;
+ quda_inv_param.cuda_prec = gpu_prec;
+ quda_inv_param.cuda_prec_sloppy = gpu_half_prec;
+ quda_inv_param.cuda_prec_precondition = gpu_half_prec;
+ quda_inv_param.preserve_source = QUDA_PRESERVE_SOURCE_NO;
+ quda_inv_param.gamma_basis = QUDA_DEGRAND_ROSSI_GAMMA_BASIS;
+
+#ifndef BUILD_QUDA_DEVIFACE_SPINOR
+ quda_inv_param.dirac_order = QUDA_DIRAC_ORDER;
+ quda_inv_param.input_location = QUDA_CPU_FIELD_LOCATION;
+ quda_inv_param.output_location = QUDA_CPU_FIELD_LOCATION;
+
+#else
+ quda_inv_param.dirac_order = QUDA_QDPJIT_DIRAC_ORDER;
+ quda_inv_param.input_location = QUDA_CUDA_FIELD_LOCATION;
+ quda_inv_param.output_location = QUDA_CUDA_FIELD_LOCATION;
+#endif
+
+ // Setup padding
+ multi1d