Skip to content

Commit c3c876d

Browse files
jwaldrop107ryanmrichardCopilot
authored
OpenMP Threading (#145)
* OpenMP refactor of integral generation * OpenMP optional build * OpenMP enabled test * resolve test errors * Update CMakeLists.txt Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> --------- Co-authored-by: Ryan Richard <ryanmrichard1@gmail.com> Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com>
1 parent 4577f63 commit c3c876d

File tree

4 files changed

+68
-22
lines changed

4 files changed

+68
-22
lines changed

.github/enable_openmp.cmake

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
set(INTEGRALS_ENABLE_OPENMP ON)

.github/workflows/pull_request.yaml

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -39,3 +39,9 @@ jobs:
3939
with:
4040
compilers: '["gcc-11", "clang-14"]'
4141
repo_toolchain: ".github/enable_sigma.cmake"
42+
43+
test_enable_openmp:
44+
uses: NWChemEx/.github/.github/workflows/test_nwx_library.yaml@master
45+
with:
46+
compilers: '["gcc-11", "clang-14"]'
47+
repo_toolchain: ".github/enable_openmp.cmake"

CMakeLists.txt

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,7 @@ cmaize_option_list(
3939
BUILD_TESTING OFF "Should we build the tests?"
4040
BUILD_PYBIND11_PYBINDINGS ON "Build pybind11 python3 bindings?"
4141
ENABLE_SIGMA OFF "Should we enable Sigma for uncertainty tracking?"
42+
INTEGRALS_ENABLE_OPENMP OFF "Should we enable OpenMP for threading?"
4243
)
4344

4445
# Can't build from github due to Libint setup.
@@ -60,11 +61,18 @@ cmaize_find_or_build_dependency(
6061
ENABLE_SIGMA=${ENABLE_SIGMA}
6162
)
6263

64+
set(project_depends Libint2 simde)
65+
66+
if("${INTEGRALS_ENABLE_OPENMP}")
67+
find_package(OpenMP REQUIRED)
68+
list(APPEND project_depends OpenMP::OpenMP_CXX)
69+
endif()
70+
6371
cmaize_add_library(
6472
${PROJECT_NAME}
6573
SOURCE_DIR "${INTEGRALS_SOURCE_DIR}/${PROJECT_NAME}"
6674
INCLUDE_DIRS "${INTEGRALS_INCLUDE_DIR}/${PROJECT_NAME}"
67-
DEPENDS Libint2 simde
75+
DEPENDS "${project_depends}"
6876
)
6977

7078
include(nwx_pybind11)

src/integrals/libint/libint.cpp

Lines changed: 52 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -23,9 +23,30 @@
2323
#include "libint_visitor.hpp"
2424
#include <type_traits>
2525

26+
#ifdef _OPENMP
27+
#include <omp.h>
28+
#endif
29+
2630
namespace integrals::libint {
2731
namespace {
2832

33+
#ifdef _OPENMP
34+
int get_num_threads() {
35+
int num_threads;
36+
#pragma omp parallel
37+
{ num_threads = omp_get_num_threads(); }
38+
return num_threads;
39+
}
40+
41+
int get_thread_num() { return omp_get_thread_num(); }
42+
#else
43+
44+
int get_num_threads() { return 1; }
45+
46+
int get_thread_num() { return 0; }
47+
48+
#endif
49+
2950
template<typename FloatType>
3051
auto build_eigen_buffer(const std::vector<libint2::BasisSet>& basis_sets,
3152
parallelzone::runtime::RuntimeView& rv, double thresh) {
@@ -52,25 +73,46 @@ template<std::size_t N, typename FloatType>
5273
auto fill_tensor(const std::vector<libint2::BasisSet>& basis_sets,
5374
const chemist::qm_operator::OperatorBase& op,
5475
parallelzone::runtime::RuntimeView& rv, double thresh) {
76+
using size_type = decltype(N);
77+
5578
// Dimensional information
56-
std::vector<std::size_t> dims_shells(N);
57-
for(decltype(N) i = 0; i < N; ++i) dims_shells[i] = basis_sets[i].size();
79+
std::vector<size_type> dim_stepsizes(N, 1);
80+
size_type num_shell_combinations = 1;
5881

59-
auto pbuffer = build_eigen_buffer<FloatType>(basis_sets, rv, thresh);
82+
for(size_type i = 0; i < N; ++i) {
83+
num_shell_combinations *= basis_sets[i].size();
84+
for(size_type j = i; j < N - 1; ++j) {
85+
dim_stepsizes[i] *= basis_sets[j].size();
86+
}
87+
}
6088

61-
// Make libint engine
89+
// Make an engine for each thread
90+
int num_threads = get_num_threads();
91+
std::vector<libint2::Engine> engines(num_threads);
6292
LibintVisitor visitor(basis_sets, thresh);
6393
op.visit(visitor);
64-
auto engine = visitor.engine();
65-
const auto& buf = engine.results();
94+
for(int i = 0; i != num_threads; ++i) { engines[i] = visitor.engine(); }
6695

6796
// Fill in values
68-
std::vector<std::size_t> shells(N, 0);
69-
while(shells[0] < dims_shells[0]) {
70-
detail_::run_engine_(engine, basis_sets, shells,
97+
auto pbuffer = build_eigen_buffer<FloatType>(basis_sets, rv, thresh);
98+
#ifdef _OPENMP
99+
#pragma omp parallel for
100+
#endif
101+
for(size_type i_pair = 0; i_pair != num_shell_combinations; ++i_pair) {
102+
auto thread_id = get_thread_num();
103+
104+
std::vector<size_type> shells(N);
105+
auto shell_ord = i_pair;
106+
for(size_type i = 0; i < N; ++i) {
107+
shells[i] = shell_ord / dim_stepsizes[i];
108+
shell_ord = shell_ord % dim_stepsizes[i];
109+
}
110+
111+
detail_::run_engine_(engines[thread_id], basis_sets, shells,
71112
std::make_index_sequence<N>());
72113

73-
auto vals = buf[0];
114+
const auto& buf = engines[thread_id].results();
115+
auto vals = buf[0];
74116
if(vals) {
75117
auto ord = detail_::shells2ord(basis_sets, shells);
76118
auto n_ord = ord.size();
@@ -79,17 +121,6 @@ auto fill_tensor(const std::vector<libint2::BasisSet>& basis_sets,
79121
pbuffer->set_data(ord[i_ord], update);
80122
}
81123
}
82-
83-
// Increment index
84-
shells[N - 1] += 1;
85-
for(decltype(N) i = 1; i < N; ++i) {
86-
if(shells[N - i] >= dims_shells[N - i]) {
87-
// Reset this dimension and increment the next one
88-
// shells[0] accumulates until we reach the end
89-
shells[N - i] = 0;
90-
shells[N - i - 1] += 1;
91-
}
92-
}
93124
}
94125

95126
auto pshape = pbuffer->layout().shape().clone();

0 commit comments

Comments
 (0)