2323#include " libint_visitor.hpp"
2424#include < type_traits>
2525
26+ #ifdef _OPENMP
27+ #include < omp.h>
28+ #endif
29+
2630namespace integrals ::libint {
2731namespace {
2832
33+ #ifdef _OPENMP
34+ int get_num_threads () {
35+ int num_threads;
36+ #pragma omp parallel
37+ { num_threads = omp_get_num_threads (); }
38+ return num_threads;
39+ }
40+
41+ int get_thread_num () { return omp_get_thread_num (); }
42+ #else
43+
44+ int get_num_threads () { return 1 ; }
45+
46+ int get_thread_num () { return 0 ; }
47+
48+ #endif
49+
2950template <typename FloatType>
3051auto build_eigen_buffer (const std::vector<libint2::BasisSet>& basis_sets,
3152 parallelzone::runtime::RuntimeView& rv, double thresh) {
@@ -52,25 +73,46 @@ template<std::size_t N, typename FloatType>
5273auto fill_tensor (const std::vector<libint2::BasisSet>& basis_sets,
5374 const chemist::qm_operator::OperatorBase& op,
5475 parallelzone::runtime::RuntimeView& rv, double thresh) {
76+ using size_type = decltype (N);
77+
5578 // Dimensional information
56- std::vector<std:: size_t > dims_shells (N );
57- for ( decltype (N) i = 0 ; i < N; ++i) dims_shells[i] = basis_sets[i]. size () ;
79+ std::vector<size_type> dim_stepsizes (N, 1 );
80+ size_type num_shell_combinations = 1 ;
5881
59- auto pbuffer = build_eigen_buffer<FloatType>(basis_sets, rv, thresh);
82+ for (size_type i = 0 ; i < N; ++i) {
83+ num_shell_combinations *= basis_sets[i].size ();
84+ for (size_type j = i; j < N - 1 ; ++j) {
85+ dim_stepsizes[i] *= basis_sets[j].size ();
86+ }
87+ }
6088
61- // Make libint engine
89+ // Make an engine for each thread
90+ int num_threads = get_num_threads ();
91+ std::vector<libint2::Engine> engines (num_threads);
6292 LibintVisitor visitor (basis_sets, thresh);
6393 op.visit (visitor);
64- auto engine = visitor.engine ();
65- const auto & buf = engine.results ();
94+ for (int i = 0 ; i != num_threads; ++i) { engines[i] = visitor.engine (); }
6695
6796 // Fill in values
68- std::vector<std::size_t > shells (N, 0 );
69- while (shells[0 ] < dims_shells[0 ]) {
70- detail_::run_engine_ (engine, basis_sets, shells,
97+ auto pbuffer = build_eigen_buffer<FloatType>(basis_sets, rv, thresh);
98+ #ifdef _OPENMP
99+ #pragma omp parallel for
100+ #endif
101+ for (size_type i_pair = 0 ; i_pair != num_shell_combinations; ++i_pair) {
102+ auto thread_id = get_thread_num ();
103+
104+ std::vector<size_type> shells (N);
105+ auto shell_ord = i_pair;
106+ for (size_type i = 0 ; i < N; ++i) {
107+ shells[i] = shell_ord / dim_stepsizes[i];
108+ shell_ord = shell_ord % dim_stepsizes[i];
109+ }
110+
111+ detail_::run_engine_ (engines[thread_id], basis_sets, shells,
71112 std::make_index_sequence<N>());
72113
73- auto vals = buf[0 ];
114+ const auto & buf = engines[thread_id].results ();
115+ auto vals = buf[0 ];
74116 if (vals) {
75117 auto ord = detail_::shells2ord (basis_sets, shells);
76118 auto n_ord = ord.size ();
@@ -79,17 +121,6 @@ auto fill_tensor(const std::vector<libint2::BasisSet>& basis_sets,
79121 pbuffer->set_data (ord[i_ord], update);
80122 }
81123 }
82-
83- // Increment index
84- shells[N - 1 ] += 1 ;
85- for (decltype (N) i = 1 ; i < N; ++i) {
86- if (shells[N - i] >= dims_shells[N - i]) {
87- // Reset this dimension and increment the next one
88- // shells[0] accumulates until we reach the end
89- shells[N - i] = 0 ;
90- shells[N - i - 1 ] += 1 ;
91- }
92- }
93124 }
94125
95126 auto pshape = pbuffer->layout ().shape ().clone ();
0 commit comments