Skip to content

Commit 05c3e83

Browse files
committed
Add concurrent tree build support
1 parent d4c8358 commit 05c3e83

File tree

5 files changed

+290
-9
lines changed

5 files changed

+290
-9
lines changed

CHANGELOG.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@ nanoflann 1.5.0: UNRELEASED
1010
ctor parameters.
1111
- Added method RadiusResultSet::empty()
1212
- Template argument rename: `AccesorType` => `IndexType` (does not actually affect user code at all).
13+
- Added concurrent tree building support, refer to `KDTreeSingleIndexAdaptorParams::n_thread_build`.
1314
* **Other changes:**
1415
- Macros to avoid conflicts with X11 symbols.
1516
- Inline an auxiliary example function in case users want to use it and

README.md

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -189,6 +189,10 @@ So, it seems that a `leaf_max_size` **between 10 and 50** would be optimum in ap
189189

190190
This parameter is really ignored in `nanoflann`, but was kept for backward compatibility with the original FLANN interface. Just ignore it.
191191

192+
### 2.3. `KDTreeSingleIndexAdaptorParams::n_thread_build`
193+
194+
This parameter determines the maximum number of threads that can be called concurrently during the construction of the KD tree. The default value is 1. When the parameter is set to 0, `nanoflann` automatically determines the number of threads to use.
195+
192196
-----
193197

194198
## 3. Performance

examples/KDTreeVectorOfVectorsAdaptor.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -69,7 +69,7 @@ struct KDTreeVectorOfVectorsAdaptor
6969
/// data points
7070
KDTreeVectorOfVectorsAdaptor(
7171
const size_t /* dimensionality */, const VectorOfVectorsType& mat,
72-
const int leaf_max_size = 10)
72+
const int leaf_max_size = 10, const unsigned int n_thread_build = 1)
7373
: m_data(mat)
7474
{
7575
assert(mat.size() != 0 && mat[0].size() != 0);
@@ -80,7 +80,7 @@ struct KDTreeVectorOfVectorsAdaptor
8080
"argument");
8181
index = new index_t(
8282
static_cast<int>(dims), *this /* adaptor */,
83-
nanoflann::KDTreeSingleIndexAdaptorParams(leaf_max_size));
83+
nanoflann::KDTreeSingleIndexAdaptorParams(leaf_max_size, KDTreeSingleIndexAdaptorFlags::None, n_thread_build));
8484
}
8585

8686
~KDTreeVectorOfVectorsAdaptor() { delete index; }

include/nanoflann.hpp

Lines changed: 165 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -46,10 +46,12 @@
4646

4747
#include <algorithm>
4848
#include <array>
49+
#include <atomic>
4950
#include <cassert>
5051
#include <cmath> // for abs()
5152
#include <cstdlib> // for abs()
5253
#include <functional> // std::reference_wrapper
54+
#include <future>
5355
#include <istream>
5456
#include <limits> // std::numeric_limits
5557
#include <ostream>
@@ -692,14 +694,19 @@ inline std::underlying_type<KDTreeSingleIndexAdaptorFlags>::type operator&(
692694
struct KDTreeSingleIndexAdaptorParams
693695
{
694696
KDTreeSingleIndexAdaptorParams(
695-
size_t _leaf_max_size = 10, KDTreeSingleIndexAdaptorFlags _flags =
696-
KDTreeSingleIndexAdaptorFlags::None)
697-
: leaf_max_size(_leaf_max_size), flags(_flags)
697+
size_t _leaf_max_size = 10,
698+
KDTreeSingleIndexAdaptorFlags _flags =
699+
KDTreeSingleIndexAdaptorFlags::None,
700+
unsigned int _n_thread_build = 1)
701+
: leaf_max_size(_leaf_max_size),
702+
flags(_flags),
703+
n_thread_build(_n_thread_build)
698704
{
699705
}
700706

701707
size_t leaf_max_size;
702708
KDTreeSingleIndexAdaptorFlags flags;
709+
unsigned int n_thread_build;
703710
};
704711

705712
/** Search options for KDTreeSingleIndexAdaptor::findNeighbors() */
@@ -954,6 +961,8 @@ class KDTreeBaseClass
954961

955962
Size leaf_max_size_ = 0;
956963

964+
/// Number of thread for concurrent tree build
965+
Size n_thread_build_ = 1;
957966
/// Number of current points in the dataset
958967
Size size_ = 0;
959968
/// Number of points in the dataset when the index was built
@@ -1084,6 +1093,117 @@ class KDTreeBaseClass
10841093
return node;
10851094
}
10861095

1096+
/**
1097+
* Create a tree node that subdivides the list of vecs from vind[first] to
1098+
* vind[last] concurrently. The routine is called recursively on each
1099+
* sublist.
1100+
*
1101+
* @param left index of the first vector
1102+
* @param right index of the last vector
1103+
* @param thread_count count of std::async threads
1104+
* @param mutex mutex for mempool allocation
1105+
*/
1106+
NodePtr divideTreeConcurrent(
1107+
Derived& obj, const Offset left, const Offset right, BoundingBox& bbox,
1108+
std::atomic<unsigned int>& thread_count, std::mutex& mutex)
1109+
{
1110+
std::unique_lock lock(mutex);
1111+
NodePtr node = obj.pool_.template allocate<Node>(); // allocate memory
1112+
lock.unlock();
1113+
1114+
const auto dims = (DIM > 0 ? DIM : obj.dim_);
1115+
1116+
/* If too few exemplars remain, then make this a leaf node. */
1117+
if ((right - left) <= static_cast<Offset>(obj.leaf_max_size_))
1118+
{
1119+
node->child1 = node->child2 = nullptr; /* Mark as leaf node. */
1120+
node->node_type.lr.left = left;
1121+
node->node_type.lr.right = right;
1122+
1123+
// compute bounding-box of leaf points
1124+
for (Dimension i = 0; i < dims; ++i)
1125+
{
1126+
bbox[i].low = dataset_get(obj, obj.vAcc_[left], i);
1127+
bbox[i].high = dataset_get(obj, obj.vAcc_[left], i);
1128+
}
1129+
for (Offset k = left + 1; k < right; ++k)
1130+
{
1131+
for (Dimension i = 0; i < dims; ++i)
1132+
{
1133+
const auto val = dataset_get(obj, obj.vAcc_[k], i);
1134+
if (bbox[i].low > val) bbox[i].low = val;
1135+
if (bbox[i].high < val) bbox[i].high = val;
1136+
}
1137+
}
1138+
}
1139+
else
1140+
{
1141+
Offset idx;
1142+
Dimension cutfeat;
1143+
DistanceType cutval;
1144+
middleSplit_(obj, left, right - left, idx, cutfeat, cutval, bbox);
1145+
1146+
node->node_type.sub.divfeat = cutfeat;
1147+
1148+
std::future<NodePtr> left_future, right_future;
1149+
1150+
BoundingBox left_bbox(bbox);
1151+
left_bbox[cutfeat].high = cutval;
1152+
if (++thread_count < n_thread_build_)
1153+
{
1154+
left_future = std::async(
1155+
std::launch::async, &KDTreeBaseClass::divideTreeConcurrent,
1156+
this, std::ref(obj), left, left + idx, std::ref(left_bbox),
1157+
std::ref(thread_count), std::ref(mutex));
1158+
}
1159+
else
1160+
{
1161+
--thread_count;
1162+
node->child1 = this->divideTreeConcurrent(
1163+
obj, left, left + idx, left_bbox, thread_count, mutex);
1164+
}
1165+
1166+
BoundingBox right_bbox(bbox);
1167+
right_bbox[cutfeat].low = cutval;
1168+
if (++thread_count < n_thread_build_)
1169+
{
1170+
right_future = std::async(
1171+
std::launch::async, &KDTreeBaseClass::divideTreeConcurrent,
1172+
this, std::ref(obj), left + idx, right,
1173+
std::ref(right_bbox), std::ref(thread_count),
1174+
std::ref(mutex));
1175+
}
1176+
else
1177+
{
1178+
--thread_count;
1179+
node->child2 = this->divideTreeConcurrent(
1180+
obj, left + idx, right, right_bbox, thread_count, mutex);
1181+
}
1182+
1183+
if (left_future.valid())
1184+
{
1185+
node->child1 = left_future.get();
1186+
--thread_count;
1187+
}
1188+
if (right_future.valid())
1189+
{
1190+
node->child2 = right_future.get();
1191+
--thread_count;
1192+
}
1193+
1194+
node->node_type.sub.divlow = left_bbox[cutfeat].high;
1195+
node->node_type.sub.divhigh = right_bbox[cutfeat].low;
1196+
1197+
for (Dimension i = 0; i < dims; ++i)
1198+
{
1199+
bbox[i].low = std::min(left_bbox[i].low, right_bbox[i].low);
1200+
bbox[i].high = std::max(left_bbox[i].high, right_bbox[i].high);
1201+
}
1202+
}
1203+
1204+
return node;
1205+
}
1206+
10871207
void middleSplit_(
10881208
const Derived& obj, const Offset ind, const Size count, Offset& index,
10891209
Dimension& cutfeat, DistanceType& cutval, const BoundingBox& bbox)
@@ -1397,6 +1517,15 @@ class KDTreeSingleIndexAdaptor
13971517
Base::dim_ = dimensionality;
13981518
if (DIM > 0) Base::dim_ = DIM;
13991519
Base::leaf_max_size_ = params.leaf_max_size;
1520+
if (params.n_thread_build > 0)
1521+
{
1522+
Base::n_thread_build_ = params.n_thread_build;
1523+
}
1524+
else
1525+
{
1526+
Base::n_thread_build_ =
1527+
std::max(std::thread::hardware_concurrency(), 1u);
1528+
}
14001529

14011530
if (!(params.flags &
14021531
KDTreeSingleIndexAdaptorFlags::SkipInitialBuildIndex))
@@ -1420,8 +1549,18 @@ class KDTreeSingleIndexAdaptor
14201549
if (Base::size_ == 0) return;
14211550
computeBoundingBox(Base::root_bbox_);
14221551
// construct the tree
1423-
Base::root_node_ =
1424-
this->divideTree(*this, 0, Base::size_, Base::root_bbox_);
1552+
if (Base::n_thread_build_ == 1)
1553+
{
1554+
Base::root_node_ =
1555+
this->divideTree(*this, 0, Base::size_, Base::root_bbox_);
1556+
}
1557+
else
1558+
{
1559+
std::atomic<unsigned int> thread_count = 0;
1560+
std::mutex mutex;
1561+
Base::root_node_ = this->divideTreeConcurrent(
1562+
*this, 0, Base::size_, Base::root_bbox_, thread_count, mutex);
1563+
}
14251564
}
14261565

14271566
/** \name Query methods
@@ -1803,6 +1942,15 @@ class KDTreeSingleIndexDynamicAdaptor_
18031942
Base::dim_ = dimensionality;
18041943
if (DIM > 0) Base::dim_ = DIM;
18051944
Base::leaf_max_size_ = params.leaf_max_size;
1945+
if (params.n_thread_build > 0)
1946+
{
1947+
Base::n_thread_build_ = params.n_thread_build;
1948+
}
1949+
else
1950+
{
1951+
Base::n_thread_build_ =
1952+
std::max(std::thread::hardware_concurrency(), 1u);
1953+
}
18061954
}
18071955

18081956
/** Explicitly default the copy constructor */
@@ -1837,8 +1985,18 @@ class KDTreeSingleIndexDynamicAdaptor_
18371985
if (Base::size_ == 0) return;
18381986
computeBoundingBox(Base::root_bbox_);
18391987
// construct the tree
1840-
Base::root_node_ =
1841-
this->divideTree(*this, 0, Base::size_, Base::root_bbox_);
1988+
if (Base::n_thread_build_ == 1)
1989+
{
1990+
Base::root_node_ =
1991+
this->divideTree(*this, 0, Base::size_, Base::root_bbox_);
1992+
}
1993+
else
1994+
{
1995+
std::atomic<unsigned int> thread_count = 0;
1996+
std::mutex mutex;
1997+
Base::root_node_ = this->divideTreeConcurrent(
1998+
*this, 0, Base::size_, Base::root_bbox_, thread_count, mutex);
1999+
}
18422000
}
18432001

18442002
/** \name Query methods

0 commit comments

Comments
 (0)