46
46
47
47
#include < algorithm>
48
48
#include < array>
49
+ #include < atomic>
49
50
#include < cassert>
50
51
#include < cmath> // for abs()
51
52
#include < cstdlib> // for abs()
52
53
#include < functional> // std::reference_wrapper
54
+ #include < future>
53
55
#include < istream>
54
56
#include < limits> // std::numeric_limits
55
57
#include < ostream>
@@ -692,14 +694,19 @@ inline std::underlying_type<KDTreeSingleIndexAdaptorFlags>::type operator&(
692
694
struct KDTreeSingleIndexAdaptorParams
693
695
{
694
696
KDTreeSingleIndexAdaptorParams (
695
- size_t _leaf_max_size = 10 , KDTreeSingleIndexAdaptorFlags _flags =
696
- KDTreeSingleIndexAdaptorFlags::None)
697
- : leaf_max_size(_leaf_max_size), flags(_flags)
697
+ size_t _leaf_max_size = 10 ,
698
+ KDTreeSingleIndexAdaptorFlags _flags =
699
+ KDTreeSingleIndexAdaptorFlags::None,
700
+ unsigned int _n_thread_build = 1 )
701
+ : leaf_max_size(_leaf_max_size),
702
+ flags (_flags),
703
+ n_thread_build(_n_thread_build)
698
704
{
699
705
}
700
706
701
707
size_t leaf_max_size;
702
708
KDTreeSingleIndexAdaptorFlags flags;
709
+ unsigned int n_thread_build;
703
710
};
704
711
705
712
/* * Search options for KDTreeSingleIndexAdaptor::findNeighbors() */
@@ -954,6 +961,8 @@ class KDTreeBaseClass
954
961
955
962
Size leaf_max_size_ = 0 ;
956
963
964
+ // / Number of thread for concurrent tree build
965
+ Size n_thread_build_ = 1 ;
957
966
// / Number of current points in the dataset
958
967
Size size_ = 0 ;
959
968
// / Number of points in the dataset when the index was built
@@ -1084,6 +1093,117 @@ class KDTreeBaseClass
1084
1093
return node;
1085
1094
}
1086
1095
1096
+ /* *
1097
+ * Create a tree node that subdivides the list of vecs from vind[first] to
1098
+ * vind[last] concurrently. The routine is called recursively on each
1099
+ * sublist.
1100
+ *
1101
+ * @param left index of the first vector
1102
+ * @param right index of the last vector
1103
+ * @param thread_count count of std::async threads
1104
+ * @param mutex mutex for mempool allocation
1105
+ */
1106
+ NodePtr divideTreeConcurrent (
1107
+ Derived& obj, const Offset left, const Offset right, BoundingBox& bbox,
1108
+ std::atomic<unsigned int >& thread_count, std::mutex& mutex)
1109
+ {
1110
+ std::unique_lock lock (mutex);
1111
+ NodePtr node = obj.pool_ .template allocate <Node>(); // allocate memory
1112
+ lock.unlock ();
1113
+
1114
+ const auto dims = (DIM > 0 ? DIM : obj.dim_ );
1115
+
1116
+ /* If too few exemplars remain, then make this a leaf node. */
1117
+ if ((right - left) <= static_cast <Offset>(obj.leaf_max_size_ ))
1118
+ {
1119
+ node->child1 = node->child2 = nullptr ; /* Mark as leaf node. */
1120
+ node->node_type .lr .left = left;
1121
+ node->node_type .lr .right = right;
1122
+
1123
+ // compute bounding-box of leaf points
1124
+ for (Dimension i = 0 ; i < dims; ++i)
1125
+ {
1126
+ bbox[i].low = dataset_get (obj, obj.vAcc_ [left], i);
1127
+ bbox[i].high = dataset_get (obj, obj.vAcc_ [left], i);
1128
+ }
1129
+ for (Offset k = left + 1 ; k < right; ++k)
1130
+ {
1131
+ for (Dimension i = 0 ; i < dims; ++i)
1132
+ {
1133
+ const auto val = dataset_get (obj, obj.vAcc_ [k], i);
1134
+ if (bbox[i].low > val) bbox[i].low = val;
1135
+ if (bbox[i].high < val) bbox[i].high = val;
1136
+ }
1137
+ }
1138
+ }
1139
+ else
1140
+ {
1141
+ Offset idx;
1142
+ Dimension cutfeat;
1143
+ DistanceType cutval;
1144
+ middleSplit_ (obj, left, right - left, idx, cutfeat, cutval, bbox);
1145
+
1146
+ node->node_type .sub .divfeat = cutfeat;
1147
+
1148
+ std::future<NodePtr> left_future, right_future;
1149
+
1150
+ BoundingBox left_bbox (bbox);
1151
+ left_bbox[cutfeat].high = cutval;
1152
+ if (++thread_count < n_thread_build_)
1153
+ {
1154
+ left_future = std::async (
1155
+ std::launch::async, &KDTreeBaseClass::divideTreeConcurrent,
1156
+ this , std::ref (obj), left, left + idx, std::ref (left_bbox),
1157
+ std::ref (thread_count), std::ref (mutex));
1158
+ }
1159
+ else
1160
+ {
1161
+ --thread_count;
1162
+ node->child1 = this ->divideTreeConcurrent (
1163
+ obj, left, left + idx, left_bbox, thread_count, mutex);
1164
+ }
1165
+
1166
+ BoundingBox right_bbox (bbox);
1167
+ right_bbox[cutfeat].low = cutval;
1168
+ if (++thread_count < n_thread_build_)
1169
+ {
1170
+ right_future = std::async (
1171
+ std::launch::async, &KDTreeBaseClass::divideTreeConcurrent,
1172
+ this , std::ref (obj), left + idx, right,
1173
+ std::ref (right_bbox), std::ref (thread_count),
1174
+ std::ref (mutex));
1175
+ }
1176
+ else
1177
+ {
1178
+ --thread_count;
1179
+ node->child2 = this ->divideTreeConcurrent (
1180
+ obj, left + idx, right, right_bbox, thread_count, mutex);
1181
+ }
1182
+
1183
+ if (left_future.valid ())
1184
+ {
1185
+ node->child1 = left_future.get ();
1186
+ --thread_count;
1187
+ }
1188
+ if (right_future.valid ())
1189
+ {
1190
+ node->child2 = right_future.get ();
1191
+ --thread_count;
1192
+ }
1193
+
1194
+ node->node_type .sub .divlow = left_bbox[cutfeat].high ;
1195
+ node->node_type .sub .divhigh = right_bbox[cutfeat].low ;
1196
+
1197
+ for (Dimension i = 0 ; i < dims; ++i)
1198
+ {
1199
+ bbox[i].low = std::min (left_bbox[i].low , right_bbox[i].low );
1200
+ bbox[i].high = std::max (left_bbox[i].high , right_bbox[i].high );
1201
+ }
1202
+ }
1203
+
1204
+ return node;
1205
+ }
1206
+
1087
1207
void middleSplit_ (
1088
1208
const Derived& obj, const Offset ind, const Size count, Offset& index,
1089
1209
Dimension& cutfeat, DistanceType& cutval, const BoundingBox& bbox)
@@ -1397,6 +1517,15 @@ class KDTreeSingleIndexAdaptor
1397
1517
Base::dim_ = dimensionality;
1398
1518
if (DIM > 0 ) Base::dim_ = DIM;
1399
1519
Base::leaf_max_size_ = params.leaf_max_size ;
1520
+ if (params.n_thread_build > 0 )
1521
+ {
1522
+ Base::n_thread_build_ = params.n_thread_build ;
1523
+ }
1524
+ else
1525
+ {
1526
+ Base::n_thread_build_ =
1527
+ std::max (std::thread::hardware_concurrency (), 1u );
1528
+ }
1400
1529
1401
1530
if (!(params.flags &
1402
1531
KDTreeSingleIndexAdaptorFlags::SkipInitialBuildIndex))
@@ -1420,8 +1549,18 @@ class KDTreeSingleIndexAdaptor
1420
1549
if (Base::size_ == 0 ) return ;
1421
1550
computeBoundingBox (Base::root_bbox_);
1422
1551
// construct the tree
1423
- Base::root_node_ =
1424
- this ->divideTree (*this , 0 , Base::size_, Base::root_bbox_);
1552
+ if (Base::n_thread_build_ == 1 )
1553
+ {
1554
+ Base::root_node_ =
1555
+ this ->divideTree (*this , 0 , Base::size_, Base::root_bbox_);
1556
+ }
1557
+ else
1558
+ {
1559
+ std::atomic<unsigned int > thread_count = 0 ;
1560
+ std::mutex mutex;
1561
+ Base::root_node_ = this ->divideTreeConcurrent (
1562
+ *this , 0 , Base::size_, Base::root_bbox_, thread_count, mutex);
1563
+ }
1425
1564
}
1426
1565
1427
1566
/* * \name Query methods
@@ -1803,6 +1942,15 @@ class KDTreeSingleIndexDynamicAdaptor_
1803
1942
Base::dim_ = dimensionality;
1804
1943
if (DIM > 0 ) Base::dim_ = DIM;
1805
1944
Base::leaf_max_size_ = params.leaf_max_size ;
1945
+ if (params.n_thread_build > 0 )
1946
+ {
1947
+ Base::n_thread_build_ = params.n_thread_build ;
1948
+ }
1949
+ else
1950
+ {
1951
+ Base::n_thread_build_ =
1952
+ std::max (std::thread::hardware_concurrency (), 1u );
1953
+ }
1806
1954
}
1807
1955
1808
1956
/* * Explicitly default the copy constructor */
@@ -1837,8 +1985,18 @@ class KDTreeSingleIndexDynamicAdaptor_
1837
1985
if (Base::size_ == 0 ) return ;
1838
1986
computeBoundingBox (Base::root_bbox_);
1839
1987
// construct the tree
1840
- Base::root_node_ =
1841
- this ->divideTree (*this , 0 , Base::size_, Base::root_bbox_);
1988
+ if (Base::n_thread_build_ == 1 )
1989
+ {
1990
+ Base::root_node_ =
1991
+ this ->divideTree (*this , 0 , Base::size_, Base::root_bbox_);
1992
+ }
1993
+ else
1994
+ {
1995
+ std::atomic<unsigned int > thread_count = 0 ;
1996
+ std::mutex mutex;
1997
+ Base::root_node_ = this ->divideTreeConcurrent (
1998
+ *this , 0 , Base::size_, Base::root_bbox_, thread_count, mutex);
1999
+ }
1842
2000
}
1843
2001
1844
2002
/* * \name Query methods
0 commit comments