@@ -11,7 +11,7 @@ namespace ftk {
11
11
// implementation of https://www.nvidia.com/content/gtc-2010/pdfs/2140_gtc2010.pdf
12
12
template <int nd, typename I=int , typename F=double >
13
13
__host__
14
- void kd_build_recursive (
14
+ void kdlite_build_recursive (
15
15
const I n,
16
16
const I current,
17
17
const F *X, // coordinates
@@ -47,15 +47,15 @@ void kd_build_recursive(
47
47
// fprintf(stderr, "current=%d, offset=%d, length=%d, lbm=%d, median=%d\n", current, offset, length, lbm, heap[current]);
48
48
49
49
if (lbm - 1 >= 1 )
50
- kd_build_recursive <nd, I, F>(n, current*2 +1 , X, level+1 , offset, lbm-1 , heap, ids); // left
50
+ kdlite_build_recursive <nd, I, F>(n, current*2 +1 , X, level+1 , offset, lbm-1 , heap, ids); // left
51
51
if (length - lbm >= 1 )
52
- kd_build_recursive <nd, I, F>(n, current*2 +2 , X, level+1 , offset+lbm, length-lbm, heap, ids); // right
52
+ kdlite_build_recursive <nd, I, F>(n, current*2 +2 , X, level+1 , offset+lbm, length-lbm, heap, ids); // right
53
53
}
54
54
}
55
55
56
56
template <int nd, typename I=int , typename F=double >
57
57
__host__
58
- void kd_build (
58
+ void kdlite_build (
59
59
const I n, // number of points
60
60
const F *X, // coordinates
61
61
I *heap) // out: pre-allocated heap
@@ -65,15 +65,77 @@ void kd_build(
65
65
for (int i = 0 ; i < n; i ++)
66
66
ids[i] = i;
67
67
68
- kd_build_recursive <nd, I, F>(n, 0 , X, 0 , 0 , n, heap, ids.data ());
68
+ kdlite_build_recursive <nd, I, F>(n, 0 , X, 0 , 0 , n, heap, ids.data ());
69
69
70
70
// for (int i = 0; i < n; i ++)
71
71
// fprintf(stderr, "i=%d, heap=%d\n", i, heap[i]);
72
72
}
73
73
74
74
template <int nd, typename I=int , typename F=double >
75
75
__device__ __host__
76
- I kd_nearest (I n, const F *X, const I *heap, const F *x)
76
+ I kdlite_nearest (I n, const F *X, const I *heap, const F *x)
77
+ {
78
+ static size_t max_stack_size = 32 ; // TODO
79
+
80
+ I S[max_stack_size];
81
+ I top = 0 ;
82
+
83
+ S[top++] = 0 ; // push root // S[top].depth = 0; // root // depth = log2(i+1);
84
+
85
+ I best = -1 ; // no best yet
86
+ F best_d2 = 1e32 ; // no best distance yet
87
+
88
+ while (top != 0 ) { // stack is not empty
89
+ const I i = S[--top]; // pop stack
90
+
91
+ const I xid = heap[i];
92
+ const I depth = std::log2 (i+1 );
93
+ const I axis = depth % nd;
94
+ I next, other;
95
+
96
+ if (x[axis] < X[nd*xid+axis]) {
97
+ next = i * 2 + 1 ; // left child
98
+ other = i * 2 + 2 ; // right child
99
+ } else {
100
+ next = i * 2 + 2 ; // right child
101
+ other = i * 2 + 1 ; // left child
102
+ }
103
+
104
+ const F d2 = vector_dist_2norm2<F>(nd, x, X + nd*xid); // distance to the current node
105
+ if (d2 < best_d2) {
106
+ best = xid;
107
+ best_d2 = d2;
108
+
109
+ // fprintf(stderr, "current_best=%d, d2=%f, X=%f, %f, %f\n",
110
+ // best, best_d2,
111
+ // X[nd*xid], X[nd*xid+1], X[nd*xid+2]);
112
+ }
113
+
114
+ // const F dp = x[axis] - X[nd*xid+axis]; // distance to the median
115
+ // const F dp2 = dp * dp;
116
+
117
+ if (next < n) { // the next node exists
118
+ assert (top < max_stack_size);
119
+ S[top++] = next; // push stack
120
+ }
121
+
122
+ if (other < n) {
123
+ const F dp = x[axis] - X[nd*xid+axis];
124
+ const F dp2 = dp * dp;
125
+
126
+ if (dp2 <= best_d2) {
127
+ assert (top < max_stack_size);
128
+ S[top++] = other; // push stack
129
+ }
130
+ }
131
+ }
132
+
133
+ return best;
134
+ }
135
+
136
+ template <int nd, typename I=int , typename F=double >
137
+ __device__ __host__
138
+ I kdlite_nearest_bfs (I n, const F *X, const I *heap, const F *x)
77
139
{
78
140
static size_t max_queue_size = 32768 ; // TODO
79
141
typedef struct {
0 commit comments