Skip to content

Commit 31aeb20

Browse files
author
Pierre Paleo
committed
for the merge with Jerome
1 parent 844a43b commit 31aeb20

File tree

4 files changed

+101
-46
lines changed

4 files changed

+101
-46
lines changed

comparisons/compare.c

+61-30
Original file line numberDiff line numberDiff line change
@@ -3,31 +3,25 @@
33
*
44
* Usage : make && ./compare extrema_cpp.txt extrema_opencl.txt
55
*
6+
* Format of the keypoints in the files :
7+
[ -3.41704249 368.90612793 14.99513245 2.78045988]
8+
[ -6.37356377 449.18521118 69.4825592 2.53365469]
9+
610
********************************************************************
711
*/
812

9-
10-
#include <stdio.h>
11-
#include <stdlib.h>
12-
#include <string.h>
13-
#include <math.h>
13+
#include "compare.h"
1414
#define MAX_KP 300 //for a given octave
1515
#define DIGITS 3 //for comparison precision (10e-DIGITS)
1616

17-
typedef struct keypoint {
18-
float p;
19-
float r;
20-
float c;
21-
float s;
22-
} keypoint;
2317

2418
/*
2519
Swap two keypoints pointers
2620
*/
27-
void keypoint_swap(keypoint k1, keypoint k2) {
28-
keypoint tmp_ptr = k1;
29-
k1 = k2;
30-
k2 = tmp_ptr;
21+
void keypoint_swap(keypoint* k1, keypoint* k2) {
22+
keypoint tmp_ptr = *k1;
23+
*k1 = *k2;
24+
*k2 = tmp_ptr;
3125
}
3226

3327

@@ -55,7 +49,7 @@ int parse_keypoints(char* filename, keypoint* keypoints, unsigned int* total_key
5549
FILE* stream = fopen(filename,"r+");
5650
if (!stream) { printf("Error: Could not open file %s\n",filename); return -1; }
5751
float p=0,r=0,c=0,s=0;
58-
keypoint* kp = (keypoint*) calloc(1,sizeof(keypoint));
52+
keypoint* kp = &(keypoints[0]);
5953
unsigned int j= 1, k =0;
6054
char str[511];
6155
while (EOF != fscanf(stream,"%s",str)) {
@@ -73,17 +67,15 @@ int parse_keypoints(char* filename, keypoint* keypoints, unsigned int* total_key
7367
case 0:
7468
kp->s = atof(str);
7569
keypoints[k] = *kp;
76-
kp = (keypoint*) calloc(1,sizeof(keypoint));
70+
kp = (keypoint*) &keypoints[k+1];
7771
k++;
7872
break;
7973
} //end switch
8074
j++;
8175
} //end isdigit
8276
} //end read loop
8377
//puts a "end of keypoints" marker
84-
kp = (keypoint*) calloc(1,sizeof(keypoint));
85-
kp->r = -1.0;
86-
keypoints[k] = *kp;
78+
keypoints[k].r = -1.0;
8779
*total_keypoints = k;
8880
return 1;
8981
}
@@ -127,32 +119,71 @@ int main(int args, char* argv[]) {
127119
}
128120
printf("End of comparison -- %d/(%d,%d) keypoints matches\n",
129121
kp_ok,total_keypoints_opencl,total_keypoints_cpp);
122+
123+
keypoint* output = (keypoint*) calloc(total_keypoints_opencl-1,sizeof(keypoint));
124+
// puts("before cut sort");
125+
for(i=0;6 >= i; i++) printf("%f ",k_opencl[i].r);
126+
//puts(""); puts("after cut sort");
127+
merge_sort(k_opencl,0,6/*total_keypoints_opencl-1*/,output);
128+
//for(i=0;30 > i; i++) printf("%f ",k_opencl[i].r);
129+
puts("");
130+
free(k_cpp);
131+
free(k_opencl);
132+
free(output);
130133
return 1;
131134

132135

133136
}
134137

135-
/*
136-
void cut_sort(keypoint* input, unsigned int start, unsigned int end) {
138+
//[start,...,end]
139+
void merge_sort(keypoint* input, unsigned int start, unsigned int end, keypoint* output) {
137140
unsigned int len = end-start+1;
141+
unsigned int middle = (end+start)/2;
138142
if (len > 2) {
139-
cut_sort(input, start, (end+start)/2);
140-
cut_sort(input,(end+start)/2+1,end);
143+
merge_sort(input, start, middle,output);
144+
merge_sort(input,middle+1,end,output);
141145
}
142146
else {
143-
if (len == 1) ; //return input[start];
147+
if (len == 2) {
148+
if (input[start].r > input[end].r) keypoint_swap(&input[start],&input[end]);
149+
}
150+
}
151+
/* printf("Call to %d %d %d\n",start,middle,end);*/
152+
if (len > 2) merge(input, output, start, middle, end);
153+
}
154+
/*
155+
Merge 2 sorted lists
156+
*/
157+
void merge(keypoint* input, keypoint* output, unsigned int start, unsigned int middle, unsigned int end) {
158+
int i1 = 0, i2 = 0, stop = 0;
159+
printf("I am %d %d %d\n",start,middle,end);
160+
while (!stop) {
161+
if (input[start+i1].r > input[middle+1+i2].r) {
162+
output[start+i1+i2] = input[middle+1+i2];
163+
i2++;
164+
}
144165
else {
145-
if (input[start].r > input[end].r) keypoint_swap(input[start],input[end]);
146-
//else ;
166+
output[start+i1+i2] = input[start+i1];
167+
i1++;
147168
}
148-
}
169+
if (i1 == middle+1) stop = 1;
170+
if (i2 == end+1) stop = 2;
171+
}
172+
int i;
173+
if (stop == 1) //recopy the end of the 2nd list
174+
for (i = i2; end >= i; i++) output[start+i1+i] = input[middle+1+i];
175+
if (stop == 2) //recopy the end of the 1st list
176+
for (i = i1; middle >= i; i++) output[start+i2+i] = input[start+i];
149177

150178

179+
puts("Merged :");
180+
for (i = 0; i1+i2 >= i; i++) printf("%f ",output[start+i].r);
181+
puts("");
182+
151183
}
152184

153185

154-
//TODO: print kp_opencl, see if kp_opencl[:].r have been sorted by parts of 2
155-
*/
186+
156187

157188

158189

compile

+1-1
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
11
export TD=dummy
22
python setup.py install --install-lib ${TD}/libs --install-scripts ${TD}/scripts
33

4-
echo "N OUBLIE PAS DE INITIALISER PYTHONPATH VERS ${TD}/libs"
4+
echo "N OUBLIE PAS DE INITIALISER PYTHONPATH VERS -path absolu- de ${TD}/libs"

openCL/image.cl

+35-14
Original file line numberDiff line numberDiff line change
@@ -385,6 +385,7 @@ __kernel void interp_keypoint(
385385
* @param grad: Pointer to global memory with gradient norm previously calculated
386386
* @param ori: Pointer to global memory with gradient orientation previously calculated
387387
* @param counter: Pointer to global memory with actual number of keypoints previously found
388+
* @param hist: Pointer to shared memory with histogram (36 values per thread)
388389
* @param octsize: initially 1 then twiced at each octave
389390
* @param OriSigma : a SIFT parameter, default is 1.5. Warning : it is not "InitSigma".
390391
* @param nb_keypoints : maximum number of keypoints
@@ -400,13 +401,28 @@ par.OriHistThresh = 0.8;
400401
-replace "36" by an external paramater ?
401402
-replace "0.8" by an external parameter ?
402403
404+
TODO:
405+
-Memory optimization
406+
--Use less registers (re-use, calculation instead of assignation)
407+
--Use local memory for float histogram[36]
408+
-Speed-up
409+
--Less access to global memory (k.s1 is OK because this is a register)
410+
--leave the loops as soon as possible
411+
--Avoid divisions
412+
413+
TODO: QUESTION :
414+
keypoint k = keypoints[gid0];
415+
keypoint is float4.
416+
Is this a copy (register) or a pointer ? would explain bad performances...
417+
403418
*/
404419

405420
__kernel void orientation_assignment(
406421
__global keypoint* keypoints,
407422
__global float* grad,
408423
__global float* ori,
409424
__global int* counter,
425+
__local float* hist,
410426
int octsize,
411427
float OriSigma, //WARNING: (1.5), it is not "InitSigma (=1.6)"
412428
int nb_keypoints,
@@ -420,33 +436,37 @@ __kernel void orientation_assignment(
420436
if (keypoints_start <= gid0 && gid0 < keypoints_end) { //do not use *counter, for it will be modified below
421437
keypoint k = keypoints[gid0];
422438
if (k.s1 != -1.0f) { //if the keypoint is valid
439+
440+
//Local memory memset
441+
for (int i=0; i < 36; i++)
442+
hist[36*gid0+i] = 0.0f;
443+
423444
int bin, prev, next;
424-
int old;
445+
int old; //counter value
425446
float distsq, gval, angle, interp=0.0;
426-
float hist[36] = { 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f};
427-
int row = (int) (k.s1 + 0.5),
428-
col = (int) (k.s2 + 0.5);
447+
//int row = (int) (k.s1 + 0.5),
448+
// col = (int) (k.s2 + 0.5);
429449

430450
/* Look at pixels within 3 sigma around the point and sum their
431451
Gaussian weighted gradient magnitudes into the histogram. */
432452

433453
float sigma = OriSigma * k.s3;
434454
int radius = (int) (sigma * 3.0);
435-
int rmin = MAX(0,row - radius);
436-
int cmin = MAX(0,col - radius);
437-
int rmax = MIN(row + radius,grad_height - 2);
438-
int cmax = MIN(col + radius,grad_width - 2);
455+
int rmin = MAX(0,((int) (k.s1 + 0.5)) - radius);
456+
int cmin = MAX(0,((int) (k.s2 + 0.5)) - radius);
457+
int rmax = MIN(((int) (k.s1 + 0.5)) + radius,grad_height - 2);
458+
int cmax = MIN(((int) (k.s2 + 0.5)) + radius,grad_width - 2);
439459
int i,j,r,c;
440460
for (r = rmin; r <= rmax; r++) {
441461
for (c = cmin; c <= cmax; c++) {
442462

443463
gval = grad[r*grad_width+c];
444-
distsq = (r-k.s1)*(r-k.s1) + (c-k.s2)*(c-k.s2);
464+
distsq = (r-k.s1,2)*(r-k.s1,2) + (c-k.s2,2)*(c-k.s2,2);
445465

446466
if (gval > 0.0f && distsq < ((float) (radius*radius)) + 0.5f) {
447467
/* Ori is in range of -PI to PI. */
448468
angle = ori[r*grad_width+c];
449-
bin = (int) (36 * (angle + M_PI_F + 0.001f) / (2.0f * M_PI_F)); //FIXME: why this offset ?
469+
bin = (int) (36 * (angle + M_PI_F + 0.001f) / (2.0f * M_PI_F)); //why this offset ?
450470
if (bin >= 0 && bin <= 36) {
451471
bin = MIN(bin, 35);
452472
hist[bin] += exp(- distsq / (2.0f*sigma*sigma)) * gval;
@@ -485,22 +505,23 @@ __kernel void orientation_assignment(
485505
hist[next] = -hist[next];
486506
}
487507
interp = 0.5f * (hist[prev] - hist[next]) / (hist[prev] - 2.0f * maxval + hist[next]);
488-
angle = 2.0f * M_PI_F * (argmax + 0.5f + interp) / 36 - M_PI_F;
489-
508+
angle = 2.0f * M_PI_F * (argmax + 0.5f + interp) * 0.027778f - M_PI_F; //1/36 = 0.027777777777777776
490509

510+
/*
511+
Re-arrange coordinates to be coherent with sift.cpp
512+
*/
491513
k.s0 = k.s2; //c
492514
k.s1 = k.s1; //r
493515
k.s2 = k.s3; //sigma
494516
k.s3 = angle; //angle
495-
496517
keypoints[gid0] = k;
497518

498519
/*
499520
An orientation is now assigned to our current keypoint.
500521
We can create new keypoints of same (x,y,sigma) but a different angle.
501522
For every local peak in histogram, every peak of value >= 80% of maxval generates a new keypoint
502523
*/
503-
524+
//TODO: use k instead of k2 for memory ?
504525
keypoint k2 = 0.0; k2.s0 = k.s0; k2.s1 = k.s1; k2.s2 = k.s2;
505526
for (i = 0; i < 36; i++) {
506527
prev = (i == 0 ? 36 -1 : i - 1);

test/test_image.py

+4-1
Original file line numberDiff line numberDiff line change
@@ -269,10 +269,13 @@ def test_orientation(self):
269269
keypoints_start = numpy.int32(0)
270270
keypoints_end = numpy.int32(actual_nb_keypoints)
271271
counter = pyopencl.array.to_device(queue, keypoints_end) #actual_nb_keypoints)
272+
#shared memory : 36-bin histograms
273+
local_size = (keypoints_end-keypoints_start+1)*36*4
274+
local_mem = pyopencl.LocalMemory(local_size)
272275

273276
t0 = time.time()
274277
k1 = self.program.orientation_assignment(queue, shape, wg,
275-
gpu_keypoints.data, gpu_grad.data, gpu_ori.data, counter.data,
278+
gpu_keypoints.data, gpu_grad.data, gpu_ori.data, counter.data, local_mem,
276279
octsize, orisigma, nb_keypoints, keypoints_start, keypoints_end, grad_width, grad_height)
277280
res = gpu_keypoints.get()
278281
cnt = counter.get()

0 commit comments

Comments
 (0)