@@ -385,6 +385,7 @@ __kernel void interp_keypoint(
385
385
* @param grad: Pointer to global memory with gradient norm previously calculated
386
386
* @param ori: Pointer to global memory with gradient orientation previously calculated
387
387
* @param counter: Pointer to global memory with actual number of keypoints previously found
388
+ * @param hist: Pointer to shared memory with histogram (36 values per thread)
388
389
* @param octsize: initially 1 then twiced at each octave
389
390
* @param OriSigma : a SIFT parameter, default is 1.5. Warning : it is not "InitSigma".
390
391
* @param nb_keypoints : maximum number of keypoints
@@ -400,13 +401,28 @@ par.OriHistThresh = 0.8;
400
401
-replace "36" by an external paramater ?
401
402
-replace "0.8" by an external parameter ?
402
403
404
+ TODO:
405
+ -Memory optimization
406
+ --Use less registers (re-use, calculation instead of assignation)
407
+ --Use local memory for float histogram[36]
408
+ -Speed-up
409
+ --Less access to global memory (k.s1 is OK because this is a register)
410
+ --leave the loops as soon as possible
411
+ --Avoid divisions
412
+
413
+ TODO: QUESTION :
414
+ keypoint k = keypoints[gid0];
415
+ keypoint is float4.
416
+ Is this a copy (register) or a pointer ? would explain bad performances...
417
+
403
418
*/
404
419
405
420
__kernel void orientation_assignment (
406
421
__global keypoint * keypoints ,
407
422
__global float * grad ,
408
423
__global float * ori ,
409
424
__global int * counter ,
425
+ __local float * hist ,
410
426
int octsize ,
411
427
float OriSigma , //WARNING: (1.5), it is not "InitSigma (=1.6)"
412
428
int nb_keypoints ,
@@ -420,33 +436,37 @@ __kernel void orientation_assignment(
420
436
if (keypoints_start <= gid0 && gid0 < keypoints_end ) { //do not use *counter, for it will be modified below
421
437
keypoint k = keypoints [gid0 ];
422
438
if (k .s1 != -1.0f ) { //if the keypoint is valid
439
+
440
+ //Local memory memset
441
+ for (int i = 0 ; i < 36 ; i ++ )
442
+ hist [36 * gid0 + i ] = 0.0f ;
443
+
423
444
int bin , prev , next ;
424
- int old ;
445
+ int old ; //counter value
425
446
float distsq , gval , angle , interp = 0.0 ;
426
- float hist [36 ] = { 0.0f , 0.0f , 0.0f , 0.0f , 0.0f , 0.0f , 0.0f , 0.0f , 0.0f , 0.0f , 0.0f , 0.0f , 0.0f , 0.0f , 0.0f , 0.0f , 0.0f , 0.0f , 0.0f , 0.0f , 0.0f , 0.0f , 0.0f , 0.0f , 0.0f , 0.0f , 0.0f , 0.0f , 0.0f , 0.0f , 0.0f , 0.0f , 0.0f , 0.0f , 0.0f , 0.0f };
427
- int row = (int ) (k .s1 + 0.5 ),
428
- col = (int ) (k .s2 + 0.5 );
447
+ //int row = (int) (k.s1 + 0.5),
448
+ // col = (int) (k.s2 + 0.5);
429
449
430
450
/* Look at pixels within 3 sigma around the point and sum their
431
451
Gaussian weighted gradient magnitudes into the histogram. */
432
452
433
453
float sigma = OriSigma * k .s3 ;
434
454
int radius = (int ) (sigma * 3.0 );
435
- int rmin = MAX (0 ,row - radius );
436
- int cmin = MAX (0 ,col - radius );
437
- int rmax = MIN (row + radius ,grad_height - 2 );
438
- int cmax = MIN (col + radius ,grad_width - 2 );
455
+ int rmin = MAX (0 ,(( int ) ( k . s1 + 0.5 )) - radius );
456
+ int cmin = MAX (0 ,(( int ) ( k . s2 + 0.5 )) - radius );
457
+ int rmax = MIN ((( int ) ( k . s1 + 0.5 )) + radius ,grad_height - 2 );
458
+ int cmax = MIN ((( int ) ( k . s2 + 0.5 )) + radius ,grad_width - 2 );
439
459
int i ,j ,r ,c ;
440
460
for (r = rmin ; r <= rmax ; r ++ ) {
441
461
for (c = cmin ; c <= cmax ; c ++ ) {
442
462
443
463
gval = grad [r * grad_width + c ];
444
- distsq = (r - k .s1 )* (r - k .s1 ) + (c - k .s2 )* (c - k .s2 );
464
+ distsq = (r - k .s1 , 2 )* (r - k .s1 , 2 ) + (c - k .s2 , 2 )* (c - k .s2 , 2 );
445
465
446
466
if (gval > 0.0f && distsq < ((float ) (radius * radius )) + 0.5f ) {
447
467
/* Ori is in range of -PI to PI. */
448
468
angle = ori [r * grad_width + c ];
449
- bin = (int ) (36 * (angle + M_PI_F + 0.001f ) / (2.0f * M_PI_F )); //FIXME: why this offset ?
469
+ bin = (int ) (36 * (angle + M_PI_F + 0.001f ) / (2.0f * M_PI_F )); //why this offset ?
450
470
if (bin >= 0 && bin <= 36 ) {
451
471
bin = MIN (bin , 35 );
452
472
hist [bin ] += exp (- distsq / (2.0f * sigma * sigma )) * gval ;
@@ -485,22 +505,23 @@ __kernel void orientation_assignment(
485
505
hist [next ] = - hist [next ];
486
506
}
487
507
interp = 0.5f * (hist [prev ] - hist [next ]) / (hist [prev ] - 2.0f * maxval + hist [next ]);
488
- angle = 2.0f * M_PI_F * (argmax + 0.5f + interp ) / 36 - M_PI_F ;
489
-
508
+ angle = 2.0f * M_PI_F * (argmax + 0.5f + interp ) * 0.027778f - M_PI_F ; //1/36 = 0.027777777777777776
490
509
510
+ /*
511
+ Re-arrange coordinates to be coherent with sift.cpp
512
+ */
491
513
k .s0 = k .s2 ; //c
492
514
k .s1 = k .s1 ; //r
493
515
k .s2 = k .s3 ; //sigma
494
516
k .s3 = angle ; //angle
495
-
496
517
keypoints [gid0 ] = k ;
497
518
498
519
/*
499
520
An orientation is now assigned to our current keypoint.
500
521
We can create new keypoints of same (x,y,sigma) but a different angle.
501
522
For every local peak in histogram, every peak of value >= 80% of maxval generates a new keypoint
502
523
*/
503
-
524
+ //TODO: use k instead of k2 for memory ?
504
525
keypoint k2 = 0.0 ; k2 .s0 = k .s0 ; k2 .s1 = k .s1 ; k2 .s2 = k .s2 ;
505
526
for (i = 0 ; i < 36 ; i ++ ) {
506
527
prev = (i == 0 ? 36 - 1 : i - 1 );
0 commit comments