forked from opencv/opencv
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request opencv#1731 from perping:2.4_haar
- Loading branch information
Showing
3 changed files
with
70 additions
and
25 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -11,6 +11,7 @@ | |
// Jia Haipeng, [email protected] | ||
// Nathan, [email protected] | ||
// Peng Xiao, [email protected] | ||
// Erping Pang, [email protected] | ||
// Redistribution and use in source and binary forms, with or without modification, | ||
// are permitted provided that the following conditions are met: | ||
// | ||
|
@@ -320,7 +321,7 @@ __kernel void __attribute__((reqd_work_group_size(8,8,1)))gpuRunHaarClassifierCa | |
int glb_x = grpoffx + (lcl_x<<2); | ||
int glb_y = grpoffy + lcl_y; | ||
|
||
int glb_off = mad24(min(glb_y, height - 1),pixelstep,glb_x); | ||
int glb_off = mad24(min(glb_y, height + WINDOWSIZE - 1),pixelstep,glb_x); | ||
int4 data = *(__global int4*)&sum[glb_off]; | ||
int lcl_off = mad24(lcl_y, readwidth, lcl_x<<2); | ||
|
||
|
@@ -420,12 +421,23 @@ __kernel void __attribute__((reqd_work_group_size(8,8,1)))gpuRunHaarClassifierCa | |
|
||
result = (stage_sum >= stagethreshold); | ||
} | ||
|
||
if(result && (x < width) && (y < height)) | ||
if(factor < 2) | ||
{ | ||
if(result && lclidx %2 ==0 && lclidy %2 ==0 ) | ||
{ | ||
int queueindex = atomic_inc(lclcount); | ||
lcloutindex[queueindex<<1] = (lclidy << 16) | lclidx; | ||
lcloutindex[(queueindex<<1)+1] = as_int((float)variance_norm_factor); | ||
} | ||
} | ||
else | ||
{ | ||
int queueindex = atomic_inc(lclcount); | ||
lcloutindex[queueindex<<1] = (lclidy << 16) | lclidx; | ||
lcloutindex[(queueindex<<1)+1] = as_int(variance_norm_factor); | ||
if(result) | ||
{ | ||
int queueindex = atomic_inc(lclcount); | ||
lcloutindex[queueindex<<1] = (lclidy << 16) | lclidx; | ||
lcloutindex[(queueindex<<1)+1] = as_int((float)variance_norm_factor); | ||
} | ||
} | ||
barrier(CLK_LOCAL_MEM_FENCE); | ||
int queuecount = lclcount[0]; | ||
|
@@ -548,11 +560,27 @@ __kernel void __attribute__((reqd_work_group_size(8,8,1)))gpuRunHaarClassifierCa | |
int y = mad24(grpidy,grpszy,((temp & (int)0xffff0000) >> 16)); | ||
temp = glboutindex[0]; | ||
int4 candidate_result; | ||
candidate_result.zw = (int2)convert_int_rtn(factor*20.f); | ||
candidate_result.x = convert_int_rtn(x*factor); | ||
candidate_result.y = convert_int_rtn(y*factor); | ||
candidate_result.zw = (int2)convert_int_rte(factor*20.f); | ||
candidate_result.x = convert_int_rte(x*factor); | ||
candidate_result.y = convert_int_rte(y*factor); | ||
atomic_inc(glboutindex); | ||
candidate[outputoff+temp+lcl_id] = candidate_result; | ||
|
||
int i = outputoff+temp+lcl_id; | ||
if(candidate[i].z == 0) | ||
{ | ||
candidate[i] = candidate_result; | ||
} | ||
else | ||
{ | ||
for(i=i+1;;i++) | ||
{ | ||
if(candidate[i].z == 0) | ||
{ | ||
candidate[i] = candidate_result; | ||
break; | ||
} | ||
} | ||
} | ||
} | ||
barrier(CLK_LOCAL_MEM_FENCE); | ||
}//end for(int grploop=grpidx;grploop<totalgrp;grploop+=grpnumx) | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -18,6 +18,7 @@ | |
// Wu Xinglong, [email protected] | ||
// Sen Liu, [email protected] | ||
// Peng Xiao, [email protected] | ||
// Erping Pang, [email protected] | ||
// Redistribution and use in source and binary forms, with or without modification, | ||
// are permitted provided that the following conditions are met: | ||
// | ||
|
@@ -141,7 +142,7 @@ __kernel void gpuRunHaarClassifierCascade_scaled2( | |
int totalgrp = scaleinfo1.y & 0xffff; | ||
float factor = as_float(scaleinfo1.w); | ||
float correction_t = correction[scalei]; | ||
int ystep = (int)(max(2.0f, factor) + 0.5f); | ||
float ystep = max(2.0f, factor); | ||
|
||
for (int grploop = get_group_id(0); grploop < totalgrp; grploop += grpnumx) | ||
{ | ||
|
@@ -150,8 +151,8 @@ __kernel void gpuRunHaarClassifierCascade_scaled2( | |
int grpidx = grploop - mul24(grpidy, grpnumperline); | ||
int ix = mad24(grpidx, grpszx, lclidx); | ||
int iy = mad24(grpidy, grpszy, lclidy); | ||
int x = ix * ystep; | ||
int y = iy * ystep; | ||
int x = round(ix * ystep); | ||
int y = round(iy * ystep); | ||
lcloutindex[lcl_id] = 0; | ||
lclcount[0] = 0; | ||
int nodecounter; | ||
|
@@ -242,7 +243,7 @@ __kernel void gpuRunHaarClassifierCascade_scaled2( | |
|
||
barrier(CLK_LOCAL_MEM_FENCE); | ||
|
||
if (result && (ix < width) && (iy < height)) | ||
if (result) | ||
{ | ||
int queueindex = atomic_inc(lclcount); | ||
lcloutindex[queueindex] = (y << 16) | x; | ||
|
@@ -257,10 +258,26 @@ __kernel void gpuRunHaarClassifierCascade_scaled2( | |
int y = (temp & (int)0xffff0000) >> 16; | ||
temp = atomic_inc(glboutindex); | ||
int4 candidate_result; | ||
candidate_result.zw = (int2)convert_int_rtn(factor * 20.f); | ||
candidate_result.zw = (int2)convert_int_rte(factor * 20.f); | ||
candidate_result.x = x; | ||
candidate_result.y = y; | ||
candidate[outputoff + temp + lcl_id] = candidate_result; | ||
|
||
int i = outputoff+temp+lcl_id; | ||
if(candidate[i].z == 0) | ||
{ | ||
candidate[i] = candidate_result; | ||
} | ||
else | ||
{ | ||
for(i=i+1;;i++) | ||
{ | ||
if(candidate[i].z == 0) | ||
{ | ||
candidate[i] = candidate_result; | ||
break; | ||
} | ||
} | ||
} | ||
} | ||
|
||
barrier(CLK_LOCAL_MEM_FENCE); | ||
|
@@ -283,7 +300,7 @@ __kernel void gpuscaleclassifier(global GpuHidHaarTreeNode *orinode, global GpuH | |
tr_h[i] = (int)(t1.p[i][3] * scale + 0.5f); | ||
} | ||
|
||
t1.weight[0] = t1.p[2][0] ? -(t1.weight[1] * tr_h[1] * tr_w[1] + t1.weight[2] * tr_h[2] * tr_w[2]) / (tr_h[0] * tr_w[0]) : -t1.weight[1] * tr_h[1] * tr_w[1] / (tr_h[0] * tr_w[0]); | ||
t1.weight[0] = -(t1.weight[1] * tr_h[1] * tr_w[1] + t1.weight[2] * tr_h[2] * tr_w[2]) / (tr_h[0] * tr_w[0]); | ||
counter += nodenum; | ||
#pragma unroll | ||
|
||
|