@@ -224,15 +224,17 @@ __device__ void calculateParamsParallel_TJ(int tarRowId, int curRowId, int seqLe
224224 int temp=ec;
225225 ec=et,et=temp;
226226 }
227- if (ec==et){
228- sharedMatch[tx]++;
229- }
227+ if (ec==et) sharedMatch[tx]++;
228+ if (ec==0 &&et==2 ) sharedP0[tx]++;
229+ else if (ec==0 &&et==3 ) sharedP1[tx]++;
230+ else if (ec==1 &&et==2 ) sharedP2[tx]++;
231+ else if (ec==1 &&et==3 ) sharedP3[tx]++;
230232 }
231233 }
234+ __syncthreads ();
232235
233236 // reduction
234237 for (int stride=bs/2 ; stride>0 ; stride/=2 ){
235- __syncthreads ();
236238 if (tx<stride){
237239 sharedP0[tx] += sharedP0[tx + stride];
238240 sharedP1[tx] += sharedP1[tx + stride];
@@ -245,8 +247,11 @@ __device__ void calculateParamsParallel_TJ(int tarRowId, int curRowId, int seqLe
245247 sharedMatch[tx] += sharedMatch[tx + stride];
246248 sharedTotal[tx] += sharedTotal[tx + stride];
247249 }
250+ __syncthreads ();
248251 }
249252
253+
254+
250255 // write the final results to the first thread
251256 if (tx == 0 ) {
252257 frac[0 ] = sharedFrac0[0 ];
@@ -260,6 +265,7 @@ __device__ void calculateParamsParallel_TJ(int tarRowId, int curRowId, int seqLe
260265 pr[2 ] = sharedP2[0 ];
261266 pr[3 ] = sharedP3[0 ];
262267 }
268+ __syncthreads ();
263269}
264270
265271__device__ void calculateParams_K2P (int tarRowId, int curRowId, int seqLen, uint64_t * compressedSeqs, int &p, int &q, int &tot){
@@ -428,7 +434,7 @@ __global__ void MSADistConstruction(
428434 double fr[4 ]={};
429435 // calculateParams_TJ(rowId, idx, seqLen, compressedSeqs, frac, tot, match, pr);
430436 calculateParamsParallel_TJ (rowId, blockID, seqLen, compressedSeqs, frac, tot, match, pr);
431-
437+ // __syncthreads();
432438 if (tx == 0 ) {
433439 for (int i=0 ;i<4 ;i++) fr[i]=double (frac[i])/tot/2.0 ;
434440 double h=0 ;
0 commit comments