@@ -24,7 +24,7 @@ static inline cublasOperation_t convT(cb_transpose trans) {
24
24
}
25
25
}
26
26
27
- static const char * error (cublasStatus_t err ) {
27
+ static const char * estr (cublasStatus_t err ) {
28
28
switch (err ) {
29
29
case CUBLAS_STATUS_SUCCESS :
30
30
return "(cublas) Operation completed successfully." ;
@@ -53,12 +53,12 @@ static const char *error(cublasStatus_t err) {
53
53
54
54
static inline int error_cublas (error * e , const char * msg , cublasStatus_t err ) {
55
55
return error_fmt (e , (err == CUBLAS_STATUS_ARCH_MISMATCH ) ? GA_DEVSUP_ERROR : GA_BLAS_ERROR ,
56
- "%s: %s" , msg , error (err ));
56
+ "%s: %s" , msg , estr (err ));
57
57
}
58
58
59
59
#define CUBLAS_EXIT_ON_ERROR (ctx , cmd ) do { \
60
60
cublasStatus_t err = (cmd); \
61
- if (err != CUBLAS_SUCCESS ) { \
61
+ if (err != CUBLAS_STATUS_SUCCESS ) { \
62
62
cuda_exit(ctx); \
63
63
return error_cublas((ctx)->err, #cmd, err); \
64
64
} \
@@ -525,13 +525,14 @@ static int sgemmBatch(cb_order order, cb_transpose transA, cb_transpose transB,
525
525
const size_t threshold = 650 ;
526
526
cb_transpose transT ;
527
527
528
+ ASSERT_BUF (A [0 ]);
529
+ ctx = A [0 ]-> ctx ;
530
+
528
531
if (LARGE_VAL (M ) || LARGE_VAL (N ) || LARGE_VAL (K ) ||
529
532
LARGE_VAL (lda ) || LARGE_VAL (ldb ) || LARGE_VAL (ldc ) ||
530
533
LARGE_VAL (M * N ) || LARGE_VAL (M * K ) || LARGE_VAL (K * N ))
531
534
return error_set (ctx -> err , GA_XLARGE_ERROR , "Passed-in sizes would overflow the ints in the cublas interface" );
532
535
533
- ASSERT_BUF (A [0 ]);
534
- ctx = A [0 ]-> ctx ;
535
536
h = (blas_handle * )ctx -> blas_handle ;
536
537
cuda_enter (ctx );
537
538
@@ -623,7 +624,7 @@ static int sgemmBatch(cb_order order, cb_transpose transA, cb_transpose transB,
623
624
gpudata_release (Ta );
624
625
if (err != CUBLAS_STATUS_SUCCESS ) {
625
626
cuda_exit (ctx );
626
- return error_cublas (ctx , "cublasSgemmBatched" , err );
627
+ return error_cublas (ctx -> err , "cublasSgemmBatched" , err );
627
628
}
628
629
629
630
for (i = 0 ; i < batchCount ; i ++ ) {
@@ -651,13 +652,14 @@ static int dgemmBatch(cb_order order, cb_transpose transA, cb_transpose transB,
651
652
const size_t threshold = 650 ;
652
653
cb_transpose transT ;
653
654
655
+ ASSERT_BUF (A [0 ]);
656
+ ctx = A [0 ]-> ctx ;
657
+
654
658
if (LARGE_VAL (M ) || LARGE_VAL (N ) || LARGE_VAL (K ) ||
655
659
LARGE_VAL (lda ) || LARGE_VAL (ldb ) || LARGE_VAL (ldc ) ||
656
660
LARGE_VAL (M * N ) || LARGE_VAL (M * K ) || LARGE_VAL (K * N ))
657
661
return error_set (ctx -> err , GA_XLARGE_ERROR , "Passed-in sizes would overflow the ints in the cublas interface" );
658
662
659
- ASSERT_BUF (A [0 ]);
660
- ctx = A [0 ]-> ctx ;
661
663
h = (blas_handle * )ctx -> blas_handle ;
662
664
cuda_enter (ctx );
663
665
@@ -697,7 +699,7 @@ static int dgemmBatch(cb_order order, cb_transpose transA, cb_transpose transB,
697
699
(double * )A [i ]-> ptr + offA [i ], lda ,
698
700
(double * )B [i ]-> ptr + offB [i ], ldb ,
699
701
& beta ,
700
- (double * )C [i ]-> ptr + offC [i ], ldc );
702
+ (double * )C [i ]-> ptr + offC [i ], ldc )) ;
701
703
702
704
GA_CUDA_EXIT_ON_ERROR (ctx , cuda_record (A [i ], CUDA_WAIT_READ ));
703
705
GA_CUDA_EXIT_ON_ERROR (ctx , cuda_record (B [i ], CUDA_WAIT_READ ));
0 commit comments