Skip to content

Commit ad9173e

Browse files
committed
Fix all the compile problems for the cuda stuff.
1 parent e7a7094 commit ad9173e

17 files changed

+198
-170
lines changed

Makefile

+1-1
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@ debug: install-debugc py
1111

1212
.PHONY: install-debugc py debug install-relc rel config
1313

14-
Debug/Makefile: Debug Makefile.conf
14+
Debug/Makefile: Makefile.conf
1515
mkdir -p Debug
1616
ifndef INSTALL_PREFIX
1717
(cd Debug && NUM_DEVS=${NUM_DEVS} DEV_NAMES=${DEV_NAMES} cmake .. -DCMAKE_BUILD_TYPE=Debug)

src/cache.h

+2-1
Original file line numberDiff line numberDiff line change
@@ -89,7 +89,8 @@ cache *cache_twoq(size_t hot_size, size_t warm_size,
8989

9090
cache *cache_disk(const char *dirpath, cache *mem,
9191
kwrite_fn kwrite, vwrite_fn vwrite,
92-
kread_fn kread, vread_fn vread);
92+
kread_fn kread, vread_fn vread,
93+
error *e);
9394

9495
/* API functions */
9596
static inline int cache_add(cache *c, cache_key_t k, cache_value_t v) {

src/cache/disk.c

+15-5
Original file line numberDiff line numberDiff line change
@@ -397,7 +397,7 @@ static void disk_destroy(cache *_c) {
397397

398398
cache *cache_disk(const char *dirpath, cache *mem,
399399
kwrite_fn kwrite, vwrite_fn vwrite,
400-
kread_fn kread, vread_fn vread) {
400+
kread_fn kread, vread_fn vread, error *e) {
401401
struct stat st;
402402
disk_cache *res;
403403
char *dirp;
@@ -414,7 +414,10 @@ cache *cache_disk(const char *dirpath, cache *mem,
414414

415415
dirp = malloc(dirl + 1); /* With the NUL */
416416

417-
if (dirp == NULL) return NULL;
417+
if (dirp == NULL) {
418+
error_sys(e, "malloc");
419+
return NULL;
420+
}
418421

419422
strlcpy(dirp, dirpath, dirl + 1);
420423

@@ -425,6 +428,7 @@ cache *cache_disk(const char *dirpath, cache *mem,
425428

426429
if (ensurep(NULL, dirp) != 0) {
427430
free(dirp);
431+
error_sys(e, "ensurep");
428432
return NULL;
429433
}
430434

@@ -433,18 +437,24 @@ cache *cache_disk(const char *dirpath, cache *mem,
433437

434438
mkdir(dirp, 0777); /* This may fail, but it's ok */
435439

436-
if (lstat(dirp, &st) != 0)
440+
if (lstat(dirp, &st) != 0) {
441+
error_sys(e, "lstat");
437442
return NULL;
443+
}
438444

439445
/* Restore the good path at the end */
440446
dirp[dirl - 1] = sep;
441447

442-
if (!(st.st_mode & S_IFDIR))
448+
if (!(st.st_mode & S_IFDIR)) {
449+
error_set(e, GA_SYS_ERROR, "Cache path exists but is not a directory");
443450
return NULL;
451+
}
444452

445453
res = calloc(sizeof(*res), 1);
446-
if (res == NULL)
454+
if (res == NULL) {
455+
error_sys(e, "calloc");
447456
return NULL;
457+
}
448458

449459
res->dirp = dirp;
450460
res->mem = mem;

src/cache/twoq.c

+2
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,8 @@
11
#include <assert.h>
22
#include <stdlib.h>
33

4+
#include <gpuarray/error.h>
5+
46
#include "cache.h"
57
#include "private_config.h"
68

src/gpuarray/buffer.h

+1-1
Original file line numberDiff line numberDiff line change
@@ -500,7 +500,7 @@ GPUARRAY_PUBLIC int gpukernel_call(gpukernel *k, unsigned int n,
500500
*
501501
* This can be use to cache kernel binaries after compilation of a
502502
* specific device. The kernel can be recreated by calling
503-
* kernel_alloc with the binary and size and passing `GA_USE_BINARY`
503+
* gpukernel_alloc with the binary and size and passing `GA_USE_BINARY`
504504
* as the use flags.
505505
*
506506
* The returned pointer is allocated and must be freed by the caller.

src/gpuarray_array.c

+2-1
Original file line numberDiff line numberDiff line change
@@ -71,7 +71,8 @@ static int ga_extcopy(GpuArray *dst, const GpuArray *src) {
7171
if (ctx->extcopy_cache == NULL)
7272
ctx->extcopy_cache = cache_twoq(4, 8, 8, 2, extcopy_eq, extcopy_hash,
7373
extcopy_free,
74-
(cache_freev_fn)GpuElemwise_free);
74+
(cache_freev_fn)GpuElemwise_free,
75+
ctx->err);
7576
if (ctx->extcopy_cache == NULL)
7677
return GA_MISC_ERROR;
7778
if (cache_add(ctx->extcopy_cache, aa, k) != 0)

src/gpuarray_blas_cuda_cublas.c

+11-9
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,7 @@ static inline cublasOperation_t convT(cb_transpose trans) {
2424
}
2525
}
2626

27-
static const char *error(cublasStatus_t err) {
27+
static const char *estr(cublasStatus_t err) {
2828
switch (err) {
2929
case CUBLAS_STATUS_SUCCESS:
3030
return "(cublas) Operation completed successfully.";
@@ -53,12 +53,12 @@ static const char *error(cublasStatus_t err) {
5353

5454
static inline int error_cublas(error *e, const char *msg, cublasStatus_t err) {
5555
return error_fmt(e, (err == CUBLAS_STATUS_ARCH_MISMATCH) ? GA_DEVSUP_ERROR : GA_BLAS_ERROR,
56-
"%s: %s", msg, error(err));
56+
"%s: %s", msg, estr(err));
5757
}
5858

5959
#define CUBLAS_EXIT_ON_ERROR(ctx, cmd) do { \
6060
cublasStatus_t err = (cmd); \
61-
if (err != CUBLAS_SUCCESS) { \
61+
if (err != CUBLAS_STATUS_SUCCESS) { \
6262
cuda_exit(ctx); \
6363
return error_cublas((ctx)->err, #cmd, err); \
6464
} \
@@ -525,13 +525,14 @@ static int sgemmBatch(cb_order order, cb_transpose transA, cb_transpose transB,
525525
const size_t threshold = 650;
526526
cb_transpose transT;
527527

528+
ASSERT_BUF(A[0]);
529+
ctx = A[0]->ctx;
530+
528531
if (LARGE_VAL(M) || LARGE_VAL(N) || LARGE_VAL(K) ||
529532
LARGE_VAL(lda) || LARGE_VAL(ldb) || LARGE_VAL(ldc) ||
530533
LARGE_VAL(M * N) || LARGE_VAL(M * K) || LARGE_VAL(K * N))
531534
return error_set(ctx->err, GA_XLARGE_ERROR, "Passed-in sizes would overflow the ints in the cublas interface");
532535

533-
ASSERT_BUF(A[0]);
534-
ctx = A[0]->ctx;
535536
h = (blas_handle *)ctx->blas_handle;
536537
cuda_enter(ctx);
537538

@@ -623,7 +624,7 @@ static int sgemmBatch(cb_order order, cb_transpose transA, cb_transpose transB,
623624
gpudata_release(Ta);
624625
if (err != CUBLAS_STATUS_SUCCESS) {
625626
cuda_exit(ctx);
626-
return error_cublas(ctx, "cublasSgemmBatched", err);
627+
return error_cublas(ctx->err, "cublasSgemmBatched", err);
627628
}
628629

629630
for (i = 0; i < batchCount; i++) {
@@ -651,13 +652,14 @@ static int dgemmBatch(cb_order order, cb_transpose transA, cb_transpose transB,
651652
const size_t threshold = 650;
652653
cb_transpose transT;
653654

655+
ASSERT_BUF(A[0]);
656+
ctx = A[0]->ctx;
657+
654658
if (LARGE_VAL(M) || LARGE_VAL(N) || LARGE_VAL(K) ||
655659
LARGE_VAL(lda) || LARGE_VAL(ldb) || LARGE_VAL(ldc) ||
656660
LARGE_VAL(M * N) || LARGE_VAL(M * K) || LARGE_VAL(K * N))
657661
return error_set(ctx->err, GA_XLARGE_ERROR, "Passed-in sizes would overflow the ints in the cublas interface");
658662

659-
ASSERT_BUF(A[0]);
660-
ctx = A[0]->ctx;
661663
h = (blas_handle *)ctx->blas_handle;
662664
cuda_enter(ctx);
663665

@@ -697,7 +699,7 @@ static int dgemmBatch(cb_order order, cb_transpose transA, cb_transpose transB,
697699
(double*)A[i]->ptr + offA[i], lda,
698700
(double*)B[i]->ptr + offB[i], ldb,
699701
&beta,
700-
(double*)C[i]->ptr + offC[i], ldc);
702+
(double*)C[i]->ptr + offC[i], ldc));
701703

702704
GA_CUDA_EXIT_ON_ERROR(ctx, cuda_record(A[i], CUDA_WAIT_READ));
703705
GA_CUDA_EXIT_ON_ERROR(ctx, cuda_record(B[i], CUDA_WAIT_READ));

src/gpuarray_buffer.c

+6-2
Original file line numberDiff line numberDiff line change
@@ -163,8 +163,12 @@ gpukernel *gpukernel_init(gpucontext *ctx, unsigned int count,
163163
const char *fname, unsigned int numargs,
164164
const int *typecodes, int flags, int *ret,
165165
char **err_str) {
166-
return ctx->ops->kernel_alloc(ctx, count, strings, lengths, fname, numargs,
167-
typecodes, flags, ret, err_str);
166+
gpukernel *res;
167+
res = ctx->ops->kernel_alloc(ctx, count, strings, lengths, fname, numargs,
168+
typecodes, flags, err_str);
169+
if (res == NULL && ret)
170+
*ret = ctx->err->code;
171+
return res;
168172
}
169173

170174
void gpukernel_retain(gpukernel *k) {

src/gpuarray_buffer_collectives.c

+1-4
Original file line numberDiff line numberDiff line change
@@ -22,10 +22,7 @@ void gpucomm_free(gpucomm* comm) {
2222
}
2323

2424
const char* gpucomm_error(gpucontext* ctx) {
25-
if (ctx->comm_ops != NULL)
26-
return ctx->error->msg;
27-
return "No collective ops available, API error. Is a collectives library "
28-
"installed?";
25+
return ctx->err->msg;
2926
}
3027

3128
gpucontext* gpucomm_context(gpucomm* comm) {

0 commit comments

Comments
 (0)