From 28e7c1b3cef01150feee1d0f77748bbd2ebfd7a0 Mon Sep 17 00:00:00 2001 From: Wong Hang Date: Sat, 8 Feb 2020 15:49:51 +0900 Subject: [PATCH 1/2] compute and pass ls, gs directly to kernel_call in tril, triu --- pygpu/basic.py | 25 ++++++++++++++++++++++--- 1 file changed, 22 insertions(+), 3 deletions(-) diff --git a/pygpu/basic.py b/pygpu/basic.py index 90ffd93b49..dcdbc8b446 100644 --- a/pygpu/basic.py +++ b/pygpu/basic.py @@ -38,7 +38,6 @@ def _generate_kernel(ctx, cols, dtype, upper=True): have_complex=have_complex) return k - def triu(A, inplace=True): if A.ndim != 2: raise ValueError("triu only works for 2d arrays") @@ -54,7 +53,17 @@ def triu(A, inplace=True): upper = True cols = A.shape[1] k = _generate_kernel(A.context, cols, A.dtype, upper) - k(A, A.offset, A.shape[0] * A.shape[1], n=A.shape[0] * A.shape[1]) + n = int(A.shape[0]*A.shape[1]) + ls = 256 + if n < ls: + ls = n + gs = 1 + else: + (gs,r) = divmod(n,ls) + if r > 0: + gs += 1 + + k(A, A.offset, A.shape[0] * A.shape[1], ls=ls, gs=gs) return A @@ -73,5 +82,15 @@ def tril(A, inplace=True): upper = False cols = A.shape[1] k = _generate_kernel(A.context, cols, A.dtype, upper) - k(A, A.offset, A.shape[0] * A.shape[1], n=A.shape[0] * A.shape[1]) + n = int(A.shape[0]*A.shape[1]) + ls = 256 + if n < ls: + ls = n + gs = 1 + else: + (gs,r) = divmod(n,ls) + if r > 0: + gs += 1 + + k(A, A.offset, A.shape[0] * A.shape[1], ls=ls, gs=gs) return A From ac8006cde96edc981ed185cf30c82d93618a1bc5 Mon Sep 17 00:00:00 2001 From: Wong Hang Date: Sat, 8 Feb 2020 15:52:23 +0900 Subject: [PATCH 2/2] fix flake8 --- pygpu/basic.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/pygpu/basic.py b/pygpu/basic.py index dcdbc8b446..02253e340f 100644 --- a/pygpu/basic.py +++ b/pygpu/basic.py @@ -2,6 +2,7 @@ from .gpuarray import GpuArray, GpuKernel, SIZE, dtype_to_ctype import numpy + def _generate_kernel(ctx, cols, dtype, upper=True): tmpl = Template(""" #include "cluda.h" @@ -38,6 +39,7 @@ def _generate_kernel(ctx, cols, dtype, upper=True): have_complex=have_complex) return k + def triu(A, inplace=True): if A.ndim != 2: raise ValueError("triu only works for 2d arrays") @@ -59,7 +61,7 @@ def triu(A, inplace=True): ls = n gs = 1 else: - (gs,r) = divmod(n,ls) + (gs, r) = divmod(n, ls) if r > 0: gs += 1 @@ -88,7 +90,7 @@ def tril(A, inplace=True): ls = n gs = 1 else: - (gs,r) = divmod(n,ls) + (gs, r) = divmod(n, ls) if r > 0: gs += 1