diff --git a/performance/__main__.py b/performance/__main__.py index 9e31dc4d..ac9d8018 100644 --- a/performance/__main__.py +++ b/performance/__main__.py @@ -17,6 +17,7 @@ from performance.reference.util import dtype_from_element as dtype_from_element_ref from performance.reference.util import array_deepcopy as array_deepcopy_ref from performance.reference.util import isna_element as isna_element_ref +from performance.reference.util import roll_1d as roll_1d_ref from performance.reference.array_go import ArrayGO as ArrayGOREF @@ -32,6 +33,7 @@ from arraykit import dtype_from_element as dtype_from_element_ak from arraykit import array_deepcopy as array_deepcopy_ak from arraykit import isna_element as isna_element_ak +from arraykit import roll_1d as roll_1d_ak from arraykit import ArrayGO as ArrayGOAK @@ -359,6 +361,89 @@ class IsNaElementPerfREF(IsNaElementPerf): entry = staticmethod(isna_element_ref) +#------------------------------------------------------------------------------- + +storage = [] +def build_subclassses(klass, ak_meth, ref_meth): + storage.append(type(f'{klass.__name__}AK', (klass,), dict(entry=staticmethod(ak_meth)))) + storage.append(type(f'{klass.__name__}REF', (klass,), dict(entry=staticmethod(ref_meth)))) + + +#------------------------------------------------------------------------------- +class Roll1d20kInt(Perf): + NUMBER = 10 + SIZE = 20_000 + + def __init__(self): + self.array = np.arange(self.SIZE) + + def main(self): + for i in range(-20_001, 20_001): + self.entry(self.array, i) + +class Roll1d20kFloat(Perf): + NUMBER = 10 + SIZE = 20_000 + + def __init__(self): + self.array = np.arange(self.SIZE).astype(float) + + def main(self): + for i in range(-20_001, 20_001): + self.entry(self.array, i) + +class Roll1d20kObject(Perf): + NUMBER = 2 + SIZE = 20_000 + + def __init__(self): + self.array = np.arange(self.SIZE).astype(object) + + def main(self): + for i in range(-20_001, 20_001): + self.entry(self.array, i) + +class Roll1d1kInt(Perf): + NUMBER = 10 + SIZE = 1_000 + + def __init__(self): + self.array = np.arange(self.SIZE) + + def main(self): + for i in range(-20_000, 20_000): + self.entry(self.array, i) + +class Roll1d1kFloat(Perf): + NUMBER = 10 + SIZE = 1_000 + + def __init__(self): + self.array = np.arange(self.SIZE).astype(float) + + def main(self): + for i in range(-20_000, 20_000): + self.entry(self.array, i) + +class Roll1d1kObject(Perf): + NUMBER = 10 + SIZE = 1_000 + + def __init__(self): + self.array = np.arange(self.SIZE).astype(object) + + def main(self): + for i in range(-20_000, 20_000): + self.entry(self.array, i) + + +build_subclassses(Roll1d20kInt, roll_1d_ak, roll_1d_ref) +build_subclassses(Roll1d20kFloat, roll_1d_ak, roll_1d_ref) +build_subclassses(Roll1d20kObject, roll_1d_ak, roll_1d_ref) +build_subclassses(Roll1d1kInt, roll_1d_ak, roll_1d_ref) +build_subclassses(Roll1d1kFloat, roll_1d_ak, roll_1d_ref) +build_subclassses(Roll1d1kObject, roll_1d_ak, roll_1d_ref) + #------------------------------------------------------------------------------- def get_arg_parser(): diff --git a/performance/reference/util.py b/performance/reference/util.py index 0f2d0efc..6c876b78 100644 --- a/performance/reference/util.py +++ b/performance/reference/util.py @@ -216,3 +216,23 @@ def dtype_from_element(value: tp.Optional[tp.Hashable]) -> np.dtype: # NOTE: calling array and getting dtype on np.nan is faster than combining isinstance, isnan calls return np.array(value).dtype + +def roll_1d(array: np.ndarray, shift: int) -> np.ndarray: + ''' + Specialized form of np.roll that, by focusing on the 1D solution, is at least four times faster. + ''' + size = len(array) + if size <= 1: + return array.copy() + + # result will be positive + shift = shift % size + if shift == 0: + return array.copy() + + post = np.empty(size, dtype=array.dtype) + + post[0:shift] = array[-shift:] + post[shift:] = array[0:-shift] + return post + diff --git a/src/__init__.py b/src/__init__.py index 988ca110..52945c33 100644 --- a/src/__init__.py +++ b/src/__init__.py @@ -16,3 +16,4 @@ from ._arraykit import resolve_dtype_iter as resolve_dtype_iter from ._arraykit import isna_element as isna_element from ._arraykit import dtype_from_element as dtype_from_element +from ._arraykit import roll_1d as roll_1d diff --git a/src/__init__.pyi b/src/__init__.pyi index 4ff12eb9..9a6fa848 100644 --- a/src/__init__.pyi +++ b/src/__init__.pyi @@ -32,4 +32,5 @@ def resolve_dtype(__d1: np.dtype, __d2: np.dtype) -> np.dtype: ... def resolve_dtype_iter(__dtypes: tp.Iterable[np.dtype]) -> np.dtype: ... def isna_element(__value: tp.Any) -> bool: ... def dtype_from_element(__value: tp.Optional[tp.Hashable]) -> np.dtype: ... +def roll_1d(__array: np.ndarray, __shift: int) -> np.ndarray: ... diff --git a/src/_arraykit.c b/src/_arraykit.c index f8906a5c..7b82b3c8 100644 --- a/src/_arraykit.c +++ b/src/_arraykit.c @@ -257,9 +257,9 @@ shape_filter(PyObject *Py_UNUSED(m), PyObject *a) AK_CHECK_NUMPY_ARRAY_1D_2D(a); PyArrayObject *array = (PyArrayObject *)a; - int size0 = PyArray_DIM(array, 0); + int size0 = (int)PyArray_DIM(array, 0); // If 1D array, set size for axis 1 at 1, else use 2D array to get the size of axis 1 - int size1 = PyArray_NDIM(array) == 1 ? 1 : PyArray_DIM(array, 1); + int size1 = (int)(PyArray_NDIM(array) == 1 ? 1 : PyArray_DIM(array, 1)); return Py_BuildValue("ii", size0, size1); } @@ -490,6 +490,126 @@ isna_element(PyObject *Py_UNUSED(m), PyObject *arg) Py_RETURN_FALSE; } +//------------------------------------------------------------------------------ +// rolling + +static PyObject * +_roll_1d(PyArrayObject *array, int shift) +{ + // Tell the constructor to automatically allocate the output. + // The data type of the output will match that of the input. + PyArrayObject *arrays[2]; + npy_uint32 arrays_flags[2]; + arrays[0] = array; + arrays[1] = NULL; + arrays_flags[0] = NPY_ITER_READONLY; + arrays_flags[1] = NPY_ITER_WRITEONLY | NPY_ITER_ALLOCATE; + + // No inner iteration - inner loop is handled by CopyArray code + // Reference objects are OK. + int iter_flags = NPY_ITER_EXTERNAL_LOOP | NPY_ITER_REFS_OK; + + // Construct the iterator + NpyIter *iter = NpyIter_MultiNew( + 2, // number of arrays + arrays, + iter_flags, + NPY_KEEPORDER, // Maintain existing order for `array` + NPY_NO_CASTING, // Both arrays will have the same dtype so casting isn't needed or allowed + arrays_flags, + NULL); // We don't have to specify dtypes since it will use array's + + if (iter == NULL) { + return NULL; + } + + NpyIter_IterNextFunc *iternext = NpyIter_GetIterNext(iter, NULL); + if (!iternext) { + NpyIter_Deallocate(iter); + return NULL; + } + + char** dataptr = NpyIter_GetDataPtrArray(iter); + npy_intp *sizeptr = NpyIter_GetInnerLoopSizePtr(iter); + npy_intp itemsize = NpyIter_GetDescrArray(iter)[0]->elsize; + + // If we don't need the GIL, iteration can be multi-threaded! + NPY_BEGIN_THREADS_DEF; + if (!NpyIter_IterationNeedsAPI(iter)) { + NPY_BEGIN_THREADS; + } + + do { + char* src_data = dataptr[0]; + char* dst_data = dataptr[1]; + npy_intp size = *sizeptr; + + npy_intp offset = ((size - shift) % size) * itemsize; + npy_intp first_chunk = (size * itemsize) - offset; + + memcpy(dst_data, src_data + offset, first_chunk); + memcpy(dst_data + first_chunk, src_data, offset); + + // Increment ref counts of objects. + if (PyDataType_ISOBJECT(PyArray_DESCR(array))) { + dst_data = dataptr[1]; + while (size--) { + Py_INCREF(*(PyObject**)dst_data); + dst_data += itemsize; + } + } + } while (iternext(iter)); + + NPY_END_THREADS; + + // Get the result from the iterator object array + PyArrayObject *ret = NpyIter_GetOperandArray(iter)[1]; + if (!ret) { + NpyIter_Deallocate(iter); + return NULL; + } + Py_INCREF(ret); + + if (NpyIter_Deallocate(iter) != NPY_SUCCEED) { + Py_DECREF(ret); + return NULL; + } + + return (PyObject*)ret; +} + +static PyObject * +roll_1d(PyObject *Py_UNUSED(m), PyObject *args) +{ + PyArrayObject *array; + int shift; + + if (!PyArg_ParseTuple(args, "O!i:roll_1d", &PyArray_Type, &array, &shift)) + { + return NULL; + } + + // Must be signed in order for modulo to work properly for negative shift values + int size = (int)PyArray_SIZE(array); + + uint8_t is_empty = (size == 0); + + if (!is_empty) { + shift = shift % size; + } + + if (is_empty || (shift == 0)) { + PyObject* copy = PyArray_Copy(array); + if (!copy) { + return NULL; + } + return copy; + } + + return _roll_1d(array, shift); +} + + //------------------------------------------------------------------------------ // ArrayGO //------------------------------------------------------------------------------ @@ -772,6 +892,7 @@ static PyMethodDef arraykit_methods[] = { {"resolve_dtype_iter", resolve_dtype_iter, METH_O, NULL}, {"isna_element", isna_element, METH_O, NULL}, {"dtype_from_element", dtype_from_element, METH_O, NULL}, + {"roll_1d", roll_1d, METH_VARARGS, NULL}, {NULL}, }; diff --git a/test/test_util.py b/test/test_util.py index dcdc1c24..19dd4a8d 100644 --- a/test/test_util.py +++ b/test/test_util.py @@ -16,6 +16,7 @@ from arraykit import array_deepcopy from arraykit import isna_element from arraykit import dtype_from_element +from arraykit import roll_1d from performance.reference.util import mloc as mloc_ref @@ -368,6 +369,25 @@ def test_dtype_from_element_str_and_bytes_dtypes(self) -> None: self.assertEqual(np.dtype(f'|S{size}'), dtype_from_element(bytes(size))) self.assertEqual(np.dtype(f' None: + a1 = np.arange(12, dtype=float) + + for i in range(len(a1) + 1): + post = roll_1d(a1, i) + self.assertEqual(post.tolist(), np.roll(a1, i).tolist()) + + post = roll_1d(a1, -i) + self.assertEqual(post.tolist(), np.roll(a1, -i).tolist()) + + def test_roll_1d_b(self) -> None: + post = roll_1d(np.array([]), -4) + self.assertEqual([], post.tolist()) + + def test_roll_1d_c(self) -> None: + a1 = np.array([3, 4, 5, 6]) + self.assertEqual(roll_1d(a1, 1).tolist(), [6, 3, 4, 5]) + self.assertEqual(roll_1d(a1, -1).tolist(), [4, 5, 6, 3]) + if __name__ == '__main__': unittest.main()