You signed in with another tab or window. Reload to refresh your session.You signed out in another tab or window. Reload to refresh your session.You switched accounts on another tab or window. Reload to refresh your session.Dismiss alert
Copy file name to clipboardExpand all lines: setup.py
+1-1
Original file line number
Diff line number
Diff line change
@@ -5,7 +5,7 @@
5
5
fromsetuptoolsimportsetup
6
6
frompathlibimportPath
7
7
8
-
AK_VERSION='0.6.2'
8
+
AK_VERSION='0.6.3'
9
9
10
10
defget_long_description() ->str:
11
11
return'''The ArrayKit library provides utilities for creating and transforming NumPy arrays, implementing performance-critical StaticFrame operations as Python C extensions.
// indices = (npy_int64*)realloc(indices, sizeof(npy_int64) * capacity);\
3543
-
// if (indices == NULL) { \
3544
-
// return NULL; \
3545
-
// } \
3546
-
// } \
3547
-
// indices[count++] = p - p_start; \
3548
-
// } \
3549
-
3550
-
// #define NONZERO_APPEND_INDEX_ABSOLUTE { \
3551
-
// if (AK_UNLIKELY(count == capacity)) { \
3552
-
// capacity <<= 1; \
3553
-
// indices = (npy_int64*)realloc(indices, sizeof(npy_int64) * capacity);\
3554
-
// if (indices == NULL) { \
3555
-
// return NULL; \
3556
-
// } \
3557
-
// } \
3558
-
// indices[count++] = i; \
3559
-
// } \
3560
-
3561
-
3562
-
3563
-
// // Given a Boolean, contiguous 1D array, return the index positions in an int64 array. Through experimentation it has been verified that doing full-size allocation of memory does not permit outperforming NumPy at 10_000_000 scale; but doing less optimizations does help. Using bit masks does not improve perforamnce over pointer arithmetic. Prescanning for all empty is very effective. Note that NumPy befits from first counting the nonzeros, then allocating only enough data for the expexted number.
3564
-
// static inline PyObject*
3565
-
// AK_nonzero_1d(PyArrayObject* array) {
3566
-
// // the maxiumum number of indices we could return is the size of the array; if this is under a certain number, probably better to just allocate that rather than reallocate
3567
-
// PyObject* final;
3568
-
// npy_intp count_max = PyArray_SIZE(array);
3569
-
3570
-
// if (count_max == 0) { // return empty array
3571
-
// npy_intp dims = {count_max};
3572
-
// final = PyArray_SimpleNew(1, &dims, NPY_INT64);
// lldiv_t size_div = lldiv((long long)count_max, 8); // quot, rem
3577
-
3578
-
// Py_ssize_t count = 0;
3579
-
// // the maximum number of collected integers is equal to or less than count_max; for small count_max, we can just set that value; for large size, we set it to half the size
// Given a Boolean, contiguous 1D array, return the index positions in an int64 array. Through experimentation it has been verified that doing full-size allocation of memory provides the best performance at all scales. Using NpyIter, or using, bit masks does not improve performance over pointer arithmetic. Prescanning for all empty is very effective. Note that NumPy benefits from first counting the nonzeros, then allocating only enough data for the expexted number of indices.
0 commit comments