From a3a3aa693c7caf15c7e316b17d2a777c6bd161d6 Mon Sep 17 00:00:00 2001 From: Christopher Ariza Date: Tue, 29 Oct 2024 18:17:52 -0700 Subject: [PATCH 1/3] moved AK_dt_unit_from_array to utilities --- src/tri_map.c | 16 ++++++++-------- src/utilities.h | 11 +++++++++++ 2 files changed, 19 insertions(+), 8 deletions(-) diff --git a/src/tri_map.c b/src/tri_map.c index 48c41c47..959cac45 100644 --- a/src/tri_map.c +++ b/src/tri_map.c @@ -11,14 +11,14 @@ # include "tri_map.h" # include "utilities.h" -static inline NPY_DATETIMEUNIT -AK_dt_unit_from_array(PyArrayObject* a) { - // This is based on get_datetime_metadata_from_dtype in the NumPy source, but that function is private. This does not check that the dtype is of the appropriate type. - PyArray_Descr* dt = PyArray_DESCR(a); // borrowed ref - PyArray_DatetimeMetaData* dma = &(((PyArray_DatetimeDTypeMetaData *)PyDataType_C_METADATA(dt))->meta); - // PyArray_DatetimeMetaData* dma = &(((PyArray_DatetimeDTypeMetaData *)PyArray_DESCR(a)->c_metadata)->meta); - return dma->base; -} +// static inline NPY_DATETIMEUNIT +// AK_dt_unit_from_array(PyArrayObject* a) { +// // This is based on get_datetime_metadata_from_dtype in the NumPy source, but that function is private. This does not check that the dtype is of the appropriate type. +// PyArray_Descr* dt = PyArray_DESCR(a); // borrowed ref +// PyArray_DatetimeMetaData* dma = &(((PyArray_DatetimeDTypeMetaData *)PyDataType_C_METADATA(dt))->meta); +// // PyArray_DatetimeMetaData* dma = &(((PyArray_DatetimeDTypeMetaData *)PyArray_DESCR(a)->c_metadata)->meta); +// return dma->base; +// } typedef struct TriMapOne { Py_ssize_t from; // signed diff --git a/src/utilities.h b/src/utilities.h index 9b85a198..d6c45075 100644 --- a/src/utilities.h +++ b/src/utilities.h @@ -222,6 +222,17 @@ AK_slice_to_ascending_slice(PyObject* slice, Py_ssize_t size) -step); } + +static inline NPY_DATETIMEUNIT +AK_dt_unit_from_array(PyArrayObject* a) { + // This is based on get_datetime_metadata_from_dtype in the NumPy source, but that function is private. This does not check that the dtype is of the appropriate type. + PyArray_Descr* dt = PyArray_DESCR(a); // borrowed ref + PyArray_DatetimeMetaData* dma = &(((PyArray_DatetimeDTypeMetaData *)PyDataType_C_METADATA(dt))->meta); + // PyArray_DatetimeMetaData* dma = &(((PyArray_DatetimeDTypeMetaData *)PyArray_DESCR(a)->c_metadata)->meta); + return dma->base; +} + + // Given a Boolean, contiguous 1D array, return the index positions in an int64 array. Through experimentation it has been verified that doing full-size allocation of memory provides the best performance at all scales. Using NpyIter, or using, bit masks does not improve performance over pointer arithmetic. Prescanning for all empty is very effective. Note that NumPy benefits from first counting the nonzeros, then allocating only enough data for the expexted number of indices. static inline PyObject * AK_nonzero_1d(PyArrayObject* array) { From 6f38224812a8c3cd1388293ba190d78694e6503f Mon Sep 17 00:00:00 2001 From: Christopher Ariza Date: Tue, 29 Oct 2024 18:26:31 -0700 Subject: [PATCH 2/3] skethc case --- src/utilities.h | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/src/utilities.h b/src/utilities.h index d6c45075..6df0909e 100644 --- a/src/utilities.h +++ b/src/utilities.h @@ -232,6 +232,18 @@ AK_dt_unit_from_array(PyArrayObject* a) { return dma->base; } +// Givne a dt64 array, determine if it can be cast to a object without data loss. +static inline bool +AK_is_objectable_dt64(PyArrayObject* a) +{ + NPY_DATETIMEUNIT unit = AK_dt_unit_from_array(a); + swith (unit) { + case NPY_FR_Y: + case NPY_FR_M; + case NPY_FR_W; + } +} + // Given a Boolean, contiguous 1D array, return the index positions in an int64 array. Through experimentation it has been verified that doing full-size allocation of memory provides the best performance at all scales. Using NpyIter, or using, bit masks does not improve performance over pointer arithmetic. Prescanning for all empty is very effective. Note that NumPy benefits from first counting the nonzeros, then allocating only enough data for the expexted number of indices. static inline PyObject * From d216f09482ea70993b66bd1810925f206d903163 Mon Sep 17 00:00:00 2001 From: Christopher Ariza Date: Tue, 29 Oct 2024 20:04:03 -0700 Subject: [PATCH 3/3] preliminary objectable test --- src/__init__.py | 1 + src/__init__.pyi | 1 + src/_arraykit.c | 1 + src/methods.c | 9 +++++++++ src/methods.h | 3 +++ src/utilities.h | 43 ++++++++++++++++++++++++++++++++++++++--- test/test_objectable.py | 14 ++++++++++++++ 7 files changed, 69 insertions(+), 3 deletions(-) create mode 100644 test/test_objectable.py diff --git a/src/__init__.py b/src/__init__.py index 8c85b5e5..7bf835b5 100644 --- a/src/__init__.py +++ b/src/__init__.py @@ -31,3 +31,4 @@ from ._arraykit import array_to_tuple_array as array_to_tuple_array from ._arraykit import array_to_tuple_iter as array_to_tuple_iter from ._arraykit import nonzero_1d as nonzero_1d +from ._arraykit import is_objectable_dt64 as is_objectable_dt64 diff --git a/src/__init__.pyi b/src/__init__.pyi index 25a763c7..ced18cc9 100644 --- a/src/__init__.pyi +++ b/src/__init__.pyi @@ -164,6 +164,7 @@ def get_new_indexers_and_screen(indexers: np.ndarray, positions: np.ndarray) -> def first_true_1d(__array: np.ndarray, *, forward: bool) -> int: ... def first_true_2d(__array: np.ndarray, *, forward: bool, axis: int) -> np.ndarray: ... def nonzero_1d(__array: np.ndarray, /) -> np.ndarray: ... +def is_objectable_dt64(__array: np.ndarray, /) -> np.ndarray: ... def slice_to_ascending_slice(__slice: slice, __size: int) -> slice: ... def array_to_tuple_array(__array: np.ndarray) -> np.ndarray: ... def array_to_tuple_iter(__array: np.ndarray) -> tp.Iterator[tp.Tuple[tp.Any, ...]]: ... \ No newline at end of file diff --git a/src/_arraykit.c b/src/_arraykit.c index 5ceacac0..eb5b9e98 100644 --- a/src/_arraykit.c +++ b/src/_arraykit.c @@ -51,6 +51,7 @@ static PyMethodDef arraykit_methods[] = { NULL}, {"count_iteration", count_iteration, METH_O, NULL}, {"nonzero_1d", nonzero_1d, METH_O, NULL}, + {"is_objectable_dt64", is_objectable_dt64, METH_O, NULL}, {"isna_element", (PyCFunction)isna_element, METH_VARARGS | METH_KEYWORDS, diff --git a/src/methods.c b/src/methods.c index 21bcbeb4..3016d496 100644 --- a/src/methods.c +++ b/src/methods.c @@ -201,6 +201,15 @@ nonzero_1d(PyObject *Py_UNUSED(m), PyObject *a) { return AK_nonzero_1d(array); } +PyObject * +is_objectable_dt64(PyObject *Py_UNUSED(m), PyObject *a) { + AK_CHECK_NUMPY_ARRAY(a); + PyArrayObject* array = (PyArrayObject*)a; + AK_is_objectable_dt64(array); + Py_RETURN_FALSE; +} + + static char *first_true_1d_kwarg_names[] = { "array", "forward", diff --git a/src/methods.h b/src/methods.h index 751ccf85..340ef705 100644 --- a/src/methods.h +++ b/src/methods.h @@ -47,6 +47,9 @@ resolve_dtype_iter(PyObject *Py_UNUSED(m), PyObject *arg); PyObject * nonzero_1d(PyObject *Py_UNUSED(m), PyObject *a); +PyObject * +is_objectable_dt64(PyObject *Py_UNUSED(m), PyObject *a); + PyObject * first_true_1d(PyObject *Py_UNUSED(m), PyObject *args, PyObject *kwargs); diff --git a/src/utilities.h b/src/utilities.h index 6df0909e..49be1e1b 100644 --- a/src/utilities.h +++ b/src/utilities.h @@ -237,14 +237,51 @@ static inline bool AK_is_objectable_dt64(PyArrayObject* a) { NPY_DATETIMEUNIT unit = AK_dt_unit_from_array(a); - swith (unit) { + switch (unit) { + case NPY_FR_ERROR: case NPY_FR_Y: - case NPY_FR_M; - case NPY_FR_W; + case NPY_FR_M: + case NPY_FR_W: + return false; + case NPY_FR_D: + case NPY_FR_h: + case NPY_FR_m: + case NPY_FR_s: + case NPY_FR_ms: + case NPY_FR_us: + break; + case NPY_FR_ns: + case NPY_FR_ps: + case NPY_FR_fs: + case NPY_FR_as: + case NPY_FR_GENERIC: + return false; } + + PyArray_Descr* dt_year = PyArray_DescrFromType(NPY_DATETIME); + if (dt_year == NULL) { + return NULL; + } + // TODO: not sure how to do this + // dt_year->metadata = Py_BuildValue("{s:i}", "unit", NPY_FR_Y); + PyObject* a_year = PyArray_CastToType(a, dt_year, 0); + Py_DECREF(dt_year); + + Py_DECREF(a_year); + return false; + + // years = array[~np.isnat(array)].astype(DT64_YEAR).astype(DTYPE_INT_DEFAULT) + 1970 + // if np.any(years < datetime.MINYEAR): + // return False + // if np.any(years > datetime.MAXYEAR): + // return False + // return True + } + + // Given a Boolean, contiguous 1D array, return the index positions in an int64 array. Through experimentation it has been verified that doing full-size allocation of memory provides the best performance at all scales. Using NpyIter, or using, bit masks does not improve performance over pointer arithmetic. Prescanning for all empty is very effective. Note that NumPy benefits from first counting the nonzeros, then allocating only enough data for the expexted number of indices. static inline PyObject * AK_nonzero_1d(PyArrayObject* array) { diff --git a/test/test_objectable.py b/test/test_objectable.py new file mode 100644 index 00000000..ba2b8a2e --- /dev/null +++ b/test/test_objectable.py @@ -0,0 +1,14 @@ +import unittest + +import numpy as np + +from arraykit import is_objectable_dt64 + +class TestUnit(unittest.TestCase): + + def test_is_objectable_dt64_a(self) -> None: + a1 = np.array(['2022-01-04', '1954-04-12'], dtype=np.datetime64) + self.assertFalse(is_objectable_dt64(a1)) + + +