Skip to content

Objectable filter, astype_array #183

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Draft
wants to merge 3 commits into
base: master
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions src/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,3 +31,4 @@
from ._arraykit import array_to_tuple_array as array_to_tuple_array
from ._arraykit import array_to_tuple_iter as array_to_tuple_iter
from ._arraykit import nonzero_1d as nonzero_1d
from ._arraykit import is_objectable_dt64 as is_objectable_dt64
1 change: 1 addition & 0 deletions src/__init__.pyi
Original file line number Diff line number Diff line change
Expand Up @@ -164,6 +164,7 @@ def get_new_indexers_and_screen(indexers: np.ndarray, positions: np.ndarray) ->
def first_true_1d(__array: np.ndarray, *, forward: bool) -> int: ...
def first_true_2d(__array: np.ndarray, *, forward: bool, axis: int) -> np.ndarray: ...
def nonzero_1d(__array: np.ndarray, /) -> np.ndarray: ...
def is_objectable_dt64(__array: np.ndarray, /) -> np.ndarray: ...
def slice_to_ascending_slice(__slice: slice, __size: int) -> slice: ...
def array_to_tuple_array(__array: np.ndarray) -> np.ndarray: ...
def array_to_tuple_iter(__array: np.ndarray) -> tp.Iterator[tp.Tuple[tp.Any, ...]]: ...
1 change: 1 addition & 0 deletions src/_arraykit.c
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,7 @@ static PyMethodDef arraykit_methods[] = {
NULL},
{"count_iteration", count_iteration, METH_O, NULL},
{"nonzero_1d", nonzero_1d, METH_O, NULL},
{"is_objectable_dt64", is_objectable_dt64, METH_O, NULL},
{"isna_element",
(PyCFunction)isna_element,
METH_VARARGS | METH_KEYWORDS,
Expand Down
9 changes: 9 additions & 0 deletions src/methods.c
Original file line number Diff line number Diff line change
Expand Up @@ -201,6 +201,15 @@ nonzero_1d(PyObject *Py_UNUSED(m), PyObject *a) {
return AK_nonzero_1d(array);
}

PyObject *
is_objectable_dt64(PyObject *Py_UNUSED(m), PyObject *a) {
AK_CHECK_NUMPY_ARRAY(a);
PyArrayObject* array = (PyArrayObject*)a;
AK_is_objectable_dt64(array);
Py_RETURN_FALSE;
}


static char *first_true_1d_kwarg_names[] = {
"array",
"forward",
Expand Down
3 changes: 3 additions & 0 deletions src/methods.h
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,9 @@ resolve_dtype_iter(PyObject *Py_UNUSED(m), PyObject *arg);
PyObject *
nonzero_1d(PyObject *Py_UNUSED(m), PyObject *a);

PyObject *
is_objectable_dt64(PyObject *Py_UNUSED(m), PyObject *a);

PyObject *
first_true_1d(PyObject *Py_UNUSED(m), PyObject *args, PyObject *kwargs);

Expand Down
16 changes: 8 additions & 8 deletions src/tri_map.c
Original file line number Diff line number Diff line change
Expand Up @@ -11,14 +11,14 @@
# include "tri_map.h"
# include "utilities.h"

static inline NPY_DATETIMEUNIT
AK_dt_unit_from_array(PyArrayObject* a) {
// This is based on get_datetime_metadata_from_dtype in the NumPy source, but that function is private. This does not check that the dtype is of the appropriate type.
PyArray_Descr* dt = PyArray_DESCR(a); // borrowed ref
PyArray_DatetimeMetaData* dma = &(((PyArray_DatetimeDTypeMetaData *)PyDataType_C_METADATA(dt))->meta);
// PyArray_DatetimeMetaData* dma = &(((PyArray_DatetimeDTypeMetaData *)PyArray_DESCR(a)->c_metadata)->meta);
return dma->base;
}
// static inline NPY_DATETIMEUNIT
// AK_dt_unit_from_array(PyArrayObject* a) {
// // This is based on get_datetime_metadata_from_dtype in the NumPy source, but that function is private. This does not check that the dtype is of the appropriate type.
// PyArray_Descr* dt = PyArray_DESCR(a); // borrowed ref
// PyArray_DatetimeMetaData* dma = &(((PyArray_DatetimeDTypeMetaData *)PyDataType_C_METADATA(dt))->meta);
// // PyArray_DatetimeMetaData* dma = &(((PyArray_DatetimeDTypeMetaData *)PyArray_DESCR(a)->c_metadata)->meta);
// return dma->base;
// }

typedef struct TriMapOne {
Py_ssize_t from; // signed
Expand Down
60 changes: 60 additions & 0 deletions src/utilities.h
Original file line number Diff line number Diff line change
Expand Up @@ -222,6 +222,66 @@ AK_slice_to_ascending_slice(PyObject* slice, Py_ssize_t size)
-step);
}


static inline NPY_DATETIMEUNIT
AK_dt_unit_from_array(PyArrayObject* a) {
// This is based on get_datetime_metadata_from_dtype in the NumPy source, but that function is private. This does not check that the dtype is of the appropriate type.
PyArray_Descr* dt = PyArray_DESCR(a); // borrowed ref
PyArray_DatetimeMetaData* dma = &(((PyArray_DatetimeDTypeMetaData *)PyDataType_C_METADATA(dt))->meta);
// PyArray_DatetimeMetaData* dma = &(((PyArray_DatetimeDTypeMetaData *)PyArray_DESCR(a)->c_metadata)->meta);
return dma->base;
}

// Givne a dt64 array, determine if it can be cast to a object without data loss.
static inline bool
AK_is_objectable_dt64(PyArrayObject* a)
{
NPY_DATETIMEUNIT unit = AK_dt_unit_from_array(a);
switch (unit) {
case NPY_FR_ERROR:
case NPY_FR_Y:
case NPY_FR_M:
case NPY_FR_W:
return false;
case NPY_FR_D:
case NPY_FR_h:
case NPY_FR_m:
case NPY_FR_s:
case NPY_FR_ms:
case NPY_FR_us:
break;
case NPY_FR_ns:
case NPY_FR_ps:
case NPY_FR_fs:
case NPY_FR_as:
case NPY_FR_GENERIC:
return false;
}

PyArray_Descr* dt_year = PyArray_DescrFromType(NPY_DATETIME);
if (dt_year == NULL) {
return NULL;
}
// TODO: not sure how to do this
// dt_year->metadata = Py_BuildValue("{s:i}", "unit", NPY_FR_Y);
PyObject* a_year = PyArray_CastToType(a, dt_year, 0);
Py_DECREF(dt_year);

Py_DECREF(a_year);
return false;

// years = array[~np.isnat(array)].astype(DT64_YEAR).astype(DTYPE_INT_DEFAULT) + 1970
// if np.any(years < datetime.MINYEAR):
// return False
// if np.any(years > datetime.MAXYEAR):
// return False
// return True

}




// Given a Boolean, contiguous 1D array, return the index positions in an int64 array. Through experimentation it has been verified that doing full-size allocation of memory provides the best performance at all scales. Using NpyIter, or using, bit masks does not improve performance over pointer arithmetic. Prescanning for all empty is very effective. Note that NumPy benefits from first counting the nonzeros, then allocating only enough data for the expexted number of indices.
static inline PyObject *
AK_nonzero_1d(PyArrayObject* array) {
Expand Down
14 changes: 14 additions & 0 deletions test/test_objectable.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
import unittest

import numpy as np

from arraykit import is_objectable_dt64

class TestUnit(unittest.TestCase):

def test_is_objectable_dt64_a(self) -> None:
a1 = np.array(['2022-01-04', '1954-04-12'], dtype=np.datetime64)
self.assertFalse(is_objectable_dt64(a1))



Loading