From 23242eff14b1e17eb0df19b3370c0107af8721f2 Mon Sep 17 00:00:00 2001 From: Charles Burkland Date: Mon, 26 Apr 2021 15:33:56 -0700 Subject: [PATCH 01/19] Adds C impl for is_gen_copy_values. 1.3x faster. --- arraykit.c | 107 ++++++++++++++++++++++++++++++++++ arraykit.pyi | 1 + performance/main.py | 28 +++++++++ performance/reference/util.py | 24 +++++++- setup.py | 2 +- test/test_util.py | 21 ++++++- 6 files changed, 179 insertions(+), 4 deletions(-) diff --git a/arraykit.c b/arraykit.c index ce30a0d5..d8123d58 100644 --- a/arraykit.c +++ b/arraykit.c @@ -1,5 +1,6 @@ # include "Python.h" # include "structmember.h" +# include # define PY_ARRAY_UNIQUE_SYMBOL AK_ARRAY_API # define NPY_NO_DEPRECATED_API NPY_1_7_API_VERSION @@ -349,6 +350,110 @@ resolve_dtype_iter(PyObject *Py_UNUSED(m), PyObject *arg) return (PyObject *)AK_ResolveDTypeIter(arg); } +typedef enum IsGenCopyValues { + ERR, + IS_GEN, + NOT_GEN_COPY, + NOT_GEN_NO_COPY +} IsGenCopyValues; + +static IsGenCopyValues +AK_is_gen_copy_values(PyObject *arg) +{ + /* + Returns: + -1: for failure + 0: is a generator + 1: a non-generator that needs to be copied + 2: a non-generator that does not needs to be copied + */ + if(PyObject_HasAttrString(arg, "__len__")) { + if (PySet_Check(arg) || PyDict_Check(arg) || + PyDictValues_Check(arg) || PyDictKeys_Check(arg)) + { + return NOT_GEN_COPY; + } + + PyObject *automap = PyImport_ImportModule("automap"); + if (!automap) { + return ERR; + } + + PyObject *frozenAutoMap = PyObject_GetAttrString(automap, "FrozenAutoMap"); + Py_DECREF(automap); + if (!frozenAutoMap) { + return ERR; + } + + int is_frozen_automap = PyObject_IsInstance(arg, frozenAutoMap); + Py_DECREF(frozenAutoMap); + + if (is_frozen_automap) { + return NOT_GEN_COPY; + } + + return NOT_GEN_NO_COPY; + } + + return IS_GEN; +} + +static PyObject* +is_gen_copy_values(PyObject *Py_UNUSED(m), PyObject *arg) +{ + // This only exists to allow `AK_is_gen_copy_values` to be tested + + PyObject *is_gen = Py_False; + PyObject *needs_copy = Py_False; + + switch (AK_is_gen_copy_values(arg)) + { + case ERR: + return NULL; + case IS_GEN: + is_gen = Py_True; + needs_copy = Py_True; + break; + case NOT_GEN_COPY: + needs_copy = Py_True; + break; + case NOT_GEN_NO_COPY: + break; + } + + Py_INCREF(is_gen); + Py_INCREF(needs_copy); + + PyObject *ret = PyTuple_Pack(2, is_gen, needs_copy); + if (!ret) { + Py_DECREF(is_gen); + Py_DECREF(needs_copy); + return NULL; + } + + return ret; +} + +// static IsGenCopyValues +// prepare_iter_for_array(PyObject *Py_UNUSED(m), PyObject *arg) +// { +// bool is_gen = false; +// bool needs_copy = false; + +// switch (AK_is_gen_copy_values(arg)) +// { +// case ERR: +// return NULL; +// case IS_GEN: +// is_gen = true; +// needs_copy = true; +// break; +// case NOT_GEN_COPY: +// needs_copy = true; +// } + +// } + //------------------------------------------------------------------------------ // ArrayGO //------------------------------------------------------------------------------ @@ -626,6 +731,8 @@ static PyMethodDef arraykit_methods[] = { {"array_deepcopy", array_deepcopy, METH_VARARGS, NULL}, {"resolve_dtype", resolve_dtype, METH_VARARGS, NULL}, {"resolve_dtype_iter", resolve_dtype_iter, METH_O, NULL}, + {"is_gen_copy_values", is_gen_copy_values, METH_O, NULL}, + //{"prepare_iter_for_array", prepare_iter_for_array, METH_O, NULL}, {NULL}, }; diff --git a/arraykit.pyi b/arraykit.pyi index b56e49c2..078d3c59 100644 --- a/arraykit.pyi +++ b/arraykit.pyi @@ -28,3 +28,4 @@ def row_1d_filter(__array: np.array) -> np.ndarray: ... def array_deepcopy(__array: np.array, memo: tp.Dict[int, tp.Any]) -> np.ndarray: ... def resolve_dtype(__d1: np.dtype, __d2: np.dtype) -> np.dtype: ... def resolve_dtype_iter(__dtypes: tp.Iterable[np.dtype]) -> np.dtype: ... +def is_gen_copy_values(__values: tp.Iterable[tp.Any]) -> tp.Tuple[bool, bool]: ... diff --git a/performance/main.py b/performance/main.py index c7ca120a..682b36fa 100644 --- a/performance/main.py +++ b/performance/main.py @@ -4,6 +4,7 @@ import timeit import argparse +from automap import FrozenAutoMap import numpy as np from performance.reference.util import mloc as mloc_ref @@ -16,6 +17,7 @@ from performance.reference.util import resolve_dtype as resolve_dtype_ref from performance.reference.util import resolve_dtype_iter as resolve_dtype_iter_ref from performance.reference.util import array_deepcopy as array_deepcopy_ref +from performance.reference.util import is_gen_copy_values as is_gen_copy_values_ref from performance.reference.array_go import ArrayGO as ArrayGOREF @@ -29,6 +31,7 @@ from arraykit import resolve_dtype as resolve_dtype_ak from arraykit import resolve_dtype_iter as resolve_dtype_iter_ak from arraykit import array_deepcopy as array_deepcopy_ak +from arraykit import is_gen_copy_values as is_gen_copy_values_ak from arraykit import ArrayGO as ArrayGOAK @@ -251,6 +254,31 @@ class ArrayGOPerfREF(ArrayGOPerf): #------------------------------------------------------------------------------- +class IsGenCopyValues(Perf): + NUMBER = 1000 + + def pre(self): + self.objects = [ + [1, 2, 3], + (1, 2, 3), + FrozenAutoMap((1, 2, 3)), + {1, 2, 3}, + {1:1, 2:2, 3:3}, + ] + + def main(self): + for _ in range(100): + for obj in self.objects: + self.entry(obj) + +class IsGenCopyValuesAK(IsGenCopyValues): + entry = staticmethod(is_gen_copy_values_ak) + +class IsGenCopyValuesREF(IsGenCopyValues): + entry = staticmethod(is_gen_copy_values_ref) + +#------------------------------------------------------------------------------- + def get_arg_parser(): diff --git a/performance/reference/util.py b/performance/reference/util.py index 6d437b28..5279f0a7 100644 --- a/performance/reference/util.py +++ b/performance/reference/util.py @@ -1,5 +1,7 @@ import typing as tp from copy import deepcopy +from collections import abc +from automap import FrozenAutoMap # pylint: disable = E0611 import numpy as np @@ -25,6 +27,11 @@ DTYPES_BOOL = (DTYPE_BOOL,) DTYPES_INEXACT = (DTYPE_FLOAT_DEFAULT, DTYPE_COMPLEX_DEFAULT) +DICTLIKE_TYPES = (abc.Set, dict, FrozenAutoMap) + +# iterables that cannot be used in NP array constructors; asumes that dictlike types have already been identified +INVALID_ITERABLE_FOR_ARRAY = (abc.ValuesView, abc.KeysView) + def mloc(array: np.ndarray) -> int: '''Return the memory location of an array. @@ -158,7 +165,6 @@ def resolve_dtype_iter(dtypes: tp.Iterable[np.dtype]) -> np.dtype: return dt_resolve - def array_deepcopy( array: np.ndarray, memo: tp.Optional[tp.Dict[int, tp.Any]], @@ -181,3 +187,19 @@ def array_deepcopy( if memo is not None: memo[ident] = post return post + + +def is_gen_copy_values(values: tp.Iterable[tp.Any]) -> tp.Tuple[bool, bool]: + ''' + Returns: + copy_values: True if values cannot be used in an np.array constructor.` + ''' + if hasattr(values, '__len__'): + if isinstance(values, DICTLIKE_TYPES + INVALID_ITERABLE_FOR_ARRAY): + # Dict-like iterables need copies + return False, True + + return False, False + + # We are a generator and all generators need copies + return True, True diff --git a/setup.py b/setup.py index 6db31dec..370f7449 100644 --- a/setup.py +++ b/setup.py @@ -39,7 +39,7 @@ def get_long_description() -> str: 'Programming Language :: Python :: 3.8', ], keywords='numpy array', - packages=[''], + packages=[], package_data={'': ['arraykit.pyi', 'py.typed']}, include_package_data=True, ext_modules=[ diff --git a/test/test_util.py b/test/test_util.py index c44d7ab9..44c6247e 100644 --- a/test/test_util.py +++ b/test/test_util.py @@ -1,5 +1,6 @@ import unittest +from automap import FrozenAutoMap import numpy as np # type: ignore from arraykit import resolve_dtype @@ -11,6 +12,7 @@ from arraykit import mloc from arraykit import immutable_filter from arraykit import array_deepcopy +from arraykit import is_gen_copy_values from performance.reference.util import mloc as mloc_ref @@ -224,9 +226,24 @@ def test_array_deepcopy_c2(self) -> None: self.assertFalse(a2.flags.writeable) self.assertIn(id(a1), memo) + def test_is_gen_copy_values(self) -> None: + self.assertEqual((True, True), is_gen_copy_values((x for x in range(3)))) -if __name__ == '__main__': - unittest.main() + l = [1, 2, 3] + t = (1, 2, 3) + fam = FrozenAutoMap((1, 2, 3)) + s = {1, 2, 3} + d = {1:1, 2:2, 3:3} + + self.assertEqual((False, False), is_gen_copy_values(l)) + self.assertEqual((False, False), is_gen_copy_values(t)) + self.assertEqual((False, True), is_gen_copy_values(fam)) + self.assertEqual((False, True), is_gen_copy_values(d.keys())) + self.assertEqual((False, True), is_gen_copy_values(d.values())) + self.assertEqual((False, True), is_gen_copy_values(d)) + self.assertEqual((False, True), is_gen_copy_values(s)) +if __name__ == '__main__': + unittest.main() From 16375c63e80872b450dd82c7be1bc263ee1dca60 Mon Sep 17 00:00:00 2001 From: Charles Burkland Date: Mon, 26 Apr 2021 16:11:22 -0700 Subject: [PATCH 02/19] Adds initial framework for prepare_iter_for_array --- arraykit.c | 76 +++++++++++----- arraykit.pyi | 4 + performance/reference/util.py | 93 +++++++++++++++++++- test/test_util.py | 159 ++++++++++++++++++++++++++++++++++ 4 files changed, 311 insertions(+), 21 deletions(-) diff --git a/arraykit.c b/arraykit.c index d8123d58..98c0921a 100644 --- a/arraykit.c +++ b/arraykit.c @@ -434,25 +434,61 @@ is_gen_copy_values(PyObject *Py_UNUSED(m), PyObject *arg) return ret; } -// static IsGenCopyValues -// prepare_iter_for_array(PyObject *Py_UNUSED(m), PyObject *arg) -// { -// bool is_gen = false; -// bool needs_copy = false; - -// switch (AK_is_gen_copy_values(arg)) -// { -// case ERR: -// return NULL; -// case IS_GEN: -// is_gen = true; -// needs_copy = true; -// break; -// case NOT_GEN_COPY: -// needs_copy = true; -// } - -// } +static PyObject* +prepare_iter_for_array(PyObject *Py_UNUSED(m), PyObject *args, PyObject *kwargs) +{ + PyObject *values; + int restrict_copy; + + static char *kwlist[] = {"values", "restrict_copy", NULL}; + + if (!PyArg_ParseTupleAndKeywords(args, kwargs, "Oi:prepare_iter_for_array", + kwlist, + &values, + &restrict_copy)) + { + return NULL; + } + + bool is_gen = false; + bool needs_copy = false; + + switch (AK_is_gen_copy_values(values)) + { + case ERR: + return NULL; + case IS_GEN: + is_gen = true; + needs_copy = true; + break; + case NOT_GEN_COPY: + needs_copy = true; + break; + case NOT_GEN_NO_COPY: + break; + } + + if (!is_gen) { + ssize_t len = PyObject_Length(values); + if (len == -1) { + return NULL; + } + if (len == 0) { + Py_INCREF(Py_None); + Py_INCREF(Py_False); + PyObject *ret = PyTuple_Pack(3, Py_None, Py_False, values); + if (!ret) { + Py_DECREF(Py_None); + Py_DECREF(Py_False); + return NULL; + } + return ret; + } + } + + Py_INCREF(Py_True); + return Py_True; +} //------------------------------------------------------------------------------ // ArrayGO @@ -732,7 +768,7 @@ static PyMethodDef arraykit_methods[] = { {"resolve_dtype", resolve_dtype, METH_VARARGS, NULL}, {"resolve_dtype_iter", resolve_dtype_iter, METH_O, NULL}, {"is_gen_copy_values", is_gen_copy_values, METH_O, NULL}, - //{"prepare_iter_for_array", prepare_iter_for_array, METH_O, NULL}, + {"prepare_iter_for_array", (PyCFunction)prepare_iter_for_array, METH_VARARGS | METH_KEYWORDS, NULL}, {NULL}, }; diff --git a/arraykit.pyi b/arraykit.pyi index 078d3c59..2f500e38 100644 --- a/arraykit.pyi +++ b/arraykit.pyi @@ -29,3 +29,7 @@ def array_deepcopy(__array: np.array, memo: tp.Dict[int, tp.Any]) -> np.ndarray: def resolve_dtype(__d1: np.dtype, __d2: np.dtype) -> np.dtype: ... def resolve_dtype_iter(__dtypes: tp.Iterable[np.dtype]) -> np.dtype: ... def is_gen_copy_values(__values: tp.Iterable[tp.Any]) -> tp.Tuple[bool, bool]: ... +def prepare_iter_for_array( + __values: tp.Iterable[tp.Any], + __restrict_copy: bool = False + ) -> tp.Tuple[DtypeSpecifier, bool, tp.Sequence[tp.Any]]: ... diff --git a/performance/reference/util.py b/performance/reference/util.py index 5279f0a7..d819eb30 100644 --- a/performance/reference/util.py +++ b/performance/reference/util.py @@ -1,10 +1,13 @@ import typing as tp from copy import deepcopy from collections import abc +from enum import Enum from automap import FrozenAutoMap # pylint: disable = E0611 import numpy as np +DtypeSpecifier = tp.Optional[tp.Union[str, np.dtype, type]] + DTYPE_DATETIME_KIND = 'M' DTYPE_TIMEDELTA_KIND = 'm' DTYPE_COMPLEX_KIND = 'c' @@ -27,11 +30,19 @@ DTYPES_BOOL = (DTYPE_BOOL,) DTYPES_INEXACT = (DTYPE_FLOAT_DEFAULT, DTYPE_COMPLEX_DEFAULT) +INEXACT_TYPES = (float, complex, np.inexact) # inexact matches floating, complexfloating + DICTLIKE_TYPES = (abc.Set, dict, FrozenAutoMap) -# iterables that cannot be used in NP array constructors; asumes that dictlike types have already been identified +# iterables that cannot be used in NP array constructors; asumes that dictlike +# types have already been identified INVALID_ITERABLE_FOR_ARRAY = (abc.ValuesView, abc.KeysView) +# integers above this value will occasionally, once coerced to a float (64 or 128) +# in an NP array, will not match a hash lookup as a key in a dictionary; +# an NP array of int or object will work +INT_MAX_COERCIBLE_TO_FLOAT = 1_000_000_000_000_000 + def mloc(array: np.ndarray) -> int: '''Return the memory location of an array. @@ -203,3 +214,83 @@ def is_gen_copy_values(values: tp.Iterable[tp.Any]) -> tp.Tuple[bool, bool]: # We are a generator and all generators need copies return True, True + + +def prepare_iter_for_array( + values: tp.Iterable[tp.Any], + restrict_copy: bool = False + ) -> tp.Tuple[DtypeSpecifier, bool, tp.Sequence[tp.Any]]: + ''' + Determine an appropriate DtypeSpecifier for values in an iterable. + This does not try to determine the actual dtype, but instead, if the DtypeSpecifier needs to be + object rather than None (which lets NumPy auto detect). + This is expected to only operate on 1D data. + + Args: + values: can be a generator that will be exhausted in processing; + if a generator, a copy will be made and returned as values. + restrict_copy: if True, reject making a copy, even if a generator is given + Returns: + resolved, has_tuple, values + ''' + + is_gen, copy_values = is_gen_copy_values(values) + + if not is_gen and len(values) == 0: #type: ignore + return None, False, values #type: ignore + + if restrict_copy: + copy_values = False + + v_iter = iter(values) + + if copy_values: + values_post = [] + + resolved = None # None is valid specifier if the type is not ambiguous + has_tuple = False + has_str = False + has_enum = False + has_non_str = False + has_inexact = False + has_big_int = False + + for v in v_iter: + if copy_values: + # if a generator, have to make a copy while iterating + # for array construction, cannot use dictlike, so must convert to list + values_post.append(v) + + if resolved != object: + value_type = type(v) + + # need to get tuple subclasses, like NamedTuple + if isinstance(v, (tuple, list)) or hasattr(v, '__slots__'): + # identify SF types by if they have __slots__ defined; they also must be assigned after array creation, so we treat them like tuples + has_tuple = True + elif isinstance(v, Enum): + # must check isinstance, as Enum types are always derived from Enum + has_enum = True + elif value_type == str or value_type == np.str_: + # must compare to both string types + has_str = True + else: + has_non_str = True + if value_type in INEXACT_TYPES: + has_inexact = True + elif value_type == int and abs(v) > INT_MAX_COERCIBLE_TO_FLOAT: + has_big_int = True + + if has_tuple or has_enum or (has_str and has_non_str): + resolved = object + elif has_big_int and has_inexact: + resolved = object + else: # resolved is object, can exit + if copy_values: + values_post.extend(v_iter) + break + + # NOTE: we break before finding a tuple, but our treatment of object types, downstream, will always assign them in the appropriate way + if copy_values: + return resolved, has_tuple, values_post + return resolved, has_tuple, values #type: ignore diff --git a/test/test_util.py b/test/test_util.py index 44c6247e..f377175f 100644 --- a/test/test_util.py +++ b/test/test_util.py @@ -1,3 +1,4 @@ +import typing as tp import unittest from automap import FrozenAutoMap @@ -13,8 +14,10 @@ from arraykit import immutable_filter from arraykit import array_deepcopy from arraykit import is_gen_copy_values +from arraykit import prepare_iter_for_array from performance.reference.util import mloc as mloc_ref +#from performance.reference.util import prepare_iter_for_array class TestUnit(unittest.TestCase): @@ -244,6 +247,162 @@ def test_is_gen_copy_values(self) -> None: self.assertEqual((False, True), is_gen_copy_values(d)) self.assertEqual((False, True), is_gen_copy_values(s)) + @unittest.skip('Not ready') + def test_resolve_type_iter_a(self) -> None: + v1 = ('a', 'b', 'c') + resolved, has_tuple, values = prepare_iter_for_array(v1) + self.assertEqual(resolved, None) + + v22 = ('a', 'b', 3) + resolved, has_tuple, values = prepare_iter_for_array(v22) + self.assertEqual(resolved, object) + + v3 = ('a', 'b', (1, 2)) + resolved, has_tuple, values = prepare_iter_for_array(v3) + self.assertEqual(resolved, object) + self.assertTrue(has_tuple) + + v4 = (1, 2, 4.3, 2) + resolved, has_tuple, values = prepare_iter_for_array(v4) + self.assertEqual(resolved, None) + + v5 = (1, 2, 4.3, 2, None) + resolved, has_tuple, values = prepare_iter_for_array(v5) + self.assertEqual(resolved, None) + + v6 = (1, 2, 4.3, 2, 'g') + resolved, has_tuple, values = prepare_iter_for_array(v6) + self.assertEqual(resolved, object) + + v7 = () + resolved, has_tuple, values = prepare_iter_for_array(v7) + self.assertEqual(resolved, None) + + @unittest.skip('Not ready') + def test_resolve_type_iter_b(self) -> None: + v1 = iter(('a', 'b', 'c')) + resolved, has_tuple, values = prepare_iter_for_array(v1) + self.assertEqual(resolved, None) + + v2 = iter(('a', 'b', 3)) + resolved, has_tuple, values = prepare_iter_for_array(v2) + self.assertEqual(resolved, object) + + v3 = iter(('a', 'b', (1, 2))) + resolved, has_tuple, values = prepare_iter_for_array(v3) + self.assertEqual(resolved, object) + self.assertTrue(has_tuple) + + v4 = range(4) + resolved, has_tuple, values = prepare_iter_for_array(v4) + self.assertEqual(resolved, None) + + @unittest.skip('Not ready') + def test_resolve_type_iter_c(self) -> None: + a = [True, False, True] + resolved, has_tuple, values = prepare_iter_for_array(a) + self.assertEqual(id(a), id(values)) + + resolved, has_tuple, values = prepare_iter_for_array(iter(a)) + self.assertNotEqual(id(a), id(values)) + + self.assertEqual(resolved, None) + self.assertEqual(has_tuple, False) + + @unittest.skip('Not ready') + def test_resolve_type_iter_d(self) -> None: + a = [3, 2, (3,4)] + resolved, has_tuple, values = prepare_iter_for_array(a) + self.assertEqual(id(a), id(values)) + self.assertTrue(has_tuple) + + resolved, has_tuple, values = prepare_iter_for_array(iter(a)) + self.assertNotEqual(id(a), id(values)) + + self.assertEqual(resolved, object) + self.assertEqual(has_tuple, True) + + @unittest.skip('Not ready') + def test_resolve_type_iter_e(self) -> None: + a = [300000000000000002, 5000000000000000001] + resolved, has_tuple, values = prepare_iter_for_array(a) + self.assertEqual(id(a), id(values)) + + resolved, has_tuple, values = prepare_iter_for_array(iter(a)) + self.assertNotEqual(id(a), id(values)) + self.assertEqual(resolved, None) + self.assertEqual(has_tuple, False) + + @unittest.skip('Not ready') + def test_resolve_type_iter_f(self) -> None: + + def a() -> tp.Iterator[tp.Any]: + for i in range(3): + yield i + yield None + + resolved, has_tuple, values = prepare_iter_for_array(a()) + self.assertEqual(values, [0, 1, 2, None]) + self.assertEqual(resolved, None) + self.assertEqual(has_tuple, False) + + @unittest.skip('Not ready') + def test_resolve_type_iter_g(self) -> None: + + def a() -> tp.Iterator[tp.Any]: + yield None + for i in range(3): + yield i + + resolved, has_tuple, values = prepare_iter_for_array(a()) + self.assertEqual(values, [None, 0, 1, 2]) + self.assertEqual(resolved, None) + self.assertEqual(has_tuple, False) + + @unittest.skip('Not ready') + def test_resolve_type_iter_h(self) -> None: + + def a() -> tp.Iterator[tp.Any]: + yield 10 + yield None + for i in range(3): + yield i + yield (3,4) + + resolved, has_tuple, values = prepare_iter_for_array(a()) + self.assertEqual(values, [10, None, 0, 1, 2, (3,4)]) + self.assertEqual(resolved, object) + # we stop evaluation after finding object + self.assertEqual(has_tuple, True) + + @unittest.skip('Not ready') + def test_resolve_type_iter_i(self) -> None: + a0 = range(3, 7) + resolved, has_tuple, values = prepare_iter_for_array(a0) + # a copy is not made + self.assertEqual(id(a0), id(values)) + self.assertEqual(resolved, None) + + @unittest.skip('Not ready') + def test_resolve_type_iter_j(self) -> None: + # this case was found through hypothesis + a0 = [0.0, 36_028_797_018_963_969] + resolved, has_tuple, values = prepare_iter_for_array(a0) + self.assertEqual(resolved, object) + + @unittest.skip('Not ready') + def test_resolve_type_iter_k(self) -> None: + resolved, has_tuple, values = prepare_iter_for_array((x for x in ())) #type: ignore + self.assertEqual(resolved, None) + self.assertEqual(len(values), 0) + self.assertEqual(has_tuple, False) + + def test_resolve_type_iter_l(self) -> None: + self.assertEqual((None, False, []), prepare_iter_for_array([], True)) + self.assertEqual((None, False, ()), prepare_iter_for_array((), True)) + self.assertEqual((None, False, {}), prepare_iter_for_array({}, True)) + self.assertEqual((None, False, set()), prepare_iter_for_array(set(), True)) + if __name__ == '__main__': unittest.main() From 783b47c2352c7a64885026a231e19bbcece869ea Mon Sep 17 00:00:00 2001 From: Charles Burkland Date: Mon, 26 Apr 2021 18:52:36 -0700 Subject: [PATCH 03/19] Initial iteration on prepare_iter_for_array. --- .gitignore | 1 + arraykit.c | 195 +++++++++++++++++++++++++++++----- arraykit.pyi | 2 +- performance/main.py | 73 ++++++++++++- performance/reference/util.py | 62 +++++------ test/test_util.py | 64 +++++------ 6 files changed, 302 insertions(+), 95 deletions(-) diff --git a/.gitignore b/.gitignore index 300033e8..e55b9691 100644 --- a/.gitignore +++ b/.gitignore @@ -8,3 +8,4 @@ .vscode __pycache__ build +*.diff diff --git a/arraykit.c b/arraykit.c index 98c0921a..ede9038c 100644 --- a/arraykit.c +++ b/arraykit.c @@ -357,6 +357,26 @@ typedef enum IsGenCopyValues { NOT_GEN_NO_COPY } IsGenCopyValues; +static int +AK_obj_is_instance_of(PyObject *obj, const char *module_name, const char *module_type_name) +{ + PyObject *module = PyImport_ImportModule(module_name); + if (!module) { + return -1; + } + + PyObject *type = PyObject_GetAttrString(module, module_type_name); + Py_DECREF(module); + if (!type) { + return -1; + } + + int is_of_type = PyObject_IsInstance(obj, type); + Py_DECREF(type); + + return is_of_type; +} + static IsGenCopyValues AK_is_gen_copy_values(PyObject *arg) { @@ -374,25 +394,17 @@ AK_is_gen_copy_values(PyObject *arg) return NOT_GEN_COPY; } - PyObject *automap = PyImport_ImportModule("automap"); - if (!automap) { - return ERR; - } - - PyObject *frozenAutoMap = PyObject_GetAttrString(automap, "FrozenAutoMap"); - Py_DECREF(automap); - if (!frozenAutoMap) { + switch (AK_obj_is_instance_of(arg, "automap", "FrozenAutoMap")) + { + case -1: return ERR; - } - int is_frozen_automap = PyObject_IsInstance(arg, frozenAutoMap); - Py_DECREF(frozenAutoMap); + case 0: + return NOT_GEN_NO_COPY; - if (is_frozen_automap) { + default: return NOT_GEN_COPY; } - - return NOT_GEN_NO_COPY; } return IS_GEN; @@ -438,11 +450,11 @@ static PyObject* prepare_iter_for_array(PyObject *Py_UNUSED(m), PyObject *args, PyObject *kwargs) { PyObject *values; - int restrict_copy; + int restrict_copy = 0; // False by default static char *kwlist[] = {"values", "restrict_copy", NULL}; - if (!PyArg_ParseTupleAndKeywords(args, kwargs, "Oi:prepare_iter_for_array", + if (!PyArg_ParseTupleAndKeywords(args, kwargs, "O|p:prepare_iter_for_array", kwlist, &values, &restrict_copy)) @@ -451,7 +463,7 @@ prepare_iter_for_array(PyObject *Py_UNUSED(m), PyObject *args, PyObject *kwargs) } bool is_gen = false; - bool needs_copy = false; + bool copy_values = false; switch (AK_is_gen_copy_values(values)) { @@ -459,26 +471,28 @@ prepare_iter_for_array(PyObject *Py_UNUSED(m), PyObject *args, PyObject *kwargs) return NULL; case IS_GEN: is_gen = true; - needs_copy = true; + copy_values = true; break; case NOT_GEN_COPY: - needs_copy = true; + copy_values = true; break; case NOT_GEN_NO_COPY: break; } + ssize_t len; + if (!is_gen) { - ssize_t len = PyObject_Length(values); + len = PyObject_Size(values); if (len == -1) { return NULL; } if (len == 0) { - Py_INCREF(Py_None); Py_INCREF(Py_False); - PyObject *ret = PyTuple_Pack(3, Py_None, Py_False, values); + Py_INCREF(Py_False); + PyObject *ret = PyTuple_Pack(3, Py_False, Py_False, values); if (!ret) { - Py_DECREF(Py_None); + Py_DECREF(Py_False); Py_DECREF(Py_False); return NULL; } @@ -486,8 +500,139 @@ prepare_iter_for_array(PyObject *Py_UNUSED(m), PyObject *args, PyObject *kwargs) } } - Py_INCREF(Py_True); - return Py_True; + PyObject *copied_values = NULL; + + if (restrict_copy) { + copy_values = false; + } + else if (copy_values) { + copied_values = PyList_New(0); + if (!copied_values) { + return NULL; + } + } + + len = 0; + bool is_object = false; + bool has_tuple = false; + bool has_str = false; + bool has_enum = false; + bool has_non_str = false; + bool has_inexact = false; + bool has_big_int = false; + + PyObject *int_max_coercible_to_float = PyLong_FromLong(1000000000000000); + if (!int_max_coercible_to_float) { + goto failure; + } + + PyObject *iterator = PyObject_GetIter(values); + if (!iterator) { + Py_DECREF(int_max_coercible_to_float); + goto failure; + } + PyObject *item = NULL; + + while ((item = PyIter_Next(iterator))) { + if (copy_values) { + if (-1 == PyList_Append(copied_values, item)) { + goto iteration_failure; + } + ++len; + } + int enum_success = AK_obj_is_instance_of(item, "enum", "Enum"); + if (enum_success == -1) { + goto iteration_failure; + } + + // These three API calls always succeed. + if (PyList_Check(item) || PyTuple_Check(item) || PyObject_HasAttrString(item, "__slots__")) { + has_tuple = true; + } + else if (enum_success) { + has_enum = true; + } + // This API call always succeeds + else if (PyUnicode_Check(item)) { + has_str = true; + } + else { + has_non_str = true; + + // These two API calls always succeed + if (PyFloat_Check(item) || PyComplex_Check(item)) { + has_inexact = true; + } + else if PyLong_Check(item) { + PyObject *abs_v = PyNumber_Absolute(item); + if (!abs_v) { + goto iteration_failure; + } + + int success = PyObject_RichCompareBool(abs_v, int_max_coercible_to_float, Py_GT); + Py_DECREF(abs_v); + if (success == -1) { + goto iteration_failure; + } + + has_big_int |= success; + } + } + + if (has_tuple | has_enum | (has_str & has_non_str)) { + is_object = true; + } + else if (has_big_int & has_inexact) { + is_object = true; + } + + if (is_object) { + if (copy_values) { + if (-1 == PyList_SetSlice(copied_values, len, len, iterator)) { + goto iteration_failure; + } + } + Py_DECREF(item); + break; + } + + Py_DECREF(item); + } + + Py_DECREF(iterator); + Py_DECREF(int_max_coercible_to_float); + + if (PyErr_Occurred()) { + goto failure; + } + + PyObject *is_object_obj = PyBool_FromLong(is_object); + if (!is_object_obj) { + goto failure; + } + + PyObject *is_tuple_obj = PyBool_FromLong(has_tuple); + if (!is_tuple_obj) { + Py_DECREF(is_object_obj); + goto failure; + } + + if (copy_values) { + PyObject *ret = PyTuple_Pack(3, is_object_obj, is_tuple_obj, copied_values); + Py_DECREF(copied_values); + return ret; + } + + return PyTuple_Pack(3, is_object_obj, is_tuple_obj, values); + +iteration_failure: + Py_DECREF(int_max_coercible_to_float); + Py_DECREF(iterator); + Py_DECREF(item); + +failure: + Py_XDECREF(copied_values); + return NULL; } //------------------------------------------------------------------------------ diff --git a/arraykit.pyi b/arraykit.pyi index 2f500e38..3c187692 100644 --- a/arraykit.pyi +++ b/arraykit.pyi @@ -31,5 +31,5 @@ def resolve_dtype_iter(__dtypes: tp.Iterable[np.dtype]) -> np.dtype: ... def is_gen_copy_values(__values: tp.Iterable[tp.Any]) -> tp.Tuple[bool, bool]: ... def prepare_iter_for_array( __values: tp.Iterable[tp.Any], - __restrict_copy: bool = False + restrict_copy: bool = ..., ) -> tp.Tuple[DtypeSpecifier, bool, tp.Sequence[tp.Any]]: ... diff --git a/performance/main.py b/performance/main.py index 682b36fa..e74f1f82 100644 --- a/performance/main.py +++ b/performance/main.py @@ -1,6 +1,4 @@ - - - +import typing as tp import timeit import argparse @@ -18,6 +16,7 @@ from performance.reference.util import resolve_dtype_iter as resolve_dtype_iter_ref from performance.reference.util import array_deepcopy as array_deepcopy_ref from performance.reference.util import is_gen_copy_values as is_gen_copy_values_ref +from performance.reference.util import prepare_iter_for_array as prepare_iter_for_array_ref from performance.reference.array_go import ArrayGO as ArrayGOREF @@ -32,6 +31,7 @@ from arraykit import resolve_dtype_iter as resolve_dtype_iter_ak from arraykit import array_deepcopy as array_deepcopy_ak from arraykit import is_gen_copy_values as is_gen_copy_values_ak +from arraykit import prepare_iter_for_array as prepare_iter_for_array_ak from arraykit import ArrayGO as ArrayGOAK @@ -277,6 +277,73 @@ class IsGenCopyValuesAK(IsGenCopyValues): class IsGenCopyValuesREF(IsGenCopyValues): entry = staticmethod(is_gen_copy_values_ref) + +#------------------------------------------------------------------------------- +class PrepareIterForArray(Perf): + NUMBER = 10000 + + def pre(self): + def a() -> tp.Iterator[tp.Any]: + for i in range(3): + yield i + yield None + + def b() -> tp.Iterator[tp.Any]: + yield None + for i in range(3): + yield i + + def c() -> tp.Iterator[tp.Any]: + yield 10 + yield None + for i in range(3): + yield i + yield (3,4) + + self.iterables = [ + #('a', 'b', 'c'), + # iter(('a', 'b', 'c') * 500), + + #('a', 'b', 3), + # iter(('a', 'b', 3)), + + # ('a', 'b', (1, 2)), + # iter(('a', 'b', (1, 2))), + + #[True, False, True], + # iter([True, False, True]), + + # (1, 2, 4.3, 2), + # (1, 2, 4.3, 2, None), + # (1, 2, 4.3, 2, 'g'), + # range(4), + # [3, 2, (3,4)], + # iter([3, 2, (3,4)]), + # [300000000000000002, 5000000000000000001], + # iter([300000000000000002, 5000000000000000001]), + # a(), + # b(), + # c(), + # range(3, 7), + #[0.0, 36_028_797_018_963_969], + #(x for x in ()), + list(), + tuple(), + dict(), + set(), + ] + + def main(self): + for restrict_copy in (True, False): + for iterable in self.iterables: + self.entry(iterable, restrict_copy=restrict_copy) + +class PrepareIterForArrayAK(PrepareIterForArray): + entry = staticmethod(prepare_iter_for_array_ak) + +class PrepareIterForArrayREF(PrepareIterForArray): + entry = staticmethod(prepare_iter_for_array_ref) + #------------------------------------------------------------------------------- diff --git a/performance/reference/util.py b/performance/reference/util.py index d819eb30..e9fee98a 100644 --- a/performance/reference/util.py +++ b/performance/reference/util.py @@ -230,14 +230,15 @@ def prepare_iter_for_array( values: can be a generator that will be exhausted in processing; if a generator, a copy will be made and returned as values. restrict_copy: if True, reject making a copy, even if a generator is given + Returns: - resolved, has_tuple, values + is_object, has_tuple, values ''' is_gen, copy_values = is_gen_copy_values(values) if not is_gen and len(values) == 0: #type: ignore - return None, False, values #type: ignore + return False, False, values #type: ignore if restrict_copy: copy_values = False @@ -247,7 +248,7 @@ def prepare_iter_for_array( if copy_values: values_post = [] - resolved = None # None is valid specifier if the type is not ambiguous + is_object = False has_tuple = False has_str = False has_enum = False @@ -261,36 +262,37 @@ def prepare_iter_for_array( # for array construction, cannot use dictlike, so must convert to list values_post.append(v) - if resolved != object: - value_type = type(v) - - # need to get tuple subclasses, like NamedTuple - if isinstance(v, (tuple, list)) or hasattr(v, '__slots__'): - # identify SF types by if they have __slots__ defined; they also must be assigned after array creation, so we treat them like tuples - has_tuple = True - elif isinstance(v, Enum): - # must check isinstance, as Enum types are always derived from Enum - has_enum = True - elif value_type == str or value_type == np.str_: - # must compare to both string types - has_str = True - else: - has_non_str = True - if value_type in INEXACT_TYPES: - has_inexact = True - elif value_type == int and abs(v) > INT_MAX_COERCIBLE_TO_FLOAT: - has_big_int = True - - if has_tuple or has_enum or (has_str and has_non_str): - resolved = object - elif has_big_int and has_inexact: - resolved = object - else: # resolved is object, can exit + value_type = type(v) + + # need to get tuple subclasses, like NamedTuple + if isinstance(v, (tuple, list)) or hasattr(v, '__slots__'): + # identify SF types by if they have __slots__ defined; they also must be assigned after array creation, so we treat them like tuples + has_tuple = True + elif isinstance(v, Enum): + # must check isinstance, as Enum types are always derived from Enum + has_enum = True + elif value_type == str or value_type == np.str_: + # must compare to both string types + has_str = True + else: + has_non_str = True + if value_type in INEXACT_TYPES: + has_inexact = True + elif value_type == int and abs(v) > INT_MAX_COERCIBLE_TO_FLOAT: + has_big_int = True + + if has_tuple or has_enum or (has_str and has_non_str): + is_object = True + + elif has_big_int and has_inexact: + is_object = True + + if is_object: if copy_values: values_post.extend(v_iter) break # NOTE: we break before finding a tuple, but our treatment of object types, downstream, will always assign them in the appropriate way if copy_values: - return resolved, has_tuple, values_post - return resolved, has_tuple, values #type: ignore + return is_object, has_tuple, values_post + return is_object, has_tuple, values #type: ignore diff --git a/test/test_util.py b/test/test_util.py index f377175f..b462af23 100644 --- a/test/test_util.py +++ b/test/test_util.py @@ -14,9 +14,10 @@ from arraykit import immutable_filter from arraykit import array_deepcopy from arraykit import is_gen_copy_values -from arraykit import prepare_iter_for_array from performance.reference.util import mloc as mloc_ref + +from arraykit import prepare_iter_for_array #from performance.reference.util import prepare_iter_for_array @@ -247,57 +248,56 @@ def test_is_gen_copy_values(self) -> None: self.assertEqual((False, True), is_gen_copy_values(d)) self.assertEqual((False, True), is_gen_copy_values(s)) - @unittest.skip('Not ready') + +class TestPrepareIterUnit(unittest.TestCase): def test_resolve_type_iter_a(self) -> None: v1 = ('a', 'b', 'c') resolved, has_tuple, values = prepare_iter_for_array(v1) - self.assertEqual(resolved, None) + self.assertEqual(resolved, False) v22 = ('a', 'b', 3) resolved, has_tuple, values = prepare_iter_for_array(v22) - self.assertEqual(resolved, object) + self.assertEqual(resolved, True) v3 = ('a', 'b', (1, 2)) resolved, has_tuple, values = prepare_iter_for_array(v3) - self.assertEqual(resolved, object) + self.assertEqual(resolved, True) self.assertTrue(has_tuple) v4 = (1, 2, 4.3, 2) resolved, has_tuple, values = prepare_iter_for_array(v4) - self.assertEqual(resolved, None) + self.assertEqual(resolved, False) v5 = (1, 2, 4.3, 2, None) resolved, has_tuple, values = prepare_iter_for_array(v5) - self.assertEqual(resolved, None) + self.assertEqual(resolved, False) v6 = (1, 2, 4.3, 2, 'g') resolved, has_tuple, values = prepare_iter_for_array(v6) - self.assertEqual(resolved, object) + self.assertEqual(resolved, True) v7 = () resolved, has_tuple, values = prepare_iter_for_array(v7) - self.assertEqual(resolved, None) + self.assertEqual(resolved, False) - @unittest.skip('Not ready') def test_resolve_type_iter_b(self) -> None: v1 = iter(('a', 'b', 'c')) resolved, has_tuple, values = prepare_iter_for_array(v1) - self.assertEqual(resolved, None) + self.assertEqual(resolved, False) v2 = iter(('a', 'b', 3)) resolved, has_tuple, values = prepare_iter_for_array(v2) - self.assertEqual(resolved, object) + self.assertEqual(resolved, True) v3 = iter(('a', 'b', (1, 2))) resolved, has_tuple, values = prepare_iter_for_array(v3) - self.assertEqual(resolved, object) + self.assertEqual(resolved, True) self.assertTrue(has_tuple) v4 = range(4) resolved, has_tuple, values = prepare_iter_for_array(v4) - self.assertEqual(resolved, None) + self.assertEqual(resolved, False) - @unittest.skip('Not ready') def test_resolve_type_iter_c(self) -> None: a = [True, False, True] resolved, has_tuple, values = prepare_iter_for_array(a) @@ -306,10 +306,9 @@ def test_resolve_type_iter_c(self) -> None: resolved, has_tuple, values = prepare_iter_for_array(iter(a)) self.assertNotEqual(id(a), id(values)) - self.assertEqual(resolved, None) + self.assertEqual(resolved, False) self.assertEqual(has_tuple, False) - @unittest.skip('Not ready') def test_resolve_type_iter_d(self) -> None: a = [3, 2, (3,4)] resolved, has_tuple, values = prepare_iter_for_array(a) @@ -319,10 +318,9 @@ def test_resolve_type_iter_d(self) -> None: resolved, has_tuple, values = prepare_iter_for_array(iter(a)) self.assertNotEqual(id(a), id(values)) - self.assertEqual(resolved, object) + self.assertEqual(resolved, True) self.assertEqual(has_tuple, True) - @unittest.skip('Not ready') def test_resolve_type_iter_e(self) -> None: a = [300000000000000002, 5000000000000000001] resolved, has_tuple, values = prepare_iter_for_array(a) @@ -330,10 +328,9 @@ def test_resolve_type_iter_e(self) -> None: resolved, has_tuple, values = prepare_iter_for_array(iter(a)) self.assertNotEqual(id(a), id(values)) - self.assertEqual(resolved, None) + self.assertEqual(resolved, False) self.assertEqual(has_tuple, False) - @unittest.skip('Not ready') def test_resolve_type_iter_f(self) -> None: def a() -> tp.Iterator[tp.Any]: @@ -343,10 +340,9 @@ def a() -> tp.Iterator[tp.Any]: resolved, has_tuple, values = prepare_iter_for_array(a()) self.assertEqual(values, [0, 1, 2, None]) - self.assertEqual(resolved, None) + self.assertEqual(resolved, False) self.assertEqual(has_tuple, False) - @unittest.skip('Not ready') def test_resolve_type_iter_g(self) -> None: def a() -> tp.Iterator[tp.Any]: @@ -356,10 +352,9 @@ def a() -> tp.Iterator[tp.Any]: resolved, has_tuple, values = prepare_iter_for_array(a()) self.assertEqual(values, [None, 0, 1, 2]) - self.assertEqual(resolved, None) + self.assertEqual(resolved, False) self.assertEqual(has_tuple, False) - @unittest.skip('Not ready') def test_resolve_type_iter_h(self) -> None: def a() -> tp.Iterator[tp.Any]: @@ -371,37 +366,34 @@ def a() -> tp.Iterator[tp.Any]: resolved, has_tuple, values = prepare_iter_for_array(a()) self.assertEqual(values, [10, None, 0, 1, 2, (3,4)]) - self.assertEqual(resolved, object) + self.assertEqual(resolved, True) # we stop evaluation after finding object self.assertEqual(has_tuple, True) - @unittest.skip('Not ready') def test_resolve_type_iter_i(self) -> None: a0 = range(3, 7) resolved, has_tuple, values = prepare_iter_for_array(a0) # a copy is not made self.assertEqual(id(a0), id(values)) - self.assertEqual(resolved, None) + self.assertEqual(resolved, False) - @unittest.skip('Not ready') def test_resolve_type_iter_j(self) -> None: # this case was found through hypothesis a0 = [0.0, 36_028_797_018_963_969] resolved, has_tuple, values = prepare_iter_for_array(a0) - self.assertEqual(resolved, object) + self.assertEqual(resolved, True) - @unittest.skip('Not ready') def test_resolve_type_iter_k(self) -> None: resolved, has_tuple, values = prepare_iter_for_array((x for x in ())) #type: ignore - self.assertEqual(resolved, None) + self.assertEqual(resolved, False) self.assertEqual(len(values), 0) self.assertEqual(has_tuple, False) def test_resolve_type_iter_l(self) -> None: - self.assertEqual((None, False, []), prepare_iter_for_array([], True)) - self.assertEqual((None, False, ()), prepare_iter_for_array((), True)) - self.assertEqual((None, False, {}), prepare_iter_for_array({}, True)) - self.assertEqual((None, False, set()), prepare_iter_for_array(set(), True)) + self.assertEqual((False, False, []), prepare_iter_for_array([], True)) + self.assertEqual((False, False, ()), prepare_iter_for_array((), True)) + self.assertEqual((False, False, {}), prepare_iter_for_array({}, True)) + self.assertEqual((False, False, set()), prepare_iter_for_array(set(), True)) if __name__ == '__main__': From 4ab7a7c3182b7c586dc297c86a1b67fd983ddf87 Mon Sep 17 00:00:00 2001 From: Charles Burkland Date: Wed, 28 Apr 2021 09:33:58 -0700 Subject: [PATCH 04/19] Initial effort to clean up isinstance checks. Cleans up some ref counting. --- arraykit.c | 141 +++++++++++++++++++++----------------------- performance/main.py | 52 ++++++++-------- 2 files changed, 94 insertions(+), 99 deletions(-) diff --git a/arraykit.c b/arraykit.c index ede9038c..9220886c 100644 --- a/arraykit.c +++ b/arraykit.c @@ -357,36 +357,9 @@ typedef enum IsGenCopyValues { NOT_GEN_NO_COPY } IsGenCopyValues; -static int -AK_obj_is_instance_of(PyObject *obj, const char *module_name, const char *module_type_name) -{ - PyObject *module = PyImport_ImportModule(module_name); - if (!module) { - return -1; - } - - PyObject *type = PyObject_GetAttrString(module, module_type_name); - Py_DECREF(module); - if (!type) { - return -1; - } - - int is_of_type = PyObject_IsInstance(obj, type); - Py_DECREF(type); - - return is_of_type; -} - static IsGenCopyValues -AK_is_gen_copy_values(PyObject *arg) +AK_is_gen_copy_values(PyObject *arg, PyObject *frozen_automap_type) { - /* - Returns: - -1: for failure - 0: is a generator - 1: a non-generator that needs to be copied - 2: a non-generator that does not needs to be copied - */ if(PyObject_HasAttrString(arg, "__len__")) { if (PySet_Check(arg) || PyDict_Check(arg) || PyDictValues_Check(arg) || PyDictKeys_Check(arg)) @@ -394,7 +367,7 @@ AK_is_gen_copy_values(PyObject *arg) return NOT_GEN_COPY; } - switch (AK_obj_is_instance_of(arg, "automap", "FrozenAutoMap")) + switch (PyObject_IsInstance(arg, frozen_automap_type)) { case -1: return ERR; @@ -416,34 +389,34 @@ is_gen_copy_values(PyObject *Py_UNUSED(m), PyObject *arg) // This only exists to allow `AK_is_gen_copy_values` to be tested PyObject *is_gen = Py_False; - PyObject *needs_copy = Py_False; + PyObject *copy_values = Py_False; - switch (AK_is_gen_copy_values(arg)) - { - case ERR: + PyObject *automap_module = PyImport_ImportModule("automap"); + if (!automap_module) { return NULL; - case IS_GEN: + } + + PyObject *frozen_automap_type = PyObject_GetAttrString(automap_module, "FrozenAutoMap"); + Py_DECREF(automap_module); + if (!frozen_automap_type) { + return NULL; + } + + IsGenCopyValues is_gen_ret = AK_is_gen_copy_values(arg, frozen_automap_type); + Py_DECREF(frozen_automap_type); + + if (is_gen_ret == IS_GEN) { is_gen = Py_True; - needs_copy = Py_True; - break; - case NOT_GEN_COPY: - needs_copy = Py_True; - break; - case NOT_GEN_NO_COPY: - break; - } - - Py_INCREF(is_gen); - Py_INCREF(needs_copy); - - PyObject *ret = PyTuple_Pack(2, is_gen, needs_copy); - if (!ret) { - Py_DECREF(is_gen); - Py_DECREF(needs_copy); + copy_values = Py_True; + } + else if (is_gen_ret == NOT_GEN_COPY) { + copy_values = Py_True; + } + else if (is_gen_ret == ERR) { return NULL; } - return ret; + return PyTuple_Pack(2, is_gen, copy_values); } static PyObject* @@ -462,22 +435,32 @@ prepare_iter_for_array(PyObject *Py_UNUSED(m), PyObject *args, PyObject *kwargs) return NULL; } + PyObject *automap_module = PyImport_ImportModule("automap"); + if (!automap_module) { + return NULL; + } + + PyObject *frozen_automap_type = PyObject_GetAttrString(automap_module, "FrozenAutoMap"); + Py_DECREF(automap_module); + if (!frozen_automap_type) { + return NULL; + } + bool is_gen = false; bool copy_values = false; - switch (AK_is_gen_copy_values(values)) - { - case ERR: - return NULL; - case IS_GEN: + IsGenCopyValues is_gen_ret = AK_is_gen_copy_values(values, frozen_automap_type); + Py_DECREF(frozen_automap_type); + + if (is_gen_ret == IS_GEN) { is_gen = true; copy_values = true; - break; - case NOT_GEN_COPY: + } + else if (is_gen_ret == NOT_GEN_COPY) { copy_values = true; - break; - case NOT_GEN_NO_COPY: - break; + } + else if (is_gen_ret == ERR) { + return NULL; } ssize_t len; @@ -488,15 +471,7 @@ prepare_iter_for_array(PyObject *Py_UNUSED(m), PyObject *args, PyObject *kwargs) return NULL; } if (len == 0) { - Py_INCREF(Py_False); - Py_INCREF(Py_False); - PyObject *ret = PyTuple_Pack(3, Py_False, Py_False, values); - if (!ret) { - Py_DECREF(Py_False); - Py_DECREF(Py_False); - return NULL; - } - return ret; + return PyTuple_Pack(3, Py_False, Py_False, values); } } @@ -526,9 +501,21 @@ prepare_iter_for_array(PyObject *Py_UNUSED(m), PyObject *args, PyObject *kwargs) goto failure; } + PyObject *enum_module = PyImport_ImportModule("enum"); + if (!enum_module) { + goto failure; + } + + PyObject *enum_type = PyObject_GetAttrString(enum_module, "Enum"); + Py_DECREF(enum_module); + if (!enum_type) { + goto failure; + } + PyObject *iterator = PyObject_GetIter(values); if (!iterator) { Py_DECREF(int_max_coercible_to_float); + Py_DECREF(enum_type); goto failure; } PyObject *item = NULL; @@ -540,7 +527,9 @@ prepare_iter_for_array(PyObject *Py_UNUSED(m), PyObject *args, PyObject *kwargs) } ++len; } - int enum_success = AK_obj_is_instance_of(item, "enum", "Enum"); + + // Clean this up + int enum_success = 0;//PyObject_IsInstance(item, enum_type); if (enum_success == -1) { goto iteration_failure; } @@ -559,7 +548,7 @@ prepare_iter_for_array(PyObject *Py_UNUSED(m), PyObject *args, PyObject *kwargs) else { has_non_str = true; - // These two API calls always succeed + // These two API calls always succeed. 96% sure this catches np types if (PyFloat_Check(item) || PyComplex_Check(item)) { has_inexact = true; } @@ -619,13 +608,19 @@ prepare_iter_for_array(PyObject *Py_UNUSED(m), PyObject *args, PyObject *kwargs) if (copy_values) { PyObject *ret = PyTuple_Pack(3, is_object_obj, is_tuple_obj, copied_values); + Py_DECREF(is_object_obj); + Py_DECREF(is_tuple_obj); Py_DECREF(copied_values); return ret; } - return PyTuple_Pack(3, is_object_obj, is_tuple_obj, values); + PyObject *ret = PyTuple_Pack(3, is_object_obj, is_tuple_obj, values); + Py_DECREF(is_object_obj); + Py_DECREF(is_tuple_obj); + return ret; iteration_failure: + Py_DECREF(enum_type); Py_DECREF(int_max_coercible_to_float); Py_DECREF(iterator); Py_DECREF(item); diff --git a/performance/main.py b/performance/main.py index e74f1f82..32800aa1 100644 --- a/performance/main.py +++ b/performance/main.py @@ -301,32 +301,32 @@ def c() -> tp.Iterator[tp.Any]: yield (3,4) self.iterables = [ - #('a', 'b', 'c'), - # iter(('a', 'b', 'c') * 500), - - #('a', 'b', 3), - # iter(('a', 'b', 3)), - - # ('a', 'b', (1, 2)), - # iter(('a', 'b', (1, 2))), - - #[True, False, True], - # iter([True, False, True]), - - # (1, 2, 4.3, 2), - # (1, 2, 4.3, 2, None), - # (1, 2, 4.3, 2, 'g'), - # range(4), - # [3, 2, (3,4)], - # iter([3, 2, (3,4)]), - # [300000000000000002, 5000000000000000001], - # iter([300000000000000002, 5000000000000000001]), - # a(), - # b(), - # c(), - # range(3, 7), - #[0.0, 36_028_797_018_963_969], - #(x for x in ()), + ('a', 'b', 'c'), + iter(('a', 'b', 'c') * 500), + + ('a', 'b', 3), + iter(('a', 'b', 3)), + + ('a', 'b', (1, 2)), + iter(('a', 'b', (1, 2))), + + [True, False, True], + iter([True, False, True]), + + (1, 2, 4.3, 2), + (1, 2, 4.3, 2, None), + (1, 2, 4.3, 2, 'g'), + range(4), + [3, 2, (3,4)], + iter([3, 2, (3,4)]), + [300000000000000002, 5000000000000000001], + iter([300000000000000002, 5000000000000000001]), + a(), + b(), + c(), + range(3, 7), + [0.0, 36_028_797_018_963_969], + (x for x in ()), list(), tuple(), dict(), From 3613f359daa47f8f45e720e73c298bfddcebf4aa Mon Sep 17 00:00:00 2001 From: Charles Burkland Date: Wed, 28 Apr 2021 10:44:20 -0700 Subject: [PATCH 05/19] Adds FrozenAutoMap to module to avoid imports. Cleans up perf test and enum type check. --- arraykit.c | 79 ++++++++++++++++++--------------------------- performance/main.py | 60 +++++++++++++++++++++++----------- 2 files changed, 73 insertions(+), 66 deletions(-) diff --git a/arraykit.c b/arraykit.c index 9220886c..da6e2243 100644 --- a/arraykit.c +++ b/arraykit.c @@ -358,7 +358,7 @@ typedef enum IsGenCopyValues { } IsGenCopyValues; static IsGenCopyValues -AK_is_gen_copy_values(PyObject *arg, PyObject *frozen_automap_type) +AK_is_gen_copy_values(PyObject *arg, PyObject *frozenAutoMap) { if(PyObject_HasAttrString(arg, "__len__")) { if (PySet_Check(arg) || PyDict_Check(arg) || @@ -367,7 +367,7 @@ AK_is_gen_copy_values(PyObject *arg, PyObject *frozen_automap_type) return NOT_GEN_COPY; } - switch (PyObject_IsInstance(arg, frozen_automap_type)) + switch (PyObject_IsInstance(arg, frozenAutoMap)) { case -1: return ERR; @@ -384,26 +384,20 @@ AK_is_gen_copy_values(PyObject *arg, PyObject *frozen_automap_type) } static PyObject* -is_gen_copy_values(PyObject *Py_UNUSED(m), PyObject *arg) +is_gen_copy_values(PyObject *m, PyObject *arg) { // This only exists to allow `AK_is_gen_copy_values` to be tested - PyObject *is_gen = Py_False; - PyObject *copy_values = Py_False; - - PyObject *automap_module = PyImport_ImportModule("automap"); - if (!automap_module) { + PyObject *frozenAutoMap = PyObject_GetAttrString(m, "FrozenAutoMap"); + if (!frozenAutoMap) { return NULL; } - PyObject *frozen_automap_type = PyObject_GetAttrString(automap_module, "FrozenAutoMap"); - Py_DECREF(automap_module); - if (!frozen_automap_type) { - return NULL; - } + PyObject *is_gen = Py_False; + PyObject *copy_values = Py_False; - IsGenCopyValues is_gen_ret = AK_is_gen_copy_values(arg, frozen_automap_type); - Py_DECREF(frozen_automap_type); + IsGenCopyValues is_gen_ret = AK_is_gen_copy_values(arg, frozenAutoMap); + Py_DECREF(frozenAutoMap); if (is_gen_ret == IS_GEN) { is_gen = Py_True; @@ -420,7 +414,7 @@ is_gen_copy_values(PyObject *Py_UNUSED(m), PyObject *arg) } static PyObject* -prepare_iter_for_array(PyObject *Py_UNUSED(m), PyObject *args, PyObject *kwargs) +prepare_iter_for_array(PyObject *m, PyObject *args, PyObject *kwargs) { PyObject *values; int restrict_copy = 0; // False by default @@ -435,22 +429,16 @@ prepare_iter_for_array(PyObject *Py_UNUSED(m), PyObject *args, PyObject *kwargs) return NULL; } - PyObject *automap_module = PyImport_ImportModule("automap"); - if (!automap_module) { - return NULL; - } - - PyObject *frozen_automap_type = PyObject_GetAttrString(automap_module, "FrozenAutoMap"); - Py_DECREF(automap_module); - if (!frozen_automap_type) { + PyObject *frozenAutoMap = PyObject_GetAttrString(m, "FrozenAutoMap"); + if (!frozenAutoMap) { return NULL; } bool is_gen = false; bool copy_values = false; - IsGenCopyValues is_gen_ret = AK_is_gen_copy_values(values, frozen_automap_type); - Py_DECREF(frozen_automap_type); + IsGenCopyValues is_gen_ret = AK_is_gen_copy_values(values, frozenAutoMap); + Py_DECREF(frozenAutoMap); if (is_gen_ret == IS_GEN) { is_gen = true; @@ -501,21 +489,9 @@ prepare_iter_for_array(PyObject *Py_UNUSED(m), PyObject *args, PyObject *kwargs) goto failure; } - PyObject *enum_module = PyImport_ImportModule("enum"); - if (!enum_module) { - goto failure; - } - - PyObject *enum_type = PyObject_GetAttrString(enum_module, "Enum"); - Py_DECREF(enum_module); - if (!enum_type) { - goto failure; - } - PyObject *iterator = PyObject_GetIter(values); if (!iterator) { Py_DECREF(int_max_coercible_to_float); - Py_DECREF(enum_type); goto failure; } PyObject *item = NULL; @@ -528,17 +504,11 @@ prepare_iter_for_array(PyObject *Py_UNUSED(m), PyObject *args, PyObject *kwargs) ++len; } - // Clean this up - int enum_success = 0;//PyObject_IsInstance(item, enum_type); - if (enum_success == -1) { - goto iteration_failure; - } - // These three API calls always succeed. if (PyList_Check(item) || PyTuple_Check(item) || PyObject_HasAttrString(item, "__slots__")) { has_tuple = true; } - else if (enum_success) { + else if (PyObject_TypeCheck(item, &PyEnum_Type)) { has_enum = true; } // This API call always succeeds @@ -620,7 +590,6 @@ prepare_iter_for_array(PyObject *Py_UNUSED(m), PyObject *args, PyObject *kwargs) return ret; iteration_failure: - Py_DECREF(enum_type); Py_DECREF(int_max_coercible_to_float); Py_DECREF(iterator); Py_DECREF(item); @@ -921,11 +890,27 @@ PyInit_arraykit(void) { import_array(); PyObject *m = PyModule_Create(&arraykit_module); + + PyObject *automap_module = PyImport_ImportModule("automap"); + if (!automap_module) { + Py_XDECREF(m); + return NULL; + } + + PyObject *frozenAutoMap = PyObject_GetAttrString(automap_module, "FrozenAutoMap"); + Py_DECREF(automap_module); + if (!frozenAutoMap) { + Py_XDECREF(m); + return NULL; + } + if (!m || PyModule_AddStringConstant(m, "__version__", Py_STRINGIFY(AK_VERSION)) || PyType_Ready(&ArrayGOType) || - PyModule_AddObject(m, "ArrayGO", (PyObject *) &ArrayGOType)) + PyModule_AddObject(m, "ArrayGO", (PyObject *) &ArrayGOType) || + PyModule_AddObject(m, "FrozenAutoMap", frozenAutoMap)) { + Py_DECREF(frozenAutoMap); Py_XDECREF(m); return NULL; } diff --git a/performance/main.py b/performance/main.py index 32800aa1..088b0b3b 100644 --- a/performance/main.py +++ b/performance/main.py @@ -1,6 +1,7 @@ import typing as tp import timeit import argparse +from enum import Enum from automap import FrozenAutoMap import numpy as np @@ -280,7 +281,8 @@ class IsGenCopyValuesREF(IsGenCopyValues): #------------------------------------------------------------------------------- class PrepareIterForArray(Perf): - NUMBER = 10000 + NUMBER = 5 + FUNCTIONS = ('iter_small', 'iter_large') def pre(self): def a() -> tp.Iterator[tp.Any]: @@ -300,30 +302,22 @@ def c() -> tp.Iterator[tp.Any]: yield i yield (3,4) - self.iterables = [ - ('a', 'b', 'c'), - iter(('a', 'b', 'c') * 500), + class E(Enum): + A = 1 + B = 2 + C = 3 + self.small_iterables = [ + ('a', 'b', 'c'), ('a', 'b', 3), - iter(('a', 'b', 3)), - ('a', 'b', (1, 2)), - iter(('a', 'b', (1, 2))), - [True, False, True], - iter([True, False, True]), - (1, 2, 4.3, 2), (1, 2, 4.3, 2, None), (1, 2, 4.3, 2, 'g'), range(4), [3, 2, (3,4)], - iter([3, 2, (3,4)]), [300000000000000002, 5000000000000000001], - iter([300000000000000002, 5000000000000000001]), - a(), - b(), - c(), range(3, 7), [0.0, 36_028_797_018_963_969], (x for x in ()), @@ -331,11 +325,40 @@ def c() -> tp.Iterator[tp.Any]: tuple(), dict(), set(), + FrozenAutoMap((1, 2, 3, 4, 5, 6)), + [E.A, E.B, E.C], ] - def main(self): + self.small_iterables.extend([iter(iterable) for iterable in self.small_iterables]) + self.small_iterables.extend((a(), b(), c())) + + self.large_iterables = [ + ('a', 'b', 'c') * 10000, + ('a', 'b', 'c') * 10000 + (1, ), + ('a', 'b', 'c') * 10000 + ((1, 2), ), + [True, False, True] * 10000, + (1, 2, 4.3, 2) * 10000, + (1, 2, 4.3, 2) * 10000 + (None, ), + (1, 2, 4.3, 2) * 10000 + ('g', ), + range(10000), + [3, 2, 1] * 10000 + [(3,4)], + [300000000000000002] * 20000 + [5000000000000000001], + range(30000, 40000), + [0.0] * 20000 + [36_028_797_018_963_969], + FrozenAutoMap(range(10000)), + [E.A, E.B, E.C] * 10000, + ] + self.large_iterables.extend([iter(iterable) for iterable in self.large_iterables]) + + def iter_small(self): + for _ in range(2000): + for restrict_copy in (True, False): + for iterable in self.small_iterables: + self.entry(iterable, restrict_copy=restrict_copy) + + def iter_large(self): for restrict_copy in (True, False): - for iterable in self.iterables: + for iterable in self.large_iterables: self.entry(iterable, restrict_copy=restrict_copy) class PrepareIterForArrayAK(PrepareIterForArray): @@ -364,7 +387,6 @@ def main(): records = [('cls', 'func', 'ak', 'ref', 'ref/ak')] for cls_perf in Perf.__subclasses__(): # only get one level - print(cls_perf) cls_map = {} if match and cls_perf.__name__ not in match: continue @@ -385,7 +407,7 @@ def main(): number=cls_runner.NUMBER) records.append((cls_perf.__name__, func_attr, results['ak'], results['ref'], results['ref'] / results['ak'])) - width = 24 + width = 32 for record in records: print(''.join( (r.ljust(width) if isinstance(r, str) else str(round(r, 8)).ljust(width)) for r in record From 5d56ad772598c74708a81ea00640871bc65edc49 Mon Sep 17 00:00:00 2001 From: Charles Burkland Date: Wed, 28 Apr 2021 11:57:36 -0700 Subject: [PATCH 06/19] Adds automap to requirements. --- requirements-test.txt | 2 +- requirements.txt | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/requirements-test.txt b/requirements-test.txt index 559d1003..82a65547 100644 --- a/requirements-test.txt +++ b/requirements-test.txt @@ -3,4 +3,4 @@ numpy==1.16.5 pytest==3.8.0 pylint==2.7.4 invoke==1.4.0 - +automap==0.4.8 diff --git a/requirements.txt b/requirements.txt index 1afd8367..7bc49d05 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1 +1,2 @@ numpy==1.16.5 +automap==0.4.8 From fb762954272b72f55e478014aeae18c3924094e1 Mon Sep 17 00:00:00 2001 From: Charles Burkland Date: Wed, 28 Apr 2021 12:09:57 -0700 Subject: [PATCH 07/19] Simplifies and cleans up unit test. --- test/test_util.py | 97 +++++++++++++++++++++-------------------------- 1 file changed, 43 insertions(+), 54 deletions(-) diff --git a/test/test_util.py b/test/test_util.py index b462af23..cf39dd5c 100644 --- a/test/test_util.py +++ b/test/test_util.py @@ -251,84 +251,73 @@ def test_is_gen_copy_values(self) -> None: class TestPrepareIterUnit(unittest.TestCase): def test_resolve_type_iter_a(self) -> None: - v1 = ('a', 'b', 'c') - resolved, has_tuple, values = prepare_iter_for_array(v1) - self.assertEqual(resolved, False) + is_object, has_tuple, values = prepare_iter_for_array(('a', 'b', 'c')) + self.assertEqual(is_object, False) - v22 = ('a', 'b', 3) - resolved, has_tuple, values = prepare_iter_for_array(v22) - self.assertEqual(resolved, True) + is_object, has_tuple, values = prepare_iter_for_array(('a', 'b', 3)) + self.assertEqual(is_object, True) - v3 = ('a', 'b', (1, 2)) - resolved, has_tuple, values = prepare_iter_for_array(v3) - self.assertEqual(resolved, True) + is_object, has_tuple, values = prepare_iter_for_array(('a', 'b', (1, 2))) + self.assertEqual(is_object, True) self.assertTrue(has_tuple) - v4 = (1, 2, 4.3, 2) - resolved, has_tuple, values = prepare_iter_for_array(v4) - self.assertEqual(resolved, False) + is_object, has_tuple, values = prepare_iter_for_array((1, 2, 4.3, 2)) + self.assertEqual(is_object, False) - v5 = (1, 2, 4.3, 2, None) - resolved, has_tuple, values = prepare_iter_for_array(v5) - self.assertEqual(resolved, False) + is_object, has_tuple, values = prepare_iter_for_array((1, 2, 4.3, 2, None)) + self.assertEqual(is_object, False) - v6 = (1, 2, 4.3, 2, 'g') - resolved, has_tuple, values = prepare_iter_for_array(v6) - self.assertEqual(resolved, True) + is_object, has_tuple, values = prepare_iter_for_array((1, 2, 4.3, 2, 'g')) + self.assertEqual(is_object, True) - v7 = () - resolved, has_tuple, values = prepare_iter_for_array(v7) - self.assertEqual(resolved, False) + is_object, has_tuple, values = prepare_iter_for_array(()) + self.assertEqual(is_object, False) def test_resolve_type_iter_b(self) -> None: - v1 = iter(('a', 'b', 'c')) - resolved, has_tuple, values = prepare_iter_for_array(v1) - self.assertEqual(resolved, False) + is_object, has_tuple, values = prepare_iter_for_array(iter(('a', 'b', 'c'))) + self.assertEqual(is_object, False) - v2 = iter(('a', 'b', 3)) - resolved, has_tuple, values = prepare_iter_for_array(v2) - self.assertEqual(resolved, True) + is_object, has_tuple, values = prepare_iter_for_array(iter(('a', 'b', 3))) + self.assertEqual(is_object, True) - v3 = iter(('a', 'b', (1, 2))) - resolved, has_tuple, values = prepare_iter_for_array(v3) - self.assertEqual(resolved, True) + is_object, has_tuple, values = prepare_iter_for_array(iter(('a', 'b', (1, 2)))) + self.assertEqual(is_object, True) self.assertTrue(has_tuple) - v4 = range(4) - resolved, has_tuple, values = prepare_iter_for_array(v4) - self.assertEqual(resolved, False) + is_object, has_tuple, values = prepare_iter_for_array(range(4)) + self.assertEqual(is_object, False) def test_resolve_type_iter_c(self) -> None: a = [True, False, True] - resolved, has_tuple, values = prepare_iter_for_array(a) + is_object, has_tuple, values = prepare_iter_for_array(a) self.assertEqual(id(a), id(values)) - resolved, has_tuple, values = prepare_iter_for_array(iter(a)) + is_object, has_tuple, values = prepare_iter_for_array(iter(a)) self.assertNotEqual(id(a), id(values)) - self.assertEqual(resolved, False) + self.assertEqual(is_object, False) self.assertEqual(has_tuple, False) def test_resolve_type_iter_d(self) -> None: a = [3, 2, (3,4)] - resolved, has_tuple, values = prepare_iter_for_array(a) + is_object, has_tuple, values = prepare_iter_for_array(a) self.assertEqual(id(a), id(values)) self.assertTrue(has_tuple) - resolved, has_tuple, values = prepare_iter_for_array(iter(a)) + is_object, has_tuple, values = prepare_iter_for_array(iter(a)) self.assertNotEqual(id(a), id(values)) - self.assertEqual(resolved, True) + self.assertEqual(is_object, True) self.assertEqual(has_tuple, True) def test_resolve_type_iter_e(self) -> None: a = [300000000000000002, 5000000000000000001] - resolved, has_tuple, values = prepare_iter_for_array(a) + is_object, has_tuple, values = prepare_iter_for_array(a) self.assertEqual(id(a), id(values)) - resolved, has_tuple, values = prepare_iter_for_array(iter(a)) + is_object, has_tuple, values = prepare_iter_for_array(iter(a)) self.assertNotEqual(id(a), id(values)) - self.assertEqual(resolved, False) + self.assertEqual(is_object, False) self.assertEqual(has_tuple, False) def test_resolve_type_iter_f(self) -> None: @@ -338,9 +327,9 @@ def a() -> tp.Iterator[tp.Any]: yield i yield None - resolved, has_tuple, values = prepare_iter_for_array(a()) + is_object, has_tuple, values = prepare_iter_for_array(a()) self.assertEqual(values, [0, 1, 2, None]) - self.assertEqual(resolved, False) + self.assertEqual(is_object, False) self.assertEqual(has_tuple, False) def test_resolve_type_iter_g(self) -> None: @@ -350,9 +339,9 @@ def a() -> tp.Iterator[tp.Any]: for i in range(3): yield i - resolved, has_tuple, values = prepare_iter_for_array(a()) + is_object, has_tuple, values = prepare_iter_for_array(a()) self.assertEqual(values, [None, 0, 1, 2]) - self.assertEqual(resolved, False) + self.assertEqual(is_object, False) self.assertEqual(has_tuple, False) def test_resolve_type_iter_h(self) -> None: @@ -364,28 +353,28 @@ def a() -> tp.Iterator[tp.Any]: yield i yield (3,4) - resolved, has_tuple, values = prepare_iter_for_array(a()) + is_object, has_tuple, values = prepare_iter_for_array(a()) self.assertEqual(values, [10, None, 0, 1, 2, (3,4)]) - self.assertEqual(resolved, True) + self.assertEqual(is_object, True) # we stop evaluation after finding object self.assertEqual(has_tuple, True) def test_resolve_type_iter_i(self) -> None: a0 = range(3, 7) - resolved, has_tuple, values = prepare_iter_for_array(a0) + is_object, has_tuple, values = prepare_iter_for_array(a0) # a copy is not made self.assertEqual(id(a0), id(values)) - self.assertEqual(resolved, False) + self.assertEqual(is_object, False) def test_resolve_type_iter_j(self) -> None: # this case was found through hypothesis a0 = [0.0, 36_028_797_018_963_969] - resolved, has_tuple, values = prepare_iter_for_array(a0) - self.assertEqual(resolved, True) + is_object, has_tuple, values = prepare_iter_for_array(a0) + self.assertEqual(is_object, True) def test_resolve_type_iter_k(self) -> None: - resolved, has_tuple, values = prepare_iter_for_array((x for x in ())) #type: ignore - self.assertEqual(resolved, False) + is_object, has_tuple, values = prepare_iter_for_array((x for x in ())) #type: ignore + self.assertEqual(is_object, False) self.assertEqual(len(values), 0) self.assertEqual(has_tuple, False) From e1914d3677e190ac6efda49a625cfc370040f88b Mon Sep 17 00:00:00 2001 From: Charles Burkland Date: Wed, 28 Apr 2021 12:28:16 -0700 Subject: [PATCH 08/19] Comments out a failing test temporarily. --- test/test_util.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/test/test_util.py b/test/test_util.py index cf39dd5c..97b7c1b9 100644 --- a/test/test_util.py +++ b/test/test_util.py @@ -261,8 +261,10 @@ def test_resolve_type_iter_a(self) -> None: self.assertEqual(is_object, True) self.assertTrue(has_tuple) - is_object, has_tuple, values = prepare_iter_for_array((1, 2, 4.3, 2)) - self.assertEqual(is_object, False) + # TODO: This inexplicably fails on Python 3.6 -> why!! + # https://github.com/InvestmentSystems/arraykit/pull/43/checks?check_run_id=2460697121#step:7:612 + #is_object, has_tuple, values = prepare_iter_for_array((1, 2, 4.3, 2)) + #self.assertEqual(is_object, False) is_object, has_tuple, values = prepare_iter_for_array((1, 2, 4.3, 2, None)) self.assertEqual(is_object, False) From 590ca5ff725c6b0d15cc493c26981052f3b98959 Mon Sep 17 00:00:00 2001 From: Charles Burkland Date: Wed, 28 Apr 2021 12:33:20 -0700 Subject: [PATCH 09/19] Comments out another failing test temporarily. --- test/test_util.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/test/test_util.py b/test/test_util.py index 97b7c1b9..bdd11e06 100644 --- a/test/test_util.py +++ b/test/test_util.py @@ -261,13 +261,15 @@ def test_resolve_type_iter_a(self) -> None: self.assertEqual(is_object, True) self.assertTrue(has_tuple) - # TODO: This inexplicably fails on Python 3.6 -> why!! + # TODO: These tests inexplicably fails on Python 3.6 -> why!! # https://github.com/InvestmentSystems/arraykit/pull/43/checks?check_run_id=2460697121#step:7:612 #is_object, has_tuple, values = prepare_iter_for_array((1, 2, 4.3, 2)) #self.assertEqual(is_object, False) - is_object, has_tuple, values = prepare_iter_for_array((1, 2, 4.3, 2, None)) - self.assertEqual(is_object, False) + # TODO: These tests inexplicably fails on Python 3.6 -> why!! + # https://github.com/InvestmentSystems/arraykit/pull/43/checks?check_run_id=2460828552#step:7:613 + #is_object, has_tuple, values = prepare_iter_for_array((1, 2, 4.3, 2, None)) + #self.assertEqual(is_object, False) is_object, has_tuple, values = prepare_iter_for_array((1, 2, 4.3, 2, 'g')) self.assertEqual(is_object, True) From 8d4665d45f35fbe9afaa871f7744fef68e48adf9 Mon Sep 17 00:00:00 2001 From: Charles Burkland Date: Wed, 28 Apr 2021 16:17:40 -0700 Subject: [PATCH 10/19] Uncomments the failing tests. --- test/test_util.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/test/test_util.py b/test/test_util.py index bdd11e06..0e941f3c 100644 --- a/test/test_util.py +++ b/test/test_util.py @@ -263,13 +263,13 @@ def test_resolve_type_iter_a(self) -> None: # TODO: These tests inexplicably fails on Python 3.6 -> why!! # https://github.com/InvestmentSystems/arraykit/pull/43/checks?check_run_id=2460697121#step:7:612 - #is_object, has_tuple, values = prepare_iter_for_array((1, 2, 4.3, 2)) - #self.assertEqual(is_object, False) + is_object, has_tuple, values = prepare_iter_for_array((1, 2, 4.3, 2)) + self.assertEqual(is_object, False) # TODO: These tests inexplicably fails on Python 3.6 -> why!! # https://github.com/InvestmentSystems/arraykit/pull/43/checks?check_run_id=2460828552#step:7:613 - #is_object, has_tuple, values = prepare_iter_for_array((1, 2, 4.3, 2, None)) - #self.assertEqual(is_object, False) + is_object, has_tuple, values = prepare_iter_for_array((1, 2, 4.3, 2, None)) + self.assertEqual(is_object, False) is_object, has_tuple, values = prepare_iter_for_array((1, 2, 4.3, 2, 'g')) self.assertEqual(is_object, True) From e5d3b4971f3219a5da7d1e7049e99b4104ca25a7 Mon Sep 17 00:00:00 2001 From: Charles Burkland Date: Wed, 28 Apr 2021 16:51:02 -0700 Subject: [PATCH 11/19] Fixes incorrect enum type check logic - address 32 bit issue with int_max_coercible_to_float. --- arraykit.c | 56 +++++++++++++++++++++++++++++++++++++---------- test/test_util.py | 4 ---- 2 files changed, 45 insertions(+), 15 deletions(-) diff --git a/arraykit.c b/arraykit.c index da6e2243..45aa5e3e 100644 --- a/arraykit.c +++ b/arraykit.c @@ -484,13 +484,20 @@ prepare_iter_for_array(PyObject *m, PyObject *args, PyObject *kwargs) bool has_inexact = false; bool has_big_int = false; - PyObject *int_max_coercible_to_float = PyLong_FromLong(1000000000000000); + PyObject *enumClass = PyObject_GetAttrString(m, "Enum"); + if (!enumClass) { + goto failure; + } + + PyObject *int_max_coercible_to_float = PyLong_FromLongLong(1000000000000000ll); if (!int_max_coercible_to_float) { + Py_DECREF(enumClass); goto failure; } PyObject *iterator = PyObject_GetIter(values); if (!iterator) { + Py_DECREF(enumClass); Py_DECREF(int_max_coercible_to_float); goto failure; } @@ -504,11 +511,16 @@ prepare_iter_for_array(PyObject *m, PyObject *args, PyObject *kwargs) ++len; } + int enum_success = PyObject_IsInstance(item, enumClass); + if (enum_success == -1) { + goto iteration_failure; + } + // These three API calls always succeed. if (PyList_Check(item) || PyTuple_Check(item) || PyObject_HasAttrString(item, "__slots__")) { has_tuple = true; } - else if (PyObject_TypeCheck(item, &PyEnum_Type)) { + else if (enum_success) { has_enum = true; } // This API call always succeeds @@ -559,6 +571,7 @@ prepare_iter_for_array(PyObject *m, PyObject *args, PyObject *kwargs) } Py_DECREF(iterator); + Py_DECREF(enumClass); Py_DECREF(int_max_coercible_to_float); if (PyErr_Occurred()) { @@ -590,6 +603,7 @@ prepare_iter_for_array(PyObject *m, PyObject *args, PyObject *kwargs) return ret; iteration_failure: + Py_DECREF(enumClass); Py_DECREF(int_max_coercible_to_float); Py_DECREF(iterator); Py_DECREF(item); @@ -890,28 +904,48 @@ PyInit_arraykit(void) { import_array(); PyObject *m = PyModule_Create(&arraykit_module); + if (!m) { + return NULL; + } PyObject *automap_module = PyImport_ImportModule("automap"); if (!automap_module) { - Py_XDECREF(m); + Py_DECREF(m); return NULL; } - PyObject *frozenAutoMap = PyObject_GetAttrString(automap_module, "FrozenAutoMap"); + PyObject *frozenAutoMapClass = PyObject_GetAttrString(automap_module, "FrozenAutoMap"); Py_DECREF(automap_module); - if (!frozenAutoMap) { - Py_XDECREF(m); + if (!frozenAutoMapClass) { + Py_DECREF(m); + return NULL; + } + + PyObject *enum_module = PyImport_ImportModule("enum"); + if (!enum_module) { + Py_DECREF(frozenAutoMapClass); + Py_DECREF(m); + return NULL; + } + + PyObject *enumClass = PyObject_GetAttrString(enum_module, "Enum"); + Py_DECREF(enum_module); + if (!enumClass) { + Py_DECREF(frozenAutoMapClass); + Py_DECREF(enumClass); + Py_DECREF(m); return NULL; } - if (!m || - PyModule_AddStringConstant(m, "__version__", Py_STRINGIFY(AK_VERSION)) || + if (PyModule_AddStringConstant(m, "__version__", Py_STRINGIFY(AK_VERSION)) || PyType_Ready(&ArrayGOType) || PyModule_AddObject(m, "ArrayGO", (PyObject *) &ArrayGOType) || - PyModule_AddObject(m, "FrozenAutoMap", frozenAutoMap)) + PyModule_AddObject(m, "FrozenAutoMap", frozenAutoMapClass) || + PyModule_AddObject(m, "Enum", enumClass)) { - Py_DECREF(frozenAutoMap); - Py_XDECREF(m); + Py_DECREF(frozenAutoMapClass); + Py_DECREF(enum_module); + Py_DECREF(m); return NULL; } return m; diff --git a/test/test_util.py b/test/test_util.py index 0e941f3c..cf39dd5c 100644 --- a/test/test_util.py +++ b/test/test_util.py @@ -261,13 +261,9 @@ def test_resolve_type_iter_a(self) -> None: self.assertEqual(is_object, True) self.assertTrue(has_tuple) - # TODO: These tests inexplicably fails on Python 3.6 -> why!! - # https://github.com/InvestmentSystems/arraykit/pull/43/checks?check_run_id=2460697121#step:7:612 is_object, has_tuple, values = prepare_iter_for_array((1, 2, 4.3, 2)) self.assertEqual(is_object, False) - # TODO: These tests inexplicably fails on Python 3.6 -> why!! - # https://github.com/InvestmentSystems/arraykit/pull/43/checks?check_run_id=2460828552#step:7:613 is_object, has_tuple, values = prepare_iter_for_array((1, 2, 4.3, 2, None)) self.assertEqual(is_object, False) From 5eb35b5fc1a5929ea202215457627224db0dba9e Mon Sep 17 00:00:00 2001 From: Charles Burkland Date: Tue, 25 May 2021 17:25:29 -0700 Subject: [PATCH 12/19] Addresses pyi failures. --- src/__init__.pyi | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/src/__init__.pyi b/src/__init__.pyi index c5c2aa48..b16f88ec 100644 --- a/src/__init__.pyi +++ b/src/__init__.pyi @@ -1,8 +1,11 @@ import typing as tp +from enum import Enum # type: ignore +from automap import FrozenAutoMap import numpy as np # type: ignore _T = tp.TypeVar('_T') +_DtypeSpecifier = tp.Optional[tp.Union[str, np.dtype, type]] __version__: str @@ -35,4 +38,4 @@ def is_gen_copy_values(__values: tp.Iterable[tp.Any]) -> tp.Tuple[bool, bool]: . def prepare_iter_for_array( __values: tp.Iterable[tp.Any], restrict_copy: bool = ..., - ) -> tp.Tuple[DtypeSpecifier, bool, tp.Sequence[tp.Any]]: ... + ) -> tp.Tuple[_DtypeSpecifier, bool, tp.Sequence[tp.Any]]: ... From d344602e1b6f0cdd8df158a5e8efae3bef465b48 Mon Sep 17 00:00:00 2001 From: Charles Burkland Date: Tue, 25 May 2021 17:39:38 -0700 Subject: [PATCH 13/19] Updates reference python to reflect new changes. --- performance/reference/util.py | 52 +++++++++---------- test/test_util.py | 98 +++++++++++++++++------------------ 2 files changed, 74 insertions(+), 76 deletions(-) diff --git a/performance/reference/util.py b/performance/reference/util.py index 2b787007..647ba0f0 100644 --- a/performance/reference/util.py +++ b/performance/reference/util.py @@ -251,26 +251,25 @@ def prepare_iter_for_array( restrict_copy: if True, reject making a copy, even if a generator is given Returns: - is_object, has_tuple, values + resolved_dtype, has_tuple, values ''' - is_gen, copy_values = is_gen_copy_values(values) if not is_gen and len(values) == 0: #type: ignore - return False, False, values #type: ignore + return None, False, values #type: ignore if restrict_copy: copy_values = False - v_iter = iter(values) + v_iter = values if is_gen else iter(values) if copy_values: values_post = [] - is_object = False + resolved = None # None is valid specifier if the type is not ambiguous + has_tuple = False has_str = False - has_enum = False has_non_str = False has_inexact = False has_big_int = False @@ -278,40 +277,39 @@ def prepare_iter_for_array( for v in v_iter: if copy_values: # if a generator, have to make a copy while iterating - # for array construction, cannot use dictlike, so must convert to list values_post.append(v) value_type = type(v) - # need to get tuple subclasses, like NamedTuple - if isinstance(v, (tuple, list)) or hasattr(v, '__slots__'): - # identify SF types by if they have __slots__ defined; they also must be assigned after array creation, so we treat them like tuples + if (value_type is str + or value_type is np.str_ + or value_type is bytes + or value_type is np.bytes_): + # must compare to both string types + has_str = True + elif hasattr(v, '__len__'): + # identify SF types by if they have STATIC attr they also must be assigned after array creation, so we treat them like tuples has_tuple = True + resolved = object + break elif isinstance(v, Enum): # must check isinstance, as Enum types are always derived from Enum - has_enum = True - elif value_type == str or value_type == np.str_: - # must compare to both string types - has_str = True + resolved = object + break else: has_non_str = True if value_type in INEXACT_TYPES: has_inexact = True - elif value_type == int and abs(v) > INT_MAX_COERCIBLE_TO_FLOAT: + elif value_type is int and abs(v) > INT_MAX_COERCIBLE_TO_FLOAT: has_big_int = True - if has_tuple or has_enum or (has_str and has_non_str): - is_object = True - - elif has_big_int and has_inexact: - is_object = True - - if is_object: - if copy_values: - values_post.extend(v_iter) + if (has_str and has_non_str) or (has_big_int and has_inexact): + resolved = object break - # NOTE: we break before finding a tuple, but our treatment of object types, downstream, will always assign them in the appropriate way if copy_values: - return is_object, has_tuple, values_post - return is_object, has_tuple, values #type: ignore + # v_iter is an iter, we need to finish it + values_post.extend(v_iter) + return resolved, has_tuple, values_post + + return resolved, has_tuple, values #type: ignore \ No newline at end of file diff --git a/test/test_util.py b/test/test_util.py index 26204a36..ad3abd0d 100644 --- a/test/test_util.py +++ b/test/test_util.py @@ -20,8 +20,8 @@ from performance.reference.util import mloc as mloc_ref -from arraykit import prepare_iter_for_array -#from performance.reference.util import prepare_iter_for_array +#from arraykit import prepare_iter_for_array +from performance.reference.util import prepare_iter_for_array class TestUnit(unittest.TestCase): @@ -326,73 +326,73 @@ def test_is_gen_copy_values(self) -> None: class TestPrepareIterUnit(unittest.TestCase): def test_resolve_type_iter_a(self) -> None: - is_object, has_tuple, values = prepare_iter_for_array(('a', 'b', 'c')) - self.assertEqual(is_object, False) + resolved, has_tuple, values = prepare_iter_for_array(('a', 'b', 'c')) + self.assertIsNone(resolved) - is_object, has_tuple, values = prepare_iter_for_array(('a', 'b', 3)) - self.assertEqual(is_object, True) + resolved, has_tuple, values = prepare_iter_for_array(('a', 'b', 3)) + self.assertIsNotNone(resolved) - is_object, has_tuple, values = prepare_iter_for_array(('a', 'b', (1, 2))) - self.assertEqual(is_object, True) + resolved, has_tuple, values = prepare_iter_for_array(('a', 'b', (1, 2))) + self.assertIsNotNone(resolved) self.assertTrue(has_tuple) - is_object, has_tuple, values = prepare_iter_for_array((1, 2, 4.3, 2)) - self.assertEqual(is_object, False) + resolved, has_tuple, values = prepare_iter_for_array((1, 2, 4.3, 2)) + self.assertIsNone(resolved) - is_object, has_tuple, values = prepare_iter_for_array((1, 2, 4.3, 2, None)) - self.assertEqual(is_object, False) + resolved, has_tuple, values = prepare_iter_for_array((1, 2, 4.3, 2, None)) + self.assertIsNone(resolved) - is_object, has_tuple, values = prepare_iter_for_array((1, 2, 4.3, 2, 'g')) - self.assertEqual(is_object, True) + resolved, has_tuple, values = prepare_iter_for_array((1, 2, 4.3, 2, 'g')) + self.assertIsNotNone(resolved) - is_object, has_tuple, values = prepare_iter_for_array(()) - self.assertEqual(is_object, False) + resolved, has_tuple, values = prepare_iter_for_array(()) + self.assertIsNone(resolved) def test_resolve_type_iter_b(self) -> None: - is_object, has_tuple, values = prepare_iter_for_array(iter(('a', 'b', 'c'))) - self.assertEqual(is_object, False) + resolved, has_tuple, values = prepare_iter_for_array(iter(('a', 'b', 'c'))) + self.assertIsNone(resolved) - is_object, has_tuple, values = prepare_iter_for_array(iter(('a', 'b', 3))) - self.assertEqual(is_object, True) + resolved, has_tuple, values = prepare_iter_for_array(iter(('a', 'b', 3))) + self.assertIsNotNone(resolved) - is_object, has_tuple, values = prepare_iter_for_array(iter(('a', 'b', (1, 2)))) - self.assertEqual(is_object, True) + resolved, has_tuple, values = prepare_iter_for_array(iter(('a', 'b', (1, 2)))) + self.assertIsNotNone(resolved) self.assertTrue(has_tuple) - is_object, has_tuple, values = prepare_iter_for_array(range(4)) - self.assertEqual(is_object, False) + resolved, has_tuple, values = prepare_iter_for_array(range(4)) + self.assertIsNone(resolved) def test_resolve_type_iter_c(self) -> None: a = [True, False, True] - is_object, has_tuple, values = prepare_iter_for_array(a) + resolved, has_tuple, values = prepare_iter_for_array(a) self.assertEqual(id(a), id(values)) - is_object, has_tuple, values = prepare_iter_for_array(iter(a)) + resolved, has_tuple, values = prepare_iter_for_array(iter(a)) self.assertNotEqual(id(a), id(values)) - self.assertEqual(is_object, False) + self.assertIsNone(resolved) self.assertEqual(has_tuple, False) def test_resolve_type_iter_d(self) -> None: a = [3, 2, (3,4)] - is_object, has_tuple, values = prepare_iter_for_array(a) + resolved, has_tuple, values = prepare_iter_for_array(a) self.assertEqual(id(a), id(values)) self.assertTrue(has_tuple) - is_object, has_tuple, values = prepare_iter_for_array(iter(a)) + resolved, has_tuple, values = prepare_iter_for_array(iter(a)) self.assertNotEqual(id(a), id(values)) - self.assertEqual(is_object, True) + self.assertIsNotNone(resolved) self.assertEqual(has_tuple, True) def test_resolve_type_iter_e(self) -> None: a = [300000000000000002, 5000000000000000001] - is_object, has_tuple, values = prepare_iter_for_array(a) + resolved, has_tuple, values = prepare_iter_for_array(a) self.assertEqual(id(a), id(values)) - is_object, has_tuple, values = prepare_iter_for_array(iter(a)) + resolved, has_tuple, values = prepare_iter_for_array(iter(a)) self.assertNotEqual(id(a), id(values)) - self.assertEqual(is_object, False) + self.assertIsNone(resolved) self.assertEqual(has_tuple, False) def test_resolve_type_iter_f(self) -> None: @@ -402,9 +402,9 @@ def a() -> tp.Iterator[tp.Any]: yield i yield None - is_object, has_tuple, values = prepare_iter_for_array(a()) + resolved, has_tuple, values = prepare_iter_for_array(a()) self.assertEqual(values, [0, 1, 2, None]) - self.assertEqual(is_object, False) + self.assertIsNone(resolved) self.assertEqual(has_tuple, False) def test_resolve_type_iter_g(self) -> None: @@ -414,9 +414,9 @@ def a() -> tp.Iterator[tp.Any]: for i in range(3): yield i - is_object, has_tuple, values = prepare_iter_for_array(a()) + resolved, has_tuple, values = prepare_iter_for_array(a()) self.assertEqual(values, [None, 0, 1, 2]) - self.assertEqual(is_object, False) + self.assertIsNone(resolved) self.assertEqual(has_tuple, False) def test_resolve_type_iter_h(self) -> None: @@ -428,36 +428,36 @@ def a() -> tp.Iterator[tp.Any]: yield i yield (3,4) - is_object, has_tuple, values = prepare_iter_for_array(a()) + resolved, has_tuple, values = prepare_iter_for_array(a()) self.assertEqual(values, [10, None, 0, 1, 2, (3,4)]) - self.assertEqual(is_object, True) + self.assertIsNotNone(resolved) # we stop evaluation after finding object self.assertEqual(has_tuple, True) def test_resolve_type_iter_i(self) -> None: a0 = range(3, 7) - is_object, has_tuple, values = prepare_iter_for_array(a0) + resolved, has_tuple, values = prepare_iter_for_array(a0) # a copy is not made self.assertEqual(id(a0), id(values)) - self.assertEqual(is_object, False) + self.assertIsNone(resolved) def test_resolve_type_iter_j(self) -> None: # this case was found through hypothesis a0 = [0.0, 36_028_797_018_963_969] - is_object, has_tuple, values = prepare_iter_for_array(a0) - self.assertEqual(is_object, True) + resolved, has_tuple, values = prepare_iter_for_array(a0) + self.assertIsNotNone(resolved) def test_resolve_type_iter_k(self) -> None: - is_object, has_tuple, values = prepare_iter_for_array((x for x in ())) #type: ignore - self.assertEqual(is_object, False) + resolved, has_tuple, values = prepare_iter_for_array((x for x in ())) #type: ignore + self.assertIsNone(resolved) self.assertEqual(len(values), 0) self.assertEqual(has_tuple, False) def test_resolve_type_iter_l(self) -> None: - self.assertEqual((False, False, []), prepare_iter_for_array([], True)) - self.assertEqual((False, False, ()), prepare_iter_for_array((), True)) - self.assertEqual((False, False, {}), prepare_iter_for_array({}, True)) - self.assertEqual((False, False, set()), prepare_iter_for_array(set(), True)) + self.assertEqual((None, False, []), prepare_iter_for_array([], True)) + self.assertEqual((None, False, ()), prepare_iter_for_array((), True)) + self.assertEqual((None, False, {}), prepare_iter_for_array({}, True)) + self.assertEqual((None, False, set()), prepare_iter_for_array(set(), True)) if __name__ == '__main__': From 0e89909680cf77abffbfc3e54b589ddab45fa4bd Mon Sep 17 00:00:00 2001 From: Charles Burkland Date: Tue, 25 May 2021 17:53:39 -0700 Subject: [PATCH 14/19] Updates c source to return object/None instead of a boolean. --- src/_arraykit.c | 21 ++++++++++++--------- test/test_util.py | 3 +-- 2 files changed, 13 insertions(+), 11 deletions(-) diff --git a/src/_arraykit.c b/src/_arraykit.c index 45d437c3..7430b43e 100644 --- a/src/_arraykit.c +++ b/src/_arraykit.c @@ -472,7 +472,7 @@ prepare_iter_for_array(PyObject *m, PyObject *args, PyObject *kwargs) return NULL; } if (len == 0) { - return PyTuple_Pack(3, Py_False, Py_False, values); + return PyTuple_Pack(3, Py_None, Py_False, values); } } @@ -591,27 +591,30 @@ prepare_iter_for_array(PyObject *m, PyObject *args, PyObject *kwargs) goto failure; } - PyObject *is_object_obj = PyBool_FromLong(is_object); - if (!is_object_obj) { - goto failure; + PyObject *resolved = NULL; + if (is_object) { + resolved = PyArray_DescrFromType(NPY_OBJECT); + } else { + resolved = Py_None; } + Py_INCREF(resolved); PyObject *is_tuple_obj = PyBool_FromLong(has_tuple); if (!is_tuple_obj) { - Py_DECREF(is_object_obj); + Py_DECREF(resolved); goto failure; } if (copy_values) { - PyObject *ret = PyTuple_Pack(3, is_object_obj, is_tuple_obj, copied_values); - Py_DECREF(is_object_obj); + PyObject *ret = PyTuple_Pack(3, resolved, is_tuple_obj, copied_values); + Py_DECREF(resolved); Py_DECREF(is_tuple_obj); Py_DECREF(copied_values); return ret; } - PyObject *ret = PyTuple_Pack(3, is_object_obj, is_tuple_obj, values); - Py_DECREF(is_object_obj); + PyObject *ret = PyTuple_Pack(3, resolved, is_tuple_obj, values); + Py_DECREF(resolved); Py_DECREF(is_tuple_obj); return ret; diff --git a/test/test_util.py b/test/test_util.py index ad3abd0d..9bb9628f 100644 --- a/test/test_util.py +++ b/test/test_util.py @@ -20,8 +20,7 @@ from performance.reference.util import mloc as mloc_ref -#from arraykit import prepare_iter_for_array -from performance.reference.util import prepare_iter_for_array +from arraykit import prepare_iter_for_array class TestUnit(unittest.TestCase): From bf342e839bf74647f4d09a17ce39f12b8d877f63 Mon Sep 17 00:00:00 2001 From: Charles Burkland Date: Tue, 25 May 2021 18:17:07 -0700 Subject: [PATCH 15/19] Aligns c closer to python source. --- src/_arraykit.c | 21 +++++++++------------ 1 file changed, 9 insertions(+), 12 deletions(-) diff --git a/src/_arraykit.c b/src/_arraykit.c index 7430b43e..4a6aba17 100644 --- a/src/_arraykit.c +++ b/src/_arraykit.c @@ -371,16 +371,16 @@ typedef enum IsGenCopyValues { } IsGenCopyValues; static IsGenCopyValues -AK_is_gen_copy_values(PyObject *arg, PyObject *frozenAutoMap) +AK_is_gen_copy_values(PyObject *values, PyObject *frozenAutoMap) { - if(PyObject_HasAttrString(arg, "__len__")) { - if (PySet_Check(arg) || PyDict_Check(arg) || - PyDictValues_Check(arg) || PyDictKeys_Check(arg)) + if(PyObject_HasAttrString(values, "__len__")) { + if (PySet_Check(values) || PyDict_Check(values) || + PyDictValues_Check(values) || PyDictKeys_Check(values)) { return NOT_GEN_COPY; } - switch (PyObject_IsInstance(arg, frozenAutoMap)) + switch (PyObject_IsInstance(values, frozenAutoMap)) { case -1: return ERR; @@ -492,7 +492,6 @@ prepare_iter_for_array(PyObject *m, PyObject *args, PyObject *kwargs) bool is_object = false; bool has_tuple = false; bool has_str = false; - bool has_enum = false; bool has_non_str = false; bool has_inexact = false; bool has_big_int = false; @@ -525,16 +524,17 @@ prepare_iter_for_array(PyObject *m, PyObject *args, PyObject *kwargs) } int enum_success = PyObject_IsInstance(item, enumClass); - if (enum_success == -1) { + if (-1 == enum_success) { goto iteration_failure; } // These three API calls always succeed. if (PyList_Check(item) || PyTuple_Check(item) || PyObject_HasAttrString(item, "__slots__")) { has_tuple = true; + is_object = true; } else if (enum_success) { - has_enum = true; + is_object = true; } // This API call always succeeds else if (PyUnicode_Check(item)) { @@ -563,10 +563,7 @@ prepare_iter_for_array(PyObject *m, PyObject *args, PyObject *kwargs) } } - if (has_tuple | has_enum | (has_str & has_non_str)) { - is_object = true; - } - else if (has_big_int & has_inexact) { + if ((has_str & has_non_str) | (has_big_int & has_inexact)) { is_object = true; } From de763125b41add2bea4102afd1c749854843496a Mon Sep 17 00:00:00 2001 From: Charles Burkland Date: Fri, 28 May 2021 15:12:04 -0700 Subject: [PATCH 16/19] Optimizes prepare_iter_to_array and cleans up big_int logic. --- performance/__main__.py | 4 +- performance/reference/util.py | 8 ++-- src/_arraykit.c | 81 +++++++++++++++-------------------- test/test_util.py | 9 +++- 4 files changed, 48 insertions(+), 54 deletions(-) diff --git a/performance/__main__.py b/performance/__main__.py index 44c31efc..acbe8282 100644 --- a/performance/__main__.py +++ b/performance/__main__.py @@ -326,7 +326,7 @@ class IsNaElementPerfREF(IsNaElementPerf): #------------------------------------------------------------------------------- class IsGenCopyValues(Perf): - NUMBER = 1000 + NUMBER = 2500 def pre(self): self.objects = [ @@ -338,7 +338,7 @@ def pre(self): ] def main(self): - for _ in range(100): + for _ in range(200): for obj in self.objects: self.entry(obj) diff --git a/performance/reference/util.py b/performance/reference/util.py index 647ba0f0..d3e15c7d 100644 --- a/performance/reference/util.py +++ b/performance/reference/util.py @@ -38,10 +38,8 @@ # types have already been identified INVALID_ITERABLE_FOR_ARRAY = (abc.ValuesView, abc.KeysView) -# integers above this value will occasionally, once coerced to a float (64 or 128) -# in an NP array, will not match a hash lookup as a key in a dictionary; -# an NP array of int or object will work -INT_MAX_COERCIBLE_TO_FLOAT = 1_000_000_000_000_000 +# integers above this value will lose precision when coerced to a float +INT_MAX_COERCIBLE_TO_FLOAT = 9_007_199_256_349_108 def mloc(array: np.ndarray) -> int: @@ -312,4 +310,4 @@ def prepare_iter_for_array( values_post.extend(v_iter) return resolved, has_tuple, values_post - return resolved, has_tuple, values #type: ignore \ No newline at end of file + return resolved, has_tuple, values #type: ignore diff --git a/src/_arraykit.c b/src/_arraykit.c index 4a6aba17..141c8cdf 100644 --- a/src/_arraykit.c +++ b/src/_arraykit.c @@ -64,6 +64,8 @@ fprintf(stderr, #msg); \ _AK_DEBUG_END() +# define AK_INT_MAX_COERCIBLE_TO_FLOAT 9007199256349108l + # if defined __GNUC__ || defined __clang__ # define AK_LIKELY(X) __builtin_expect(!!(X), 1) # define AK_UNLIKELY(X) __builtin_expect(!!(X), 0) @@ -388,7 +390,7 @@ AK_is_gen_copy_values(PyObject *values, PyObject *frozenAutoMap) case 0: return NOT_GEN_NO_COPY; - default: + default: // 1 return NOT_GEN_COPY; } } @@ -464,7 +466,7 @@ prepare_iter_for_array(PyObject *m, PyObject *args, PyObject *kwargs) return NULL; } - ssize_t len; + Py_ssize_t len; if (!is_gen) { len = PyObject_Size(values); @@ -501,16 +503,9 @@ prepare_iter_for_array(PyObject *m, PyObject *args, PyObject *kwargs) goto failure; } - PyObject *int_max_coercible_to_float = PyLong_FromLongLong(1000000000000000ll); - if (!int_max_coercible_to_float) { - Py_DECREF(enumClass); - goto failure; - } - PyObject *iterator = PyObject_GetIter(values); if (!iterator) { Py_DECREF(enumClass); - Py_DECREF(int_max_coercible_to_float); goto failure; } PyObject *item = NULL; @@ -518,59 +513,55 @@ prepare_iter_for_array(PyObject *m, PyObject *args, PyObject *kwargs) while ((item = PyIter_Next(iterator))) { if (copy_values) { if (-1 == PyList_Append(copied_values, item)) { - goto iteration_failure; + goto failure_during_iteration; } ++len; } - int enum_success = PyObject_IsInstance(item, enumClass); - if (-1 == enum_success) { - goto iteration_failure; + // This API call always succeeds. + if (PyUnicode_Check(item)) { + has_str = true; } - - // These three API calls always succeed. - if (PyList_Check(item) || PyTuple_Check(item) || PyObject_HasAttrString(item, "__slots__")) { + else if (PyObject_HasAttrString(item, "__len__")) { has_tuple = true; is_object = true; } - else if (enum_success) { - is_object = true; - } - // This API call always succeeds - else if (PyUnicode_Check(item)) { - has_str = true; - } else { - has_non_str = true; - - // These two API calls always succeed. 96% sure this catches np types - if (PyFloat_Check(item) || PyComplex_Check(item)) { - has_inexact = true; + int enum_success = PyObject_IsInstance(item, enumClass); + if (-1 == enum_success) { + goto failure_during_iteration; } - else if PyLong_Check(item) { - PyObject *abs_v = PyNumber_Absolute(item); - if (!abs_v) { - goto iteration_failure; - } + else if (enum_success) { + is_object = true; + } + else { + has_non_str = true; - int success = PyObject_RichCompareBool(abs_v, int_max_coercible_to_float, Py_GT); - Py_DECREF(abs_v); - if (success == -1) { - goto iteration_failure; + // These two API calls always succeed. 96% sure this catches np types + if (!has_inexact && (PyFloat_Check(item) || PyComplex_Check(item))) { + has_inexact = true; + } + else if (!has_big_int && PyLong_Check(item)) { + int overflow; + npy_int64 item_val = PyLong_AsLongLongAndOverflow(item, &overflow); + if (-1 == item_val) { + if (PyErr_Occurred()) { + goto failure_during_iteration; + } + } + else { + has_big_int |= (overflow != 0 || Py_ABS(item_val) > AK_INT_MAX_COERCIBLE_TO_FLOAT); + } } - - has_big_int |= success; } } - if ((has_str & has_non_str) | (has_big_int & has_inexact)) { - is_object = true; - } + is_object |= (has_str && has_non_str) || (has_big_int && has_inexact); if (is_object) { if (copy_values) { if (-1 == PyList_SetSlice(copied_values, len, len, iterator)) { - goto iteration_failure; + goto failure_during_iteration; } } Py_DECREF(item); @@ -582,7 +573,6 @@ prepare_iter_for_array(PyObject *m, PyObject *args, PyObject *kwargs) Py_DECREF(iterator); Py_DECREF(enumClass); - Py_DECREF(int_max_coercible_to_float); if (PyErr_Occurred()) { goto failure; @@ -615,9 +605,8 @@ prepare_iter_for_array(PyObject *m, PyObject *args, PyObject *kwargs) Py_DECREF(is_tuple_obj); return ret; -iteration_failure: +failure_during_iteration: Py_DECREF(enumClass); - Py_DECREF(int_max_coercible_to_float); Py_DECREF(iterator); Py_DECREF(item); diff --git a/test/test_util.py b/test/test_util.py index 9bb9628f..b0fc93ff 100644 --- a/test/test_util.py +++ b/test/test_util.py @@ -1,7 +1,6 @@ import typing as tp import datetime import unittest -import itertools from automap import FrozenAutoMap import numpy as np # type: ignore @@ -446,6 +445,14 @@ def test_resolve_type_iter_j(self) -> None: resolved, has_tuple, values = prepare_iter_for_array(a0) self.assertIsNotNone(resolved) + a1 = [0.0, 9_007_199_256_349_109] + resolved, has_tuple, values = prepare_iter_for_array(a1) + self.assertIsNotNone(resolved) + + a2 = [0.0, 9_007_199_256_349_108] + resolved, has_tuple, values = prepare_iter_for_array(a2) + self.assertIsNone(resolved) + def test_resolve_type_iter_k(self) -> None: resolved, has_tuple, values = prepare_iter_for_array((x for x in ())) #type: ignore self.assertIsNone(resolved) From 0201b0033cd64d7ef8f32d0cd26fde9d04f4a8ca Mon Sep 17 00:00:00 2001 From: Charles Burkland Date: Fri, 28 May 2021 15:32:03 -0700 Subject: [PATCH 17/19] Adds in a missing cast. --- src/_arraykit.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/_arraykit.c b/src/_arraykit.c index 141c8cdf..299ed93c 100644 --- a/src/_arraykit.c +++ b/src/_arraykit.c @@ -580,7 +580,7 @@ prepare_iter_for_array(PyObject *m, PyObject *args, PyObject *kwargs) PyObject *resolved = NULL; if (is_object) { - resolved = PyArray_DescrFromType(NPY_OBJECT); + resolved = (PyObject*)PyArray_DescrFromType(NPY_OBJECT); } else { resolved = Py_None; } From 8368df60b0d3b45a490f801aa49ca7cfb89d7ef6 Mon Sep 17 00:00:00 2001 From: Charles Burkland Date: Tue, 1 Jun 2021 13:47:10 -0700 Subject: [PATCH 18/19] Fixes __init__.py --- src/__init__.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/__init__.py b/src/__init__.py index 7f49afaa..34128fe5 100644 --- a/src/__init__.py +++ b/src/__init__.py @@ -3,6 +3,8 @@ # pylint: disable=C0414 from ._arraykit import __version__ +from ._arraykit import Enum as Enum +from ._arraykit import FrozenAutoMap as FrozenAutoMap from ._arraykit import ArrayGO as ArrayGO from ._arraykit import immutable_filter as immutable_filter from ._arraykit import mloc as mloc @@ -15,3 +17,5 @@ from ._arraykit import resolve_dtype as resolve_dtype from ._arraykit import resolve_dtype_iter as resolve_dtype_iter from ._arraykit import isna_element as isna_element +from ._arraykit import is_gen_copy_values as is_gen_copy_values +from ._arraykit import prepare_iter_for_array as prepare_iter_for_array From 5239d534e3654f4d8b39a1d9b4a4a804122f2467 Mon Sep 17 00:00:00 2001 From: Charles Burkland Date: Tue, 1 Jun 2021 14:37:02 -0700 Subject: [PATCH 19/19] Removes unnecessary enum isinstance check. --- performance/reference/util.py | 5 --- src/__init__.py | 1 - src/__init__.pyi | 1 - src/_arraykit.c | 65 ++++++++--------------------------- 4 files changed, 15 insertions(+), 57 deletions(-) diff --git a/performance/reference/util.py b/performance/reference/util.py index d3e15c7d..936096b6 100644 --- a/performance/reference/util.py +++ b/performance/reference/util.py @@ -1,7 +1,6 @@ import typing as tp from copy import deepcopy from collections import abc -from enum import Enum from automap import FrozenAutoMap # pylint: disable = E0611 import numpy as np @@ -290,10 +289,6 @@ def prepare_iter_for_array( has_tuple = True resolved = object break - elif isinstance(v, Enum): - # must check isinstance, as Enum types are always derived from Enum - resolved = object - break else: has_non_str = True if value_type in INEXACT_TYPES: diff --git a/src/__init__.py b/src/__init__.py index 34128fe5..20e835c4 100644 --- a/src/__init__.py +++ b/src/__init__.py @@ -3,7 +3,6 @@ # pylint: disable=C0414 from ._arraykit import __version__ -from ._arraykit import Enum as Enum from ._arraykit import FrozenAutoMap as FrozenAutoMap from ._arraykit import ArrayGO as ArrayGO from ._arraykit import immutable_filter as immutable_filter diff --git a/src/__init__.pyi b/src/__init__.pyi index b16f88ec..d9960bb9 100644 --- a/src/__init__.pyi +++ b/src/__init__.pyi @@ -1,5 +1,4 @@ import typing as tp -from enum import Enum # type: ignore from automap import FrozenAutoMap import numpy as np # type: ignore diff --git a/src/_arraykit.c b/src/_arraykit.c index 299ed93c..cac0fc74 100644 --- a/src/_arraykit.c +++ b/src/_arraykit.c @@ -498,14 +498,8 @@ prepare_iter_for_array(PyObject *m, PyObject *args, PyObject *kwargs) bool has_inexact = false; bool has_big_int = false; - PyObject *enumClass = PyObject_GetAttrString(m, "Enum"); - if (!enumClass) { - goto failure; - } - PyObject *iterator = PyObject_GetIter(values); if (!iterator) { - Py_DECREF(enumClass); goto failure; } PyObject *item = NULL; @@ -527,32 +521,23 @@ prepare_iter_for_array(PyObject *m, PyObject *args, PyObject *kwargs) is_object = true; } else { - int enum_success = PyObject_IsInstance(item, enumClass); - if (-1 == enum_success) { - goto failure_during_iteration; - } - else if (enum_success) { - is_object = true; - } - else { - has_non_str = true; + has_non_str = true; - // These two API calls always succeed. 96% sure this catches np types - if (!has_inexact && (PyFloat_Check(item) || PyComplex_Check(item))) { - has_inexact = true; - } - else if (!has_big_int && PyLong_Check(item)) { - int overflow; - npy_int64 item_val = PyLong_AsLongLongAndOverflow(item, &overflow); - if (-1 == item_val) { - if (PyErr_Occurred()) { - goto failure_during_iteration; - } - } - else { - has_big_int |= (overflow != 0 || Py_ABS(item_val) > AK_INT_MAX_COERCIBLE_TO_FLOAT); + // These two API calls always succeed. 96% sure this catches np types + if (!has_inexact && (PyFloat_Check(item) || PyComplex_Check(item))) { + has_inexact = true; + } + else if (!has_big_int && PyLong_Check(item)) { + int overflow; + npy_int64 item_val = PyLong_AsLongLongAndOverflow(item, &overflow); + if (-1 == item_val) { + if (PyErr_Occurred()) { + goto failure_during_iteration; } } + else { + has_big_int |= (overflow != 0 || Py_ABS(item_val) > AK_INT_MAX_COERCIBLE_TO_FLOAT); + } } } @@ -572,7 +557,6 @@ prepare_iter_for_array(PyObject *m, PyObject *args, PyObject *kwargs) } Py_DECREF(iterator); - Py_DECREF(enumClass); if (PyErr_Occurred()) { goto failure; @@ -606,7 +590,6 @@ prepare_iter_for_array(PyObject *m, PyObject *args, PyObject *kwargs) return ret; failure_during_iteration: - Py_DECREF(enumClass); Py_DECREF(iterator); Py_DECREF(item); @@ -991,30 +974,12 @@ PyInit__arraykit(void) return NULL; } - PyObject *enum_module = PyImport_ImportModule("enum"); - if (!enum_module) { - Py_DECREF(frozenAutoMapClass); - Py_DECREF(m); - return NULL; - } - - PyObject *enumClass = PyObject_GetAttrString(enum_module, "Enum"); - Py_DECREF(enum_module); - if (!enumClass) { - Py_DECREF(frozenAutoMapClass); - Py_DECREF(enumClass); - Py_DECREF(m); - return NULL; - } - if (PyModule_AddStringConstant(m, "__version__", Py_STRINGIFY(AK_VERSION)) || PyType_Ready(&ArrayGOType) || PyModule_AddObject(m, "ArrayGO", (PyObject *) &ArrayGOType) || - PyModule_AddObject(m, "FrozenAutoMap", frozenAutoMapClass) || - PyModule_AddObject(m, "Enum", enumClass)) + PyModule_AddObject(m, "FrozenAutoMap", frozenAutoMapClass)) { Py_DECREF(frozenAutoMapClass); - Py_DECREF(enum_module); Py_DECREF(m); return NULL; }