-
-
Notifications
You must be signed in to change notification settings - Fork 13
FEAT: Implementing is_integer and as_integer_ratio for QuadPrecision
#221
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from all commits
bbdd72c
cb3243d
b8a60a6
9f95d21
72b2023
652c35f
9c54981
798e503
9ffcbfd
476ac4d
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change | ||||||
|---|---|---|---|---|---|---|---|---|
|
|
@@ -22,6 +22,18 @@ | |||||||
| // src: https://en.wikipedia.org/wiki/Quadruple-precision_floating-point_format | ||||||||
| #define SLEEF_QUAD_DECIMAL_DIG 36 | ||||||||
|
|
||||||||
| #if PY_VERSION_HEX < 0x30d00b3 | ||||||||
| static PyThread_type_lock sleef_lock; | ||||||||
| #define LOCK_SLEEF PyThread_acquire_lock(sleef_lock, WAIT_LOCK) | ||||||||
| #define UNLOCK_SLEEF PyThread_release_lock(sleef_lock) | ||||||||
| #else | ||||||||
| static PyMutex sleef_lock = {0}; | ||||||||
| #define LOCK_SLEEF PyMutex_Lock(&sleef_lock) | ||||||||
| #define UNLOCK_SLEEF PyMutex_Unlock(&sleef_lock) | ||||||||
| #endif | ||||||||
|
|
||||||||
|
|
||||||||
|
|
||||||||
|
|
||||||||
| QuadPrecisionObject * | ||||||||
| QuadPrecision_raw_new(QuadBackendType backend) | ||||||||
|
|
@@ -383,6 +395,188 @@ QuadPrecision_get_imag(QuadPrecisionObject *self, void *closure) | |||||||
| return (PyObject *)QuadPrecision_raw_new(self->backend); | ||||||||
| } | ||||||||
|
|
||||||||
| // Method implementations for float compatibility | ||||||||
| static PyObject * | ||||||||
| QuadPrecision_is_integer(QuadPrecisionObject *self, PyObject *Py_UNUSED(ignored)) | ||||||||
| { | ||||||||
| Sleef_quad value; | ||||||||
|
|
||||||||
| if (self->backend == BACKEND_SLEEF) { | ||||||||
| value = self->value.sleef_value; | ||||||||
| } | ||||||||
| else { | ||||||||
| // lets also tackle ld from sleef functions as well | ||||||||
| value = Sleef_cast_from_doubleq1((double)self->value.longdouble_value); | ||||||||
| } | ||||||||
|
|
||||||||
| if(Sleef_iunordq1(value, value)) { | ||||||||
| Py_RETURN_FALSE; | ||||||||
| } | ||||||||
|
|
||||||||
| // Check if value is finite (not inf or nan) | ||||||||
| Sleef_quad abs_value = Sleef_fabsq1(value); | ||||||||
| Sleef_quad pos_inf = sleef_q(+0x1000000000000LL, 0x0000000000000000ULL, 16384); | ||||||||
| int32_t is_finite = Sleef_icmpltq1(abs_value, pos_inf); | ||||||||
|
|
||||||||
| if (!is_finite) { | ||||||||
| Py_RETURN_FALSE; | ||||||||
| } | ||||||||
|
|
||||||||
| // Check if value equals its truncated version | ||||||||
| Sleef_quad truncated = Sleef_truncq1(value); | ||||||||
| int32_t is_equal = Sleef_icmpeqq1(value, truncated); | ||||||||
|
|
||||||||
| if (is_equal) { | ||||||||
| Py_RETURN_TRUE; | ||||||||
| } | ||||||||
| else { | ||||||||
| Py_RETURN_FALSE; | ||||||||
| } | ||||||||
| } | ||||||||
|
|
||||||||
| PyObject* quad_to_pylong(Sleef_quad value) | ||||||||
| { | ||||||||
| char buffer[128]; | ||||||||
|
|
||||||||
| // Sleef_snprintf call is thread-unsafe | ||||||||
| LOCK_SLEEF; | ||||||||
| // Format as integer (%.0Qf gives integer with no decimal places) | ||||||||
| // Q modifier means pass Sleef_quad by value | ||||||||
| int written = Sleef_snprintf(buffer, sizeof(buffer), "%.0Qf", value); | ||||||||
| UNLOCK_SLEEF; | ||||||||
| if (written < 0 || written >= sizeof(buffer)) { | ||||||||
| PyErr_SetString(PyExc_RuntimeError, "Failed to convert quad to string"); | ||||||||
| return NULL; | ||||||||
| } | ||||||||
|
|
||||||||
| PyObject *result = PyLong_FromString(buffer, NULL, 10); | ||||||||
|
|
||||||||
| if (result == NULL) { | ||||||||
| PyErr_SetString(PyExc_RuntimeError, "Failed to parse integer string"); | ||||||||
| return NULL; | ||||||||
| } | ||||||||
|
|
||||||||
| return result; | ||||||||
| } | ||||||||
|
|
||||||||
| // inspired by the CPython implementation | ||||||||
| // https://github.com/python/cpython/blob/ac1ffd77858b62d169a08040c08aa5de26e145ac/Objects/floatobject.c#L1503C1-L1572C2 | ||||||||
| // NOTE: a 128-bit | ||||||||
| static PyObject * | ||||||||
| QuadPrecision_as_integer_ratio(QuadPrecisionObject *self, PyObject *Py_UNUSED(ignored)) | ||||||||
| { | ||||||||
|
|
||||||||
| Sleef_quad value; | ||||||||
| Sleef_quad pos_inf = sleef_q(+0x1000000000000LL, 0x0000000000000000ULL, 16384); | ||||||||
| const int FLOAT128_PRECISION = 113; | ||||||||
|
|
||||||||
| if (self->backend == BACKEND_SLEEF) { | ||||||||
| value = self->value.sleef_value; | ||||||||
| } | ||||||||
| else { | ||||||||
| // lets also tackle ld from sleef functions as well | ||||||||
| value = Sleef_cast_from_doubleq1((double)self->value.longdouble_value); | ||||||||
| } | ||||||||
|
|
||||||||
| if(Sleef_iunordq1(value, value)) { | ||||||||
| PyErr_SetString(PyExc_ValueError, "Cannot convert NaN to integer ratio"); | ||||||||
| return NULL; | ||||||||
| } | ||||||||
| if(Sleef_icmpgeq1(Sleef_fabsq1(value), pos_inf)) { | ||||||||
| PyErr_SetString(PyExc_OverflowError, "Cannot convert infinite value to integer ratio"); | ||||||||
| return NULL; | ||||||||
| } | ||||||||
|
|
||||||||
| // Sleef_value == float_part * 2**exponent exactly | ||||||||
| int exponent; | ||||||||
| Sleef_quad mantissa = Sleef_frexpq1(value, &exponent); // within [0.5, 1.0) | ||||||||
|
|
||||||||
| /* | ||||||||
| CPython loops for 300 (some huge number) to make sure | ||||||||
| float_part gets converted to the floor(float_part) i.e. near integer as | ||||||||
|
|
||||||||
| for (i=0; i<300 && float_part != floor(float_part) ; i++) { | ||||||||
| float_part *= 2.0; | ||||||||
| exponent--; | ||||||||
| } | ||||||||
|
|
||||||||
| It seems highly inefficient from performance perspective, maybe they pick 300 for future-proof | ||||||||
| or If FLT_RADIX != 2, the 300 steps may leave a tiny fractional part | ||||||||
|
|
||||||||
| Another way can be doing as: | ||||||||
| ``` | ||||||||
| mantissa = ldexpq(mantissa, FLOAT128_PRECISION); | ||||||||
| exponent -= FLOAT128_PRECISION; | ||||||||
| ``` | ||||||||
SwayamInSync marked this conversation as resolved.
Show resolved
Hide resolved
|
||||||||
| This should work but give non-simplified, huge integers (although they also come down to same representation) | ||||||||
| We can also do gcd to find simplified values, but it'll add more O(log(N)) | ||||||||
| For the sake of simplicity and fixed 128-bit nature, we will loop till 113 only | ||||||||
| */ | ||||||||
|
|
||||||||
| for (int i = 0; i < FLOAT128_PRECISION && !Sleef_icmpeqq1(mantissa, Sleef_floorq1(mantissa)); i++) { | ||||||||
| mantissa = Sleef_mulq1_u05(mantissa, Sleef_cast_from_doubleq1(2.0)); | ||||||||
| exponent--; | ||||||||
| } | ||||||||
|
|
||||||||
| // numerator and denominators can't fit in int | ||||||||
| // convert items to PyLongObject from string instead | ||||||||
|
|
||||||||
| PyObject *py_exp = PyLong_FromLongLong(Py_ABS(exponent)); | ||||||||
| if(py_exp == NULL) | ||||||||
| { | ||||||||
| return NULL; | ||||||||
| } | ||||||||
|
|
||||||||
| PyObject *numerator = quad_to_pylong(mantissa); | ||||||||
| if(numerator == NULL) | ||||||||
| { | ||||||||
| Py_DECREF(numerator); | ||||||||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. While it is annoying, testing error cases catches stuff like this...
Suggested change
|
||||||||
| return NULL; | ||||||||
| } | ||||||||
| PyObject *denominator = PyLong_FromLong(1); | ||||||||
| if (denominator == NULL) { | ||||||||
| Py_DECREF(numerator); | ||||||||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Consider adding a
Suggested change
|
||||||||
| return NULL; | ||||||||
| } | ||||||||
|
|
||||||||
| // fold in 2**exponent | ||||||||
| if(exponent > 0) | ||||||||
| { | ||||||||
| PyObject *new_num = PyNumber_Lshift(numerator, py_exp); | ||||||||
| Py_DECREF(numerator); | ||||||||
| if(new_num == NULL) | ||||||||
| { | ||||||||
| Py_DECREF(denominator); | ||||||||
| Py_DECREF(py_exp); | ||||||||
| return NULL; | ||||||||
| } | ||||||||
| numerator = new_num; | ||||||||
| } | ||||||||
| else | ||||||||
| { | ||||||||
| PyObject *new_denom = PyNumber_Lshift(denominator, py_exp); | ||||||||
| Py_DECREF(denominator); | ||||||||
| if(new_denom == NULL) | ||||||||
| { | ||||||||
| Py_DECREF(numerator); | ||||||||
| Py_DECREF(py_exp); | ||||||||
| return NULL; | ||||||||
| } | ||||||||
| denominator = new_denom; | ||||||||
| } | ||||||||
|
|
||||||||
| Py_DECREF(py_exp); | ||||||||
| return PyTuple_Pack(2, numerator, denominator); | ||||||||
| } | ||||||||
|
|
||||||||
| static PyMethodDef QuadPrecision_methods[] = { | ||||||||
| {"is_integer", (PyCFunction)QuadPrecision_is_integer, METH_NOARGS, | ||||||||
| "Return True if the value is an integer."}, | ||||||||
| {"as_integer_ratio", (PyCFunction)QuadPrecision_as_integer_ratio, METH_NOARGS, | ||||||||
| "Return a pair of integers whose ratio is exactly equal to the original value."}, | ||||||||
| {NULL, NULL, 0, NULL} /* Sentinel */ | ||||||||
| }; | ||||||||
|
|
||||||||
| static PyGetSetDef QuadPrecision_getset[] = { | ||||||||
| {"real", (getter)QuadPrecision_get_real, NULL, "Real part of the scalar", NULL}, | ||||||||
| {"imag", (getter)QuadPrecision_get_imag, NULL, "Imaginary part of the scalar (always 0 for real types)", NULL}, | ||||||||
|
|
@@ -400,12 +594,20 @@ PyTypeObject QuadPrecision_Type = { | |||||||
| .tp_as_number = &quad_as_scalar, | ||||||||
| .tp_as_buffer = &QuadPrecision_as_buffer, | ||||||||
| .tp_richcompare = (richcmpfunc)quad_richcompare, | ||||||||
| .tp_methods = QuadPrecision_methods, | ||||||||
| .tp_getset = QuadPrecision_getset, | ||||||||
| }; | ||||||||
|
|
||||||||
| int | ||||||||
| init_quadprecision_scalar(void) | ||||||||
| { | ||||||||
| #if PY_VERSION_HEX < 0x30d00b3 | ||||||||
| sleef_lock = PyThread_allocate_lock(); | ||||||||
| if (sleef_lock == NULL) { | ||||||||
| PyErr_NoMemory(); | ||||||||
| return -1; | ||||||||
| } | ||||||||
| #endif | ||||||||
| QuadPrecision_Type.tp_base = &PyFloatingArrType_Type; | ||||||||
| return PyType_Ready(&QuadPrecision_Type); | ||||||||
| } | ||||||||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -8,7 +8,11 @@ if [ -d "build/" ]; then | |
| rm -rf subprojects/sleef | ||
| fi | ||
|
|
||
| # export CFLAGS="-g -O0" | ||
| # export CXXFLAGS="-g -O0" | ||
| python -m pip uninstall -y numpy_quaddtype | ||
| python -m pip install . -vv --no-build-isolation 2>&1 | tee build_log.txt | ||
|
|
||
| # for debugging and TSAN builds, comment the above line and uncomment all below: | ||
| # export CFLAGS="-fsanitize=thread -g -O0" | ||
| # export CXXFLAGS="-fsanitize=thread -g -O0" | ||
| # export LDFLAGS="-fsanitize=thread" | ||
| # python -m pip install . -vv --no-build-isolation -Csetup-args=-Db_sanitize=thread 2>&1 | tee build_log.txt | ||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think you also need to pass A somewhat more verbose but perhaps clearer way to do this is to explicitly install all the build deps, then build quaddtype with # either on the cpython_sanity docker image or
# after building and installing a TSan Python build
python -m pip install meson meson-python wheel ninja
python -m pip install "numpy @ git+https://github.com/numpy/numpy" -C'setup-args=-Db_sanitize=thread'
python -m pip install . --no-build-isolation -C'setup-args=-Db_sanitize=thread'No need to pass any special arguments for pure-python dependencies. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. After I figured out how to compile SLEEF with TSan instrumentation, I got this race report in the new test you added: I think the locking that's going to be needed is probably not limited to |
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -12,6 +12,7 @@ if host_machine.system() == 'windows' | |
| parallel_flag = [] | ||
| endif | ||
|
|
||
| # uncomment below lines for TSAN builds (in case compiler flags are not picked up from meson) | ||
| sleef_configure = run_command([ | ||
| cmake, | ||
| '-S', meson.current_source_dir(), | ||
|
|
@@ -22,6 +23,10 @@ sleef_configure = run_command([ | |
| '-DSLEEF_BUILD_TESTS=OFF', | ||
| '-DSLEEF_BUILD_INLINE_HEADERS=OFF', | ||
| '-DCMAKE_POSITION_INDEPENDENT_CODE=ON', | ||
| # '-DCMAKE_C_FLAGS=-fsanitize=thread -g', | ||
| # '-DCMAKE_CXX_FLAGS=-fsanitize=thread -g', | ||
| # '-DCMAKE_EXE_LINKER_FLAGS=-fsanitize=thread', | ||
| # '-DCMAKE_SHARED_LINKER_FLAGS=-fsanitize=thread', | ||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I poked a little at this. It looks like this doesn't get passed down into the subproject by default so this manual patching is necessary. Too bad... Also maybe worth mentioning in the comment above that you also need to delete the sleef subproject completely for this change to have any impact unless you're starting from a fresh clone of the repo. At least that's what I had to do for this change to have any effect. |
||
| '-DCMAKE_INSTALL_PREFIX=' + meson.current_build_dir() / sleef_install_dir | ||
| ], check: false, capture: true) | ||
|
|
||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,32 @@ | ||
| import concurrent.futures | ||
| import threading | ||
|
|
||
| import pytest | ||
|
|
||
| import numpy as np | ||
| from numpy._core import _rational_tests | ||
| from numpy._core.tests.test_stringdtype import random_unicode_string_list | ||
| from numpy.testing import IS_64BIT, IS_WASM | ||
| from numpy.testing._private.utils import run_threaded | ||
|
|
||
| if IS_WASM: | ||
| pytest.skip(allow_module_level=True, reason="no threading support in wasm") | ||
|
|
||
| from numpy_quaddtype import * | ||
|
|
||
|
|
||
| def test_as_integer_ratio_reconstruction(): | ||
| """Multi-threaded test that as_integer_ratio() can reconstruct the original value.""" | ||
|
|
||
| def test(barrier): | ||
| barrier.wait() # All threads start simultaneously | ||
| values = ["3.14", "0.1", "1.414213562373095", "2.718281828459045", | ||
| "-1.23456789", "1000.001", "0.0001", "1e20", "1.23e15", "1e-30", pi] | ||
| for val in values: | ||
| quad_val = QuadPrecision(val) | ||
| num, denom = quad_val.as_integer_ratio() | ||
| # todo: can remove str converstion after merging PR #213 | ||
| reconstructed = QuadPrecision(str(num)) / QuadPrecision(str(denom)) | ||
| assert reconstructed == quad_val | ||
|
|
||
| run_threaded(test, pass_barrier=True, max_workers=64, outer_iterations=100) |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Why the explicit import? We use
boolwithout thebuiltins.boolelsewhere in the stubsThere was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Copied from the NumPy's stubs, I think both are just same.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
in numpy it's needed because
boolis shadowed bynp.bool. But that shouldn't be problem here, so no need for thebuiltins._