Skip to content

Commit 76bc5e4

Browse files
committed
divmod impl
1 parent 4c2863d commit 76bc5e4

File tree

4 files changed

+423
-2
lines changed

4 files changed

+423
-2
lines changed

quaddtype/numpy_quaddtype/src/ops.hpp

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -442,6 +442,8 @@ ld_isnan(const long double *op)
442442

443443
// Binary Quad operations
444444
typedef Sleef_quad (*binary_op_quad_def)(const Sleef_quad *, const Sleef_quad *);
445+
// Binary Quad operations with 2 outputs (for divmod, modf, frexp)
446+
typedef void (*binary_op_2out_quad_def)(const Sleef_quad *, const Sleef_quad *, Sleef_quad *, Sleef_quad *);
445447

446448
static inline Sleef_quad
447449
quad_add(const Sleef_quad *in1, const Sleef_quad *in2)
@@ -593,6 +595,14 @@ quad_fmod(const Sleef_quad *a, const Sleef_quad *b)
593595
return result;
594596
}
595597

598+
static inline void
599+
quad_divmod(const Sleef_quad *a, const Sleef_quad *b,
600+
Sleef_quad *out_quotient, Sleef_quad *out_remainder)
601+
{
602+
*out_quotient = quad_floor_divide(a, b);
603+
*out_remainder = quad_mod(a, b);
604+
}
605+
596606
static inline Sleef_quad
597607
quad_minimum(const Sleef_quad *in1, const Sleef_quad *in2)
598608
{
@@ -744,6 +754,8 @@ quad_logaddexp2(const Sleef_quad *x, const Sleef_quad *y)
744754

745755
// Binary long double operations
746756
typedef long double (*binary_op_longdouble_def)(const long double *, const long double *);
757+
// Binary long double operations with 2 outputs (for divmod, modf, frexp)
758+
typedef void (*binary_op_2out_longdouble_def)(const long double *, const long double *, long double *, long double *);
747759

748760
static inline long double
749761
ld_add(const long double *in1, const long double *in2)
@@ -871,6 +883,14 @@ ld_fmod(const long double *a, const long double *b)
871883
return result;
872884
}
873885

886+
static inline void
887+
ld_divmod(const long double *a, const long double *b,
888+
long double *out_quotient, long double *out_remainder)
889+
{
890+
*out_quotient = ld_floor_divide(a, b);
891+
*out_remainder = ld_mod(a, b);
892+
}
893+
874894
static inline long double
875895
ld_minimum(const long double *in1, const long double *in2)
876896
{

quaddtype/numpy_quaddtype/src/umath/binary_ops.cpp

Lines changed: 198 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -145,6 +145,201 @@ quad_generic_binop_strided_loop_aligned(PyArrayMethod_Context *context, char *co
145145
}
146146

147147

148+
149+
// Resolve descriptors for binary ops with 2 outputs (2 inputs, 2 outputs)
150+
static NPY_CASTING
151+
quad_binary_op_2out_resolve_descriptors(PyObject *self, PyArray_DTypeMeta *const dtypes[],
152+
PyArray_Descr *const given_descrs[],
153+
PyArray_Descr *loop_descrs[], npy_intp *NPY_UNUSED(view_offset))
154+
{
155+
QuadPrecDTypeObject *descr_in1 = (QuadPrecDTypeObject *)given_descrs[0];
156+
QuadPrecDTypeObject *descr_in2 = (QuadPrecDTypeObject *)given_descrs[1];
157+
QuadBackendType target_backend;
158+
159+
// Determine target backend and if casting is needed
160+
NPY_CASTING casting = NPY_NO_CASTING;
161+
if (descr_in1->backend != descr_in2->backend) {
162+
target_backend = BACKEND_LONGDOUBLE;
163+
casting = NPY_SAFE_CASTING;
164+
}
165+
else {
166+
target_backend = descr_in1->backend;
167+
}
168+
169+
// Set up input descriptors, casting if necessary
170+
for (int i = 0; i < 2; i++) {
171+
if (((QuadPrecDTypeObject *)given_descrs[i])->backend != target_backend) {
172+
loop_descrs[i] = (PyArray_Descr *)new_quaddtype_instance(target_backend);
173+
if (!loop_descrs[i]) {
174+
return (NPY_CASTING)-1;
175+
}
176+
}
177+
else {
178+
Py_INCREF(given_descrs[i]);
179+
loop_descrs[i] = given_descrs[i];
180+
}
181+
}
182+
183+
// Set up output descriptors (2 outputs for divmod)
184+
for (int i = 2; i < 4; i++) {
185+
if (given_descrs[i] == NULL) {
186+
loop_descrs[i] = (PyArray_Descr *)new_quaddtype_instance(target_backend);
187+
if (!loop_descrs[i]) {
188+
return (NPY_CASTING)-1;
189+
}
190+
}
191+
else {
192+
QuadPrecDTypeObject *descr_out = (QuadPrecDTypeObject *)given_descrs[i];
193+
if (descr_out->backend != target_backend) {
194+
loop_descrs[i] = (PyArray_Descr *)new_quaddtype_instance(target_backend);
195+
if (!loop_descrs[i]) {
196+
return (NPY_CASTING)-1;
197+
}
198+
}
199+
else {
200+
Py_INCREF(given_descrs[i]);
201+
loop_descrs[i] = given_descrs[i];
202+
}
203+
}
204+
}
205+
return casting;
206+
}
207+
208+
// Strided loop for binary ops with 2 outputs (unaligned)
209+
template <binary_op_2out_quad_def sleef_op, binary_op_2out_longdouble_def longdouble_op>
210+
int
211+
quad_generic_binop_2out_strided_loop_unaligned(PyArrayMethod_Context *context, char *const data[],
212+
npy_intp const dimensions[], npy_intp const strides[],
213+
NpyAuxData *auxdata)
214+
{
215+
npy_intp N = dimensions[0];
216+
char *in1_ptr = data[0], *in2_ptr = data[1];
217+
char *out1_ptr = data[2], *out2_ptr = data[3];
218+
npy_intp in1_stride = strides[0];
219+
npy_intp in2_stride = strides[1];
220+
npy_intp out1_stride = strides[2];
221+
npy_intp out2_stride = strides[3];
222+
223+
QuadPrecDTypeObject *descr = (QuadPrecDTypeObject *)context->descriptors[0];
224+
QuadBackendType backend = descr->backend;
225+
size_t elem_size = (backend == BACKEND_SLEEF) ? sizeof(Sleef_quad) : sizeof(long double);
226+
227+
quad_value in1, in2, out1, out2;
228+
while (N--) {
229+
memcpy(&in1, in1_ptr, elem_size);
230+
memcpy(&in2, in2_ptr, elem_size);
231+
if (backend == BACKEND_SLEEF) {
232+
sleef_op(&in1.sleef_value, &in2.sleef_value, &out1.sleef_value, &out2.sleef_value);
233+
}
234+
else {
235+
longdouble_op(&in1.longdouble_value, &in2.longdouble_value,
236+
&out1.longdouble_value, &out2.longdouble_value);
237+
}
238+
memcpy(out1_ptr, &out1, elem_size);
239+
memcpy(out2_ptr, &out2, elem_size);
240+
241+
in1_ptr += in1_stride;
242+
in2_ptr += in2_stride;
243+
out1_ptr += out1_stride;
244+
out2_ptr += out2_stride;
245+
}
246+
return 0;
247+
}
248+
249+
// Strided loop for binary ops with 2 outputs (aligned)
250+
template <binary_op_2out_quad_def sleef_op, binary_op_2out_longdouble_def longdouble_op>
251+
int
252+
quad_generic_binop_2out_strided_loop_aligned(PyArrayMethod_Context *context, char *const data[],
253+
npy_intp const dimensions[], npy_intp const strides[],
254+
NpyAuxData *auxdata)
255+
{
256+
npy_intp N = dimensions[0];
257+
char *in1_ptr = data[0], *in2_ptr = data[1];
258+
char *out1_ptr = data[2], *out2_ptr = data[3];
259+
npy_intp in1_stride = strides[0];
260+
npy_intp in2_stride = strides[1];
261+
npy_intp out1_stride = strides[2];
262+
npy_intp out2_stride = strides[3];
263+
264+
QuadPrecDTypeObject *descr = (QuadPrecDTypeObject *)context->descriptors[0];
265+
QuadBackendType backend = descr->backend;
266+
267+
while (N--) {
268+
if (backend == BACKEND_SLEEF) {
269+
sleef_op((Sleef_quad *)in1_ptr, (Sleef_quad *)in2_ptr,
270+
(Sleef_quad *)out1_ptr, (Sleef_quad *)out2_ptr);
271+
}
272+
else {
273+
longdouble_op((long double *)in1_ptr, (long double *)in2_ptr,
274+
(long double *)out1_ptr, (long double *)out2_ptr);
275+
}
276+
277+
in1_ptr += in1_stride;
278+
in2_ptr += in2_stride;
279+
out1_ptr += out1_stride;
280+
out2_ptr += out2_stride;
281+
}
282+
return 0;
283+
}
284+
285+
// Create binary ufunc with 2 outputs (generic for divmod, modf, frexp, etc.)
286+
template <binary_op_2out_quad_def sleef_op, binary_op_2out_longdouble_def longdouble_op>
287+
int
288+
create_quad_binary_2out_ufunc(PyObject *numpy, const char *ufunc_name)
289+
{
290+
PyObject *ufunc = PyObject_GetAttrString(numpy, ufunc_name);
291+
if (ufunc == NULL) {
292+
return -1;
293+
}
294+
295+
// 2 inputs, 2 outputs
296+
PyArray_DTypeMeta *dtypes[4] = {&QuadPrecDType, &QuadPrecDType, &QuadPrecDType, &QuadPrecDType};
297+
298+
PyType_Slot slots[] = {
299+
{NPY_METH_resolve_descriptors, (void *)&quad_binary_op_2out_resolve_descriptors},
300+
{NPY_METH_strided_loop,
301+
(void *)&quad_generic_binop_2out_strided_loop_aligned<sleef_op, longdouble_op>},
302+
{NPY_METH_unaligned_strided_loop,
303+
(void *)&quad_generic_binop_2out_strided_loop_unaligned<sleef_op, longdouble_op>},
304+
{0, NULL}};
305+
306+
PyArrayMethod_Spec Spec = {
307+
.name = "quad_binop_2out",
308+
.nin = 2,
309+
.nout = 2,
310+
.casting = NPY_NO_CASTING,
311+
.flags = (NPY_ARRAYMETHOD_FLAGS)(NPY_METH_SUPPORTS_UNALIGNED | NPY_METH_IS_REORDERABLE),
312+
.dtypes = dtypes,
313+
.slots = slots,
314+
};
315+
316+
if (PyUFunc_AddLoopFromSpec(ufunc, &Spec) < 0) {
317+
return -1;
318+
}
319+
320+
PyObject *promoter_capsule =
321+
PyCapsule_New((void *)&quad_ufunc_promoter, "numpy._ufunc_promoter", NULL);
322+
if (promoter_capsule == NULL) {
323+
return -1;
324+
}
325+
326+
PyObject *DTypes = PyTuple_Pack(4, &PyArrayDescr_Type, &PyArrayDescr_Type,
327+
&PyArrayDescr_Type, &PyArrayDescr_Type);
328+
if (DTypes == 0) {
329+
Py_DECREF(promoter_capsule);
330+
return -1;
331+
}
332+
333+
if (PyUFunc_AddPromoter(ufunc, DTypes, promoter_capsule) < 0) {
334+
Py_DECREF(promoter_capsule);
335+
Py_DECREF(DTypes);
336+
return -1;
337+
}
338+
Py_DECREF(promoter_capsule);
339+
Py_DECREF(DTypes);
340+
return 0;
341+
}
342+
148343
template <binary_op_quad_def sleef_op, binary_op_longdouble_def longdouble_op>
149344
int
150345
create_quad_binary_ufunc(PyObject *numpy, const char *ufunc_name)
@@ -259,5 +454,8 @@ init_quad_binary_ops(PyObject *numpy)
259454
if (create_quad_binary_ufunc<quad_logaddexp2, ld_logaddexp2>(numpy, "logaddexp2") < 0) {
260455
return -1;
261456
}
457+
if (create_quad_binary_2out_ufunc<quad_divmod, ld_divmod>(numpy, "divmod") < 0) {
458+
return -1;
459+
}
262460
return 0;
263461
}

quaddtype/release_tracker.md

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
# Plan for `numpy-quaddtype` v1.0.0
22

3-
- [ ] High-Endian System support
3+
- [x] High-Endian System support
44
- [ ] Complete Documentation
55

66
| ufunc name | Added | Edge Cases Tested\* |
@@ -21,7 +21,7 @@
2121
| remainder |||
2222
| mod |||
2323
| fmod |||
24-
| divmod | | |
24+
| divmod | | |
2525
| absolute |||
2626
| fabs | | |
2727
| rint |||

0 commit comments

Comments
 (0)