@@ -145,6 +145,201 @@ quad_generic_binop_strided_loop_aligned(PyArrayMethod_Context *context, char *co
145145}
146146
147147
148+
149+ // Resolve descriptors for binary ops with 2 outputs (2 inputs, 2 outputs)
150+ static NPY_CASTING
151+ quad_binary_op_2out_resolve_descriptors (PyObject *self, PyArray_DTypeMeta *const dtypes[],
152+ PyArray_Descr *const given_descrs[],
153+ PyArray_Descr *loop_descrs[], npy_intp *NPY_UNUSED (view_offset))
154+ {
155+ QuadPrecDTypeObject *descr_in1 = (QuadPrecDTypeObject *)given_descrs[0 ];
156+ QuadPrecDTypeObject *descr_in2 = (QuadPrecDTypeObject *)given_descrs[1 ];
157+ QuadBackendType target_backend;
158+
159+ // Determine target backend and if casting is needed
160+ NPY_CASTING casting = NPY_NO_CASTING;
161+ if (descr_in1->backend != descr_in2->backend ) {
162+ target_backend = BACKEND_LONGDOUBLE;
163+ casting = NPY_SAFE_CASTING;
164+ }
165+ else {
166+ target_backend = descr_in1->backend ;
167+ }
168+
169+ // Set up input descriptors, casting if necessary
170+ for (int i = 0 ; i < 2 ; i++) {
171+ if (((QuadPrecDTypeObject *)given_descrs[i])->backend != target_backend) {
172+ loop_descrs[i] = (PyArray_Descr *)new_quaddtype_instance (target_backend);
173+ if (!loop_descrs[i]) {
174+ return (NPY_CASTING)-1 ;
175+ }
176+ }
177+ else {
178+ Py_INCREF (given_descrs[i]);
179+ loop_descrs[i] = given_descrs[i];
180+ }
181+ }
182+
183+ // Set up output descriptors (2 outputs for divmod)
184+ for (int i = 2 ; i < 4 ; i++) {
185+ if (given_descrs[i] == NULL ) {
186+ loop_descrs[i] = (PyArray_Descr *)new_quaddtype_instance (target_backend);
187+ if (!loop_descrs[i]) {
188+ return (NPY_CASTING)-1 ;
189+ }
190+ }
191+ else {
192+ QuadPrecDTypeObject *descr_out = (QuadPrecDTypeObject *)given_descrs[i];
193+ if (descr_out->backend != target_backend) {
194+ loop_descrs[i] = (PyArray_Descr *)new_quaddtype_instance (target_backend);
195+ if (!loop_descrs[i]) {
196+ return (NPY_CASTING)-1 ;
197+ }
198+ }
199+ else {
200+ Py_INCREF (given_descrs[i]);
201+ loop_descrs[i] = given_descrs[i];
202+ }
203+ }
204+ }
205+ return casting;
206+ }
207+
208+ // Strided loop for binary ops with 2 outputs (unaligned)
209+ template <binary_op_2out_quad_def sleef_op, binary_op_2out_longdouble_def longdouble_op>
210+ int
211+ quad_generic_binop_2out_strided_loop_unaligned (PyArrayMethod_Context *context, char *const data[],
212+ npy_intp const dimensions[], npy_intp const strides[],
213+ NpyAuxData *auxdata)
214+ {
215+ npy_intp N = dimensions[0 ];
216+ char *in1_ptr = data[0 ], *in2_ptr = data[1 ];
217+ char *out1_ptr = data[2 ], *out2_ptr = data[3 ];
218+ npy_intp in1_stride = strides[0 ];
219+ npy_intp in2_stride = strides[1 ];
220+ npy_intp out1_stride = strides[2 ];
221+ npy_intp out2_stride = strides[3 ];
222+
223+ QuadPrecDTypeObject *descr = (QuadPrecDTypeObject *)context->descriptors [0 ];
224+ QuadBackendType backend = descr->backend ;
225+ size_t elem_size = (backend == BACKEND_SLEEF) ? sizeof (Sleef_quad) : sizeof (long double );
226+
227+ quad_value in1, in2, out1, out2;
228+ while (N--) {
229+ memcpy (&in1, in1_ptr, elem_size);
230+ memcpy (&in2, in2_ptr, elem_size);
231+ if (backend == BACKEND_SLEEF) {
232+ sleef_op (&in1.sleef_value , &in2.sleef_value , &out1.sleef_value , &out2.sleef_value );
233+ }
234+ else {
235+ longdouble_op (&in1.longdouble_value , &in2.longdouble_value ,
236+ &out1.longdouble_value , &out2.longdouble_value );
237+ }
238+ memcpy (out1_ptr, &out1, elem_size);
239+ memcpy (out2_ptr, &out2, elem_size);
240+
241+ in1_ptr += in1_stride;
242+ in2_ptr += in2_stride;
243+ out1_ptr += out1_stride;
244+ out2_ptr += out2_stride;
245+ }
246+ return 0 ;
247+ }
248+
249+ // Strided loop for binary ops with 2 outputs (aligned)
250+ template <binary_op_2out_quad_def sleef_op, binary_op_2out_longdouble_def longdouble_op>
251+ int
252+ quad_generic_binop_2out_strided_loop_aligned (PyArrayMethod_Context *context, char *const data[],
253+ npy_intp const dimensions[], npy_intp const strides[],
254+ NpyAuxData *auxdata)
255+ {
256+ npy_intp N = dimensions[0 ];
257+ char *in1_ptr = data[0 ], *in2_ptr = data[1 ];
258+ char *out1_ptr = data[2 ], *out2_ptr = data[3 ];
259+ npy_intp in1_stride = strides[0 ];
260+ npy_intp in2_stride = strides[1 ];
261+ npy_intp out1_stride = strides[2 ];
262+ npy_intp out2_stride = strides[3 ];
263+
264+ QuadPrecDTypeObject *descr = (QuadPrecDTypeObject *)context->descriptors [0 ];
265+ QuadBackendType backend = descr->backend ;
266+
267+ while (N--) {
268+ if (backend == BACKEND_SLEEF) {
269+ sleef_op ((Sleef_quad *)in1_ptr, (Sleef_quad *)in2_ptr,
270+ (Sleef_quad *)out1_ptr, (Sleef_quad *)out2_ptr);
271+ }
272+ else {
273+ longdouble_op ((long double *)in1_ptr, (long double *)in2_ptr,
274+ (long double *)out1_ptr, (long double *)out2_ptr);
275+ }
276+
277+ in1_ptr += in1_stride;
278+ in2_ptr += in2_stride;
279+ out1_ptr += out1_stride;
280+ out2_ptr += out2_stride;
281+ }
282+ return 0 ;
283+ }
284+
285+ // Create binary ufunc with 2 outputs (generic for divmod, modf, frexp, etc.)
286+ template <binary_op_2out_quad_def sleef_op, binary_op_2out_longdouble_def longdouble_op>
287+ int
288+ create_quad_binary_2out_ufunc (PyObject *numpy, const char *ufunc_name)
289+ {
290+ PyObject *ufunc = PyObject_GetAttrString (numpy, ufunc_name);
291+ if (ufunc == NULL ) {
292+ return -1 ;
293+ }
294+
295+ // 2 inputs, 2 outputs
296+ PyArray_DTypeMeta *dtypes[4 ] = {&QuadPrecDType, &QuadPrecDType, &QuadPrecDType, &QuadPrecDType};
297+
298+ PyType_Slot slots[] = {
299+ {NPY_METH_resolve_descriptors, (void *)&quad_binary_op_2out_resolve_descriptors},
300+ {NPY_METH_strided_loop,
301+ (void *)&quad_generic_binop_2out_strided_loop_aligned<sleef_op, longdouble_op>},
302+ {NPY_METH_unaligned_strided_loop,
303+ (void *)&quad_generic_binop_2out_strided_loop_unaligned<sleef_op, longdouble_op>},
304+ {0 , NULL }};
305+
306+ PyArrayMethod_Spec Spec = {
307+ .name = " quad_binop_2out" ,
308+ .nin = 2 ,
309+ .nout = 2 ,
310+ .casting = NPY_NO_CASTING,
311+ .flags = (NPY_ARRAYMETHOD_FLAGS)(NPY_METH_SUPPORTS_UNALIGNED | NPY_METH_IS_REORDERABLE),
312+ .dtypes = dtypes,
313+ .slots = slots,
314+ };
315+
316+ if (PyUFunc_AddLoopFromSpec (ufunc, &Spec) < 0 ) {
317+ return -1 ;
318+ }
319+
320+ PyObject *promoter_capsule =
321+ PyCapsule_New ((void *)&quad_ufunc_promoter, " numpy._ufunc_promoter" , NULL );
322+ if (promoter_capsule == NULL ) {
323+ return -1 ;
324+ }
325+
326+ PyObject *DTypes = PyTuple_Pack (4 , &PyArrayDescr_Type, &PyArrayDescr_Type,
327+ &PyArrayDescr_Type, &PyArrayDescr_Type);
328+ if (DTypes == 0 ) {
329+ Py_DECREF (promoter_capsule);
330+ return -1 ;
331+ }
332+
333+ if (PyUFunc_AddPromoter (ufunc, DTypes, promoter_capsule) < 0 ) {
334+ Py_DECREF (promoter_capsule);
335+ Py_DECREF (DTypes);
336+ return -1 ;
337+ }
338+ Py_DECREF (promoter_capsule);
339+ Py_DECREF (DTypes);
340+ return 0 ;
341+ }
342+
148343template <binary_op_quad_def sleef_op, binary_op_longdouble_def longdouble_op>
149344int
150345create_quad_binary_ufunc (PyObject *numpy, const char *ufunc_name)
@@ -259,5 +454,8 @@ init_quad_binary_ops(PyObject *numpy)
259454 if (create_quad_binary_ufunc<quad_logaddexp2, ld_logaddexp2>(numpy, " logaddexp2" ) < 0 ) {
260455 return -1 ;
261456 }
457+ if (create_quad_binary_2out_ufunc<quad_divmod, ld_divmod>(numpy, " divmod" ) < 0 ) {
458+ return -1 ;
459+ }
262460 return 0 ;
263461}
0 commit comments