From d7ed1a6fa1c5d7ec01a0f4b1438ab0b941eea744 Mon Sep 17 00:00:00 2001 From: Vahid Tavanashad Date: Tue, 29 Apr 2025 15:44:51 -0700 Subject: [PATCH 1/2] fix warnings generated at compile time --- CMakeLists.txt | 2 -- mkl_umath/src/mkl_umath_loops.c.src | 29 ++++++++++++++++------------- 2 files changed, 16 insertions(+), 15 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index c421df7..af4e480 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -45,7 +45,6 @@ if(WIN32) string(CONCAT PRECISION_FLAGS "/fp:fast=2 " "/Qimf-precision=high " - "/Qprec-sqrt " "/Qprotect-parens " ) set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} /Ox ${WARNING_FLAGS} ${SDL_FLAGS} ${PRECISION_FLAGS}") @@ -82,7 +81,6 @@ elseif(UNIX) "${SDL_FLAGS}" ) string(CONCAT PRECISION_FLAGS - "-prec-sqrt " "-fprotect-parens " "-fimf-precision=high " "-fp-model fast=2 " diff --git a/mkl_umath/src/mkl_umath_loops.c.src b/mkl_umath/src/mkl_umath_loops.c.src index baa34a2..8e7509b 100644 --- a/mkl_umath/src/mkl_umath_loops.c.src +++ b/mkl_umath/src/mkl_umath_loops.c.src @@ -74,10 +74,11 @@ type *in1p = (type *) (in1); \ type *op1p = (type *) (op1); \ while (_n_ > _chunk_size) { \ - vml_func((MKL_INT) _chunk_size, in1p, op1p); \ - _n_ -= _chunk_size; \ - in1p += _chunk_size; \ - op1p += _chunk_size; \ + npy_intp _current_chunk = (_n_ > _chunk_size) ? _chunk_size : _n_; \ + vml_func((MKL_INT) _current_chunk, in1p, op1p); \ + _n_ -= _current_chunk; \ + in1p += _current_chunk; \ + op1p += _current_chunk; \ } \ if (_n_) { \ vml_func((MKL_INT) _n_, in1p, op1p); \ @@ -92,11 +93,12 @@ type *in2p = (type *) (in2); \ type *op1p = (type *) (op1); \ while (_n_ > _chunk_size) { \ - vml_func((MKL_INT) _chunk_size, in1p, in2p, op1p); \ - _n_ -= _chunk_size; \ - in1p += _chunk_size; \ - in2p += _chunk_size; \ - op1p += _chunk_size; \ + npy_intp _current_chunk = (_n_ > _chunk_size) ? _chunk_size : _n_; \ + vml_func((MKL_INT) _current_chunk, in1p, in2p, op1p); \ + _n_ -= _current_chunk; \ + in1p += _current_chunk; \ + in2p += _current_chunk; \ + op1p += _current_chunk; \ } \ if (_n_) { \ vml_func((MKL_INT)_n_, in1p, in2p, op1p); \ @@ -115,10 +117,11 @@ const type _scaleB = (scaleB); \ const type _shiftB = (shiftB); \ while (_n_ > _chunk_size) { \ - vml_func(_chunk_size, in1p, in1p, _scaleA, _shiftA, _scaleB, _shiftB, op1p); \ - _n_ -= _chunk_size; \ - in1p += _chunk_size; \ - op1p += _chunk_size; \ + npy_intp _current_chunk = (_n_ > _chunk_size) ? _chunk_size : _n_; \ + vml_func(_current_chunk, in1p, in1p, _scaleA, _shiftA, _scaleB, _shiftB, op1p); \ + _n_ -= _current_chunk; \ + in1p += _current_chunk; \ + op1p += _current_chunk; \ } \ if (_n_) { \ vml_func((MKL_INT)_n_, in1p, in1p, _scaleA, _shiftA, _scaleB, _shiftB, op1p); \ From 675b5f23830ce8c14a5f5edb7c084795f27ee4bf Mon Sep 17 00:00:00 2001 From: Vahid Tavanashad Date: Wed, 30 Apr 2025 10:56:39 -0700 Subject: [PATCH 2/2] complete the modifition for while loop logic --- mkl_umath/src/mkl_umath_loops.c.src | 77 +++++++++++++---------------- 1 file changed, 34 insertions(+), 43 deletions(-) diff --git a/mkl_umath/src/mkl_umath_loops.c.src b/mkl_umath/src/mkl_umath_loops.c.src index 8e7509b..5f2a978 100644 --- a/mkl_umath/src/mkl_umath_loops.c.src +++ b/mkl_umath/src/mkl_umath_loops.c.src @@ -67,42 +67,36 @@ #define MKL_INT_MAX ((npy_intp) ((~((MKL_UINT) 0)) >> 1)) -#define CHUNKED_VML_CALL2(vml_func, n, type, in1, op1) \ - do { \ - npy_intp _n_ = (n); \ - const npy_intp _chunk_size = MKL_INT_MAX; \ - type *in1p = (type *) (in1); \ - type *op1p = (type *) (op1); \ - while (_n_ > _chunk_size) { \ - npy_intp _current_chunk = (_n_ > _chunk_size) ? _chunk_size : _n_; \ - vml_func((MKL_INT) _current_chunk, in1p, op1p); \ - _n_ -= _current_chunk; \ - in1p += _current_chunk; \ - op1p += _current_chunk; \ - } \ - if (_n_) { \ - vml_func((MKL_INT) _n_, in1p, op1p); \ - } \ +#define CHUNKED_VML_CALL2(vml_func, n, type, in1, op1) \ + do { \ + npy_intp _n_ = (n); \ + const npy_intp _chunk_size = MKL_INT_MAX; \ + type *in1p = (type *) (in1); \ + type *op1p = (type *) (op1); \ + while (_n_ > 0) { \ + npy_intp _current_chunk = (_n_ > _chunk_size) ? _chunk_size : _n_; \ + vml_func((MKL_INT) _current_chunk, in1p, op1p); \ + _n_ -= _current_chunk; \ + in1p += _current_chunk; \ + op1p += _current_chunk; \ + } \ } while (0) -#define CHUNKED_VML_CALL3(vml_func, n, type, in1, in2, op1) \ - do { \ - npy_intp _n_ = (n); \ - const npy_intp _chunk_size = MKL_INT_MAX; \ - type *in1p = (type *) (in1); \ - type *in2p = (type *) (in2); \ - type *op1p = (type *) (op1); \ - while (_n_ > _chunk_size) { \ - npy_intp _current_chunk = (_n_ > _chunk_size) ? _chunk_size : _n_; \ - vml_func((MKL_INT) _current_chunk, in1p, in2p, op1p); \ - _n_ -= _current_chunk; \ - in1p += _current_chunk; \ - in2p += _current_chunk; \ - op1p += _current_chunk; \ - } \ - if (_n_) { \ - vml_func((MKL_INT)_n_, in1p, in2p, op1p); \ - } \ +#define CHUNKED_VML_CALL3(vml_func, n, type, in1, in2, op1) \ + do { \ + npy_intp _n_ = (n); \ + const npy_intp _chunk_size = MKL_INT_MAX; \ + type *in1p = (type *) (in1); \ + type *in2p = (type *) (in2); \ + type *op1p = (type *) (op1); \ + while (_n_ > 0) { \ + npy_intp _current_chunk = (_n_ > _chunk_size) ? _chunk_size : _n_; \ + vml_func((MKL_INT) _current_chunk, in1p, in2p, op1p); \ + _n_ -= _current_chunk; \ + in1p += _current_chunk; \ + in2p += _current_chunk; \ + op1p += _current_chunk; \ + } \ } while(0) @@ -116,15 +110,12 @@ const type _shiftA = (shiftA); \ const type _scaleB = (scaleB); \ const type _shiftB = (shiftB); \ - while (_n_ > _chunk_size) { \ - npy_intp _current_chunk = (_n_ > _chunk_size) ? _chunk_size : _n_; \ - vml_func(_current_chunk, in1p, in1p, _scaleA, _shiftA, _scaleB, _shiftB, op1p); \ - _n_ -= _current_chunk; \ - in1p += _current_chunk; \ - op1p += _current_chunk; \ - } \ - if (_n_) { \ - vml_func((MKL_INT)_n_, in1p, in1p, _scaleA, _shiftA, _scaleB, _shiftB, op1p); \ + while (_n_ > 0) { \ + npy_intp _current_chunk = (_n_ > _chunk_size) ? _chunk_size : _n_; \ + vml_func(_current_chunk, in1p, in1p, _scaleA, _shiftA, _scaleB, _shiftB, op1p); \ + _n_ -= _current_chunk; \ + in1p += _current_chunk; \ + op1p += _current_chunk; \ } \ } while(0)