diff --git a/CMakeLists.txt b/CMakeLists.txt index c421df7..af4e480 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -45,7 +45,6 @@ if(WIN32) string(CONCAT PRECISION_FLAGS "/fp:fast=2 " "/Qimf-precision=high " - "/Qprec-sqrt " "/Qprotect-parens " ) set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} /Ox ${WARNING_FLAGS} ${SDL_FLAGS} ${PRECISION_FLAGS}") @@ -82,7 +81,6 @@ elseif(UNIX) "${SDL_FLAGS}" ) string(CONCAT PRECISION_FLAGS - "-prec-sqrt " "-fprotect-parens " "-fimf-precision=high " "-fp-model fast=2 " diff --git a/mkl_umath/src/mkl_umath_loops.c.src b/mkl_umath/src/mkl_umath_loops.c.src index 050bdd0..6728999 100644 --- a/mkl_umath/src/mkl_umath_loops.c.src +++ b/mkl_umath/src/mkl_umath_loops.c.src @@ -73,40 +73,36 @@ #define MKL_INT_MAX ((npy_intp) ((~((MKL_UINT) 0)) >> 1)) -#define CHUNKED_VML_CALL2(vml_func, n, type, in1, op1) \ - do { \ - npy_intp _n_ = (n); \ - const npy_intp _chunk_size = MKL_INT_MAX; \ - type *in1p = (type *) (in1); \ - type *op1p = (type *) (op1); \ - while (_n_ > _chunk_size) { \ - vml_func((MKL_INT) _chunk_size, in1p, op1p); \ - _n_ -= _chunk_size; \ - in1p += _chunk_size; \ - op1p += _chunk_size; \ - } \ - if (_n_) { \ - vml_func((MKL_INT) _n_, in1p, op1p); \ - } \ +#define CHUNKED_VML_CALL2(vml_func, n, type, in1, op1) \ + do { \ + npy_intp _n_ = (n); \ + const npy_intp _chunk_size = MKL_INT_MAX; \ + type *in1p = (type *) (in1); \ + type *op1p = (type *) (op1); \ + while (_n_ > 0) { \ + npy_intp _current_chunk = (_n_ > _chunk_size) ? _chunk_size : _n_; \ + vml_func((MKL_INT) _current_chunk, in1p, op1p); \ + _n_ -= _current_chunk; \ + in1p += _current_chunk; \ + op1p += _current_chunk; \ + } \ } while (0) -#define CHUNKED_VML_CALL3(vml_func, n, type, in1, in2, op1) \ - do { \ - npy_intp _n_ = (n); \ - const npy_intp _chunk_size = MKL_INT_MAX; \ - type *in1p = (type *) (in1); \ - type *in2p = (type *) (in2); \ - type *op1p = (type *) (op1); \ - while (_n_ > _chunk_size) { \ - vml_func((MKL_INT) _chunk_size, in1p, in2p, op1p); \ - _n_ -= _chunk_size; \ - in1p += _chunk_size; \ - in2p += _chunk_size; \ - op1p += _chunk_size; \ - } \ - if (_n_) { \ - vml_func((MKL_INT)_n_, in1p, in2p, op1p); \ - } \ +#define CHUNKED_VML_CALL3(vml_func, n, type, in1, in2, op1) \ + do { \ + npy_intp _n_ = (n); \ + const npy_intp _chunk_size = MKL_INT_MAX; \ + type *in1p = (type *) (in1); \ + type *in2p = (type *) (in2); \ + type *op1p = (type *) (op1); \ + while (_n_ > 0) { \ + npy_intp _current_chunk = (_n_ > _chunk_size) ? _chunk_size : _n_; \ + vml_func((MKL_INT) _current_chunk, in1p, in2p, op1p); \ + _n_ -= _current_chunk; \ + in1p += _current_chunk; \ + in2p += _current_chunk; \ + op1p += _current_chunk; \ + } \ } while(0) @@ -120,14 +116,12 @@ const type _shiftA = (shiftA); \ const type _scaleB = (scaleB); \ const type _shiftB = (shiftB); \ - while (_n_ > _chunk_size) { \ - vml_func(_chunk_size, in1p, in1p, _scaleA, _shiftA, _scaleB, _shiftB, op1p); \ - _n_ -= _chunk_size; \ - in1p += _chunk_size; \ - op1p += _chunk_size; \ - } \ - if (_n_) { \ - vml_func((MKL_INT)_n_, in1p, in1p, _scaleA, _shiftA, _scaleB, _shiftB, op1p); \ + while (_n_ > 0) { \ + npy_intp _current_chunk = (_n_ > _chunk_size) ? _chunk_size : _n_; \ + vml_func(_current_chunk, in1p, in1p, _scaleA, _shiftA, _scaleB, _shiftB, op1p); \ + _n_ -= _current_chunk; \ + in1p += _current_chunk; \ + op1p += _current_chunk; \ } \ } while(0)