|
73 | 73 |
|
74 | 74 | #define MKL_INT_MAX ((npy_intp) ((~((MKL_UINT) 0)) >> 1)) |
75 | 75 |
|
76 | | -#define CHUNKED_VML_CALL2(vml_func, n, type, in1, op1) \ |
77 | | - do { \ |
78 | | - npy_intp _n_ = (n); \ |
79 | | - const npy_intp _chunk_size = MKL_INT_MAX; \ |
80 | | - type *in1p = (type *) (in1); \ |
81 | | - type *op1p = (type *) (op1); \ |
82 | | - while (_n_ > _chunk_size) { \ |
83 | | - vml_func((MKL_INT) _chunk_size, in1p, op1p); \ |
84 | | - _n_ -= _chunk_size; \ |
85 | | - in1p += _chunk_size; \ |
86 | | - op1p += _chunk_size; \ |
87 | | - } \ |
88 | | - if (_n_) { \ |
89 | | - vml_func((MKL_INT) _n_, in1p, op1p); \ |
90 | | - } \ |
| 76 | +#define CHUNKED_VML_CALL2(vml_func, n, type, in1, op1) \ |
| 77 | + do { \ |
| 78 | + npy_intp _n_ = (n); \ |
| 79 | + const npy_intp _chunk_size = MKL_INT_MAX; \ |
| 80 | + type *in1p = (type *) (in1); \ |
| 81 | + type *op1p = (type *) (op1); \ |
| 82 | + while (_n_ > 0) { \ |
| 83 | + npy_intp _current_chunk = (_n_ > _chunk_size) ? _chunk_size : _n_; \ |
| 84 | + vml_func((MKL_INT) _current_chunk, in1p, op1p); \ |
| 85 | + _n_ -= _current_chunk; \ |
| 86 | + in1p += _current_chunk; \ |
| 87 | + op1p += _current_chunk; \ |
| 88 | + } \ |
91 | 89 | } while (0) |
92 | 90 |
|
93 | | -#define CHUNKED_VML_CALL3(vml_func, n, type, in1, in2, op1) \ |
94 | | - do { \ |
95 | | - npy_intp _n_ = (n); \ |
96 | | - const npy_intp _chunk_size = MKL_INT_MAX; \ |
97 | | - type *in1p = (type *) (in1); \ |
98 | | - type *in2p = (type *) (in2); \ |
99 | | - type *op1p = (type *) (op1); \ |
100 | | - while (_n_ > _chunk_size) { \ |
101 | | - vml_func((MKL_INT) _chunk_size, in1p, in2p, op1p); \ |
102 | | - _n_ -= _chunk_size; \ |
103 | | - in1p += _chunk_size; \ |
104 | | - in2p += _chunk_size; \ |
105 | | - op1p += _chunk_size; \ |
106 | | - } \ |
107 | | - if (_n_) { \ |
108 | | - vml_func((MKL_INT)_n_, in1p, in2p, op1p); \ |
109 | | - } \ |
| 91 | +#define CHUNKED_VML_CALL3(vml_func, n, type, in1, in2, op1) \ |
| 92 | + do { \ |
| 93 | + npy_intp _n_ = (n); \ |
| 94 | + const npy_intp _chunk_size = MKL_INT_MAX; \ |
| 95 | + type *in1p = (type *) (in1); \ |
| 96 | + type *in2p = (type *) (in2); \ |
| 97 | + type *op1p = (type *) (op1); \ |
| 98 | + while (_n_ > 0) { \ |
| 99 | + npy_intp _current_chunk = (_n_ > _chunk_size) ? _chunk_size : _n_; \ |
| 100 | + vml_func((MKL_INT) _current_chunk, in1p, in2p, op1p); \ |
| 101 | + _n_ -= _current_chunk; \ |
| 102 | + in1p += _current_chunk; \ |
| 103 | + in2p += _current_chunk; \ |
| 104 | + op1p += _current_chunk; \ |
| 105 | + } \ |
110 | 106 | } while(0) |
111 | 107 |
|
112 | 108 |
|
|
120 | 116 | const type _shiftA = (shiftA); \ |
121 | 117 | const type _scaleB = (scaleB); \ |
122 | 118 | const type _shiftB = (shiftB); \ |
123 | | - while (_n_ > _chunk_size) { \ |
124 | | - vml_func(_chunk_size, in1p, in1p, _scaleA, _shiftA, _scaleB, _shiftB, op1p); \ |
125 | | - _n_ -= _chunk_size; \ |
126 | | - in1p += _chunk_size; \ |
127 | | - op1p += _chunk_size; \ |
128 | | - } \ |
129 | | - if (_n_) { \ |
130 | | - vml_func((MKL_INT)_n_, in1p, in1p, _scaleA, _shiftA, _scaleB, _shiftB, op1p); \ |
| 119 | + while (_n_ > 0) { \ |
| 120 | + npy_intp _current_chunk = (_n_ > _chunk_size) ? _chunk_size : _n_; \ |
| 121 | + vml_func(_current_chunk, in1p, in1p, _scaleA, _shiftA, _scaleB, _shiftB, op1p); \ |
| 122 | + _n_ -= _current_chunk; \ |
| 123 | + in1p += _current_chunk; \ |
| 124 | + op1p += _current_chunk; \ |
131 | 125 | } \ |
132 | 126 | } while(0) |
133 | 127 |
|
|
0 commit comments