|
73 | 73 |
|
74 | 74 | #define MKL_INT_MAX ((npy_intp) ((~((MKL_UINT) 0)) >> 1))
|
75 | 75 |
|
76 |
| -#define CHUNKED_VML_CALL2(vml_func, n, type, in1, op1) \ |
77 |
| - do { \ |
78 |
| - npy_intp _n_ = (n); \ |
79 |
| - const npy_intp _chunk_size = MKL_INT_MAX; \ |
80 |
| - type *in1p = (type *) (in1); \ |
81 |
| - type *op1p = (type *) (op1); \ |
82 |
| - while (_n_ > _chunk_size) { \ |
83 |
| - vml_func((MKL_INT) _chunk_size, in1p, op1p); \ |
84 |
| - _n_ -= _chunk_size; \ |
85 |
| - in1p += _chunk_size; \ |
86 |
| - op1p += _chunk_size; \ |
87 |
| - } \ |
88 |
| - if (_n_) { \ |
89 |
| - vml_func((MKL_INT) _n_, in1p, op1p); \ |
90 |
| - } \ |
| 76 | +#define CHUNKED_VML_CALL2(vml_func, n, type, in1, op1) \ |
| 77 | + do { \ |
| 78 | + npy_intp _n_ = (n); \ |
| 79 | + const npy_intp _chunk_size = MKL_INT_MAX; \ |
| 80 | + type *in1p = (type *) (in1); \ |
| 81 | + type *op1p = (type *) (op1); \ |
| 82 | + while (_n_ > 0) { \ |
| 83 | + npy_intp _current_chunk = (_n_ > _chunk_size) ? _chunk_size : _n_; \ |
| 84 | + vml_func((MKL_INT) _current_chunk, in1p, op1p); \ |
| 85 | + _n_ -= _current_chunk; \ |
| 86 | + in1p += _current_chunk; \ |
| 87 | + op1p += _current_chunk; \ |
| 88 | + } \ |
91 | 89 | } while (0)
|
92 | 90 |
|
93 |
| -#define CHUNKED_VML_CALL3(vml_func, n, type, in1, in2, op1) \ |
94 |
| - do { \ |
95 |
| - npy_intp _n_ = (n); \ |
96 |
| - const npy_intp _chunk_size = MKL_INT_MAX; \ |
97 |
| - type *in1p = (type *) (in1); \ |
98 |
| - type *in2p = (type *) (in2); \ |
99 |
| - type *op1p = (type *) (op1); \ |
100 |
| - while (_n_ > _chunk_size) { \ |
101 |
| - vml_func((MKL_INT) _chunk_size, in1p, in2p, op1p); \ |
102 |
| - _n_ -= _chunk_size; \ |
103 |
| - in1p += _chunk_size; \ |
104 |
| - in2p += _chunk_size; \ |
105 |
| - op1p += _chunk_size; \ |
106 |
| - } \ |
107 |
| - if (_n_) { \ |
108 |
| - vml_func((MKL_INT)_n_, in1p, in2p, op1p); \ |
109 |
| - } \ |
| 91 | +#define CHUNKED_VML_CALL3(vml_func, n, type, in1, in2, op1) \ |
| 92 | + do { \ |
| 93 | + npy_intp _n_ = (n); \ |
| 94 | + const npy_intp _chunk_size = MKL_INT_MAX; \ |
| 95 | + type *in1p = (type *) (in1); \ |
| 96 | + type *in2p = (type *) (in2); \ |
| 97 | + type *op1p = (type *) (op1); \ |
| 98 | + while (_n_ > 0) { \ |
| 99 | + npy_intp _current_chunk = (_n_ > _chunk_size) ? _chunk_size : _n_; \ |
| 100 | + vml_func((MKL_INT) _current_chunk, in1p, in2p, op1p); \ |
| 101 | + _n_ -= _current_chunk; \ |
| 102 | + in1p += _current_chunk; \ |
| 103 | + in2p += _current_chunk; \ |
| 104 | + op1p += _current_chunk; \ |
| 105 | + } \ |
110 | 106 | } while(0)
|
111 | 107 |
|
112 | 108 |
|
|
120 | 116 | const type _shiftA = (shiftA); \
|
121 | 117 | const type _scaleB = (scaleB); \
|
122 | 118 | const type _shiftB = (shiftB); \
|
123 |
| - while (_n_ > _chunk_size) { \ |
124 |
| - vml_func(_chunk_size, in1p, in1p, _scaleA, _shiftA, _scaleB, _shiftB, op1p); \ |
125 |
| - _n_ -= _chunk_size; \ |
126 |
| - in1p += _chunk_size; \ |
127 |
| - op1p += _chunk_size; \ |
128 |
| - } \ |
129 |
| - if (_n_) { \ |
130 |
| - vml_func((MKL_INT)_n_, in1p, in1p, _scaleA, _shiftA, _scaleB, _shiftB, op1p); \ |
| 119 | + while (_n_ > 0) { \ |
| 120 | + npy_intp _current_chunk = (_n_ > _chunk_size) ? _chunk_size : _n_; \ |
| 121 | + vml_func(_current_chunk, in1p, in1p, _scaleA, _shiftA, _scaleB, _shiftB, op1p); \ |
| 122 | + _n_ -= _current_chunk; \ |
| 123 | + in1p += _current_chunk; \ |
| 124 | + op1p += _current_chunk; \ |
131 | 125 | } \
|
132 | 126 | } while(0)
|
133 | 127 |
|
|
0 commit comments