15
15
16
16
namespace xsimd
17
17
{
18
- template <class I1 , class I2 , class O1 , class UF >
19
- void transform (I1 first, I2 last, O1 out_first, UF&& f)
18
+ template <class I1 , class I2 , class O1 , class UF , class UFB >
19
+ void transform_batch (I1 first, I2 last, O1 out_first, UF&& f, UFB&& fb )
20
20
{
21
21
using value_type = typename std::decay<decltype (*first)>::type;
22
22
using traits = simd_traits<value_type>;
@@ -43,7 +43,7 @@ namespace xsimd
43
43
for (std::size_t i = align_begin; i < align_end; i += simd_size)
44
44
{
45
45
xsimd::load_aligned (&first[i], batch);
46
- xsimd::store_aligned (&out_first[i], f (batch));
46
+ xsimd::store_aligned (&out_first[i], fb (batch));
47
47
}
48
48
49
49
for (std::size_t i = align_end; i < size; ++i)
@@ -62,7 +62,7 @@ namespace xsimd
62
62
for (std::size_t i = align_begin; i < align_end; i += simd_size)
63
63
{
64
64
xsimd::load_aligned (&first[i], batch);
65
- xsimd::store_unaligned (&out_first[i], f (batch));
65
+ xsimd::store_unaligned (&out_first[i], fb (batch));
66
66
}
67
67
68
68
for (std::size_t i = align_end; i < size; ++i)
@@ -72,8 +72,14 @@ namespace xsimd
72
72
}
73
73
}
74
74
75
- template <class I1 , class I2 , class I3 , class O1 , class UF >
76
- void transform (I1 first_1, I2 last_1, I3 first_2, O1 out_first, UF&& f)
75
+ template <class I1 , class I2 , class O1 , class UF >
76
+ void transform (I1 first, I2 last, O1 out_first, UF&& f)
77
+ {
78
+ transform_batch (first, last, out_first, f, f);
79
+ }
80
+
81
+ template <class I1 , class I2 , class I3 , class O1 , class UF , class UFB >
82
+ void transform_batch (I1 first_1, I2 last_1, I3 first_2, O1 out_first, UF&& f, UFB&& fb)
77
83
{
78
84
using value_type = typename std::decay<decltype (*first_1)>::type;
79
85
using traits = simd_traits<value_type>;
@@ -102,7 +108,7 @@ namespace xsimd
102
108
{ \
103
109
xsimd::A1 (&first_1[i], batch_1); \
104
110
xsimd::A2 (&first_2[i], batch_2); \
105
- xsimd::A3 (&out_first[i], f (batch_1, batch_2)); \
111
+ xsimd::A3 (&out_first[i], fb (batch_1, batch_2)); \
106
112
} \
107
113
\
108
114
for (std::size_t i = align_end; i < size; ++i) \
@@ -130,6 +136,11 @@ namespace xsimd
130
136
#undef XSIMD_LOOP_MACRO
131
137
}
132
138
139
+ template <class I1 , class I2 , class I3 , class O1 , class UF >
140
+ void transform (I1 first_1, I2 last_1, I3 first_2, O1 out_first, UF&& f)
141
+ {
142
+ transform_batch (first_1, last_1, first_2, out_first, f, f);
143
+ }
133
144
134
145
// TODO: Remove this once we drop C++11 support
135
146
namespace detail
@@ -141,9 +152,8 @@ namespace xsimd
141
152
};
142
153
}
143
154
144
-
145
- template <class Iterator1 , class Iterator2 , class Init , class BinaryFunction = detail::plus>
146
- Init reduce (Iterator1 first, Iterator2 last, Init init, BinaryFunction&& binfun = detail::plus{})
155
+ template <class Iterator1 , class Iterator2 , class Init , class BinaryFunction , class BinaryFunctionBatch >
156
+ Init reduce_batch (Iterator1 first, Iterator2 last, Init init, BinaryFunction&& binfun, BinaryFunctionBatch&& binfun_batch)
147
157
{
148
158
using value_type = typename std::decay<decltype (*first)>::type;
149
159
using traits = simd_traits<value_type>;
@@ -180,7 +190,7 @@ namespace xsimd
180
190
for (auto const end = ptr_begin + align_end; ptr < end; ptr += simd_size)
181
191
{
182
192
xsimd::load_aligned (ptr, batch);
183
- batch_init = binfun (batch_init, batch);
193
+ batch_init = binfun_batch (batch_init, batch);
184
194
}
185
195
186
196
// reduce across batch
@@ -197,6 +207,93 @@ namespace xsimd
197
207
return init;
198
208
}
199
209
210
+ template <class Iterator1 , class Iterator2 , class Init , class BinaryFunction = detail::plus>
211
+ Init reduce (Iterator1 first, Iterator2 last, Init init, BinaryFunction&& binfun = detail::plus{})
212
+ {
213
+ return reduce_batch (first, last, init, binfun, binfun);
214
+ }
215
+
216
+ namespace detail
217
+ {
218
+ template <class T >
219
+ struct count_batch
220
+ {
221
+ count_batch (T value)
222
+ : value(value)
223
+ {}
224
+
225
+ count_batch (const count_batch<T>&) = default ;
226
+ count_batch (count_batch<T>&&) = default ;
227
+
228
+ template <class B >
229
+ std::size_t operator ()(const B& b)
230
+ {
231
+ static auto zero = B (T (0 ));
232
+ static auto one = B (T (1 ));
233
+ return static_cast <std::size_t >(xsimd::hadd (xsimd::select (b == value, one, zero)));
234
+ }
235
+
236
+ private:
237
+ T value;
238
+ };
239
+ }
240
+
241
+ template <class Iterator1 , class Iterator2 , class UnaryPredicate , class UnaryPredicateBatch >
242
+ std::size_t count_if (Iterator1 first, Iterator2 last, UnaryPredicate&& predicate, UnaryPredicateBatch&& predicate_batch)
243
+ {
244
+ using value_type = typename std::decay<decltype (*first)>::type;
245
+ using traits = simd_traits<value_type>;
246
+ using batch_type = typename traits::type;
247
+
248
+ std::size_t size = static_cast <std::size_t >(std::distance (first, last));
249
+ constexpr std::size_t simd_size = traits::size;
250
+
251
+ std::size_t counter (0 );
252
+ if (size < simd_size)
253
+ {
254
+ while (first != last)
255
+ {
256
+ counter += predicate (*first++);
257
+ }
258
+ return counter;
259
+ }
260
+
261
+ const auto * const ptr_begin = &(*first);
262
+
263
+ std::size_t align_begin = xsimd::get_alignment_offset (ptr_begin, size, simd_size);
264
+ std::size_t align_end = align_begin + ((size - align_begin) & ~(simd_size - 1 ));
265
+
266
+ // reduce initial unaligned part
267
+ for (std::size_t i = 0 ; i < align_begin; ++i)
268
+ {
269
+ counter += predicate (first[i]);
270
+ }
271
+
272
+ // reduce aligned part
273
+ batch_type batch;
274
+ auto ptr = ptr_begin + align_begin;
275
+ for (auto const end = ptr_begin + align_end; ptr < end; ptr += simd_size)
276
+ {
277
+ xsimd::load_aligned (ptr, batch);
278
+ counter += predicate_batch (batch);
279
+ }
280
+
281
+ // reduce final unaligned part
282
+ for (std::size_t i = align_end; i < size; ++i)
283
+ {
284
+ counter += predicate (first[i]);
285
+ }
286
+
287
+ return counter;
288
+ }
289
+
290
+ template <class Iterator1 , class Iterator2 , class T >
291
+ std::size_t count (Iterator1 first, Iterator2 last, const T& value)
292
+ {
293
+ return count_if (first, last,
294
+ [&value](const T& x) { return value == x; }, detail::count_batch<T>{value});
295
+ }
296
+
200
297
}
201
298
202
299
#endif
0 commit comments