Skip to content

Commit a2cc837

Browse files
committed
Add new Algorithms using explicit batch type
1 parent e845404 commit a2cc837

File tree

3 files changed

+274
-12
lines changed

3 files changed

+274
-12
lines changed

Diff for: README.md

+40
Original file line numberDiff line numberDiff line change
@@ -151,6 +151,46 @@ void mean(const vector_type& a, const vector_type& b, vector_type& res)
151151
}
152152
```
153153
154+
Algorithms like `xsimd::reduce` and `xsimd::transform` are available also in the batch explicit modality:
155+
156+
```cpp
157+
template <class C, class T = typename std::decay<decltype(*C().begin())>::type>
158+
T nansum(const C& v)
159+
{
160+
return xsimd::reduce_batch(v.begin(), v.end(), 0.0,
161+
[](auto x, auto y) {
162+
return (std::isnan(x) ? 0.0 : x) + (std::isnan(y) ? 0.0 : y);
163+
},
164+
[](auto x, auto y) {
165+
static decltype(x) zero(0.0);
166+
auto xnan = xsimd::isnan(x);
167+
auto ynan = xsimd::isnan(y);
168+
auto xs = xsimd::select(xnan, zero, x);
169+
auto ys = xsimd::select(ynan, zero, y);
170+
return xs + ys;
171+
});
172+
}
173+
```
174+
175+
To switch from `std::count_if` to `xsimd::count_if`:
176+
177+
```cpp
178+
// v is an aligned vector of int type
179+
auto count_expected = std::count_if(v.begin(), v.end(),
180+
[](auto x) {
181+
return x >= 50 && x <= 70 ? 1 : 0;
182+
});
183+
auto count = xsimd::count_if(v.begin(), v.end(),
184+
[](auto x) {
185+
return x >= 50 && x <= 70 ? 1 : 0;
186+
},
187+
[](auto b) {
188+
static decltype(b) zero(0);
189+
static decltype(b) one(1);
190+
return xsimd::hadd(xsimd::select(b >= 50 && b <= 70, one, zero));
191+
});
192+
assert(count_expected == count);
193+
```
154194
155195
## Building and Running the Tests
156196

Diff for: include/xsimd/stl/algorithms.hpp

+108-11
Original file line numberDiff line numberDiff line change
@@ -15,8 +15,8 @@
1515

1616
namespace xsimd
1717
{
18-
template <class I1, class I2, class O1, class UF>
19-
void transform(I1 first, I2 last, O1 out_first, UF&& f)
18+
template <class I1, class I2, class O1, class UF, class UFB>
19+
void transform_batch(I1 first, I2 last, O1 out_first, UF&& f, UFB&& fb)
2020
{
2121
using value_type = typename std::decay<decltype(*first)>::type;
2222
using traits = simd_traits<value_type>;
@@ -43,7 +43,7 @@ namespace xsimd
4343
for (std::size_t i = align_begin; i < align_end; i += simd_size)
4444
{
4545
xsimd::load_aligned(&first[i], batch);
46-
xsimd::store_aligned(&out_first[i], f(batch));
46+
xsimd::store_aligned(&out_first[i], fb(batch));
4747
}
4848

4949
for (std::size_t i = align_end; i < size; ++i)
@@ -62,7 +62,7 @@ namespace xsimd
6262
for (std::size_t i = align_begin; i < align_end; i += simd_size)
6363
{
6464
xsimd::load_aligned(&first[i], batch);
65-
xsimd::store_unaligned(&out_first[i], f(batch));
65+
xsimd::store_unaligned(&out_first[i], fb(batch));
6666
}
6767

6868
for (std::size_t i = align_end; i < size; ++i)
@@ -72,8 +72,14 @@ namespace xsimd
7272
}
7373
}
7474

75-
template <class I1, class I2, class I3, class O1, class UF>
76-
void transform(I1 first_1, I2 last_1, I3 first_2, O1 out_first, UF&& f)
75+
template <class I1, class I2, class O1, class UF>
76+
void transform(I1 first, I2 last, O1 out_first, UF&& f)
77+
{
78+
transform_batch(first, last, out_first, f, f);
79+
}
80+
81+
template <class I1, class I2, class I3, class O1, class UF, class UFB>
82+
void transform_batch(I1 first_1, I2 last_1, I3 first_2, O1 out_first, UF&& f, UFB&& fb)
7783
{
7884
using value_type = typename std::decay<decltype(*first_1)>::type;
7985
using traits = simd_traits<value_type>;
@@ -102,7 +108,7 @@ namespace xsimd
102108
{ \
103109
xsimd::A1(&first_1[i], batch_1); \
104110
xsimd::A2(&first_2[i], batch_2); \
105-
xsimd::A3(&out_first[i], f(batch_1, batch_2)); \
111+
xsimd::A3(&out_first[i], fb(batch_1, batch_2)); \
106112
} \
107113
\
108114
for (std::size_t i = align_end; i < size; ++i) \
@@ -130,6 +136,11 @@ namespace xsimd
130136
#undef XSIMD_LOOP_MACRO
131137
}
132138

139+
template <class I1, class I2, class I3, class O1, class UF>
140+
void transform(I1 first_1, I2 last_1, I3 first_2, O1 out_first, UF&& f)
141+
{
142+
transform_batch(first_1, last_1, first_2, out_first, f, f);
143+
}
133144

134145
// TODO: Remove this once we drop C++11 support
135146
namespace detail
@@ -141,9 +152,8 @@ namespace xsimd
141152
};
142153
}
143154

144-
145-
template <class Iterator1, class Iterator2, class Init, class BinaryFunction = detail::plus>
146-
Init reduce(Iterator1 first, Iterator2 last, Init init, BinaryFunction&& binfun = detail::plus{})
155+
template <class Iterator1, class Iterator2, class Init, class BinaryFunction, class BinaryFunctionBatch>
156+
Init reduce_batch(Iterator1 first, Iterator2 last, Init init, BinaryFunction&& binfun, BinaryFunctionBatch&& binfun_batch)
147157
{
148158
using value_type = typename std::decay<decltype(*first)>::type;
149159
using traits = simd_traits<value_type>;
@@ -180,7 +190,7 @@ namespace xsimd
180190
for (auto const end = ptr_begin + align_end; ptr < end; ptr += simd_size)
181191
{
182192
xsimd::load_aligned(ptr, batch);
183-
batch_init = binfun(batch_init, batch);
193+
batch_init = binfun_batch(batch_init, batch);
184194
}
185195

186196
// reduce across batch
@@ -197,6 +207,93 @@ namespace xsimd
197207
return init;
198208
}
199209

210+
template <class Iterator1, class Iterator2, class Init, class BinaryFunction = detail::plus>
211+
Init reduce(Iterator1 first, Iterator2 last, Init init, BinaryFunction&& binfun = detail::plus{})
212+
{
213+
return reduce_batch(first, last, init, binfun, binfun);
214+
}
215+
216+
namespace detail
217+
{
218+
template <class T>
219+
struct count_batch
220+
{
221+
count_batch(T value)
222+
: value(value)
223+
{}
224+
225+
count_batch(const count_batch<T>&) = default;
226+
count_batch(count_batch<T>&&) = default;
227+
228+
template <class B>
229+
std::size_t operator()(const B& b)
230+
{
231+
static auto zero = B(T(0));
232+
static auto one = B(T(1));
233+
return static_cast<std::size_t>(xsimd::hadd(xsimd::select(b == value, one, zero)));
234+
}
235+
236+
private:
237+
T value;
238+
};
239+
}
240+
241+
template <class Iterator1, class Iterator2, class UnaryPredicate, class UnaryPredicateBatch>
242+
std::size_t count_if(Iterator1 first, Iterator2 last, UnaryPredicate&& predicate, UnaryPredicateBatch&& predicate_batch)
243+
{
244+
using value_type = typename std::decay<decltype(*first)>::type;
245+
using traits = simd_traits<value_type>;
246+
using batch_type = typename traits::type;
247+
248+
std::size_t size = static_cast<std::size_t>(std::distance(first, last));
249+
constexpr std::size_t simd_size = traits::size;
250+
251+
std::size_t counter(0);
252+
if(size < simd_size)
253+
{
254+
while(first != last)
255+
{
256+
counter += predicate(*first++);
257+
}
258+
return counter;
259+
}
260+
261+
const auto* const ptr_begin = &(*first);
262+
263+
std::size_t align_begin = xsimd::get_alignment_offset(ptr_begin, size, simd_size);
264+
std::size_t align_end = align_begin + ((size - align_begin) & ~(simd_size - 1));
265+
266+
// reduce initial unaligned part
267+
for (std::size_t i = 0; i < align_begin; ++i)
268+
{
269+
counter += predicate(first[i]);
270+
}
271+
272+
// reduce aligned part
273+
batch_type batch;
274+
auto ptr = ptr_begin + align_begin;
275+
for (auto const end = ptr_begin + align_end; ptr < end; ptr += simd_size)
276+
{
277+
xsimd::load_aligned(ptr, batch);
278+
counter += predicate_batch(batch);
279+
}
280+
281+
// reduce final unaligned part
282+
for (std::size_t i = align_end; i < size; ++i)
283+
{
284+
counter += predicate(first[i]);
285+
}
286+
287+
return counter;
288+
}
289+
290+
template <class Iterator1, class Iterator2, class T>
291+
std::size_t count(Iterator1 first, Iterator2 last, const T& value)
292+
{
293+
return count_if(first, last,
294+
[&value](const T& x) { return value == x; }, detail::count_batch<T>{value});
295+
}
296+
200297
}
201298

202299
#endif

Diff for: test/test_algorithms.cpp

+126-1
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,62 @@ template <class T>
3737
using test_allocator_type = std::allocator<T>;
3838
#endif
3939

40+
template <class C>
41+
struct types {
42+
using value_type = typename std::decay<decltype(*C().begin())>::type;
43+
using traits = xsimd::simd_traits<value_type>;
44+
using batch_type = typename traits::type;
45+
};
46+
47+
TEST(algorithms, unary_transform_batch)
48+
{
49+
using vector_type = std::vector<int, test_allocator_type<int>>;
50+
using batch_type = types<vector_type>::batch_type;
51+
auto flip_flop = vector_type(42, 0);
52+
std::iota(flip_flop.begin(), flip_flop.end(), 1);
53+
auto square_pair = [](int x) {
54+
return !(x % 2) ? x : x*x;
55+
};
56+
auto flip_flop_axpected = flip_flop;
57+
std::transform(flip_flop_axpected.begin(), flip_flop_axpected.end(), flip_flop_axpected.begin(), square_pair);
58+
59+
xsimd::transform_batch(flip_flop.begin(), flip_flop.end(), flip_flop.begin(),
60+
// NOTE: since c++14 a simple `[](auto x)` reduce code complexity
61+
[](int x) {
62+
return !(x % 2) ? x : x*x;
63+
},
64+
// NOTE: since c++14 a simple `[](auto b)` reduce code complexity
65+
[](batch_type b) {
66+
return xsimd::select(!(b % 2), b, b*b);
67+
});
68+
EXPECT_TRUE(std::equal(flip_flop_axpected.begin(), flip_flop_axpected.end(), flip_flop.begin()) && flip_flop_axpected.size() == flip_flop.size());
69+
}
70+
71+
TEST(algorithms, binary_transform_batch)
72+
{
73+
using vector_type = std::vector<int, test_allocator_type<int>>;
74+
using batch_type = types<vector_type>::batch_type;
75+
auto flip_flop_a = vector_type(42, 0);
76+
auto flip_flop_b = vector_type(42, 0);
77+
std::iota(flip_flop_a.begin(), flip_flop_a.end(), 1);
78+
std::iota(flip_flop_b.begin(), flip_flop_b.end(), 3);
79+
auto square_pair = [](int x, int y) {
80+
return !((x + y) % 2) ? x + y : x*x + y*y;
81+
};
82+
auto flip_flop_axpected = flip_flop_a;
83+
std::transform(flip_flop_a.begin(), flip_flop_a.end(), flip_flop_b.begin(), flip_flop_axpected.begin(), square_pair);
84+
85+
auto flip_flop_result = vector_type(flip_flop_axpected.size());
86+
xsimd::transform_batch(flip_flop_a.begin(), flip_flop_a.end(), flip_flop_b.begin(), flip_flop_result.begin(),
87+
[](int x, int y) {
88+
return !((x +y) % 2) ? x + y : x*x + y*y;
89+
},
90+
[](batch_type bx, batch_type by) {
91+
return xsimd::select(!((bx + by) % 2), bx + by, bx*bx + by+by);
92+
});
93+
EXPECT_TRUE(std::equal(flip_flop_axpected.begin(), flip_flop_axpected.end(), flip_flop_result.begin()) && flip_flop_axpected.size() == flip_flop_result.size());
94+
}
95+
4096
TEST(algorithms, binary_transform)
4197
{
4298
std::vector<double> expected(93);
@@ -83,7 +139,6 @@ TEST(algorithms, binary_transform)
83139
std::fill(ca.begin(), ca.end(), -1); // erase
84140
}
85141

86-
87142
TEST(algorithms, unary_transform)
88143
{
89144
std::vector<double> expected(93);
@@ -216,6 +271,76 @@ TEST_F(xsimd_reduce, using_custom_binary_function)
216271
}
217272
}
218273

274+
TEST(algorithms, reduce_batch)
275+
{
276+
const double nan = std::numeric_limits<double>::quiet_NaN();
277+
using vector_type = std::vector<double, test_allocator_type<double>>;
278+
using batch_type = types<vector_type>::batch_type;
279+
auto vector_with_nan = vector_type(100, 0);
280+
std::iota(vector_with_nan.begin(), vector_with_nan.end(), 3.14);
281+
auto i = 0;
282+
auto add_nan = [&i, &nan](const double x) {
283+
return i % 2 ? nan : x;
284+
};
285+
std::transform(vector_with_nan.begin(), vector_with_nan.end(), vector_with_nan.begin(), add_nan);
286+
287+
auto nansum_expected = std::accumulate(vector_with_nan.begin(), vector_with_nan.end(), 0.0,
288+
[](double x, double y) {
289+
return (std::isnan(x) ? 0.0 : x) + (std::isnan(y) ? 0.0 : y);
290+
});
291+
292+
auto nansum = xsimd::reduce_batch(vector_with_nan.begin(), vector_with_nan.end(), 0.0,
293+
[](double x, double y) {
294+
return (std::isnan(x) ? 0.0 : x) + (std::isnan(y) ? 0.0 : y);
295+
},
296+
[](batch_type x, batch_type y) {
297+
static batch_type zero(0.0);
298+
auto xnan = xsimd::isnan(x);
299+
auto ynan = xsimd::isnan(y);
300+
auto xs = xsimd::select(xnan, zero, x);
301+
auto ys = xsimd::select(ynan, zero, y);
302+
return xs + ys;
303+
});
304+
305+
EXPECT_NEAR(nansum_expected, nansum, 1e-6);
306+
}
307+
308+
TEST(algorithms, count)
309+
{
310+
using vector_type = std::vector<double, test_allocator_type<double>>;
311+
auto v = vector_type(100, 0);
312+
std::iota(v.begin(), v.end(), 3.14);
313+
v[12] = 123.321;
314+
v[42] = 123.321;
315+
v[93] = 123.321;
316+
317+
EXPECT_EQ(3, xsimd::count(v.begin(), v.end(), 123.321));
318+
}
319+
320+
TEST(algorithms, count_if)
321+
{
322+
using vector_type = std::vector<int, test_allocator_type<int>>;
323+
using batch_type = types<vector_type>::batch_type;
324+
auto v = vector_type(100, 0);
325+
std::iota(v.begin(), v.end(), 1);
326+
327+
auto count_expected = std::count_if(v.begin(), v.end(),
328+
[](int x) {
329+
return x >= 50 && x <= 70 ? 1 : 0;
330+
});
331+
332+
auto count = xsimd::count_if(v.begin(), v.end(),
333+
[](int x) {
334+
return x >= 50 && x <= 70 ? 1 : 0;
335+
},
336+
[](batch_type b) {
337+
static batch_type zero(0);
338+
static batch_type one(1);
339+
return xsimd::hadd(xsimd::select(b >= 50 && b <= 70, one, zero));
340+
});
341+
EXPECT_EQ(count_expected, count);
342+
}
343+
219344
#if XSIMD_X86_INSTR_SET > XSIMD_VERSION_NUMBER_NOT_AVAILABLE || XSIMD_ARM_INSTR_SET > XSIMD_VERSION_NUMBER_NOT_AVAILABLE
220345
TEST(algorithms, iterator)
221346
{

0 commit comments

Comments
 (0)