Skip to content

Commit e286519

Browse files
* warn --
* add binary flavor of transform reduce bench
1 parent 5977ee9 commit e286519

File tree

9 files changed

+575
-18
lines changed

9 files changed

+575
-18
lines changed

bench/insn/CMakeLists.txt

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,9 @@ set(TEST_BENCH_SOURCES
88

99
set(BENCH_INSN_ARCH_SOURCES
1010
algorithm/transform_unary.cc
11+
algorithm/transform_binary.cc
1112
algorithm/reduce_unary.cc
13+
algorithm/reduce_binary.cc
1214
load_store.cc
1315
)
1416

@@ -23,7 +25,7 @@ foreach(ARCH ${COMPILABLE_ARCHS}})
2325
if(SIMDPP_MSVC)
2426
if(CMAKE_SIZEOF_VOID_P EQUAL 4)
2527
# enable _vectorcall on i386 builds (only works on MSVC 2013)
26-
set_target_properties(${exename} PROPERTIES COMPILE_FLAGS "/Gv")
28+
#set_target_properties(${exename} PROPERTIES COMPILE_FLAGS "/Gv")
2729
endif()
2830
elseif(SIMDPP_MSVC_INTEL)
2931
set_target_properties(${exename} PROPERTIES COMPILE_FLAGS "/Qstd=c++11")

bench/insn/algorithm/reduce_binary.cc

Lines changed: 155 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,155 @@
1+
/* Copyright (C) 2018 Povilas Kanapickas <[email protected]>
2+
Copyright (C) 2018 Thomas Retornaz <[email protected]>
3+
4+
Distributed under the Boost Software License, Version 1.0.
5+
(See accompanying file LICENSE_1_0.txt or copy at
6+
http://www.boost.org/LICENSE_1_0.txt)
7+
*/
8+
9+
#include "benchmark/benchmark.h"
10+
#include <vector>
11+
#include <numeric>
12+
#include <iterator>
13+
#include <simdpp/simd.h>
14+
//algorithm
15+
#include <simdpp/algorithm/reduce.h>
16+
17+
18+
namespace {
19+
20+
template< typename T>
21+
struct BinaryOpPlus
22+
{
23+
public:
24+
BinaryOpPlus() {}
25+
SIMDPP_INL T operator()(T const &a0, T const &a1) const SIMDPP_NOEXCEPT
26+
{
27+
return a0 + a1;
28+
}
29+
30+
template<typename U>
31+
SIMDPP_INL U operator()(U const &a0, U const &a1) const SIMDPP_NOEXCEPT
32+
{
33+
return a0 + a1;
34+
}
35+
};
36+
37+
template <typename T>
38+
struct GeneratorConstant
39+
{
40+
GeneratorConstant(T constant) { m_constant = constant; }
41+
T operator()() { return m_constant; }
42+
T m_constant;
43+
};
44+
45+
46+
template<typename T, class Generator>
47+
std::vector<T, simdpp::aligned_allocator<T, simdpp::simd_traits<T>::alignment>> DataGenerator(std::size_t size, Generator gen)
48+
{
49+
50+
using vector_aligned_t = std::vector<T, simdpp::aligned_allocator<T, simdpp::simd_traits<T>::alignment>>;
51+
vector_aligned_t input(size);
52+
std::generate(input.begin(), input.end(), gen);
53+
return input;
54+
}
55+
56+
/*********************UNARY****************************/
57+
58+
template<typename T>
59+
class ReduceBinaryFixture : public ::benchmark::Fixture {
60+
public:
61+
void SetUp(const ::benchmark::State& st)
62+
{
63+
m_inputvect = DataGenerator<T, GeneratorConstant<T>>((size_t)st.range(0), GeneratorConstant<T>(1));
64+
}
65+
void TearDown(const ::benchmark::State&)
66+
{
67+
m_inputvect.clear();
68+
}
69+
using vector_aligned_t = std::vector<T, simdpp::aligned_allocator<T, simdpp::simd_traits<T>::alignment>>;
70+
vector_aligned_t m_inputvect;
71+
};
72+
73+
//UINT64_T
74+
BENCHMARK_TEMPLATE_DEFINE_F(ReduceBinaryFixture, BinaryUNINT64_SIMD_Test, uint64_t)(benchmark::State& st)
75+
{
76+
const auto size= (size_t)st.range(0);
77+
uint64_t init = (uint64_t)0;
78+
auto opPlus = BinaryOpPlus<uint64_t>();
79+
uint64_t neutral = (uint64_t)0;
80+
while (st.KeepRunning())
81+
{
82+
benchmark::DoNotOptimize(simdpp::reduce(m_inputvect.data(), m_inputvect.data() + m_inputvect.size(), init, neutral, opPlus));
83+
}
84+
}
85+
BENCHMARK_REGISTER_F(ReduceBinaryFixture, BinaryUNINT64_SIMD_Test)->Arg(1)->Arg(10)->Arg(32)->Arg(100)->Arg(1000)->Arg(10000);
86+
87+
BENCHMARK_TEMPLATE_DEFINE_F(ReduceBinaryFixture, BinaryUNINT64_STD_Test, uint64_t)(benchmark::State& st)
88+
{
89+
const auto size = (size_t)st.range(0);
90+
uint64_t init = (uint64_t)0;
91+
auto opPlus = BinaryOpPlus<uint64_t>();
92+
while (st.KeepRunning())
93+
{
94+
benchmark::DoNotOptimize(std::accumulate(m_inputvect.cbegin(), m_inputvect.cend(), init, opPlus));
95+
}
96+
}
97+
BENCHMARK_REGISTER_F(ReduceBinaryFixture, BinaryUNINT64_STD_Test)->Arg(1)->Arg(10)->Arg(32)->Arg(100)->Arg(1000)->Arg(10000);
98+
99+
100+
//FLOAT
101+
BENCHMARK_TEMPLATE_DEFINE_F(ReduceBinaryFixture, BinaryFLOAT_SIMD_Test, float)(benchmark::State& st)
102+
{
103+
const auto size = (size_t)st.range(0);
104+
float init = (float)0;
105+
auto opPlus = BinaryOpPlus<float>();
106+
float neutral = (float)0;
107+
while (st.KeepRunning())
108+
{
109+
benchmark::DoNotOptimize(simdpp::reduce(m_inputvect.data(), m_inputvect.data() + m_inputvect.size(), init, neutral, opPlus));
110+
}
111+
}
112+
BENCHMARK_REGISTER_F(ReduceBinaryFixture, BinaryFLOAT_SIMD_Test)->Arg(1)->Arg(10)->Arg(32)->Arg(100)->Arg(1000)->Arg(10000);
113+
114+
BENCHMARK_TEMPLATE_DEFINE_F(ReduceBinaryFixture, BinaryFLOAT_STD_Test, float)(benchmark::State& st)
115+
{
116+
const auto size = (size_t)st.range(0);
117+
float init = (float)0;
118+
auto opPlus = BinaryOpPlus<uint64_t>();
119+
while (st.KeepRunning())
120+
{
121+
benchmark::DoNotOptimize(std::accumulate(m_inputvect.cbegin(), m_inputvect.cend(), init, opPlus));
122+
}
123+
}
124+
BENCHMARK_REGISTER_F(ReduceBinaryFixture, BinaryFLOAT_STD_Test)->Arg(1)->Arg(10)->Arg(32)->Arg(100)->Arg(1000)->Arg(10000);
125+
126+
127+
//DOUBLE
128+
BENCHMARK_TEMPLATE_DEFINE_F(ReduceBinaryFixture, BinaryDOUBLE_SIMD_Test, double)(benchmark::State& st)
129+
{
130+
const auto size = (size_t)st.range(0);
131+
double init = (double)0;
132+
auto opPlus = BinaryOpPlus<double>();
133+
double neutral = (double)0;
134+
while (st.KeepRunning())
135+
{
136+
benchmark::DoNotOptimize(simdpp::reduce(m_inputvect.data(), m_inputvect.data() + m_inputvect.size(), init, neutral, opPlus));
137+
}
138+
}
139+
BENCHMARK_REGISTER_F(ReduceBinaryFixture, BinaryDOUBLE_SIMD_Test)->Arg(1)->Arg(10)->Arg(32)->Arg(100)->Arg(1000)->Arg(10000);
140+
141+
BENCHMARK_TEMPLATE_DEFINE_F(ReduceBinaryFixture, BinaryDOUBLE_STD_Test, double)(benchmark::State& st)
142+
{
143+
const auto size = (size_t)st.range(0);
144+
double init = (double)0;
145+
auto opPlus = BinaryOpPlus<uint64_t>();
146+
while (st.KeepRunning())
147+
{
148+
benchmark::DoNotOptimize(std::accumulate(m_inputvect.cbegin(), m_inputvect.cend(), init, opPlus));
149+
}
150+
}
151+
BENCHMARK_REGISTER_F(ReduceBinaryFixture, BinaryDOUBLE_STD_Test)->Arg(1)->Arg(10)->Arg(32)->Arg(100)->Arg(1000)->Arg(10000);
152+
153+
154+
155+
} // namespace
Lines changed: 209 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,209 @@
1+
/* Copyright (C) 2018 Povilas Kanapickas <[email protected]>
2+
Copyright (C) 2018 Thomas Retornaz <[email protected]>
3+
4+
Distributed under the Boost Software License, Version 1.0.
5+
(See accompanying file LICENSE_1_0.txt or copy at
6+
http://www.boost.org/LICENSE_1_0.txt)
7+
*/
8+
9+
#include "benchmark/benchmark.h"
10+
#include <vector>
11+
#include <algorithm>
12+
#include <iterator>
13+
#include <simdpp/simd.h>
14+
//algorithm
15+
#include <simdpp/algorithm/transform.h>
16+
17+
18+
namespace {
19+
20+
template< typename T>
21+
struct BinaryOpAdd
22+
{
23+
public:
24+
BinaryOpAdd() {}
25+
SIMDPP_INL T operator()(T const &a0, T const &a1) const SIMDPP_NOEXCEPT
26+
{
27+
return a0 + a1;
28+
}
29+
30+
template<typename U>
31+
SIMDPP_INL U operator()(U const &a0, U const &a1) const SIMDPP_NOEXCEPT
32+
{
33+
using namespace simdpp;
34+
return a0 + a1;
35+
}
36+
};
37+
38+
39+
template <typename T>
40+
struct GeneratorConstant
41+
{
42+
GeneratorConstant(T constant) { m_constant = constant; }
43+
T operator()() { return m_constant; }
44+
T m_constant;
45+
};
46+
47+
48+
template<typename T, class Generator>
49+
std::vector<T, simdpp::aligned_allocator<T, simdpp::simd_traits<T>::alignment>> DataGenerator(std::size_t size, Generator gen)
50+
{
51+
52+
using vector_aligned_t = std::vector<T, simdpp::aligned_allocator<T, simdpp::simd_traits<T>::alignment>>;
53+
vector_aligned_t input(size);
54+
std::generate(input.begin(), input.end(), gen);
55+
return input;
56+
}
57+
58+
/*********************Binary****************************/
59+
60+
template<typename T>
61+
class TransformBinaryFixture : public ::benchmark::Fixture {
62+
public:
63+
void SetUp(const ::benchmark::State& st)
64+
{
65+
m_inputvect = DataGenerator<T, GeneratorConstant<T>>((size_t)st.range(0), GeneratorConstant<T>(42));
66+
m_inputvect2 = DataGenerator<T, GeneratorConstant<T>>((size_t)st.range(0), GeneratorConstant<T>(42));
67+
m_outputvect.resize((size_t)st.range(0));
68+
}
69+
void TearDown(const ::benchmark::State&)
70+
{
71+
m_inputvect.clear();
72+
m_inputvect2.clear();
73+
m_outputvect.clear();
74+
}
75+
using vector_aligned_t = std::vector<T, simdpp::aligned_allocator<T, simdpp::simd_traits<T>::alignment>>;
76+
vector_aligned_t m_inputvect;
77+
vector_aligned_t m_inputvect2;
78+
vector_aligned_t m_outputvect;
79+
BinaryOpAdd<T> opPlus= BinaryOpAdd<T>();
80+
};
81+
82+
//UINT8_T
83+
BENCHMARK_TEMPLATE_DEFINE_F(TransformBinaryFixture, BinaryUNINT8_SIMD_Test, uint8_t)(benchmark::State& st)
84+
{
85+
const auto size= (size_t)st.range(0);
86+
while (st.KeepRunning())
87+
{
88+
benchmark::DoNotOptimize(simdpp::transform(m_inputvect.data(), m_inputvect.data() + m_inputvect.size(),m_inputvect2.data(),m_outputvect.data(), opPlus));
89+
}
90+
}
91+
BENCHMARK_REGISTER_F(TransformBinaryFixture, BinaryUNINT8_SIMD_Test)->Arg(1)->Arg(10)->Arg(32)->Arg(100)->Arg(1000)->Arg(10000);
92+
93+
BENCHMARK_TEMPLATE_DEFINE_F(TransformBinaryFixture, BinaryUNINT8_STD_Test, uint8_t)(benchmark::State& st)
94+
{
95+
const auto size = (size_t)st.range(0);
96+
while (st.KeepRunning())
97+
{
98+
benchmark::DoNotOptimize(std::transform(m_inputvect.begin(), m_inputvect.end(), m_inputvect.begin(),m_outputvect.begin(), opPlus));
99+
}
100+
}
101+
BENCHMARK_REGISTER_F(TransformBinaryFixture, BinaryUNINT8_STD_Test)->Arg(1)->Arg(10)->Arg(32)->Arg(100)->Arg(1000)->Arg(10000);
102+
103+
//UINT16_T
104+
BENCHMARK_TEMPLATE_DEFINE_F(TransformBinaryFixture, BinaryUNINT16_SIMD_Test, uint16_t)(benchmark::State& st)
105+
{
106+
const auto size= (size_t)st.range(0);
107+
while (st.KeepRunning())
108+
{
109+
benchmark::DoNotOptimize(simdpp::transform(m_inputvect.data(), m_inputvect.data() + m_inputvect.size(), m_inputvect2.data(), m_outputvect.data(), opPlus));
110+
}
111+
}
112+
BENCHMARK_REGISTER_F(TransformBinaryFixture, BinaryUNINT16_SIMD_Test)->Arg(1)->Arg(10)->Arg(32)->Arg(100)->Arg(1000)->Arg(10000);
113+
114+
BENCHMARK_TEMPLATE_DEFINE_F(TransformBinaryFixture, BinaryUNINT16_STD_Test, uint16_t)(benchmark::State& st)
115+
{
116+
const auto size = (size_t)st.range(0);
117+
while (st.KeepRunning())
118+
{
119+
benchmark::DoNotOptimize(std::transform(m_inputvect.begin(), m_inputvect.end(), m_inputvect.begin(), m_outputvect.begin(), opPlus));
120+
}
121+
}
122+
BENCHMARK_REGISTER_F(TransformBinaryFixture, BinaryUNINT16_STD_Test)->Arg(1)->Arg(10)->Arg(32)->Arg(100)->Arg(1000)->Arg(10000);
123+
124+
//UINT32_T
125+
BENCHMARK_TEMPLATE_DEFINE_F(TransformBinaryFixture, BinaryUNINT32_SIMD_Test, uint32_t)(benchmark::State& st)
126+
{
127+
const auto size= (size_t)st.range(0);
128+
while (st.KeepRunning())
129+
{
130+
benchmark::DoNotOptimize(simdpp::transform(m_inputvect.data(), m_inputvect.data() + m_inputvect.size(), m_inputvect2.data(), m_outputvect.data(), opPlus));
131+
}
132+
}
133+
BENCHMARK_REGISTER_F(TransformBinaryFixture, BinaryUNINT32_SIMD_Test)->Arg(1)->Arg(10)->Arg(32)->Arg(100)->Arg(1000)->Arg(10000);
134+
135+
BENCHMARK_TEMPLATE_DEFINE_F(TransformBinaryFixture, BinaryUNINT32_STD_Test, uint32_t)(benchmark::State& st)
136+
{
137+
const auto size = (size_t)st.range(0);
138+
while (st.KeepRunning())
139+
{
140+
benchmark::DoNotOptimize(std::transform(m_inputvect.begin(), m_inputvect.end(), m_inputvect.begin(), m_outputvect.begin(), opPlus));
141+
}
142+
}
143+
BENCHMARK_REGISTER_F(TransformBinaryFixture, BinaryUNINT32_STD_Test)->Arg(1)->Arg(10)->Arg(31)->Arg(100)->Arg(1000)->Arg(10000);
144+
145+
//UINT64_T
146+
BENCHMARK_TEMPLATE_DEFINE_F(TransformBinaryFixture, BinaryUNINT64_SIMD_Test, uint64_t)(benchmark::State& st)
147+
{
148+
const auto size= (size_t)st.range(0);
149+
while (st.KeepRunning())
150+
{
151+
benchmark::DoNotOptimize(simdpp::transform(m_inputvect.data(), m_inputvect.data() + m_inputvect.size(), m_inputvect2.data(), m_outputvect.data(), opPlus));
152+
}
153+
}
154+
BENCHMARK_REGISTER_F(TransformBinaryFixture, BinaryUNINT64_SIMD_Test)->Arg(1)->Arg(10)->Arg(32)->Arg(100)->Arg(1000)->Arg(10000);
155+
156+
BENCHMARK_TEMPLATE_DEFINE_F(TransformBinaryFixture, BinaryUNINT64_STD_Test, uint64_t)(benchmark::State& st)
157+
{
158+
const auto size = (size_t)st.range(0);
159+
while (st.KeepRunning())
160+
{
161+
benchmark::DoNotOptimize(std::transform(m_inputvect.begin(), m_inputvect.end(), m_inputvect.begin(), m_outputvect.begin(), opPlus));
162+
}
163+
}
164+
BENCHMARK_REGISTER_F(TransformBinaryFixture, BinaryUNINT64_STD_Test)->Arg(1)->Arg(10)->Arg(32)->Arg(100)->Arg(1000)->Arg(10000);
165+
166+
//FLOAT
167+
BENCHMARK_TEMPLATE_DEFINE_F(TransformBinaryFixture, BinaryFloat_SIMD_Test, float)(benchmark::State& st)
168+
{
169+
const auto size = (size_t)st.range(0);
170+
while (st.KeepRunning())
171+
{
172+
benchmark::DoNotOptimize(simdpp::transform(m_inputvect.data(), m_inputvect.data() + m_inputvect.size(), m_inputvect2.data(), m_outputvect.data(), opPlus));
173+
}
174+
}
175+
BENCHMARK_REGISTER_F(TransformBinaryFixture, BinaryFloat_SIMD_Test)->Arg(1)->Arg(10)->Arg(32)->Arg(100)->Arg(1000)->Arg(10000);
176+
177+
178+
BENCHMARK_TEMPLATE_DEFINE_F(TransformBinaryFixture, BinaryFloat_STD_Test, float)(benchmark::State& st)
179+
{
180+
const auto size = (size_t)st.range(0);
181+
while (st.KeepRunning())
182+
{
183+
benchmark::DoNotOptimize(std::transform(m_inputvect.begin(), m_inputvect.end(), m_inputvect.begin(), m_outputvect.begin(), opPlus));
184+
}
185+
}
186+
BENCHMARK_REGISTER_F(TransformBinaryFixture, BinaryFloat_STD_Test)->Arg(1)->Arg(10)->Arg(32)->Arg(100)->Arg(1000)->Arg(10000);
187+
188+
//DOUBLE
189+
BENCHMARK_TEMPLATE_DEFINE_F(TransformBinaryFixture, BinaryDouble_SIMD_Test, double)(benchmark::State& st)
190+
{
191+
const auto size= (size_t)st.range(0);
192+
while (st.KeepRunning())
193+
{
194+
benchmark::DoNotOptimize(simdpp::transform(m_inputvect.data(), m_inputvect.data() + m_inputvect.size(), m_inputvect2.data(), m_outputvect.data(), opPlus));
195+
}
196+
}
197+
BENCHMARK_REGISTER_F(TransformBinaryFixture, BinaryDouble_SIMD_Test)->Arg(1)->Arg(10)->Arg(32)->Arg(100)->Arg(1000)->Arg(10000);
198+
199+
BENCHMARK_TEMPLATE_DEFINE_F(TransformBinaryFixture, BinaryDouble_STD_Test, double)(benchmark::State& st)
200+
{
201+
const auto size = (size_t)st.range(0);
202+
while (st.KeepRunning())
203+
{
204+
benchmark::DoNotOptimize(std::transform(m_inputvect.begin(), m_inputvect.end(), m_inputvect.begin(), m_outputvect.begin(), opPlus));
205+
}
206+
}
207+
BENCHMARK_REGISTER_F(TransformBinaryFixture, BinaryDouble_STD_Test)->Arg(1)->Arg(10)->Arg(32)->Arg(100)->Arg(1000)->Arg(10000);
208+
209+
} // namespace

0 commit comments

Comments
 (0)