Skip to content

Commit 73d9f7b

Browse files
authored
Merge pull request #1 from ThomasRetornaz/dev
Changes by @ThomasRetornaz
2 parents 9dac213 + e286519 commit 73d9f7b

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

101 files changed

+6982
-195
lines changed

CMakeLists.txt

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -48,8 +48,19 @@ install(FILES
4848
DESTINATION "${SIMDPP_PKGCONFIGDIR}"
4949
)
5050

51+
install(FILES
52+
"${CMAKE_CURRENT_SOURCE_DIR}/README.md"
53+
"${CMAKE_CURRENT_SOURCE_DIR}/CONTRIBUTING.md"
54+
"${CMAKE_CURRENT_SOURCE_DIR}/LICENSE_1_0.txt"
55+
DESTINATION "${SIMDPP_DOCDIR}"
56+
)
5157

5258
enable_testing()
5359

60+
option(ENABLE_BENCH "Set to on in order to compile bench suite, work only in release mode" OFF)
61+
5462
add_subdirectory(simdpp)
5563
add_subdirectory(test)
64+
if(ENABLE_BENCH)
65+
add_subdirectory(bench)
66+
endif()

bench/CMakeLists.txt

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
add_subdirectory(thirdparty)
2+
add_subdirectory(insn)

bench/insn/CMakeLists.txt

Lines changed: 51 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,51 @@
1+
include_directories(${libsimdpp_SOURCE_DIR})
2+
include_directories(${GOOGLE_BENCHMARK_INCLUDE_DIRS})
3+
4+
set(TEST_BENCH_SOURCES
5+
main.cc
6+
main.h
7+
)
8+
9+
set(BENCH_INSN_ARCH_SOURCES
10+
algorithm/transform_unary.cc
11+
algorithm/transform_binary.cc
12+
algorithm/reduce_unary.cc
13+
algorithm/reduce_binary.cc
14+
load_store.cc
15+
)
16+
17+
set_property(GLOBAL PROPERTY USE_FOLDERS ON)
18+
foreach(ARCH ${COMPILABLE_ARCHS}})
19+
simdpp_get_arch_info(CXX_FLAGS DEFINES_LIST SUFFIX ${ARCH})
20+
#message("Create benchmark for arch : ${SUFFIX} with flags: ${CXX_FLAGS} with defines ${DEFINES_LIST}")
21+
SET(exename "bench_insn_${SUFFIX}")
22+
add_executable(${exename} ${BENCH_INSN_ARCH_SOURCES} ${TEST_BENCH_SOURCES})
23+
set_target_properties( ${exename} PROPERTIES COMPILE_FLAGS "${CXX_FLAGS}" )
24+
set_target_properties (${exename} PROPERTIES FOLDER bench)
25+
if(SIMDPP_MSVC)
26+
if(CMAKE_SIZEOF_VOID_P EQUAL 4)
27+
# enable _vectorcall on i386 builds (only works on MSVC 2013)
28+
#set_target_properties(${exename} PROPERTIES COMPILE_FLAGS "/Gv")
29+
endif()
30+
elseif(SIMDPP_MSVC_INTEL)
31+
set_target_properties(${exename} PROPERTIES COMPILE_FLAGS "/Qstd=c++11")
32+
else()
33+
# Xcode clang linker spends very long time in deduplication pass when
34+
# linking the test executable unless -fvisibility-inlines-hidden is passed.
35+
set_target_properties(${exename} PROPERTIES COMPILE_FLAGS "-std=c++11 -O2 -Wall -Wextra -fvisibility-inlines-hidden")
36+
endif()
37+
if(WIN32)
38+
target_link_libraries(${exename}
39+
PUBLIC benchmark
40+
PUBLIC shlwapi.lib
41+
)
42+
else()
43+
target_link_libraries(${exename}
44+
PUBLIC benchmark
45+
PUBLIC pthread
46+
)
47+
add_dependencies(${exename} ${GOOGLE_BENCHMARK})
48+
endif()
49+
endforeach(ARCH ${${COMPILABLE_ARCHS}})
50+
51+

bench/insn/algorithm/reduce_binary.cc

Lines changed: 155 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,155 @@
1+
/* Copyright (C) 2018 Povilas Kanapickas <[email protected]>
2+
Copyright (C) 2018 Thomas Retornaz <[email protected]>
3+
4+
Distributed under the Boost Software License, Version 1.0.
5+
(See accompanying file LICENSE_1_0.txt or copy at
6+
http://www.boost.org/LICENSE_1_0.txt)
7+
*/
8+
9+
#include "benchmark/benchmark.h"
10+
#include <vector>
11+
#include <numeric>
12+
#include <iterator>
13+
#include <simdpp/simd.h>
14+
//algorithm
15+
#include <simdpp/algorithm/reduce.h>
16+
17+
18+
namespace {
19+
20+
template< typename T>
21+
struct BinaryOpPlus
22+
{
23+
public:
24+
BinaryOpPlus() {}
25+
SIMDPP_INL T operator()(T const &a0, T const &a1) const SIMDPP_NOEXCEPT
26+
{
27+
return a0 + a1;
28+
}
29+
30+
template<typename U>
31+
SIMDPP_INL U operator()(U const &a0, U const &a1) const SIMDPP_NOEXCEPT
32+
{
33+
return a0 + a1;
34+
}
35+
};
36+
37+
template <typename T>
38+
struct GeneratorConstant
39+
{
40+
GeneratorConstant(T constant) { m_constant = constant; }
41+
T operator()() { return m_constant; }
42+
T m_constant;
43+
};
44+
45+
46+
template<typename T, class Generator>
47+
std::vector<T, simdpp::aligned_allocator<T, simdpp::simd_traits<T>::alignment>> DataGenerator(std::size_t size, Generator gen)
48+
{
49+
50+
using vector_aligned_t = std::vector<T, simdpp::aligned_allocator<T, simdpp::simd_traits<T>::alignment>>;
51+
vector_aligned_t input(size);
52+
std::generate(input.begin(), input.end(), gen);
53+
return input;
54+
}
55+
56+
/*********************UNARY****************************/
57+
58+
template<typename T>
59+
class ReduceBinaryFixture : public ::benchmark::Fixture {
60+
public:
61+
void SetUp(const ::benchmark::State& st)
62+
{
63+
m_inputvect = DataGenerator<T, GeneratorConstant<T>>((size_t)st.range(0), GeneratorConstant<T>(1));
64+
}
65+
void TearDown(const ::benchmark::State&)
66+
{
67+
m_inputvect.clear();
68+
}
69+
using vector_aligned_t = std::vector<T, simdpp::aligned_allocator<T, simdpp::simd_traits<T>::alignment>>;
70+
vector_aligned_t m_inputvect;
71+
};
72+
73+
//UINT64_T
74+
BENCHMARK_TEMPLATE_DEFINE_F(ReduceBinaryFixture, BinaryUNINT64_SIMD_Test, uint64_t)(benchmark::State& st)
75+
{
76+
const auto size= (size_t)st.range(0);
77+
uint64_t init = (uint64_t)0;
78+
auto opPlus = BinaryOpPlus<uint64_t>();
79+
uint64_t neutral = (uint64_t)0;
80+
while (st.KeepRunning())
81+
{
82+
benchmark::DoNotOptimize(simdpp::reduce(m_inputvect.data(), m_inputvect.data() + m_inputvect.size(), init, neutral, opPlus));
83+
}
84+
}
85+
BENCHMARK_REGISTER_F(ReduceBinaryFixture, BinaryUNINT64_SIMD_Test)->Arg(1)->Arg(10)->Arg(32)->Arg(100)->Arg(1000)->Arg(10000);
86+
87+
BENCHMARK_TEMPLATE_DEFINE_F(ReduceBinaryFixture, BinaryUNINT64_STD_Test, uint64_t)(benchmark::State& st)
88+
{
89+
const auto size = (size_t)st.range(0);
90+
uint64_t init = (uint64_t)0;
91+
auto opPlus = BinaryOpPlus<uint64_t>();
92+
while (st.KeepRunning())
93+
{
94+
benchmark::DoNotOptimize(std::accumulate(m_inputvect.cbegin(), m_inputvect.cend(), init, opPlus));
95+
}
96+
}
97+
BENCHMARK_REGISTER_F(ReduceBinaryFixture, BinaryUNINT64_STD_Test)->Arg(1)->Arg(10)->Arg(32)->Arg(100)->Arg(1000)->Arg(10000);
98+
99+
100+
//FLOAT
101+
BENCHMARK_TEMPLATE_DEFINE_F(ReduceBinaryFixture, BinaryFLOAT_SIMD_Test, float)(benchmark::State& st)
102+
{
103+
const auto size = (size_t)st.range(0);
104+
float init = (float)0;
105+
auto opPlus = BinaryOpPlus<float>();
106+
float neutral = (float)0;
107+
while (st.KeepRunning())
108+
{
109+
benchmark::DoNotOptimize(simdpp::reduce(m_inputvect.data(), m_inputvect.data() + m_inputvect.size(), init, neutral, opPlus));
110+
}
111+
}
112+
BENCHMARK_REGISTER_F(ReduceBinaryFixture, BinaryFLOAT_SIMD_Test)->Arg(1)->Arg(10)->Arg(32)->Arg(100)->Arg(1000)->Arg(10000);
113+
114+
BENCHMARK_TEMPLATE_DEFINE_F(ReduceBinaryFixture, BinaryFLOAT_STD_Test, float)(benchmark::State& st)
115+
{
116+
const auto size = (size_t)st.range(0);
117+
float init = (float)0;
118+
auto opPlus = BinaryOpPlus<uint64_t>();
119+
while (st.KeepRunning())
120+
{
121+
benchmark::DoNotOptimize(std::accumulate(m_inputvect.cbegin(), m_inputvect.cend(), init, opPlus));
122+
}
123+
}
124+
BENCHMARK_REGISTER_F(ReduceBinaryFixture, BinaryFLOAT_STD_Test)->Arg(1)->Arg(10)->Arg(32)->Arg(100)->Arg(1000)->Arg(10000);
125+
126+
127+
//DOUBLE
128+
BENCHMARK_TEMPLATE_DEFINE_F(ReduceBinaryFixture, BinaryDOUBLE_SIMD_Test, double)(benchmark::State& st)
129+
{
130+
const auto size = (size_t)st.range(0);
131+
double init = (double)0;
132+
auto opPlus = BinaryOpPlus<double>();
133+
double neutral = (double)0;
134+
while (st.KeepRunning())
135+
{
136+
benchmark::DoNotOptimize(simdpp::reduce(m_inputvect.data(), m_inputvect.data() + m_inputvect.size(), init, neutral, opPlus));
137+
}
138+
}
139+
BENCHMARK_REGISTER_F(ReduceBinaryFixture, BinaryDOUBLE_SIMD_Test)->Arg(1)->Arg(10)->Arg(32)->Arg(100)->Arg(1000)->Arg(10000);
140+
141+
BENCHMARK_TEMPLATE_DEFINE_F(ReduceBinaryFixture, BinaryDOUBLE_STD_Test, double)(benchmark::State& st)
142+
{
143+
const auto size = (size_t)st.range(0);
144+
double init = (double)0;
145+
auto opPlus = BinaryOpPlus<uint64_t>();
146+
while (st.KeepRunning())
147+
{
148+
benchmark::DoNotOptimize(std::accumulate(m_inputvect.cbegin(), m_inputvect.cend(), init, opPlus));
149+
}
150+
}
151+
BENCHMARK_REGISTER_F(ReduceBinaryFixture, BinaryDOUBLE_STD_Test)->Arg(1)->Arg(10)->Arg(32)->Arg(100)->Arg(1000)->Arg(10000);
152+
153+
154+
155+
} // namespace

bench/insn/algorithm/reduce_unary.cc

Lines changed: 122 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,122 @@
1+
/* Copyright (C) 2018 Povilas Kanapickas <[email protected]>
2+
Copyright (C) 2018 Thomas Retornaz <[email protected]>
3+
4+
Distributed under the Boost Software License, Version 1.0.
5+
(See accompanying file LICENSE_1_0.txt or copy at
6+
http://www.boost.org/LICENSE_1_0.txt)
7+
*/
8+
9+
#include "benchmark/benchmark.h"
10+
#include <vector>
11+
#include <numeric>
12+
#include <iterator>
13+
#include <simdpp/simd.h>
14+
//algorithm
15+
#include <simdpp/algorithm/reduce.h>
16+
17+
18+
namespace {
19+
20+
template <typename T>
21+
struct GeneratorConstant
22+
{
23+
GeneratorConstant(T constant) { m_constant = constant; }
24+
T operator()() { return m_constant; }
25+
T m_constant;
26+
};
27+
28+
29+
template<typename T, class Generator>
30+
std::vector<T, simdpp::aligned_allocator<T, simdpp::simd_traits<T>::alignment>> DataGenerator(std::size_t size, Generator gen)
31+
{
32+
33+
using vector_aligned_t = std::vector<T, simdpp::aligned_allocator<T, simdpp::simd_traits<T>::alignment>>;
34+
vector_aligned_t input(size);
35+
std::generate(input.begin(), input.end(), gen);
36+
return input;
37+
}
38+
39+
/*********************UNARY****************************/
40+
41+
template<typename T>
42+
class ReduceUnaryFixture : public ::benchmark::Fixture {
43+
public:
44+
void SetUp(const ::benchmark::State& st)
45+
{
46+
m_inputvect = DataGenerator<T, GeneratorConstant<T>>((size_t)st.range(0), GeneratorConstant<T>(1));
47+
}
48+
void TearDown(const ::benchmark::State&)
49+
{
50+
m_inputvect.clear();
51+
}
52+
using vector_aligned_t = std::vector<T, simdpp::aligned_allocator<T, simdpp::simd_traits<T>::alignment>>;
53+
vector_aligned_t m_inputvect;
54+
};
55+
56+
//UINT64_T
57+
BENCHMARK_TEMPLATE_DEFINE_F(ReduceUnaryFixture, UnaryUNINT64_SIMD_Test, uint64_t)(benchmark::State& st)
58+
{
59+
const auto size= (size_t)st.range(0);
60+
while (st.KeepRunning())
61+
{
62+
benchmark::DoNotOptimize(simdpp::reduce(m_inputvect.data(), m_inputvect.data() + m_inputvect.size(),(uint64_t)0));
63+
}
64+
}
65+
BENCHMARK_REGISTER_F(ReduceUnaryFixture, UnaryUNINT64_SIMD_Test)->Arg(1)->Arg(10)->Arg(32)->Arg(100)->Arg(1000)->Arg(10000);
66+
67+
BENCHMARK_TEMPLATE_DEFINE_F(ReduceUnaryFixture, UnaryUNINT64_STD_Test, uint64_t)(benchmark::State& st)
68+
{
69+
const auto size = (size_t)st.range(0);
70+
while (st.KeepRunning())
71+
{
72+
benchmark::DoNotOptimize(std::accumulate(m_inputvect.begin(), m_inputvect.end(), (uint64_t)0));
73+
}
74+
}
75+
BENCHMARK_REGISTER_F(ReduceUnaryFixture, UnaryUNINT64_STD_Test)->Arg(1)->Arg(10)->Arg(32)->Arg(100)->Arg(1000)->Arg(10000);
76+
77+
78+
//FLOAT
79+
BENCHMARK_TEMPLATE_DEFINE_F(ReduceUnaryFixture, UnaryFLOAT_SIMD_Test, float)(benchmark::State& st)
80+
{
81+
const auto size = (size_t)st.range(0);
82+
while (st.KeepRunning())
83+
{
84+
benchmark::DoNotOptimize(simdpp::reduce(m_inputvect.data(), m_inputvect.data() + m_inputvect.size(), (float)0));
85+
}
86+
}
87+
BENCHMARK_REGISTER_F(ReduceUnaryFixture, UnaryFLOAT_SIMD_Test)->Arg(1)->Arg(10)->Arg(32)->Arg(100)->Arg(1000)->Arg(10000);
88+
89+
BENCHMARK_TEMPLATE_DEFINE_F(ReduceUnaryFixture, UnaryFLOAT_STD_Test, float)(benchmark::State& st)
90+
{
91+
const auto size = (size_t)st.range(0);
92+
while (st.KeepRunning())
93+
{
94+
benchmark::DoNotOptimize(std::accumulate(m_inputvect.begin(), m_inputvect.end(), (float)0));
95+
}
96+
}
97+
BENCHMARK_REGISTER_F(ReduceUnaryFixture, UnaryFLOAT_STD_Test)->Arg(1)->Arg(10)->Arg(32)->Arg(100)->Arg(1000)->Arg(10000);
98+
99+
//DOUBLE
100+
BENCHMARK_TEMPLATE_DEFINE_F(ReduceUnaryFixture, UnaryDOUBLE_SIMD_Test, double)(benchmark::State& st)
101+
{
102+
const auto size = (size_t)st.range(0);
103+
while (st.KeepRunning())
104+
{
105+
benchmark::DoNotOptimize(simdpp::reduce(m_inputvect.data(), m_inputvect.data() + m_inputvect.size(), (double)0));
106+
}
107+
}
108+
BENCHMARK_REGISTER_F(ReduceUnaryFixture, UnaryDOUBLE_SIMD_Test)->Arg(1)->Arg(10)->Arg(32)->Arg(100)->Arg(1000)->Arg(10000);
109+
110+
BENCHMARK_TEMPLATE_DEFINE_F(ReduceUnaryFixture, UnaryDOUBLE_STD_Test, double)(benchmark::State& st)
111+
{
112+
const auto size = (size_t)st.range(0);
113+
while (st.KeepRunning())
114+
{
115+
benchmark::DoNotOptimize(std::accumulate(m_inputvect.begin(), m_inputvect.end(), (double)0));
116+
}
117+
}
118+
BENCHMARK_REGISTER_F(ReduceUnaryFixture, UnaryDOUBLE_STD_Test)->Arg(1)->Arg(10)->Arg(32)->Arg(100)->Arg(1000)->Arg(10000);
119+
120+
121+
122+
} // namespace

0 commit comments

Comments
 (0)