Skip to content

Commit 556a5a7

Browse files
committed
Refactor and improvements in current ParallelSTL code
* Refactored files and namespaces to clearly differentiate between the standard Parallel STL implementation and the SYCL policy * Improved documentation and usage of Doxygen * Added all variants of sort, for_each, for_each_n and transform * Unit tests for all the algorithms * Multiple improvements on the build systems * Added basic benchmarking infrastructure
1 parent 14d60c7 commit 556a5a7

34 files changed

+4794
-360
lines changed

.clang-format

+62
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,62 @@
1+
---
2+
Language: Cpp
3+
# BasedOnStyle: Google
4+
AccessModifierOffset: -1
5+
ConstructorInitializerIndentWidth: 4
6+
AlignEscapedNewlinesLeft: true
7+
AlignTrailingComments: true
8+
AllowAllParametersOfDeclarationOnNextLine: true
9+
AllowShortBlocksOnASingleLine: false
10+
AllowShortCaseLabelsOnASingleLine: false
11+
AllowShortIfStatementsOnASingleLine: true
12+
AllowShortLoopsOnASingleLine: true
13+
AllowShortFunctionsOnASingleLine: All
14+
AlwaysBreakAfterDefinitionReturnType: false
15+
AlwaysBreakTemplateDeclarations: true
16+
AlwaysBreakBeforeMultilineStrings: true
17+
BreakBeforeBinaryOperators: None
18+
BreakBeforeTernaryOperators: true
19+
BreakConstructorInitializersBeforeComma: false
20+
BinPackParameters: true
21+
BinPackArguments: true
22+
ColumnLimit: 80
23+
ConstructorInitializerAllOnOneLineOrOnePerLine: true
24+
DerivePointerAlignment: true
25+
ExperimentalAutoDetectBinPacking: false
26+
IndentCaseLabels: true
27+
IndentWrappedFunctionNames: false
28+
IndentFunctionDeclarationAfterType: false
29+
MaxEmptyLinesToKeep: 1
30+
KeepEmptyLinesAtTheStartOfBlocks: false
31+
NamespaceIndentation: None
32+
ObjCSpaceAfterProperty: false
33+
ObjCSpaceBeforeProtocolList: false
34+
PenaltyBreakBeforeFirstCallParameter: 1
35+
PenaltyBreakComment: 300
36+
PenaltyBreakString: 1000
37+
PenaltyBreakFirstLessLess: 120
38+
PenaltyExcessCharacter: 1000000
39+
PenaltyReturnTypeOnItsOwnLine: 200
40+
PointerAlignment: Left
41+
SpacesBeforeTrailingComments: 2
42+
Cpp11BracedListStyle: true
43+
Standard: Auto
44+
IndentWidth: 2
45+
TabWidth: 8
46+
UseTab: Never
47+
BreakBeforeBraces: Attach
48+
SpacesInParentheses: false
49+
SpacesInSquareBrackets: false
50+
SpacesInAngles: false
51+
SpaceInEmptyParentheses: false
52+
SpacesInCStyleCastParentheses: false
53+
SpaceAfterCStyleCast: false
54+
SpacesInContainerLiterals: true
55+
SpaceBeforeAssignmentOperators: true
56+
ContinuationIndentWidth: 4
57+
CommentPragmas: '^ IWYU pragma:'
58+
ForEachMacros: [ foreach, Q_FOREACH, BOOST_FOREACH ]
59+
SpaceBeforeParens: ControlStatements
60+
DisableFormat: false
61+
...
62+

CMakeLists.txt

+45-8
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,10 @@
11
cmake_minimum_required (VERSION 2.8.12.2)
22
project (SyclSTL)
33

4+
enable_testing()
5+
6+
option(PARALLEL_STL_BENCHMARKS "Build the internal benchmarks" OFF)
7+
48
if(HEADER_ONLY)
59
message(WARNING "The SYCL implementation is header-only")
610
else(HEADER_ONLY)
@@ -9,13 +13,13 @@ else(HEADER_ONLY)
913
find_library(SYCL_LIBRARY
1014
NAMES SYCL SYCL_d
1115
HINTS ${SYCL_PATH}
12-
PATH_SUFFIXES lib
16+
PATH_SUFFIXES lib
1317
)
1418

1519
find_library(OPENCL_LIBRARY
16-
NAMES OpenCL
20+
NAMES OpenCL
1721
HINTS ${OPENCL_ROOT_DIR}
18-
PATH_SUFFIXES lib lib64
22+
PATH_SUFFIXES lib lib64
1923
)
2024

2125
if(NOT DEFINED OPENCL_LIBRARY)
@@ -39,8 +43,8 @@ else(HEADER_ONLY)
3943
if (UNIX)
4044
# TODO(Ruyman) Figure out the linux distribution
4145
# Fix for device compiler not finding specific C++ headers with Ubuntu
42-
set(CMAKE_INCLUDE_PATH "${CMAKE_INCLUDE_PATH}"
43-
"/usr/include/c++/4.8.2/"
46+
set(CMAKE_INCLUDE_PATH "${CMAKE_INCLUDE_PATH}"
47+
"/usr/include/c++/4.8.2/"
4448
"/usr/include/x86_64-linux-gnu/c++/4.8/")
4549
endif()
4650

@@ -51,7 +55,7 @@ else(HEADER_ONLY)
5155
set(DEVICE_COMPILER_NAME "compute++")
5256
endif(WIN32)
5357
endif (NOT DEFINED DEVICE_COMPILER_NAME)
54-
set(DEVICE_COMPILER "${DEVICE_COMPILER_PATH}/${DEVICE_COMPILER_NAME}"
58+
set(DEVICE_COMPILER "${DEVICE_COMPILER_PATH}/${DEVICE_COMPILER_NAME}"
5559
CACHE FILEPATH "device compiler")
5660
add_custom_target(compiler)
5761

@@ -65,14 +69,47 @@ endif(HEADER_ONLY)
6569
include_directories("${SYCL_PATH}/include")
6670
include_directories("${SYCL_PATH}/include/image_library")
6771

68-
72+
#####################
73+
# add_sycl_target
74+
#
75+
# Adds a file containing sycl code as a cmake target
76+
# Creates the dependency to the device compiler.
77+
#
78+
# exe_name : Final name of the executable
79+
# file : Name of the source file
80+
# binary_dir : Binary directory to output the .sycl files
81+
#
82+
function(add_sycl_target exe_name file binary_dir)
83+
get_filename_component(filename ${file} NAME)
84+
add_executable (${exe_name} ${file})
85+
target_link_libraries(${exe_name} SyclSTL)
86+
87+
set_property(TARGET ${exe_name} PROPERTY CXX_STANDARD "11")
88+
set_property(TARGET ${exe_name} PROPERTY CXX_STANDARD_REQUIRED True)
89+
90+
91+
if (NOT HEADER_ONLY)
92+
build_spir(${exe_name} ${file} ${binary_dir})
93+
if (MSVC)
94+
set_property(TARGET ${exe_name} PROPERTY COMPILE_FLAGS
95+
"-std=c++11 /FI ${CMAKE_CURRENT_BINARY_DIR}/${filename}.sycl ${CMAKE_CXX_FLAGS}")
96+
else(MSVC)
97+
set_property(TARGET ${exe_name} PROPERTY COMPILE_FLAGS
98+
"-std=c++11 -include ${CMAKE_CURRENT_BINARY_DIR}/${filename}.sycl ${CMAKE_CXX_FLAGS}")
99+
endif(MSVC)
100+
endif (NOT HEADER_ONLY)
101+
102+
endfunction(add_sycl_target)
69103

70104
# Build the parallel stl
71105

72106
include_directories("include")
73107

74108
add_subdirectory (src)
75109
add_subdirectory (examples)
76-
add_subdirectory (tests)
77110

111+
add_subdirectory (tests)
78112

113+
if (PARALLEL_STL_BENCHMARKS)
114+
add_subdirectory (benchmarks)
115+
endif()

README.md

+48-26
Original file line numberDiff line numberDiff line change
@@ -7,9 +7,9 @@ using the Khronos SYCL standard.
77
What is Parallel STL
88
-----------------------
99

10-
Parallel STL is an implementation of the Technical Specification for C++
11-
Extensions for Parallelism, current document number
12-
[N4409](http://www.open-std.org/jtc1/sc22/wg21/docs/papers/2015/n4409.pdf).
10+
Parallel STL is an implementation of the Technical Specification for C++
11+
Extensions for Parallelism, current document number
12+
[N4507](http://www.open-std.org/jtc1/sc22/wg21/docs/papers/2015/n4507.pdf).
1313
This technical specification describes _a set of requirements for
1414
implementations of an interface that computer programs written in
1515
C++ programming language may use to invoke algorithms with parallel
@@ -19,27 +19,25 @@ offers the opportunity to users to specify _execution policies_ to
1919
traditional STL algorithms, which will enable the execution of
2020
those algorithms in parallel.
2121
The various policies can specify different kinds of parallel execution.
22-
For example,
23-
24-
```c++
25-
std::vector<int> v = ...
26-
// Traditional sequential sort
27-
std::sort(vec.begin(), vec.end());
28-
// Explicit sequential sort
29-
std::sort(seq, vec.begin(), vec.end());
30-
// Explicit parallel sort
31-
std::sort(par, vec.begin(), vec.end());
32-
```
22+
For example,
23+
24+
std::vector<int> v = ...
25+
// Traditional sequential sort
26+
std::sort(vec.begin(), vec.end());
27+
// Explicit sequential sort
28+
std::sort(seq, vec.begin(), vec.end());
29+
// Explicit parallel sort
30+
std::sort(par, vec.begin(), vec.end());
31+
3332

3433
What is SYCL
3534
----------------------
3635

37-
[SYCL](https://www.khronos.org/opencl/sycl) is a royalty-free,
36+
[SYCL](https://www.khronos.org/opencl/sycl) is a royalty-free,
3837
cross-platform C++ abstraction layer that builds on top of OpenCL.
3938
SYCL enables single-source development of OpenCL applications in C++ whilst
4039
enabling traditional host compilers to produce standard C++ code.
4140

42-
4341
The SyclSTL
4442
---------------------
4543

@@ -50,11 +48,11 @@ Currently, the following STL algorithms are implemented:
5048
* sort : Bitonic sort for ranges which size is power of two, sequential sort
5149
otherwise.
5250
* transform : Parallel iteration (one thread per element) on the device.
53-
* for_each : Parallel iteration (one thread per element) on the device.
51+
* for\_each : Parallel iteration (one thread per element) on the device.
5452

5553
Some optimizations are implemented, for example, the ability of passing
5654
iterators to buffers rather than STL containers to reduce the amount of
57-
information copied in and out, and the ability of specifying a queue
55+
information copied in and out, and the ability of specifying a queue
5856
to the SYCL policy so that queue is used for the various kernels (potentially
5957
enabling asynchronous execution of the calls).
6058

@@ -64,14 +62,38 @@ Building the project
6462
The project uses CMake in order to produce build files.
6563
Simply create a build directory and run CMake as follows:
6664

67-
```
68-
$ mkdir build
69-
$ cd build
70-
$ cmake ../ -DSYCL_PATH=/path/to/sycl \
71-
-DOPENCL_ROOT_DIR=/path/to/opencl/dir
72-
$ make
73-
```
65+
$ mkdir build
66+
$ cd build
67+
$ cmake ../ -DSYCL_PATH=/path/to/sycl \
68+
-DOPENCL_ROOT_DIR=/path/to/opencl/dir
69+
$ make
70+
7471
Usual CMake options are available (e.g. building debug or release).
7572

76-
If Google Mock is found in external/gmock then the unit tests are build.
73+
If Google Mock is found in external/gmock, a set of unit tests is built.
74+
Unit tests can be run by running Ctest in the binary directory.
75+
76+
Building the documentation
77+
----------------------------
78+
79+
Source code is documented using Doxygen.
80+
To build the documentation as an HTML file, navigate to the doc
81+
directory and run doxygen from there.
82+
83+
$ cd doc
84+
$ doxygen
85+
86+
This will generate the html pages inside the doc\_output directory.
87+
88+
Limitations
89+
------------
90+
91+
The Lambda functions that you can pass to the algorithms have the same
92+
restrictions as any SYCL kernel.
93+
94+
While using lambda functions, the compiler needs to find a name for that lambda
95+
function. To provide a lambda name, the user do the following:
7796

97+
cl::sycl::queue q;S
98+
sycl::sycl_execution_policy<class SortAlgorithm3> snp(q);
99+
sort(snp, v.begin(), v.end(), [=](int a, int b) { return a >= b; });

TODO.md

+8
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,8 @@
1+
Parallel STL TODO List
2+
==============================
3+
4+
Next tasks to be done
5+
-----------------------
6+
7+
Include the specialization of commonly templated std functions, such as
8+
std::is_equals and similar ones.

benchmarks/CMakeLists.txt

+10
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,10 @@
1+
file(GLOB EXAMPLE_FILES ./*.cpp)
2+
3+
foreach(file ${EXAMPLE_FILES})
4+
get_filename_component(filename ${file} NAME)
5+
set(exe_name "${filename}.exe")
6+
7+
message(STATUS "File ${file} Filename ${filename} Exename ${exe_name}")
8+
add_sycl_target(${exe_name} ${file} ${CMAKE_CURRENT_BINARY_DIR})
9+
10+
endforeach()

benchmarks/basic.cpp

+60
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,60 @@
1+
/* Copyright (c) 2015 The Khronos Group Inc.
2+
3+
Permission is hereby granted, free of charge, to any person obtaining a
4+
copy of this software and/or associated documentation files (the
5+
"Materials"), to deal in the Materials without restriction, including
6+
without limitation the rights to use, copy, modify, merge, publish,
7+
distribute, sublicense, and/or sell copies of the Materials, and to
8+
permit persons to whom the Materials are furnished to do so, subject to
9+
the following conditions:
10+
11+
The above copyright notice and this permission notice shall be included
12+
in all copies or substantial portions of the Materials.
13+
14+
MODIFICATIONS TO THIS FILE MAY MEAN IT NO LONGER ACCURATELY REFLECTS
15+
KHRONOS STANDARDS. THE UNMODIFIED, NORMATIVE VERSIONS OF KHRONOS
16+
SPECIFICATIONS AND HEADER INFORMATION ARE LOCATED AT
17+
https://www.khronos.org/registry/
18+
19+
THE MATERIALS ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
20+
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
21+
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
22+
IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
23+
CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
24+
TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
25+
MATERIALS OR THE USE OR OTHER DEALINGS IN THE MATERIALS.
26+
27+
*/
28+
29+
#include <algorithm>
30+
#include <vector>
31+
#include <string>
32+
#include <iostream>
33+
34+
#include <experimental/algorithm>
35+
#include <sycl/execution_policy>
36+
37+
#include "benchmark.h"
38+
39+
using namespace sycl::helpers;
40+
41+
benchmark<>::time_units_t benchmark_sort(const unsigned numReps,
42+
const unsigned num_elems) {
43+
std::vector<int> v1;
44+
45+
for (int i = num_elems; i > 0; i--) {
46+
v1.push_back(i);
47+
}
48+
49+
cl::sycl::queue q;
50+
sycl::sycl_execution_policy<class SortAlgorithm1> snp(q);
51+
52+
auto time = benchmark<>::duration(
53+
numReps,
54+
std::experimental::parallel::sort<decltype(snp), decltype(begin(v1))>,
55+
snp, begin(v1), end(v1));
56+
57+
return time;
58+
}
59+
60+
BENCHMARK_MAIN("BENCH_SORT", benchmark_sort, 2u, 33554432u, 10);

0 commit comments

Comments
 (0)