diff --git a/dash/include/dash/halo/HaloMatrixWrapper.h b/dash/include/dash/halo/HaloMatrixWrapper.h index 1b88b9822..ac00b4131 100644 --- a/dash/include/dash/halo/HaloMatrixWrapper.h +++ b/dash/include/dash/halo/HaloMatrixWrapper.h @@ -49,7 +49,9 @@ namespace halo { * halo region 3 '- halo region 7 */ -template + + +template class HaloMatrixWrapper { private: using Pattern_t = typename MatrixT::pattern_type; @@ -65,7 +67,7 @@ class HaloMatrixWrapper { using GlobBoundSpec_t = GlobalBoundarySpec; using HaloBlock_t = HaloBlock; using HaloMemory_t = HaloMemory; - using HaloUpdateEnv_t = HaloUpdateEnv; + using HaloUpdateEnv_t = HaloUpdateEnv; using ElementCoords_t = std::array; using region_index_t = internal::region_index_t; using stencil_dist_t = internal::spoint_value_t; diff --git a/dash/include/dash/halo/HaloMemory.h b/dash/include/dash/halo/HaloMemory.h index f718c78bf..8b64d7360 100644 --- a/dash/include/dash/halo/HaloMemory.h +++ b/dash/include/dash/halo/HaloMemory.h @@ -6,6 +6,8 @@ #include #include +#include + namespace dash { namespace halo { @@ -355,10 +357,16 @@ class SignalEnv { SignalHandles_t _signal_ready_handles; }; +template +struct BufferOffset { + ElementT* block_pos; + ElementT* buffer_pos; +}; + template struct PackMetaData { bool needs_packing{false}; - std::vector block_pos{}; + std::vector> block_data{}; LengthSizeT block_len{0}; ElementT* buffer_pos{nullptr}; std::function pack_func = [](){}; @@ -373,7 +381,7 @@ std::ostream& operator<<(std::ostream& os, const PackMetaData +template class PackEnv { static constexpr auto NumDimensions = HaloBlockT::ndim(); static constexpr auto RegionsMax = NumRegionsMax; @@ -510,22 +518,39 @@ class PackEnv { pattern_size_t num_blocks = view_pack.size() / num_elems_block; pack_md.block_len = num_elems_block; - pack_md.block_pos.resize(num_blocks); + pack_md.block_data.resize(num_blocks); auto it_region = region->begin(); decltype(it_region) it_pack_data(&(it_region.globmem()), it_region.pattern(), view_pack); - for(auto& pos : pack_md.block_pos) { - pos = _local_memory + it_pack_data.lpos().index; + auto buffer_offset = pack_md.buffer_pos; + for(auto& pos : pack_md.block_data) { + pos.block_pos = _local_memory + it_pack_data.lpos().index; + pos.buffer_pos = buffer_offset; it_pack_data += num_elems_block; + buffer_offset += pack_md.block_len; } auto pack = &pack_md; - pack_md.pack_func = [pack](){ - auto buffer_offset = pack->buffer_pos; - for(auto& pos : pack->block_pos) { - std::copy(pos, pos + pack->block_len, buffer_offset); - buffer_offset += pack->block_len; - } - }; + if(SHARED_TYPE == SharedType::STL) { + pack_md.pack_func = [pack](){ + std::for_each(std::execution::par, pack->block_data.begin(), pack->block_data.end(), [pack](const auto& block) { + std::copy(block.block_pos, block.block_pos + pack->block_len, block.buffer_pos); + }); + }; + } else if(SHARED_TYPE == SharedType::OMP) { + pack_md.pack_func = [pack](){ + #pragma omp parallel for + for(auto i = 0; i < pack->block_data.size(); ++i) { + const auto& block = pack->block_data[i]; + std::copy(block.block_pos, block.block_pos + pack->block_len, block.buffer_pos); + } + }; + } else { + pack_md.pack_func = [pack](){ + for(auto& block : pack->block_data) { + std::copy(block.block_pos, block.block_pos + pack->block_len, block.buffer_pos); + } + }; + } } } @@ -536,7 +561,7 @@ class PackEnv { PackMDataAll_t _pack_md_all; }; -template +template class HaloUpdateEnv { struct UpdateData { std::function get_halos; @@ -552,7 +577,7 @@ class HaloUpdateEnv { using Pattern_t = typename HaloBlockT::Pattern_t; using BlockEnv_t = BlockEnvironment; using SignalEnv_t = SignalEnv; - using PackEnv_t = PackEnv; + using PackEnv_t = PackEnv; diff --git a/dash/include/dash/halo/StencilOperator.h b/dash/include/dash/halo/StencilOperator.h index 31d8c0818..0c9262315 100644 --- a/dash/include/dash/halo/StencilOperator.h +++ b/dash/include/dash/halo/StencilOperator.h @@ -19,6 +19,12 @@ struct replace { using namespace internal; + template + struct Iterator_Range { + IteratorT begin; + IteratorT end; + }; + // Forward declaration template class StencilOperator; @@ -33,6 +39,8 @@ class StencilOperatorInner { static constexpr auto NumStencilPoints = StencilOperatorT::num_stencil_points(); static constexpr auto NumDimensions = StencilOperatorT::ndim(); + using CoordsIdxManagerInner_t = typename StencilOperatorT::CoordsIdxManagerInner_t; + public: using Element_t = typename StencilOperatorT::Element_t; using ViewSpec_t = typename StencilOperatorT::ViewSpec_t; @@ -41,6 +49,7 @@ class StencilOperatorInner { using const_iterator = const iterator; using StencilOffsets_t = typename StencilOperatorT::StencilOffsets_t; + using Iterator_Range_t = Iterator_Range; public: StencilOperatorInner(StencilOperatorT* stencil_op) @@ -71,6 +80,26 @@ class StencilOperatorInner { */ const ViewSpec_t& view() const { return _stencil_op->_spec_views.inner(); } + Iterator_Range_t sub_iterator(const ViewSpec_t* sub_view) const { + auto& inner_view = _stencil_op->_spec_views.inner(); + auto& inner_offsets = inner_view.offsets(); + auto& inner_extents = inner_view.extents(); + auto& sub_offsets = sub_view->offsets(); + auto& sub_extents = sub_view->extents(); + for(dim_t d = 0; d < NumDimensions; ++d) { + auto inner_last_elem = inner_offsets[d] + inner_extents[d]; + auto sub_last_elem = sub_offsets[d] + sub_extents[d]; + if(sub_offsets[d] < inner_offsets[d] || sub_last_elem > inner_last_elem) { + DASH_LOG_ERROR("Sub view doesn't fit into inner view."); + + return {end(), end()}; + } + } + + return { iterator(CoordsIdxManagerInner_t(*_stencil_op, 0, sub_view)), + iterator(CoordsIdxManagerInner_t(*_stencil_op, sub_view->size(), sub_view))}; + } + /** * Modifies all stencil point elements and the center within the inner view. * The stencil points are multiplied with their coefficent (\ref StencilPoint) @@ -407,6 +436,7 @@ class StencilOperatorBoundary { using const_iterator = const iterator; using BoundaryViews_t = typename StencilSpecViews_t::BoundaryViews_t; using RegionCoords_t = RegionCoords; + using Iterator_Range_t = Iterator_Range; public: StencilOperatorBoundary(const StencilOperatorT* stencil_op) @@ -498,27 +528,40 @@ class StencilOperatorBoundary { * Using all iterators for all dimensions and \ref RegionPos has the same * effect as using bbegin and bend. */ - std::pair iterator_at(dim_t dim, RegionPos pos) { - DASH_ASSERT_LT(dim, NumDimensions, "Given dimension to great"); - const auto& bnd_views = _stencil_op->_spec_views.boundary_views(); - uindex_t offset = 0; - auto it_views = std::begin(bnd_views); - for(dim_t d = 0; d < dim; ++d, ++it_views) - offset += it_views->size() + (++it_views)->size(); + Iterator_Range_t iterator_at(dim_t dim, RegionPos pos) const { + DASH_ASSERT_LT(dim, NumDimensions, "Given dimension to high"); + + const auto& bnd_views = _stencil_op->_spec_views.boundary_views(); + auto num_regions_dim = ce::pow(REGION_INDEX_BASE, static_cast::type>(NumDimensions-1)); + region_index_t start_region = 0; + for(dim_t d = 1; d <= dim; ++d) { + start_region += num_regions_dim; + num_regions_dim = ce::pow(REGION_INDEX_BASE, static_cast::type>(NumDimensions-d-1)); + } if(pos == RegionPos::POST) { - offset += it_views->size(); - ++it_views; + start_region = NumRegionsMax - start_region - num_regions_dim; + } + + uindex_t offset = 0; + for(region_index_t r = 0; r < start_region; ++r) { + offset += bnd_views[r].size(); } + uindex_t size = 0; + for(region_index_t r = start_region; r < start_region + num_regions_dim; ++r) { + size += bnd_views[r].size(); + } + + auto it_begin = _stencil_op->_bbegin + offset; - return std::make_pair(it_begin, it_begin + it_views->size()); + return {it_begin, it_begin + size}; } - std::pair iterator_at(region_index_t index) { + Iterator_Range_t iterator_at(region_index_t index) const { DASH_ASSERT_LT(index, NumRegionsMax, "Given index out of range"); - const auto& bnd_views = _stencil_op->_spec_views.boundary_views(); + const auto& bnd_views = _stencil_op->_spec_views.boundary_views(); uindex_t offset = 0; for(region_index_t r = 0; r < index; ++r) { offset += bnd_views[r].size(); @@ -526,7 +569,7 @@ class StencilOperatorBoundary { auto it_begin = _stencil_op->_bbegin + offset; - return std::make_pair(it_begin, it_begin + bnd_views[index].size()); + return {it_begin, it_begin + bnd_views[index].size()}; } @@ -812,6 +855,8 @@ class StencilOperator { return _stencil_offsets[pos]; } + + /** * Returns the local memory offset for a given coordinate */ diff --git a/dash/include/dash/halo/Types.h b/dash/include/dash/halo/Types.h index 9b3ed84e2..48771f878 100644 --- a/dash/include/dash/halo/Types.h +++ b/dash/include/dash/halo/Types.h @@ -68,6 +68,10 @@ enum class BoundaryProp : uint8_t { CUSTOM }; +enum class SharedType : uint8_t { + STL, OMP, NONE +}; + inline std::ostream& operator<<(std::ostream& os, const BoundaryProp& prop) { if(prop == BoundaryProp::NONE) os << "NONE"; diff --git a/dash/include/dash/halo/iterator/StencilIterator.h b/dash/include/dash/halo/iterator/StencilIterator.h index dec4d0198..add76b1c1 100644 --- a/dash/include/dash/halo/iterator/StencilIterator.h +++ b/dash/include/dash/halo/iterator/StencilIterator.h @@ -152,7 +152,6 @@ class CoordsIdxManagerInner { } } - private: void init_ranges() { for(dim_t d = 0; d < NumDimensions; ++d) { @@ -790,9 +789,7 @@ class StencilIteratorTest { } Self_t& operator+=(index_t n) { - auto index = _coords_mng.index() + n; - //if(index < _coords_mng.size()) - _coords_mng.set(index); + _coords_mng.set(_coords_mng.index() + n); return *this; } diff --git a/dash/test/halo/HaloTest.cc b/dash/test/halo/HaloTest.cc index 3b922e635..059360014 100644 --- a/dash/test/halo/HaloTest.cc +++ b/dash/test/halo/HaloTest.cc @@ -450,9 +450,7 @@ unsigned long calc_sum_halo(HaloWrapperT& halo_wrapper, StencilOpT stencil_op, b if(region_wise) { for( auto r = 0; r < NumRegionsMax<3>; ++r) { auto it_bnd = stencil_op.boundary.iterator_at(r); - if(it_bnd.first == it_bnd.second) - continue; - for(auto it = it_bnd.first; it != it_bnd.second; ++it) { + for(auto it = it_bnd.begin; it != it_bnd.end; ++it) { for(auto i = 0; i < num_stencil_points; ++i) *sum_local += it.value_at(i);