Skip to content
6 changes: 4 additions & 2 deletions dash/include/dash/halo/HaloMatrixWrapper.h
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,9 @@ namespace halo {
* halo region 3 '- halo region 7
*/

template <typename MatrixT, SignalReady SigReady = SignalReady::OFF>


template <typename MatrixT, SharedType SHARED_TYPE = SharedType::NONE, SignalReady SigReady = SignalReady::OFF>
class HaloMatrixWrapper {
private:
using Pattern_t = typename MatrixT::pattern_type;
Expand All @@ -65,7 +67,7 @@ class HaloMatrixWrapper {
using GlobBoundSpec_t = GlobalBoundarySpec<NumDimensions>;
using HaloBlock_t = HaloBlock<Element_t, Pattern_t, GlobMem_t>;
using HaloMemory_t = HaloMemory<HaloBlock_t>;
using HaloUpdateEnv_t = HaloUpdateEnv<HaloBlock_t, SigReady>;
using HaloUpdateEnv_t = HaloUpdateEnv<HaloBlock_t, SHARED_TYPE, SigReady>;
using ElementCoords_t = std::array<pattern_index_t, NumDimensions>;
using region_index_t = internal::region_index_t;
using stencil_dist_t = internal::spoint_value_t;
Expand Down
53 changes: 39 additions & 14 deletions dash/include/dash/halo/HaloMemory.h
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,8 @@
#include <dash/halo/Halo.h>
#include <dash/Array.h>

#include <execution>

namespace dash {

namespace halo {
Expand Down Expand Up @@ -355,10 +357,16 @@ class SignalEnv {
SignalHandles_t _signal_ready_handles;
};

template<typename ElementT>
struct BufferOffset {
ElementT* block_pos;
ElementT* buffer_pos;
};

template<typename ElementT, typename LengthSizeT>
struct PackMetaData {
bool needs_packing{false};
std::vector<ElementT*> block_pos{};
std::vector<BufferOffset<ElementT>> block_data{};
LengthSizeT block_len{0};
ElementT* buffer_pos{nullptr};
std::function<void()> pack_func = [](){};
Expand All @@ -373,7 +381,7 @@ std::ostream& operator<<(std::ostream& os, const PackMetaData<ElementT, LengthSi
return os;
}

template<typename HaloBlockT>
template<typename HaloBlockT, SharedType SHARED_TYPE>
class PackEnv {
static constexpr auto NumDimensions = HaloBlockT::ndim();
static constexpr auto RegionsMax = NumRegionsMax<NumDimensions>;
Expand Down Expand Up @@ -510,22 +518,39 @@ class PackEnv {
pattern_size_t num_blocks = view_pack.size() / num_elems_block;

pack_md.block_len = num_elems_block;
pack_md.block_pos.resize(num_blocks);
pack_md.block_data.resize(num_blocks);

auto it_region = region->begin();
decltype(it_region) it_pack_data(&(it_region.globmem()), it_region.pattern(), view_pack);
for(auto& pos : pack_md.block_pos) {
pos = _local_memory + it_pack_data.lpos().index;
auto buffer_offset = pack_md.buffer_pos;
for(auto& pos : pack_md.block_data) {
pos.block_pos = _local_memory + it_pack_data.lpos().index;
pos.buffer_pos = buffer_offset;
it_pack_data += num_elems_block;
buffer_offset += pack_md.block_len;
}
auto pack = &pack_md;
pack_md.pack_func = [pack](){
auto buffer_offset = pack->buffer_pos;
for(auto& pos : pack->block_pos) {
std::copy(pos, pos + pack->block_len, buffer_offset);
buffer_offset += pack->block_len;
}
};
if(SHARED_TYPE == SharedType::STL) {
pack_md.pack_func = [pack](){
std::for_each(std::execution::par, pack->block_data.begin(), pack->block_data.end(), [pack](const auto& block) {
std::copy(block.block_pos, block.block_pos + pack->block_len, block.buffer_pos);
});
};
} else if(SHARED_TYPE == SharedType::OMP) {
pack_md.pack_func = [pack](){
#pragma omp parallel for
for(auto i = 0; i < pack->block_data.size(); ++i) {
const auto& block = pack->block_data[i];
std::copy(block.block_pos, block.block_pos + pack->block_len, block.buffer_pos);
}
};
} else {
pack_md.pack_func = [pack](){
for(auto& block : pack->block_data) {
std::copy(block.block_pos, block.block_pos + pack->block_len, block.buffer_pos);
}
};
}
}
}

Expand All @@ -536,7 +561,7 @@ class PackEnv {
PackMDataAll_t _pack_md_all;
};

template <typename HaloBlockT, SignalReady SigReady>
template <typename HaloBlockT, SharedType SHARED_TYPE, SignalReady SigReady>
class HaloUpdateEnv {
struct UpdateData {
std::function<void(dart_handle_t&)> get_halos;
Expand All @@ -552,7 +577,7 @@ class HaloUpdateEnv {
using Pattern_t = typename HaloBlockT::Pattern_t;
using BlockEnv_t = BlockEnvironment<Pattern_t>;
using SignalEnv_t = SignalEnv<HaloBlockT>;
using PackEnv_t = PackEnv<HaloBlockT>;
using PackEnv_t = PackEnv<HaloBlockT, SHARED_TYPE>;



Expand Down
71 changes: 58 additions & 13 deletions dash/include/dash/halo/StencilOperator.h
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,12 @@ struct replace {

using namespace internal;

template <typename IteratorT>
struct Iterator_Range {
IteratorT begin;
IteratorT end;
};

// Forward declaration
template <typename HaloBlockT, typename StencilSpecT>
class StencilOperator;
Expand All @@ -33,6 +39,8 @@ class StencilOperatorInner {
static constexpr auto NumStencilPoints = StencilOperatorT::num_stencil_points();
static constexpr auto NumDimensions = StencilOperatorT::ndim();

using CoordsIdxManagerInner_t = typename StencilOperatorT::CoordsIdxManagerInner_t;

public:
using Element_t = typename StencilOperatorT::Element_t;
using ViewSpec_t = typename StencilOperatorT::ViewSpec_t;
Expand All @@ -41,6 +49,7 @@ class StencilOperatorInner {
using const_iterator = const iterator;

using StencilOffsets_t = typename StencilOperatorT::StencilOffsets_t;
using Iterator_Range_t = Iterator_Range<iterator>;

public:
StencilOperatorInner(StencilOperatorT* stencil_op)
Expand Down Expand Up @@ -71,6 +80,26 @@ class StencilOperatorInner {
*/
const ViewSpec_t& view() const { return _stencil_op->_spec_views.inner(); }

Iterator_Range_t sub_iterator(const ViewSpec_t* sub_view) const {
auto& inner_view = _stencil_op->_spec_views.inner();
auto& inner_offsets = inner_view.offsets();
auto& inner_extents = inner_view.extents();
auto& sub_offsets = sub_view->offsets();
auto& sub_extents = sub_view->extents();
for(dim_t d = 0; d < NumDimensions; ++d) {
auto inner_last_elem = inner_offsets[d] + inner_extents[d];
auto sub_last_elem = sub_offsets[d] + sub_extents[d];
if(sub_offsets[d] < inner_offsets[d] || sub_last_elem > inner_last_elem) {
DASH_LOG_ERROR("Sub view doesn't fit into inner view.");

return {end(), end()};
}
}

return { iterator(CoordsIdxManagerInner_t(*_stencil_op, 0, sub_view)),
iterator(CoordsIdxManagerInner_t(*_stencil_op, sub_view->size(), sub_view))};
}

/**
* Modifies all stencil point elements and the center within the inner view.
* The stencil points are multiplied with their coefficent (\ref StencilPoint)
Expand Down Expand Up @@ -407,6 +436,7 @@ class StencilOperatorBoundary {
using const_iterator = const iterator;
using BoundaryViews_t = typename StencilSpecViews_t::BoundaryViews_t;
using RegionCoords_t = RegionCoords<NumDimensions>;
using Iterator_Range_t = Iterator_Range<iterator>;

public:
StencilOperatorBoundary(const StencilOperatorT* stencil_op)
Expand Down Expand Up @@ -498,35 +528,48 @@ class StencilOperatorBoundary {
* Using all iterators for all dimensions and \ref RegionPos has the same
* effect as using bbegin and bend.
*/
std::pair<iterator, iterator> iterator_at(dim_t dim, RegionPos pos) {
DASH_ASSERT_LT(dim, NumDimensions, "Given dimension to great");
const auto& bnd_views = _stencil_op->_spec_views.boundary_views();
uindex_t offset = 0;
auto it_views = std::begin(bnd_views);
for(dim_t d = 0; d < dim; ++d, ++it_views)
offset += it_views->size() + (++it_views)->size();
Iterator_Range_t iterator_at(dim_t dim, RegionPos pos) const {
DASH_ASSERT_LT(dim, NumDimensions, "Given dimension to high");

const auto& bnd_views = _stencil_op->_spec_views.boundary_views();
auto num_regions_dim = ce::pow(REGION_INDEX_BASE, static_cast<std::make_unsigned<dim_t>::type>(NumDimensions-1));
region_index_t start_region = 0;
for(dim_t d = 1; d <= dim; ++d) {
start_region += num_regions_dim;
num_regions_dim = ce::pow(REGION_INDEX_BASE, static_cast<std::make_unsigned<dim_t>::type>(NumDimensions-d-1));
}

if(pos == RegionPos::POST) {
offset += it_views->size();
++it_views;
start_region = NumRegionsMax<NumDimensions> - start_region - num_regions_dim;
}

uindex_t offset = 0;
for(region_index_t r = 0; r < start_region; ++r) {
offset += bnd_views[r].size();
}

uindex_t size = 0;
for(region_index_t r = start_region; r < start_region + num_regions_dim; ++r) {
size += bnd_views[r].size();
}


auto it_begin = _stencil_op->_bbegin + offset;

return std::make_pair(it_begin, it_begin + it_views->size());
return {it_begin, it_begin + size};
}

std::pair<iterator, iterator> iterator_at(region_index_t index) {
Iterator_Range_t iterator_at(region_index_t index) const {
DASH_ASSERT_LT(index, NumRegionsMax<NumDimensions>, "Given index out of range");
const auto& bnd_views = _stencil_op->_spec_views.boundary_views();
const auto& bnd_views = _stencil_op->_spec_views.boundary_views();
uindex_t offset = 0;
for(region_index_t r = 0; r < index; ++r) {
offset += bnd_views[r].size();
}

auto it_begin = _stencil_op->_bbegin + offset;

return std::make_pair(it_begin, it_begin + bnd_views[index].size());
return {it_begin, it_begin + bnd_views[index].size()};
}


Expand Down Expand Up @@ -812,6 +855,8 @@ class StencilOperator {
return _stencil_offsets[pos];
}



/**
* Returns the local memory offset for a given coordinate
*/
Expand Down
4 changes: 4 additions & 0 deletions dash/include/dash/halo/Types.h
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,10 @@ enum class BoundaryProp : uint8_t {
CUSTOM
};

enum class SharedType : uint8_t {
STL, OMP, NONE
};

inline std::ostream& operator<<(std::ostream& os, const BoundaryProp& prop) {
if(prop == BoundaryProp::NONE)
os << "NONE";
Expand Down
5 changes: 1 addition & 4 deletions dash/include/dash/halo/iterator/StencilIterator.h
Original file line number Diff line number Diff line change
Expand Up @@ -152,7 +152,6 @@ class CoordsIdxManagerInner {
}
}


private:
void init_ranges() {
for(dim_t d = 0; d < NumDimensions; ++d) {
Expand Down Expand Up @@ -790,9 +789,7 @@ class StencilIteratorTest {
}

Self_t& operator+=(index_t n) {
auto index = _coords_mng.index() + n;
//if(index < _coords_mng.size())
_coords_mng.set(index);
_coords_mng.set(_coords_mng.index() + n);

return *this;
}
Expand Down
4 changes: 1 addition & 3 deletions dash/test/halo/HaloTest.cc
Original file line number Diff line number Diff line change
Expand Up @@ -450,9 +450,7 @@ unsigned long calc_sum_halo(HaloWrapperT& halo_wrapper, StencilOpT stencil_op, b
if(region_wise) {
for( auto r = 0; r < NumRegionsMax<3>; ++r) {
auto it_bnd = stencil_op.boundary.iterator_at(r);
if(it_bnd.first == it_bnd.second)
continue;
for(auto it = it_bnd.first; it != it_bnd.second; ++it) {
for(auto it = it_bnd.begin; it != it_bnd.end; ++it) {
for(auto i = 0; i < num_stencil_points; ++i)
*sum_local += it.value_at(i);

Expand Down