Skip to content

Commit c150a41

Browse files
committed
[RFC] dash::copy: Implement global-to-global
Active team selection is now done by tag struct argument.
1 parent ecb9c0d commit c150a41

File tree

1 file changed

+137
-8
lines changed
  • dash/include/dash/algorithm

1 file changed

+137
-8
lines changed

dash/include/dash/algorithm/Copy.h

+137-8
Original file line numberDiff line numberDiff line change
@@ -624,25 +624,154 @@ copy_async(
624624
}
625625
#endif
626626

627+
struct ActiveDestination{};
628+
struct ActiveSource{};
629+
630+
/**
631+
* Specialization of \c dash::copy as global-to-global blocking copy
632+
* operation.
633+
*
634+
* \ingroup DashAlgorithms
635+
*/
636+
template <
637+
class GlobInputIt,
638+
class GlobOutputIt,
639+
typename ValueType = typename GlobInputIt::value_type>
640+
GlobOutputIt copy(
641+
GlobInputIt in_first,
642+
GlobInputIt in_last,
643+
GlobOutputIt out_first,
644+
ActiveDestination /*unused*/)
645+
{
646+
DASH_LOG_TRACE("dash::copy()", "blocking, global to global");
647+
648+
typedef typename GlobInputIt::size_type size_type;
649+
650+
size_type num_elem_total = dash::distance(in_first, in_last);
651+
if (num_elem_total <= 0) {
652+
DASH_LOG_TRACE("dash::copy", "input range empty");
653+
return out_first;
654+
}
655+
656+
auto g_out_first = out_first;
657+
auto g_out_last = g_out_first + num_elem_total;
658+
659+
internal::ContiguousRangeSet<GlobOutputIt> range_set{g_out_first, g_out_last};
660+
661+
const auto & out_team = out_first.team();
662+
out_team.barrier();
663+
664+
std::vector<dart_handle_t> handles;
665+
internal::local_copy_chunks<ValueType> local_chunks;
666+
667+
size_type num_elem_processed = 0;
668+
669+
for (auto range : range_set) {
670+
671+
auto cur_out_first = range.first;
672+
auto num_copy_elem = range.second;
673+
674+
DASH_ASSERT_GT(num_copy_elem, 0,
675+
"Number of elements to copy is 0");
676+
677+
// handle local data only
678+
if (cur_out_first.is_local()) {
679+
auto dest_ptr = cur_out_first.local();
680+
auto src_ptr = in_first + num_elem_processed;
681+
internal::copy_impl(src_ptr,
682+
src_ptr + num_copy_elem,
683+
dest_ptr,
684+
&handles,
685+
local_chunks);
686+
}
687+
num_elem_processed += num_copy_elem;
688+
}
689+
690+
internal::do_local_copies(local_chunks);
691+
692+
if (!handles.empty()) {
693+
DASH_LOG_TRACE("dash::copy", "Waiting for remote transfers to complete,",
694+
"num_handles: ", handles.size());
695+
dart_waitall_local(handles.data(), handles.size());
696+
}
697+
out_team.barrier();
698+
699+
DASH_ASSERT_EQ(num_elem_processed, num_elem_total,
700+
"Failed to find all contiguous subranges in range");
701+
702+
return g_out_last;
703+
}
704+
627705
/**
628706
* Specialization of \c dash::copy as global-to-global blocking copy
629707
* operation.
630708
*
631709
* \ingroup DashAlgorithms
632710
*/
633-
template <typename ValueType, class GlobInputIt, class GlobOutputIt>
711+
template <
712+
class GlobInputIt,
713+
class GlobOutputIt,
714+
typename ValueType = typename GlobInputIt::value_type>
634715
GlobOutputIt copy(
635-
GlobInputIt /*in_first*/,
636-
GlobInputIt /*in_last*/,
637-
GlobOutputIt /*out_first*/)
716+
GlobInputIt in_first,
717+
GlobInputIt in_last,
718+
GlobOutputIt out_first,
719+
ActiveSource /*unused*/)
638720
{
639721
DASH_LOG_TRACE("dash::copy()", "blocking, global to global");
640722

641-
// TODO:
642-
// - Implement adapter for local-to-global dash::copy here
643-
// - Return if global input range has no local sub-range
723+
typedef typename GlobInputIt::size_type size_type;
724+
725+
size_type num_elem_total = dash::distance(in_first, in_last);
726+
if (num_elem_total <= 0) {
727+
DASH_LOG_TRACE("dash::copy", "input range empty");
728+
return out_first;
729+
}
730+
731+
internal::ContiguousRangeSet<GlobOutputIt> range_set{in_first, in_last};
732+
733+
const auto & in_team = in_first.team();
734+
in_team.barrier();
735+
736+
std::vector<dart_handle_t> handles;
737+
internal::local_copy_chunks<ValueType> local_chunks;
738+
739+
size_type num_elem_processed = 0;
740+
741+
for (auto range : range_set) {
742+
743+
auto cur_in_first = range.first;
744+
auto num_copy_elem = range.second;
745+
746+
DASH_ASSERT_GT(num_copy_elem, 0,
747+
"Number of elements to copy is 0");
748+
749+
// handle local data only
750+
if (cur_in_first.is_local()) {
751+
auto src_ptr = cur_in_first.local();
752+
auto dest_ptr = out_first + num_elem_processed;
753+
internal::copy_impl(src_ptr,
754+
src_ptr + num_copy_elem,
755+
dest_ptr,
756+
&handles,
757+
local_chunks);
758+
}
759+
num_elem_processed += num_copy_elem;
760+
}
761+
762+
internal::do_local_copies(local_chunks);
763+
764+
if (!handles.empty()) {
765+
DASH_LOG_TRACE("dash::copy", "Waiting for remote transfers to complete,",
766+
"num_handles: ", handles.size());
767+
dart_waitall(handles.data(), handles.size());
768+
}
769+
in_team.barrier();
770+
771+
DASH_ASSERT_EQ(num_elem_processed, num_elem_total,
772+
"Failed to find all contiguous subranges in range");
644773

645-
return GlobOutputIt();
774+
return out_first + num_elem_total;
646775
}
647776

648777
#endif // DOXYGEN

0 commit comments

Comments
 (0)