diff --git a/graph/unit_test/Test_Graph_rcm.hpp b/graph/unit_test/Test_Graph_rcm.hpp index 096bdb6961..ad3f3d93b6 100644 --- a/graph/unit_test/Test_Graph_rcm.hpp +++ b/graph/unit_test/Test_Graph_rcm.hpp @@ -19,7 +19,7 @@ #include "KokkosGraph_RCM.hpp" #include "KokkosKernels_IOUtils.hpp" -#include "Kokkos_StaticCrsGraph.hpp" +#include "KokkosSparse_StaticCrsGraph.hpp" #include @@ -119,7 +119,7 @@ void test_rcm(const rowmap_t& rowmap, const entries_t& entries, bool expectBandw template void test_rcm_zerorows() { - using graph_t = Kokkos::StaticCrsGraph; + using graph_t = KokkosSparse::StaticCrsGraph; using rowmap_t = typename graph_t::row_map_type::non_const_type; using entries_t = typename graph_t::entries_type::non_const_type; rowmap_t rowmap; @@ -129,7 +129,7 @@ void test_rcm_zerorows() { template void test_rcm_7pt(lno_t gridX, lno_t gridY, lno_t gridZ, bool expectBandwidthReduced) { - using graph_t = Kokkos::StaticCrsGraph; + using graph_t = KokkosSparse::StaticCrsGraph; using rowmap_t = typename graph_t::row_map_type::non_const_type; using entries_t = typename graph_t::entries_type::non_const_type; rowmap_t rowmap; @@ -140,7 +140,7 @@ void test_rcm_7pt(lno_t gridX, lno_t gridY, lno_t gridZ, bool expectBandwidthRed template void test_rcm_4clique() { - using graph_t = Kokkos::StaticCrsGraph; + using graph_t = KokkosSparse::StaticCrsGraph; using rowmap_t = typename graph_t::row_map_type::non_const_type; using entries_t = typename graph_t::entries_type::non_const_type; rowmap_t rowmap("rowmap", 5); @@ -156,7 +156,7 @@ void test_rcm_4clique() { template void test_rcm_multiple_components() { - using graph_t = Kokkos::StaticCrsGraph; + using graph_t = KokkosSparse::StaticCrsGraph; using rowmap_t = typename graph_t::row_map_type::non_const_type; using entries_t = typename graph_t::entries_type::non_const_type; // Generate a single 3D grid first diff --git a/perf_test/graph/KokkosGraph_triangle.cpp b/perf_test/graph/KokkosGraph_triangle.cpp index 84676607d9..3e5b127607 100644 --- a/perf_test/graph/KokkosGraph_triangle.cpp +++ b/perf_test/graph/KokkosGraph_triangle.cpp @@ -16,6 +16,7 @@ #include #include "KokkosKernels_IOUtils.hpp" #include "KokkosGraph_Triangle.hpp" +#include "KokkosSparse_StaticCrsGraph.hpp" #include "KokkosSparse_CrsMatrix.hpp" #include "KokkosSparse_IOUtils.hpp" //for read_kokkos_crst_graph #include "KokkosKernels_TestStringUtils.hpp" @@ -232,7 +233,7 @@ void run_experiment(int argc, char **argv, perf_test::CommonInputParams) { using device_t = Kokkos::Device; using lno_t = KokkosKernels::default_lno_t; using size_type = KokkosKernels::default_size_type; - using graph_t = Kokkos::StaticCrsGraph; + using graph_t = StaticCrsGraph; using KernelHandle = KokkosKernels::Experimental::KokkosKernelsHandle; diff --git a/perf_test/sparse/KokkosSparse_spmv_struct.cpp b/perf_test/sparse/KokkosSparse_spmv_struct.cpp index 1e95bd7de5..9b11ee9737 100644 --- a/perf_test/sparse/KokkosSparse_spmv_struct.cpp +++ b/perf_test/sparse/KokkosSparse_spmv_struct.cpp @@ -244,7 +244,7 @@ int main(int argc, char **argv) { if (check_errors) { h_y_compare = Kokkos::create_mirror(y); - typename matrix_type::StaticCrsGraphType::HostMirror h_graph = Kokkos::create_mirror(A.graph); + typename matrix_type::StaticCrsGraphType::HostMirror h_graph = KokkosSparse::create_mirror(A.graph); typename matrix_type::values_type::HostMirror h_values = Kokkos::create_mirror_view(A.values); // Error Check Gold Values diff --git a/perf_test/sparse/KokkosSparse_spmv_struct_tuning.cpp b/perf_test/sparse/KokkosSparse_spmv_struct_tuning.cpp index 3a857fcb23..0dce5ffc8c 100644 --- a/perf_test/sparse/KokkosSparse_spmv_struct_tuning.cpp +++ b/perf_test/sparse/KokkosSparse_spmv_struct_tuning.cpp @@ -395,7 +395,7 @@ int main(int argc, char** argv) { if (check_errors) { h_y_compare = Kokkos::create_mirror(y); - typename matrix_type::StaticCrsGraphType::HostMirror h_graph = Kokkos::create_mirror(A.graph); + typename matrix_type::StaticCrsGraphType::HostMirror h_graph = KokkosSparse::create_mirror(A.graph); typename matrix_type::values_type::HostMirror h_values = Kokkos::create_mirror_view(A.values); // Error Check Gold Values diff --git a/perf_test/sparse/KokkosSparse_spmv_test.cpp b/perf_test/sparse/KokkosSparse_spmv_test.cpp index eb3b56f790..3db04e0cfc 100644 --- a/perf_test/sparse/KokkosSparse_spmv_test.cpp +++ b/perf_test/sparse/KokkosSparse_spmv_test.cpp @@ -64,7 +64,7 @@ SPMVTestData setup_test(spmv_additional_data* data, SPMVTestData::matrix_type A, test_data.h_y = Kokkos::create_mirror_view(y); test_data.h_y_compare = Kokkos::create_mirror(y); - h_graph_type h_graph = Kokkos::create_mirror(test_data.A.graph); + h_graph_type h_graph = KokkosSparse::create_mirror(test_data.A.graph); h_values_type h_values = Kokkos::create_mirror_view(test_data.A.values); for (int i = 0; i < test_data.numCols; i++) { diff --git a/sparse/src/KokkosSparse_BsrMatrix.hpp b/sparse/src/KokkosSparse_BsrMatrix.hpp index 1fecb3b7b9..722f0372fe 100644 --- a/sparse/src/KokkosSparse_BsrMatrix.hpp +++ b/sparse/src/KokkosSparse_BsrMatrix.hpp @@ -30,7 +30,6 @@ #include #include "Kokkos_Core.hpp" -#include "Kokkos_StaticCrsGraph.hpp" #include "Kokkos_ArithTraits.hpp" #include "KokkosSparse_CrsMatrix.hpp" #include "KokkosKernels_Error.hpp" @@ -332,11 +331,9 @@ class BsrMatrix { //! Type of a host-memory mirror of the sparse matrix. typedef BsrMatrix HostMirror; //! Type of the graph structure of the sparse matrix. - typedef Kokkos::StaticCrsGraph - StaticCrsGraphType; + typedef StaticCrsGraph StaticCrsGraphType; //! Type of the graph structure of the sparse matrix - consistent with Kokkos. - typedef Kokkos::StaticCrsGraph - staticcrsgraph_type; + typedef StaticCrsGraph staticcrsgraph_type; //! Type of column indices in the sparse matrix. typedef typename staticcrsgraph_type::entries_type index_type; //! Const version of the type of column indices in the sparse matrix. @@ -700,7 +697,7 @@ class BsrMatrix { // create_staticcrsgraph takes the frequency of blocks per row // and returns the cum sum pointer row_map with nbrows+1 size, and total // numBlocks in the final entry - graph = Kokkos::create_staticcrsgraph("blockgraph", block_rows); + graph = create_staticcrsgraph("blockgraph", block_rows); typename row_map_type::HostMirror h_row_map = Kokkos::create_mirror_view(graph.row_map); Kokkos::deep_copy(h_row_map, graph.row_map); diff --git a/sparse/src/KokkosSparse_CcsMatrix.hpp b/sparse/src/KokkosSparse_CcsMatrix.hpp index e39ab730e6..a77bd1dedd 100644 --- a/sparse/src/KokkosSparse_CcsMatrix.hpp +++ b/sparse/src/KokkosSparse_CcsMatrix.hpp @@ -29,85 +29,10 @@ #include #include #include "KokkosSparse_findRelOffset.hpp" +#include "KokkosSparse_StaticCcsGraph.hpp" #include "KokkosKernels_default_types.hpp" #include "KokkosKernels_Macros.hpp" -namespace Kokkos { -/// \class StaticCcsGraph -/// \brief Compressed column storage array copied from Kokkos::StaticCrsGraph. -/// -/// \tparam DataType The type of stored entries. If a StaticCcsGraph is -/// used as the graph of a sparse matrix, then this is usually an -/// integer type, the type of the column indices in the sparse -/// matrix. -/// -/// \tparam Arg1Type The second template parameter, corresponding -/// either to the Device type (if there are no more template -/// parameters) or to the Layout type (if there is at least one more -/// template parameter). -/// -/// \tparam Arg2Type The third template parameter, which if provided -/// corresponds to the Device type. -/// -/// \tparam Arg3Type The third template parameter, which if provided -/// corresponds to the MemoryTraits. -/// -/// \tparam SizeType The type of col offsets. Usually the default -/// parameter suffices. However, setting a nondefault value is -/// necessary in some cases, for example, if you want to have a -/// sparse matrices with dimensions (and therefore column indices) -/// that fit in \c int, but want to store more than INT_MAX -/// entries in the sparse matrix. -/// -/// A col has a range of entries: -///
    -///
  • col_map[i0] <= entry < col_map[i0+1]
  • -///
  • 0 <= i1 < col_map[i0+1] - col_map[i0]
  • -///
  • entries( entry , i2 , i3 , ... );
  • -///
  • entries( col_map[i0] + i1 , i2 , i3 , ... );
  • -///
-template ::size_type> -class StaticCcsGraph { - private: - using traits = ViewTraits; - - public: - using data_type = DataType; - using array_layout = typename traits::array_layout; - using execution_space = typename traits::execution_space; - using device_type = typename traits::device_type; - using memory_traits = typename traits::memory_traits; - using size_type = SizeType; - - using col_map_type = View; - using entries_type = View; - using row_block_type = View; - - entries_type entries; - col_map_type col_map; - - //! Construct an empty view. - KOKKOS_INLINE_FUNCTION - StaticCcsGraph() : entries(), col_map() {} - - //! Copy constructor (shallow copy). - KOKKOS_INLINE_FUNCTION - StaticCcsGraph(const StaticCcsGraph& rhs) : entries(rhs.entries), col_map(rhs.col_map) {} - - template - KOKKOS_INLINE_FUNCTION StaticCcsGraph(const EntriesType& entries_, const ColMapType& col_map_) - : entries(entries_), col_map(col_map_) {} - - /** \brief Return number of columns in the graph - */ - KOKKOS_INLINE_FUNCTION - size_type numCols() const { - return (col_map.extent(0) != 0) ? col_map.extent(0) - static_cast(1) : static_cast(0); - } -}; -} // namespace Kokkos - namespace KokkosSparse { /// \class CcsMatrix /// \brief Compressed sparse column implementation of a sparse matrix. @@ -142,7 +67,7 @@ class CcsMatrix { //! Type of each (column) index in the matrix. typedef OrdinalType ordinal_type; //! Type of the graph structure of the sparse matrix - consistent with Kokkos. - typedef Kokkos::StaticCcsGraph + typedef StaticCcsGraph staticccsgraph_type; //! Type of the "column map" (which contains the offset for each column's //! data). diff --git a/sparse/src/KokkosSparse_CrsMatrix.hpp b/sparse/src/KokkosSparse_CrsMatrix.hpp index 676dfb64cb..5252f57791 100644 --- a/sparse/src/KokkosSparse_CrsMatrix.hpp +++ b/sparse/src/KokkosSparse_CrsMatrix.hpp @@ -25,11 +25,11 @@ #define KOKKOSSPARSE_CRSMATRIX_HPP_ #include "Kokkos_Core.hpp" -#include "Kokkos_StaticCrsGraph.hpp" #include #include #include #include "KokkosSparse_findRelOffset.hpp" +#include "KokkosSparse_StaticCrsGraph.hpp" #include "KokkosKernels_default_types.hpp" #include "KokkosKernels_Macros.hpp" @@ -344,10 +344,10 @@ class CrsMatrix { //! Type of a host-memory mirror of the sparse matrix. typedef CrsMatrix HostMirror; //! Type of the graph structure of the sparse matrix. - typedef Kokkos::StaticCrsGraph + typedef StaticCrsGraph StaticCrsGraphType; //! Type of the graph structure of the sparse matrix - consistent with Kokkos. - typedef Kokkos::StaticCrsGraph + typedef StaticCrsGraph staticcrsgraph_type; //! Type of column indices in the sparse matrix. typedef typename staticcrsgraph_type::entries_type index_type; @@ -436,13 +436,12 @@ class CrsMatrix { /// \brief Construct with a graph that will be shared. /// - /// Allocate the values array for subsquent fill. + /// Allocate the values array for subsequent fill. template [[deprecated( "Use the constructor that accepts ncols as input " "instead.")]] CrsMatrix(const std::string& label, - const Kokkos::StaticCrsGraph& - graph_) + const StaticCrsGraph& graph_) : graph(graph_.entries, graph_.row_map), values(label, graph_.entries.extent(0)), numCols_(maximum_entry(graph_) + 1) {} @@ -457,7 +456,7 @@ class CrsMatrix { /// \param ncols [in] The number of columns. template CrsMatrix(const std::string& label, - const Kokkos::StaticCrsGraph& graph_, + const StaticCrsGraph& graph_, const OrdinalType& ncols) : graph(graph_.entries, graph_.row_map), values(label, graph_.entries.extent(0)), numCols_(ncols) {} @@ -471,11 +470,11 @@ class CrsMatrix { /// \param graph_ The graph for storing the rowmap and col ids. template CrsMatrix(const std::string&, const OrdinalType& ncols, const values_type& vals, - const Kokkos::StaticCrsGraph& graph_) + const StaticCrsGraph& graph_) : graph(graph_.entries, graph_.row_map), values(vals), numCols_(ncols) {} /// \brief Constructor that copies raw arrays of host data in - /// 3-array CRS (compresed row storage) format. + /// 3-array CRS (compressed row storage) format. /// /// On input, the entries must be sorted by row. \c rowmap determines where /// each row begins and ends. For each entry k (0 <= k < annz), \c cols[k] diff --git a/sparse/src/KokkosSparse_StaticCcsGraph.hpp b/sparse/src/KokkosSparse_StaticCcsGraph.hpp new file mode 100644 index 0000000000..a14f97133a --- /dev/null +++ b/sparse/src/KokkosSparse_StaticCcsGraph.hpp @@ -0,0 +1,108 @@ +//@HEADER +// ************************************************************************ +// +// Kokkos v. 4.0 +// Copyright (2022) National Technology & Engineering +// Solutions of Sandia, LLC (NTESS). +// +// Under the terms of Contract DE-NA0003525 with NTESS, +// the U.S. Government retains certain rights in this software. +// +// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. +// See https://kokkos.org/LICENSE for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//@HEADER + +#ifndef KOKKOSSPARSE_STATICCCSGRAPH_HPP_ +#define KOKKOSSPARSE_STATICCCSGRAPH_HPP_ + +#include + +namespace KokkosSparse { + +/// \class StaticCcsGraph +/// \brief Compressed column storage array copied from Kokkos::StaticCrsGraph. +/// +/// \tparam DataType The type of stored entries. If a StaticCcsGraph is +/// used as the graph of a sparse matrix, then this is usually an +/// integer type, the type of the column indices in the sparse +/// matrix. +/// +/// \tparam Arg1Type The second template parameter, corresponding +/// either to the Device type (if there are no more template +/// parameters) or to the Layout type (if there is at least one more +/// template parameter). +/// +/// \tparam Arg2Type The third template parameter, which if provided +/// corresponds to the Device type. +/// +/// \tparam Arg3Type The fourth template parameter, which if provided +/// corresponds to the MemoryTraits. +/// +/// \tparam SizeType The type of col offsets. Usually the default +/// parameter suffices. However, setting a nondefault value is +/// necessary in some cases, for example, if you want to have a +/// sparse matrices with dimensions (and therefore column indices) +/// that fit in \c int, but want to store more than INT_MAX +/// entries in the sparse matrix. +/// +/// A col has a range of entries: +///
    +///
  • col_map[i0] <= entry < col_map[i0+1]
  • +///
  • 0 <= i1 < col_map[i0+1] - col_map[i0]
  • +///
  • entries( entry , i2 , i3 , ... );
  • +///
  • entries( col_map[i0] + i1 , i2 , i3 , ... );
  • +///
+template ::size_type> +class StaticCcsGraph { + private: + using traits = Kokkos::ViewTraits; + + public: + using data_type = DataType; + using array_layout = typename traits::array_layout; + using execution_space = typename traits::execution_space; + using device_type = typename traits::device_type; + using memory_traits = typename traits::memory_traits; + using size_type = SizeType; + + using col_map_type = Kokkos::View; + using entries_type = Kokkos::View; + using row_block_type = Kokkos::View; + + entries_type entries; + col_map_type col_map; + + //! Construct an empty view. + KOKKOS_INLINE_FUNCTION + StaticCcsGraph() : entries(), col_map() {} + + //! Copy constructor (shallow copy). + KOKKOS_INLINE_FUNCTION + StaticCcsGraph(const StaticCcsGraph& rhs) : entries(rhs.entries), col_map(rhs.col_map) {} + + template + KOKKOS_INLINE_FUNCTION StaticCcsGraph(const EntriesType& entries_, const ColMapType& col_map_) + : entries(entries_), col_map(col_map_) {} + + /** \brief Return number of columns in the graph + */ + KOKKOS_INLINE_FUNCTION + size_type numCols() const { + return (col_map.extent(0) != 0) ? col_map.extent(0) - static_cast(1) : static_cast(0); + } +}; + +} // namespace KokkosSparse + +#ifdef KOKKOS_ENABLE_DEPRECATED_CODE_4 +namespace Kokkos { +template ::size_type> +using StaticCcsGraph = KokkosSparse::StaticCcsGraph; +} +#endif + +#endif diff --git a/sparse/src/KokkosSparse_StaticCrsGraph.hpp b/sparse/src/KokkosSparse_StaticCrsGraph.hpp new file mode 100644 index 0000000000..c0174129c9 --- /dev/null +++ b/sparse/src/KokkosSparse_StaticCrsGraph.hpp @@ -0,0 +1,562 @@ +//@HEADER +// ************************************************************************ +// +// Kokkos v. 4.0 +// Copyright (2022) National Technology & Engineering +// Solutions of Sandia, LLC (NTESS). +// +// Under the terms of Contract DE-NA0003525 with NTESS, +// the U.S. Government retains certain rights in this software. +// +// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. +// See https://kokkos.org/LICENSE for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//@HEADER + +#ifndef KOKKOSSPARSE_STATICCRSGRAPH_HPP_ +#define KOKKOSSPARSE_STATICCRSGRAPH_HPP_ + +#include + +#ifdef KOKKOS_ENABLE_DEPRECATED_CODE_4 + +#define KOKKOS_IMPL_DO_NOT_WARN_INCLUDE_STATIC_CRS_GRAPH +#include +#undef KOKKOS_IMPL_DO_NOT_WARN_INCLUDE_STATIC_CRS_GRAPH + +namespace KokkosSparse { +using Kokkos::create_staticcrsgraph; +using Kokkos::GraphRowViewConst; +using Kokkos::maximum_entry; +using Kokkos::StaticCrsGraph; +} // namespace KokkosSparse + +#else + +namespace KokkosSparse { + +namespace Impl { +template +struct StaticCrsGraphBalancerFunctor { + using int_type = typename RowOffsetsType::non_const_value_type; + RowOffsetsType row_offsets; + RowBlockOffsetsType row_block_offsets; + + int_type cost_per_row, num_blocks; + + StaticCrsGraphBalancerFunctor(RowOffsetsType row_offsets_, RowBlockOffsetsType row_block_offsets_, + int_type cost_per_row_, int_type num_blocks_) + : row_offsets(row_offsets_), + row_block_offsets(row_block_offsets_), + cost_per_row(cost_per_row_), + num_blocks(num_blocks_) {} + + KOKKOS_INLINE_FUNCTION + void operator()(const int_type& iRow) const { + const int_type num_rows = row_offsets.extent(0) - 1; + const int_type num_entries = row_offsets(num_rows); + const int_type total_cost = num_entries + num_rows * cost_per_row; + + const double cost_per_workset = 1.0 * total_cost / num_blocks; + + const int_type row_cost = row_offsets(iRow + 1) - row_offsets(iRow) + cost_per_row; + + int_type count = row_offsets(iRow + 1) + cost_per_row * iRow; + + if (iRow == num_rows - 1) row_block_offsets(num_blocks) = num_rows; + + if (true) { + int_type current_block = (count - row_cost - cost_per_row) / cost_per_workset; + int_type end_block = count / cost_per_workset; + + // Handle some corner cases for the last two blocks. + if (current_block >= num_blocks - 2) { + if ((current_block == num_blocks - 2) && (count >= (current_block + 1) * cost_per_workset)) { + int_type row = iRow; + int_type cc = count - row_cost - cost_per_row; + int_type block = cc / cost_per_workset; + while ((block > 0) && (block == current_block)) { + cc = row_offsets(row) + row * cost_per_row; + block = cc / cost_per_workset; + row--; + } + if ((count - cc - row_cost - cost_per_row) < num_entries - row_offsets(iRow + 1)) { + row_block_offsets(current_block + 1) = iRow + 1; + } else { + row_block_offsets(current_block + 1) = iRow; + } + } + } else { + if ((count >= (current_block + 1) * cost_per_workset) || (iRow + 2 == int_type(row_offsets.extent(0)))) { + if (end_block > current_block + 1) { + int_type num_block = end_block - current_block; + row_block_offsets(current_block + 1) = iRow; + for (int_type block = current_block + 2; block <= end_block; block++) + if ((block < current_block + 2 + (num_block - 1) / 2)) + row_block_offsets(block) = iRow; + else + row_block_offsets(block) = iRow + 1; + } else { + row_block_offsets(current_block + 1) = iRow + 1; + } + } + } + } + } +}; +} // namespace Impl + +/// \class GraphRowViewConst +/// \brief View of a row of a sparse graph. +/// \tparam GraphType Sparse graph type, such as (but not limited to) +/// StaticCrsGraph. +/// +/// This class provides a generic view of a row of a sparse graph. +/// We intended this class to view a row of a StaticCrsGraph, but +/// GraphType need not necessarily be CrsMatrix. +/// +/// The row view is suited for computational kernels like sparse +/// matrix-vector multiply, as well as for modifying entries in the +/// sparse matrix. The view is always const as it does not allow graph +/// modification. +/// +/// Here is an example loop over the entries in the row: +/// \code +/// using ordinal_type = typename GraphRowViewConst::ordinal_type; +/// +/// GraphRowView G_i = ...; +/// const ordinal_type numEntries = G_i.length; +/// for (ordinal_type k = 0; k < numEntries; ++k) { +/// ordinal_type j = G_i.colidx (k); +/// // ... do something with A_ij and j ... +/// } +/// \endcode +/// +/// GraphType must provide the \c data_type +/// aliases. In addition, it must make sense to use GraphRowViewConst to +/// view a row of GraphType. In particular, column +/// indices of a row must be accessible using the entries +/// resp. colidx arrays given to the constructor of this +/// class, with a constant stride between successive entries. +/// The stride is one for the compressed sparse row storage format (as +/// is used by CrsMatrix), but may be greater than one for other +/// sparse matrix storage formats (e.g., ELLPACK or jagged diagonal). +template +struct GraphRowViewConst { + //! The type of the column indices in the row. + using ordinal_type = const typename GraphType::data_type; + + private: + //! Array of (local) column indices in the row. + ordinal_type* colidx_; + /// \brief Stride between successive entries in the row. + /// + /// For compressed sparse row (CSR) storage, this is always one. + /// This might be greater than one for storage formats like ELLPACK + /// or Jagged Diagonal. Nevertheless, the stride can never be + /// greater than the number of rows or columns in the matrix. Thus, + /// \c ordinal_type is the correct type. + const ordinal_type stride_; + + public: + /// \brief Constructor + /// + /// \param colidx_in [in] Array of the row's column indices. + /// \param stride [in] (Constant) stride between matrix entries in + /// each of the above arrays. + /// \param count [in] Number of entries in the row. + KOKKOS_INLINE_FUNCTION + GraphRowViewConst(ordinal_type* const colidx_in, const ordinal_type& stride, const ordinal_type& count) + : colidx_(colidx_in), stride_(stride), length(count) {} + + /// \brief Constructor with offset into \c colidx array + /// + /// \param colidx_in [in] Array of the row's column indices. + /// \param stride [in] (Constant) stride between matrix entries in + /// each of the above arrays. + /// \param count [in] Number of entries in the row. + /// \param idx [in] Start offset into \c colidx array + /// + /// \tparam OffsetType The type of \c idx (see above). Must be a + /// built-in integer type. This may differ from ordinal_type. + /// For example, the matrix may have dimensions that fit in int, + /// but a number of entries that does not fit in int. + template + KOKKOS_INLINE_FUNCTION GraphRowViewConst(const typename GraphType::entries_type& colidx_in, + const ordinal_type& stride, const ordinal_type& count, const OffsetType& idx, + const std::enable_if_t, int>& = 0) + : colidx_(&colidx_in(idx)), stride_(stride), length(count) {} + + /// \brief Number of entries in the row. + /// + /// This is a public const field rather than a public const method, + /// in order to avoid possible overhead of a method call if the + /// compiler is unable to inline that method call. + /// + /// We assume that rows contain no duplicate entries (i.e., entries + /// with the same column index). Thus, a row may have up to + /// A.numCols() entries. This means that the correct type of + /// 'length' is ordinal_type. + const ordinal_type length; + + /// \brief (Const) reference to the column index of entry i in this + /// row of the sparse matrix. + /// + /// "Entry i" is not necessarily the entry with column index i, nor + /// does i necessarily correspond to the (local) row index. + KOKKOS_INLINE_FUNCTION + ordinal_type& colidx(const ordinal_type& i) const { return colidx_[i * stride_]; } + + /// \brief An alias for colidx + KOKKOS_INLINE_FUNCTION + ordinal_type& operator()(const ordinal_type& i) const { return colidx(i); } +}; + +/// \class StaticCrsGraph +/// \brief Compressed row storage array. +/// +/// \tparam DataType The type of stored entries. If a StaticCrsGraph is +/// used as the graph of a sparse matrix, then this is usually an +/// integer type, the type of the column indices in the sparse +/// matrix. +/// +/// \tparam Arg1Type The second template parameter, corresponding +/// either to the Device type (if there are no more template +/// parameters) or to the Layout type (if there is at least one more +/// template parameter). +/// +/// \tparam Arg2Type The third template parameter, which if provided +/// corresponds to the Device type. +/// +/// \tparam Arg3Type The third template parameter, which if provided +/// corresponds to the MemoryTraits. +/// +/// \tparam SizeType The type of row offsets. Usually the default +/// parameter suffices. However, setting a nondefault value is +/// necessary in some cases, for example, if you want to have a +/// sparse matrices with dimensions (and therefore column indices) +/// that fit in \c int, but want to store more than INT_MAX +/// entries in the sparse matrix. +/// +/// A row has a range of entries: +///
    +///
  • row_map[i0] <= entry < row_map[i0+1]
  • +///
  • 0 <= i1 < row_map[i0+1] - row_map[i0]
  • +///
  • entries( entry , i2 , i3 , ... );
  • +///
  • entries( row_map[i0] + i1 , i2 , i3 , ... );
  • +///
+template ::size_type> +class StaticCrsGraph { + private: + using traits = Kokkos::ViewTraits; + + public: + using data_type = DataType; + using array_layout = typename traits::array_layout; + using execution_space = typename traits::execution_space; + using device_type = typename traits::device_type; + using memory_traits = typename traits::memory_traits; + using size_type = SizeType; + + using staticcrsgraph_type = StaticCrsGraph; + using HostMirror = + StaticCrsGraph; + + using row_map_type = Kokkos::View; + using entries_type = Kokkos::View; + using row_block_type = Kokkos::View; + + entries_type entries; + row_map_type row_map; + row_block_type row_block_offsets; + + //! Construct an empty view. + KOKKOS_INLINE_FUNCTION + StaticCrsGraph() : entries(), row_map(), row_block_offsets() {} + + //! Copy constructor (shallow copy). + KOKKOS_INLINE_FUNCTION + StaticCrsGraph(const StaticCrsGraph& rhs) + : entries(rhs.entries), row_map(rhs.row_map), row_block_offsets(rhs.row_block_offsets) {} + + template + KOKKOS_INLINE_FUNCTION StaticCrsGraph(const EntriesType& entries_, const RowMapType& row_map_) + : entries(entries_), row_map(row_map_), row_block_offsets() {} + + /** \brief Assign to a view of the rhs array. + * If the old view is the last view + * then allocated memory is deallocated. + */ + KOKKOS_INLINE_FUNCTION + StaticCrsGraph& operator=(const StaticCrsGraph& rhs) { + entries = rhs.entries; + row_map = rhs.row_map; + row_block_offsets = rhs.row_block_offsets; + return *this; + } + + /** \brief Destroy this view of the array. + * If the last view then allocated memory is deallocated. + */ + KOKKOS_DEFAULTED_FUNCTION + ~StaticCrsGraph() = default; + + /** \brief Return number of rows in the graph + */ + KOKKOS_INLINE_FUNCTION + size_type numRows() const { + return (row_map.extent(0) != 0) ? row_map.extent(0) - static_cast(1) : static_cast(0); + } + + KOKKOS_INLINE_FUNCTION constexpr bool is_allocated() const { + return (row_map.is_allocated() && entries.is_allocated()); + } + + /// \brief Return a const view of row i of the graph. + /// + /// If row i does not belong to the graph, return an empty view. + /// + /// The returned object \c view implements the following interface: + ///
    + ///
  • \c view.length is the number of entries in the row
  • + ///
  • \c view.colidx(k) returns a const reference to the + /// column index of the k-th entry in the row
  • + ///
+ /// k is not a column index; it just counts from 0 to + /// view.length - 1. + /// + /// Users should not rely on the return type of this method. They + /// should instead assign to 'auto'. That allows compile-time + /// polymorphism for different kinds of sparse matrix formats (e.g., + /// ELLPACK or Jagged Diagonal) that we may wish to support in the + /// future. + KOKKOS_INLINE_FUNCTION + GraphRowViewConst rowConst(const data_type i) const { + const size_type start = row_map(i); + // count is guaranteed to fit in ordinal_type, as long as no row + // has duplicate entries. + const data_type count = static_cast(row_map(i + 1) - start); + + if (count == 0) { + return GraphRowViewConst(nullptr, 1, 0); + } else { + return GraphRowViewConst(entries, 1, count, start); + } + } + + /** \brief Create a row partitioning into a given number of blocks + * balancing non-zeros + a fixed cost per row. + */ + void create_block_partitioning(size_type num_blocks, size_type fix_cost_per_row = 4) { + Kokkos::View block_offsets("StatisCrsGraph::load_balance_offsets", + num_blocks + 1); + + Impl::StaticCrsGraphBalancerFunctor > partitioner( + row_map, block_offsets, fix_cost_per_row, num_blocks); + + Kokkos::parallel_for("Kokkos::StaticCrsGraph::create_block_partitioning", + Kokkos::RangePolicy(0, numRows()), partitioner); + typename device_type::execution_space().fence( + "Kokkos::StaticCrsGraph::create_block_partitioning:: fence after " + "partition"); + + row_block_offsets = block_offsets; + } +}; + +//---------------------------------------------------------------------------- + +template +typename StaticCrsGraphType::staticcrsgraph_type create_staticcrsgraph(const std::string& label, + const std::vector& input); + +template +typename StaticCrsGraphType::staticcrsgraph_type create_staticcrsgraph( + const std::string& label, const std::vector >& input); + +//---------------------------------------------------------------------------- + +template +typename StaticCrsGraph::HostMirror create_mirror_view( + const StaticCrsGraph& input); + +template +typename StaticCrsGraph::HostMirror create_mirror( + const StaticCrsGraph& input); + +//---------------------------------------------------------------------------- +//---------------------------------------------------------------------------- + +template +inline typename StaticCrsGraph::HostMirror create_mirror_view( + const StaticCrsGraph& view, + std::enable_if_t::is_hostspace>* = 0) { + return view; +} + +template +inline typename StaticCrsGraph::HostMirror create_mirror( + const StaticCrsGraph& view) { + // Force copy: + // using alloc = Impl::ViewAssignment; // unused + using staticcrsgraph_type = StaticCrsGraph; + + typename staticcrsgraph_type::HostMirror tmp; + typename staticcrsgraph_type::row_map_type::HostMirror tmp_row_map = create_mirror(view.row_map); + typename staticcrsgraph_type::row_block_type::HostMirror tmp_row_block_offsets = + create_mirror(view.row_block_offsets); + + // Allocation to match: + tmp.row_map = tmp_row_map; // Assignment of 'const' from 'non-const' + tmp.entries = create_mirror(view.entries); + tmp.row_block_offsets = tmp_row_block_offsets; // Assignment of 'const' from 'non-const' + + // Deep copy: + deep_copy(tmp_row_map, view.row_map); + deep_copy(tmp.entries, view.entries); + deep_copy(tmp_row_block_offsets, view.row_block_offsets); + + return tmp; +} + +template +inline typename StaticCrsGraph::HostMirror create_mirror_view( + const StaticCrsGraph& view, + std::enable_if_t::is_hostspace>* = 0) { + return create_mirror(view); +} + +template +inline typename StaticCrsGraphType::staticcrsgraph_type create_staticcrsgraph(const std::string& label, + const std::vector& input) { + using output_type = StaticCrsGraphType; + using entries_type = typename output_type::entries_type; + using work_type = Kokkos::View; + + output_type output; + + // Create the row map: + + const size_t length = input.size(); + + { + work_type row_work("tmp", length + 1); + + typename work_type::HostMirror row_work_host = create_mirror_view(row_work); + + size_t sum = 0; + row_work_host[0] = 0; + for (size_t i = 0; i < length; ++i) { + row_work_host[i + 1] = sum += input[i]; + } + + deep_copy(row_work, row_work_host); + + output.entries = entries_type(label, sum); + output.row_map = row_work; + } + + return output; +} + +//---------------------------------------------------------------------------- + +template +inline typename StaticCrsGraphType::staticcrsgraph_type create_staticcrsgraph( + const std::string& label, const std::vector >& input) { + using output_type = StaticCrsGraphType; + using entries_type = typename output_type::entries_type; + + static_assert(entries_type::rank == 1, "Graph entries view must be rank one"); + + using work_type = Kokkos::View; + + output_type output; + + // Create the row map: + + const size_t length = input.size(); + + { + work_type row_work("tmp", length + 1); + + typename work_type::HostMirror row_work_host = create_mirror_view(row_work); + + size_t sum = 0; + row_work_host[0] = 0; + for (size_t i = 0; i < length; ++i) { + row_work_host[i + 1] = sum += input[i].size(); + } + + deep_copy(row_work, row_work_host); + + output.entries = entries_type(label, sum); + output.row_map = row_work; + } + + // Fill in the entries: + { + typename entries_type::HostMirror host_entries = create_mirror_view(output.entries); + + size_t sum = 0; + for (size_t i = 0; i < length; ++i) { + for (size_t j = 0; j < input[i].size(); ++j, ++sum) { + host_entries(sum) = input[i][j]; + } + } + + deep_copy(output.entries, host_entries); + } + + return output; +} + +//---------------------------------------------------------------------------- +//---------------------------------------------------------------------------- + +namespace Impl { + +template +struct StaticCrsGraphMaximumEntry { + using execution_space = typename GraphType::execution_space; + using value_type = typename GraphType::data_type; + + const typename GraphType::entries_type entries; + + StaticCrsGraphMaximumEntry(const GraphType& graph) : entries(graph.entries) {} + + KOKKOS_INLINE_FUNCTION + void operator()(const unsigned i, value_type& update) const { + if (update < entries(i)) update = entries(i); + } + + KOKKOS_INLINE_FUNCTION + void init(value_type& update) const { update = 0; } + + KOKKOS_INLINE_FUNCTION + void join(value_type& update, const value_type& input) const { + if (update < input) update = input; + } +}; + +} // namespace Impl + +template +DataType maximum_entry(const StaticCrsGraph& graph) { + using GraphType = StaticCrsGraph; + using FunctorType = Impl::StaticCrsGraphMaximumEntry; + + DataType result = 0; + Kokkos::parallel_reduce("Kokkos::maximum_entry", graph.entries.extent(0), FunctorType(graph), result); + return result; +} + +} // namespace KokkosSparse + +#endif + +#endif diff --git a/sparse/unit_test/Test_Sparse.hpp b/sparse/unit_test/Test_Sparse.hpp index 3663122e92..08133b54e6 100644 --- a/sparse/unit_test/Test_Sparse.hpp +++ b/sparse/unit_test/Test_Sparse.hpp @@ -43,6 +43,7 @@ #include "Test_Sparse_crs2ccs.hpp" #include "Test_Sparse_removeCrsMatrixZeros.hpp" #include "Test_Sparse_extractCrsDiagonalBlocks.hpp" +#include "Test_Sparse_StaticCrsGraph.hpp" // TPL specific tests, these require // particular pairs of backend and TPL diff --git a/sparse/unit_test/Test_Sparse_StaticCrsGraph.hpp b/sparse/unit_test/Test_Sparse_StaticCrsGraph.hpp new file mode 100644 index 0000000000..263c772031 --- /dev/null +++ b/sparse/unit_test/Test_Sparse_StaticCrsGraph.hpp @@ -0,0 +1,260 @@ +//@HEADER +// ************************************************************************ +// +// Kokkos v. 4.0 +// Copyright (2022) National Technology & Engineering +// Solutions of Sandia, LLC (NTESS). +// +// Under the terms of Contract DE-NA0003525 with NTESS, +// the U.S. Government retains certain rights in this software. +// +// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. +// See https://kokkos.org/LICENSE for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//@HEADER + +#include + +#include + +#include +#include + +/*--------------------------------------------------------------------------*/ +namespace Test { +namespace TestStaticCrsGraph { + +template +void run_test_graph() { + using dView = KokkosSparse::StaticCrsGraph; + using hView = typename dView::HostMirror; + + const unsigned LENGTH = 1000; + + std::vector > graph(LENGTH); + + for (size_t i = 0; i < LENGTH; ++i) { + graph[i].reserve(8); + for (size_t j = 0; j < 8; ++j) { + graph[i].push_back(i + j * 3); + } + } + + { + dView d1; + ASSERT_FALSE(d1.is_allocated()); + + d1 = KokkosSparse::create_staticcrsgraph("d1", graph); + + dView d2(d1); + dView d3(d1.entries, d1.row_map); + + ASSERT_TRUE(d1.is_allocated()); + ASSERT_TRUE(d2.is_allocated()); + ASSERT_TRUE(d3.is_allocated()); + } + + dView dx; + hView hx; + + dx = KokkosSparse::create_staticcrsgraph("dx", graph); + hx = create_mirror(dx); + + ASSERT_EQ(hx.row_map.extent(0) - 1, LENGTH); + + for (size_t i = 0; i < LENGTH; ++i) { + const size_t begin = hx.row_map[i]; + const size_t n = hx.row_map[i + 1] - begin; + ASSERT_EQ(n, graph[i].size()); + for (size_t j = 0; j < n; ++j) { + ASSERT_EQ((int)hx.entries(j + begin), graph[i][j]); + } + } + + // Test row view access + for (size_t i = 0; i < LENGTH; ++i) { + auto rowView = hx.rowConst(i); + ASSERT_EQ(rowView.length, graph[i].size()); + for (size_t j = 0; j < rowView.length; ++j) { + ASSERT_EQ(rowView.colidx(j), (size_t)graph[i][j]); + ASSERT_EQ(rowView(j), (size_t)graph[i][j]); + } + } +} + +template +void run_test_graph2() { + using dView = KokkosSparse::StaticCrsGraph; + using hView = typename dView::HostMirror; + + const unsigned LENGTH = 10; + + std::vector sizes(LENGTH); + + size_t total_length = 0; + + for (size_t i = 0; i < LENGTH; ++i) { + total_length += (sizes[i] = 6 + i % 4); + } + + dView dx = KokkosSparse::create_staticcrsgraph("test", sizes); + hView hx = create_mirror(dx); + hView mx = create_mirror(dx); + + ASSERT_EQ((size_t)dx.row_map.extent(0), (size_t)LENGTH + 1); + ASSERT_EQ((size_t)hx.row_map.extent(0), (size_t)LENGTH + 1); + ASSERT_EQ((size_t)mx.row_map.extent(0), (size_t)LENGTH + 1); + + ASSERT_EQ((size_t)dx.entries.extent(0), (size_t)total_length); + ASSERT_EQ((size_t)hx.entries.extent(0), (size_t)total_length); + ASSERT_EQ((size_t)mx.entries.extent(0), (size_t)total_length); + + ASSERT_EQ((size_t)dx.entries.extent(1), (size_t)3); + ASSERT_EQ((size_t)hx.entries.extent(1), (size_t)3); + ASSERT_EQ((size_t)mx.entries.extent(1), (size_t)3); + + for (size_t i = 0; i < LENGTH; ++i) { + const size_t entry_begin = hx.row_map[i]; + const size_t entry_end = hx.row_map[i + 1]; + for (size_t j = entry_begin; j < entry_end; ++j) { + hx.entries(j, 0) = j + 1; + hx.entries(j, 1) = j + 2; + hx.entries(j, 2) = j + 3; + } + } + + Kokkos::deep_copy(dx.entries, hx.entries); + Kokkos::deep_copy(mx.entries, dx.entries); + + ASSERT_EQ(mx.row_map.extent(0), (size_t)LENGTH + 1); + + for (size_t i = 0; i < LENGTH; ++i) { + const size_t entry_begin = mx.row_map[i]; + const size_t entry_end = mx.row_map[i + 1]; + ASSERT_EQ((entry_end - entry_begin), sizes[i]); + for (size_t j = entry_begin; j < entry_end; ++j) { + ASSERT_EQ((size_t)mx.entries(j, 0), (j + 1)); + ASSERT_EQ((size_t)mx.entries(j, 1), (j + 2)); + ASSERT_EQ((size_t)mx.entries(j, 2), (j + 3)); + } + } +} + +template +void run_test_graph3(size_t B, size_t N) { + srand(10310); + + using dView = KokkosSparse::StaticCrsGraph; + using hView = typename dView::HostMirror; + + const unsigned LENGTH = 2000; + + std::vector sizes(LENGTH); + + for (size_t i = 0; i < LENGTH; ++i) { + sizes[i] = rand() % 1000; + } + + sizes[1] = N; + sizes[1998] = N; + + int C = 0; + dView dx = KokkosSparse::create_staticcrsgraph("test", sizes); + dx.create_block_partitioning(B, C); + hView hx = create_mirror(dx); + + for (size_t i = 0; i < B; i++) { + size_t ne = 0; + for (auto j = hx.row_block_offsets(i); j < hx.row_block_offsets(i + 1); j++) + ne += hx.row_map(j + 1) - hx.row_map(j) + C; + + ASSERT_FALSE((ne > 2 * ((hx.row_map(hx.numRows()) + C * hx.numRows()) / B)) && + (hx.row_block_offsets(i + 1) > hx.row_block_offsets(i) + 1)); + } +} + +template +void run_test_graph4() { + using ordinal_type = unsigned int; + using layout_type = Kokkos::LayoutRight; + using space_type = Space; + using memory_traits_type = Kokkos::MemoryUnmanaged; + using dView = KokkosSparse::StaticCrsGraph; + using hView = typename dView::HostMirror; + + dView dx; + + // StaticCrsGraph with Unmanaged trait will contain row_map and entries + // members with the Unmanaged memory trait. Use of such a StaticCrsGraph + // requires an allocation of memory for the unmanaged views to wrap. + // + // In this test, a graph (via raw arrays) resides on the host. + // The pointers are wrapped by unmanaged Views. + // To make use of this on the device, managed device Views are created + // (allocation required), and data from the unmanaged host views is deep + // copied to the device Views Unmanaged views of the appropriate type wrap the + // device data and are assigned to their corresponding unmanaged view members + // of the unmanaged StaticCrsGraph + + // Data types for raw pointers storing StaticCrsGraph info + using ptr_row_map_type = typename dView::size_type; + using ptr_entries_type = typename dView::data_type; + + const ordinal_type numRows = 8; + const ordinal_type nnz = 24; + ptr_row_map_type ptrRaw[] = {0, 4, 8, 10, 12, 14, 16, 20, 24}; + ptr_entries_type indRaw[] = {0, 1, 4, 5, 0, 1, 4, 5, 2, 3, 2, 3, 4, 5, 4, 5, 2, 3, 6, 7, 2, 3, 6, 7}; + + // Wrap pointers in unmanaged host views + using local_row_map_type = typename hView::row_map_type; + using local_entries_type = typename hView::entries_type; + local_row_map_type unman_row_map(&(ptrRaw[0]), numRows + 1); + local_entries_type unman_entries(&(indRaw[0]), nnz); + + hView hx; + hx = hView(unman_entries, unman_row_map); + + // Create the device Views for copying the host arrays into + // An allocation is needed on the device for the unmanaged StaticCrsGraph to + // wrap the pointer + using d_row_map_view_type = typename Kokkos::View; + using d_entries_view_type = typename Kokkos::View; + + d_row_map_view_type tmp_row_map("tmp_row_map", numRows + 1); + d_entries_view_type tmp_entries("tmp_entries", nnz); + + Kokkos::deep_copy(tmp_row_map, unman_row_map); + Kokkos::deep_copy(tmp_entries, unman_entries); + + // Wrap the pointer in unmanaged View and assign to the corresponding + // StaticCrsGraph member + dx.row_map = typename dView::row_map_type(tmp_row_map.data(), numRows + 1); + dx.entries = typename dView::entries_type(tmp_entries.data(), nnz); + + ASSERT_TRUE((std::is_same_v)); + ASSERT_TRUE((std::is_same_v)); + ASSERT_TRUE((std::is_same_v)); + ASSERT_TRUE((std::is_same_v)); +} + +} /* namespace TestStaticCrsGraph */ + +TEST_F(TestCategory, staticcrsgraph) { + TestStaticCrsGraph::run_test_graph(); + TestStaticCrsGraph::run_test_graph2(); + TestStaticCrsGraph::run_test_graph3(1, 0); + TestStaticCrsGraph::run_test_graph3(1, 1000); + TestStaticCrsGraph::run_test_graph3(1, 10000); + TestStaticCrsGraph::run_test_graph3(1, 100000); + TestStaticCrsGraph::run_test_graph3(3, 0); + TestStaticCrsGraph::run_test_graph3(3, 1000); + TestStaticCrsGraph::run_test_graph3(3, 10000); + TestStaticCrsGraph::run_test_graph3(3, 100000); + TestStaticCrsGraph::run_test_graph3(75, 0); + TestStaticCrsGraph::run_test_graph3(75, 1000); + TestStaticCrsGraph::run_test_graph3(75, 10000); + TestStaticCrsGraph::run_test_graph3(75, 100000); + TestStaticCrsGraph::run_test_graph4(); +} +} // namespace Test