diff --git a/source/algorithms.tex b/source/algorithms.tex index ecfa423aa1..ff4c3b38ed 100644 --- a/source/algorithms.tex +++ b/source/algorithms.tex @@ -297,7 +297,7 @@ by invoking the following functions: \begin{itemize} \item - All operations of the categories of the iterators + All operations of the categories of the iterators or \tcode{mdspan} types that the algorithm is instantiated with. \item Operations on those sequence elements that are required by its specification. @@ -369,7 +369,8 @@ \tcode{UnaryOperation}, \tcode{BinaryOperation}, \tcode{BinaryOperation1}, -\tcode{BinaryOperation2}, and +\tcode{BinaryOperation2}, +\tcode{BinaryDivideOp}, and the operators used by the analogous overloads to these parallel algorithms that are formed by an invocation with the specified default predicate or operation (where applicable) diff --git a/source/back.tex b/source/back.tex index 1107b0c2b4..335f834a90 100644 --- a/source/back.tex +++ b/source/back.tex @@ -1,48 +1,72 @@ %!TEX root = std.tex -\chapter{Bibliography} - -\begin{itemize} -\renewcommand{\labelitemi}{---} +\begin{thebibliography}{99} % ISO documents in numerical order. -\item +\bibitem{iso4217} ISO 4217:2015, \doccite{Codes for the representation of currencies} -\item +\bibitem{iso10967-1} ISO/IEC 10967-1:2012, \doccite{Information technology --- Language independent arithmetic --- Part 1: Integer and floating point arithmetic} -\item +\bibitem{iso18661-3} ISO/IEC TS 18661-3:2015, \doccite{Information Technology --- Programming languages, their environments, and system software interfaces --- Floating-point extensions for C --- Part 3: Interchange and extended types} % Other international standards. -\item +\bibitem{iana-charset} IANA Character Sets Database. Available from:\newline \url{https://www.iana.org/assignments/character-sets/}, 2021-04-01 -\item +\bibitem{iana-tz} IANA Time Zone Database. Available from: \url{https://www.iana.org/time-zones} -\item +\bibitem{unicode-charmap} Unicode Character Mapping Markup Language [online]. Edited by Mark Davis and Markus Scherer. Revision 5.0.1; 2017-05-31 Available from: \url{http://www.unicode.org/reports/tr22/tr22-8.html} % Literature references. -\item +\bibitem{cpp-r} Bjarne Stroustrup, \doccite{The \Cpp{} Programming Language, second edition}, Chapter R\@. Addison-Wesley Publishing Company, ISBN 0-201-53992-6, copyright \copyright 1991 AT\&T -\item +\bibitem{kr} Brian W.\ Kernighan and Dennis M. Ritchie, \doccite{The C Programming Language}, Appendix A\@. Prentice-Hall, 1978, ISBN 0-13-110163-3, copyright \copyright 1978 AT\&T -\item +\bibitem{cpp-lib} P.J.\ Plauger, \doccite{The Draft Standard \Cpp{} Library}. Prentice-Hall, ISBN 0-13-117003-1, copyright \copyright 1995 P.J.\ Plauger -\end{itemize} +\bibitem{linalg-stable} + J.\ Demmel, I.\ Dumitriu, and O.\ Holtz, + \doccite{Fast linear algebra is stable}, + Numerische Mathematik 108 (59--91), 2007. +\bibitem{blas1} + C.\,L.\ Lawson, R.\,J.\ Hanson, D.\ Kincaid, and F.\,T.\ Krogh, + \doccite{Basic linear algebra subprograms for Fortran usage}. + ACM Trans.\ Math.\ Soft., Vol.\ 5, pp.\ 308--323, 1979. +\bibitem{blas2} + Jack J.\ Dongarra, Jeremy Du Croz, Sven Hammarling, and Richard J. Hanson, + \doccite{An Extended Set of FORTRAN Basic Linear Algebra Subprograms}. + ACM Trans.\ Math.\ Soft., Vol.\ 14, No.\ 1, pp.\ 1--17, Mar.\ 1988. +\bibitem{blas3} + Jack J.\ Dongarra, Jeremy Du Croz, Sven Hammarling, and Iain Duff, + \doccite{A Set of Level 3 Basic Linear Algebra Subprograms}. + ACM Trans.\ Math.\ Soft., Vol.\ 16, No.\ 1, pp.\ 1--17, Mar.\ 1990. +\bibitem{lapack} + E.\ Anderson, Z.\ Bai, C.\ Bischof, S.\ Blackford, J.\ Demmel, J.\ Dongarra, + J.\ Du Croz, A.\ Greenbaum, S.\ Hammarling, A.\ McKenney, D.\ Sorensen + \doccite{LAPACK Users' Guide, Third Edition}. + SIAM, Philadelphia, PA, USA, 1999. +\bibitem{blas-std} + L. Susan Blackford, Ames Demmel, Jack Dongarra, Iain Duff, Sven Hammarling, + Greg Henry, Michael Heroux, Linda Kaufman, Andrew Lumbsdaine, Antoine Petitet, + Roldan Pozo, Karin Remington, R. Client Whaley + \doccite{An Updated Set of Basic Linear Algebra Subprograms (BLAS)}. + ACM Trans.\ Math.\ Soft., Vol.\ 28, Issue 2, 2002. +\end{thebibliography} The arithmetic specification described in ISO/IEC 10967-1:2012 is called \defn{LIA-1} in this document. diff --git a/source/compatibility.tex b/source/compatibility.tex index efab68e4b3..4e1948add1 100644 --- a/source/compatibility.tex +++ b/source/compatibility.tex @@ -84,6 +84,7 @@ The following \Cpp{} headers are new: \libheaderref{debugging}, \libheaderrefx{hazard_pointer}{hazard.pointer.syn}, +\libheaderref{linalg}, \libheaderref{rcu}, and \libheaderrefx{text_encoding}{text.encoding.syn}. Valid \CppXXIII{} code that \tcode{\#include}{s} headers with these names may be diff --git a/source/lib-intro.tex b/source/lib-intro.tex index ca4da930fd..0594db6039 100644 --- a/source/lib-intro.tex +++ b/source/lib-intro.tex @@ -1156,6 +1156,7 @@ \tcode{} \\ \tcode{} \\ \tcode{} \\ +\tcode{} \\ \tcode{} \\ \tcode{} \\ \tcode{} \\ diff --git a/source/locales.tex b/source/locales.tex index 962d0acb01..b196816b96 100644 --- a/source/locales.tex +++ b/source/locales.tex @@ -4296,7 +4296,8 @@ \begin{note} For specializations where the second template parameter is \tcode{true}, this is typically four characters long: -a three-letter code as specified by ISO 4217 followed by a space. +a three-letter code as specified by ISO 4217\supercite{iso4217} +followed by a space. \end{note} \end{itemdescr} @@ -4676,7 +4677,7 @@ \pnum The class \tcode{text_encoding} describes an interface -for accessing the IANA Character Sets registry. +for accessing the IANA Character Sets registry\supercite{iana-charset}. \indexlibraryglobal{text_encoding}% \begin{codeblock} @@ -5002,7 +5003,7 @@ \begin{note} This comparison is identical to the ``Charset Alias Matching'' algorithm -described in the Unicode Technical Standard 22. +described in the Unicode Technical Standard 22\supercite{unicode-charmap}. \end{note} \begin{example} diff --git a/source/macros.tex b/source/macros.tex index bcf6c38964..079b61a329 100644 --- a/source/macros.tex +++ b/source/macros.tex @@ -120,6 +120,11 @@ \addtocounter{SectionDepth}{\value{SectionDepthBase}} \Sec{\arabic{SectionDepth}}[#2]{#3}} +%%-------------------------------------------------- +% Bibliography +%%-------------------------------------------------- +\newcommand{\supercite}[1]{\textsuperscript{\cite{#1}}} + %%-------------------------------------------------- % Indexing %%-------------------------------------------------- diff --git a/source/numerics.tex b/source/numerics.tex index 34e3ea944d..61c871082f 100644 --- a/source/numerics.tex +++ b/source/numerics.tex @@ -24,7 +24,8 @@ \ref{numarray} & Numeric arrays & \tcode{} \\ \rowsep \ref{c.math} & Mathematical functions for floating-point types & \tcode{}, \tcode{} \\ \rowsep -\ref{numbers} & Numbers & \tcode{} \\ +\ref{numbers} & Numbers & \tcode{} \\ \rowsep +\ref{linalg} & Linear algebra & \tcode{} \\ \end{libsumtab} \rSec1[numeric.requirements]{Numeric type requirements} @@ -10466,3 +10467,5100 @@ \pnum A program that instantiates a primary template of a mathematical constant variable template is ill-formed. + +\rSec1[linalg]{Basic linear algebra algorithms} + +\rSec2[linalg.overview]{Overview} + +\pnum +Subclause \ref{linalg} defines basic linear algebra algorithms. +The algorithms that access the elements of arrays +view those elements through \tcode{mdspan}\iref{views.multidim}. + +\rSec2[linalg.syn]{Header \tcode{} synopsis} + +\begin{codeblock} +namespace std::linalg { + // \ref{linalg.tags.order}, storage order tags + struct column_major_t; + inline constexpr column_major_t column_major; + struct row_major_t; + inline constexpr row_major_t row_major; + + // \ref{linalg.tags.triangle}, triangle tags + struct upper_triangle_t; + inline constexpr upper_triangle_t upper_triangle; + struct lower_triangle_t; + inline constexpr lower_triangle_t lower_triangle; + + // \ref{linalg.tags.diagonal}, diagonal tags + struct implicit_unit_diagonal_t; + inline constexpr implicit_unit_diagonal_t implicit_unit_diagonal; + struct explicit_diagonal_t; + inline constexpr explicit_diagonal_t explicit_diagonal; + + // \ref{linalg.layout.packed}, class template layout_blas_packed + template + class layout_blas_packed; + + // \ref{linalg.helpers}, exposition-only helpers + + // \ref{linalg.helpers.concepts}, linear algebra argument concepts + template + constexpr bool @\exposid{is-mdspan}@ = @\seebelow@; // \expos + + template + concept @\exposconcept{in-vector}@ = @\seebelow@; // \expos + + template + concept @\exposconcept{out-vector}@ = @\seebelow@; // \expos + + template + concept @\exposconcept{inout-vector}@ = @\seebelow@; // \expos + + template + concept @\exposconcept{in-matrix}@ = @\seebelow@; // \expos + + template + concept @\exposconcept{out-matrix}@ = @\seebelow@; // \expos + + template + concept @\exposconcept{inout-matrix}@ = @\seebelow@; // \expos + + template + concept @\exposconcept{possibly-packed-inout-matrix}@ = @\seebelow@; // \expos + + template + concept @\exposconcept{in-object}@ = @\seebelow@; // \expos + + template + concept @\exposconcept{out-object}@ = @\seebelow@; // \expos + + template + concept @\exposconcept{inout-object}@ = @\seebelow@; // \expos + + // \ref{linalg.scaled}, scaled in-place transformation + + // \ref{linalg.scaled.scaledaccessor}, class template \tcode{scaled_accessor} + template + class scaled_accessor; + + // \ref{linalg.scaled.scaled}, function template \tcode{scaled} + template + constexpr auto scaled(ScalingFactor alpha, mdspan x); + + // \ref{linalg.conj}, conjugated in-place transformation + + // \ref{linalg.conj.conjugatedaccessor}, class template \tcode{conjugated_accessor} + template + class conjugated_accessor; + + // \ref{linalg.conj.conjugated}, function template \tcode{conjugated} + template + constexpr auto conjugated(mdspan a); + + // \ref{linalg.transp}, transpose in-place transformation + + // \ref{linalg.transp.layout.transpose}, class template \tcode{layout_transpose} + template + class layout_transpose; + + // \ref{linalg.transp.transposed}, function template \tcode{transposed} + template + constexpr auto transposed(mdspan a); + + // \ref{linalg.conjtransposed}, conjugated transpose in-place transformation + template + constexpr auto conjugate_transposed(mdspan a); + + // \ref{linalg.algs.blas1}, BLAS 1 algorithms + + // \ref{linalg.algs.blas1.givens}, Givens rotations + + // \ref{linalg.algs.blas1.givens.lartg}, compute Givens rotation + + template + struct setup_givens_rotation_result { + Real c; + Real s; + Real r; + }; + template + struct setup_givens_rotation_result> { + Real c; + complex s; + complex r; + }; + + template + setup_givens_rotation_result setup_givens_rotation(Real a, Real b) noexcept; + + template + setup_givens_rotation_result> + setup_givens_rotation(complex a, complex b) noexcept; + + // \ref{linalg.algs.blas1.givens.rot}, apply computed Givens rotation + template<@\exposconcept{inout-vector}@ InOutVec1, @\exposconcept{inout-vector}@ InOutVec2, class Real> + void apply_givens_rotation(InOutVec1 x, InOutVec2 y, Real c, Real s); + template + void apply_givens_rotation(ExecutionPolicy&& exec, + InOutVec1 x, InOutVec2 y, Real c, Real s); + template<@\exposconcept{inout-vector}@ InOutVec1, @\exposconcept{inout-vector}@ InOutVec2, class Real> + void apply_givens_rotation(InOutVec1 x, InOutVec2 y, Real c, complex s); + template + void apply_givens_rotation(ExecutionPolicy&& exec, + InOutVec1 x, InOutVec2 y, Real c, complex s); + + // \ref{linalg.algs.blas1.swap}, swap elements + template<@\exposconcept{inout-object}@ InOutObj1, @\exposconcept{inout-object}@ InOutObj2> + void swap_elements(InOutObj1 x, InOutObj2 y); + template + void swap_elements(ExecutionPolicy&& exec, + InOutObj1 x, InOutObj2 y); + + // \ref{linalg.algs.blas1.scal}, multiply elements by scalar + template + void scale(Scalar alpha, InOutObj x); + template + void scale(ExecutionPolicy&& exec, + Scalar alpha, InOutObj x); + + // \ref{linalg.algs.blas1.copy}, copy elements + template<@\exposconcept{in-object}@ InObj, @\exposconcept{out-object}@ OutObj> + void copy(InObj x, OutObj y); + template + void copy(ExecutionPolicy&& exec, + InObj x, OutObj y); + + // \ref{linalg.algs.blas1.add}, add elementwise + template<@\exposconcept{in-object}@ InObj1, @\exposconcept{in-object}@ InObj2, @\exposconcept{out-object}@ OutObj> + void add(InObj1 x, InObj2 y, OutObj z); + template + void add(ExecutionPolicy&& exec, + InObj1 x, InObj2 y, OutObj z); + + // \ref{linalg.algs.blas1.dot}, dot product of two vectors + template<@\exposconcept{in-vector}@ InVec1, @\exposconcept{in-vector}@ InVec2, class Scalar> + Scalar dot(InVec1 v1, InVec2 v2, Scalar init); + template + Scalar dot(ExecutionPolicy&& exec, + InVec1 v1, InVec2 v2, Scalar init); + template<@\exposconcept{in-vector}@ InVec1, @\exposconcept{in-vector}@ InVec2> + auto dot(InVec1 v1, InVec2 v2); + template + auto dot(ExecutionPolicy&& exec, + InVec1 v1, InVec2 v2); + + template<@\exposconcept{in-vector}@ InVec1, @\exposconcept{in-vector}@ InVec2, class Scalar> + Scalar dotc(InVec1 v1, InVec2 v2, Scalar init); + template + Scalar dotc(ExecutionPolicy&& exec, + InVec1 v1, InVec2 v2, Scalar init); + template<@\exposconcept{in-vector}@ InVec1, @\exposconcept{in-vector}@ InVec2> + auto dotc(InVec1 v1, InVec2 v2); + template + auto dotc(ExecutionPolicy&& exec, + InVec1 v1, InVec2 v2); + + // \ref{linalg.algs.blas1.ssq}, scaled sum of squares of a vector's elements + template + struct sum_of_squares_result { + Scalar scaling_factor; + Scalar scaled_sum_of_squares; + }; + template<@\exposconcept{in-vector}@ InVec, class Scalar> + sum_of_squares_result + vector_sum_of_squares(InVec v, sum_of_squares_result init); + template + sum_of_squares_result + vector_sum_of_squares(ExecutionPolicy&& exec, + InVec v, sum_of_squares_result init); + + // \ref{linalg.algs.blas1.nrm2}, Euclidean norm of a vector + template<@\exposconcept{in-vector}@ InVec, class Scalar> + Scalar vector_two_norm(InVec v, Scalar init); + template + Scalar vector_two_norm(ExecutionPolicy&& exec, InVec v, Scalar init); + template<@\exposconcept{in-vector}@ InVec> + auto vector_two_norm(InVec v); + template + auto vector_two_norm(ExecutionPolicy&& exec, InVec v); + + // \ref{linalg.algs.blas1.asum}, sum of absolute values of vector elements + template<@\exposconcept{in-vector}@ InVec, class Scalar> + Scalar vector_abs_sum(InVec v, Scalar init); + template + Scalar vector_abs_sum(ExecutionPolicy&& exec, InVec v, Scalar init); + template<@\exposconcept{in-vector}@ InVec> + auto vector_abs_sum(InVec v); + template + auto vector_abs_sum(ExecutionPolicy&& exec, InVec v); + + // \ref{linalg.algs.blas1.iamax}, index of maximum absolute value of vector elements + template<@\exposconcept{in-vector}@ InVec> + typename InVec::extents_type vector_idx_abs_max(InVec v); + template + typename InVec::extents_type vector_idx_abs_max(ExecutionPolicy&& exec, InVec v); + + // \ref{linalg.algs.blas1.matfrobnorm}, Frobenius norm of a matrix + template<@\exposconcept{in-matrix}@ InMat, class Scalar> + Scalar matrix_frob_norm(InMat A, Scalar init); + template + Scalar matrix_frob_norm(ExecutionPolicy&& exec, InMat A, Scalar init); + template<@\exposconcept{in-matrix}@ InMat> + auto matrix_frob_norm(InMat A); + template + auto matrix_frob_norm(ExecutionPolicy&& exec, InMat A); + + // \ref{linalg.algs.blas1.matonenorm}, one norm of a matrix + template<@\exposconcept{in-matrix}@ InMat, class Scalar> + Scalar matrix_one_norm(InMat A, Scalar init); + template + Scalar matrix_one_norm(ExecutionPolicy&& exec, InMat A, Scalar init); + template<@\exposconcept{in-matrix}@ InMat> + auto matrix_one_norm(InMat A); + template + auto matrix_one_norm(ExecutionPolicy&& exec, InMat A); + + // \ref{linalg.algs.blas1.matinfnorm}, infinity norm of a matrix + template<@\exposconcept{in-matrix}@ InMat, class Scalar> + Scalar matrix_inf_norm(InMat A, Scalar init); + template + Scalar matrix_inf_norm(ExecutionPolicy&& exec, InMat A, Scalar init); + template<@\exposconcept{in-matrix}@ InMat> + auto matrix_inf_norm(InMat A); + template + auto matrix_inf_norm(ExecutionPolicy&& exec, InMat A); + + // \ref{linalg.algs.blas2}, BLAS 2 algorithms + + // \ref{linalg.algs.blas2.gemv}, general matrix-vector product + template<@\exposconcept{in-matrix}@ InMat, @\exposconcept{in-vector}@ InVec, @\exposconcept{out-vector}@ OutVec> + void matrix_vector_product(InMat A, InVec x, OutVec y); + template + void matrix_vector_product(ExecutionPolicy&& exec, + InMat A, InVec x, OutVec y); + template<@\exposconcept{in-matrix}@ InMat, @\exposconcept{in-vector}@ InVec1, @\exposconcept{in-vector}@ InVec2, @\exposconcept{out-vector}@ OutVec> + void matrix_vector_product(InMat A, InVec1 x, InVec2 y, OutVec z); + template + void matrix_vector_product(ExecutionPolicy&& exec, + InMat A, InVec1 x, InVec2 y, OutVec z); + + // \ref{linalg.algs.blas2.symv}, symmetric matrix-vector product + template<@\exposconcept{in-matrix}@ InMat, class Triangle, @\exposconcept{in-vector}@ InVec, @\exposconcept{out-vector}@ OutVec> + void symmetric_matrix_vector_product(InMat A, Triangle t, InVec x, OutVec y); + template + void symmetric_matrix_vector_product(ExecutionPolicy&& exec, + InMat A, Triangle t, InVec x, OutVec y); + template<@\exposconcept{in-matrix}@ InMat, class Triangle, @\exposconcept{in-vector}@ InVec1, @\exposconcept{in-vector}@ InVec2, + @\exposconcept{out-vector}@ OutVec> + void symmetric_matrix_vector_product(InMat A, Triangle t, InVec1 x, InVec2 y, OutVec z); + template + void symmetric_matrix_vector_product(ExecutionPolicy&& exec, + InMat A, Triangle t, InVec1 x, InVec2 y, OutVec z); + + // \ref{linalg.algs.blas2.hemv}, Hermitian matrix-vector product + template<@\exposconcept{in-matrix}@ InMat, class Triangle, @\exposconcept{in-vector}@ InVec, @\exposconcept{out-vector}@ OutVec> + void hermitian_matrix_vector_product(InMat A, Triangle t, InVec x, OutVec y); + template + void hermitian_matrix_vector_product(ExecutionPolicy&& exec, + InMat A, Triangle t, InVec x, OutVec y); + template<@\exposconcept{in-matrix}@ InMat, class Triangle, @\exposconcept{in-vector}@ InVec1, @\exposconcept{in-vector}@ InVec2, + @\exposconcept{out-vector}@ OutVec> + void hermitian_matrix_vector_product(InMat A, Triangle t, InVec1 x, InVec2 y, OutVec z); + template + void hermitian_matrix_vector_product(ExecutionPolicy&& exec, + InMat A, Triangle t, InVec1 x, InVec2 y, OutVec z); + + // \ref{linalg.algs.blas2.trmv}, triangular matrix-vector product + + // Overwriting triangular matrix-vector product + template<@\exposconcept{in-matrix}@ InMat, class Triangle, class DiagonalStorage, @\exposconcept{in-vector}@ InVec, + @\exposconcept{out-vector}@ OutVec> + void triangular_matrix_vector_product(InMat A, Triangle t, DiagonalStorage d, InVec x, + OutVec y); + template + void triangular_matrix_vector_product(ExecutionPolicy&& exec, + InMat A, Triangle t, DiagonalStorage d, InVec x, + OutVec y); + + // In-place triangular matrix-vector product + template<@\exposconcept{in-matrix}@ InMat, class Triangle, class DiagonalStorage, @\exposconcept{inout-vector}@ InOutVec> + void triangular_matrix_vector_product(InMat A, Triangle t, DiagonalStorage d, InOutVec y); + template + void triangular_matrix_vector_product(ExecutionPolicy&& exec, + InMat A, Triangle t, DiagonalStorage d, InOutVec y); + + // Updating triangular matrix-vector product + template<@\exposconcept{in-matrix}@ InMat, class Triangle, class DiagonalStorage, + @\exposconcept{in-vector}@ InVec1, @\exposconcept{in-vector}@ InVec2, @\exposconcept{out-vector}@ OutVec> + void triangular_matrix_vector_product(InMat A, Triangle t, DiagonalStorage d, + InVec1 x, InVec2 y, OutVec z); + template + void triangular_matrix_vector_product(ExecutionPolicy&& exec, + InMat A, Triangle t, DiagonalStorage d, + InVec1 x, InVec2 y, OutVec z); + + // \ref{linalg.algs.blas2.trsv}, solve a triangular linear system + + // Solve a triangular linear system, not in place + template<@\exposconcept{in-matrix}@ InMat, class Triangle, class DiagonalStorage, + @\exposconcept{in-vector}@ InVec, @\exposconcept{out-vector}@ OutVec, class BinaryDivideOp> + void triangular_matrix_vector_solve(InMat A, Triangle t, DiagonalStorage d, + InVec b, OutVec x, BinaryDivideOp divide); + template + void triangular_matrix_vector_solve(ExecutionPolicy&& exec, + InMat A, Triangle t, DiagonalStorage d, + InVec b, OutVec x, BinaryDivideOp divide); + template<@\exposconcept{in-matrix}@ InMat, class Triangle, class DiagonalStorage, + @\exposconcept{in-vector}@ InVec, @\exposconcept{out-vector}@ OutVec> + void triangular_matrix_vector_solve(InMat A, Triangle t, DiagonalStorage d, + InVec b, OutVec x); + template + void triangular_matrix_vector_solve(ExecutionPolicy&& exec, + InMat A, Triangle t, DiagonalStorage d, + InVec b, OutVec x); + + // Solve a triangular linear system, in place + template<@\exposconcept{in-matrix}@ InMat, class Triangle, class DiagonalStorage, + @\exposconcept{inout-vector}@ InOutVec, class BinaryDivideOp> + void triangular_matrix_vector_solve(InMat A, Triangle t, DiagonalStorage d, + InOutVec b, BinaryDivideOp divide); + template + void triangular_matrix_vector_solve(ExecutionPolicy&& exec, + InMat A, Triangle t, DiagonalStorage d, + InOutVec b, BinaryDivideOp divide); + template<@\exposconcept{in-matrix}@ InMat, class Triangle, class DiagonalStorage, @\exposconcept{inout-vector}@ InOutVec> + void triangular_matrix_vector_solve(InMat A, Triangle t, DiagonalStorage d, InOutVec b); + template + void triangular_matrix_vector_solve(ExecutionPolicy&& exec, + InMat A, Triangle t, DiagonalStorage d, InOutVec b); + + // \ref{linalg.algs.blas2.rank1}, nonsymmetric rank-1 matrix update + template<@\exposconcept{in-vector}@ InVec1, @\exposconcept{in-vector}@ InVec2, @\exposconcept{inout-matrix}@ InOutMat> + void matrix_rank_1_update(InVec1 x, InVec2 y, InOutMat A); + template + void matrix_rank_1_update(ExecutionPolicy&& exec, + InVec1 x, InVec2 y, InOutMat A); + + template<@\exposconcept{in-vector}@ InVec1, @\exposconcept{in-vector}@ InVec2, @\exposconcept{inout-matrix}@ InOutMat> + void matrix_rank_1_update_c(InVec1 x, InVec2 y, InOutMat A); + template + void matrix_rank_1_update_c(ExecutionPolicy&& exec, + InVec1 x, InVec2 y, InOutMat A); + + // \ref{linalg.algs.blas2.symherrank1}, symmetric or Hermitian rank-1 matrix update + template + void symmetric_matrix_rank_1_update(Scalar alpha, InVec x, InOutMat A, Triangle t); + template + void symmetric_matrix_rank_1_update(ExecutionPolicy&& exec, + Scalar alpha, InVec x, InOutMat A, Triangle t); + template<@\exposconcept{in-vector}@ InVec, @\exposconcept{possibly-packed-inout-matrix}@ InOutMat, class Triangle> + void symmetric_matrix_rank_1_update(InVec x, InOutMat A, Triangle t); + template + void symmetric_matrix_rank_1_update(ExecutionPolicy&& exec, + InVec x, InOutMat A, Triangle t); + + template + void hermitian_matrix_rank_1_update(Scalar alpha, InVec x, InOutMat A, Triangle t); + template + void hermitian_matrix_rank_1_update(ExecutionPolicy&& exec, + Scalar alpha, InVec x, InOutMat A, Triangle t); + template<@\exposconcept{in-vector}@ InVec, @\exposconcept{possibly-packed-inout-matrix}@ InOutMat, class Triangle> + void hermitian_matrix_rank_1_update(InVec x, InOutMat A, Triangle t); + template + void hermitian_matrix_rank_1_update(ExecutionPolicy&& exec, + InVec x, InOutMat A, Triangle t); + + // \ref{linalg.algs.blas2.rank2}, symmetric and Hermitian rank-2 matrix updates + + // symmetric rank-2 matrix update + template<@\exposconcept{in-vector}@ InVec1, @\exposconcept{in-vector}@ InVec2, + @\exposconcept{possibly-packed-inout-matrix}@ InOutMat, class Triangle> + void symmetric_matrix_rank_2_update(InVec1 x, InVec2 y, InOutMat A, Triangle t); + template + void symmetric_matrix_rank_2_update(ExecutionPolicy&& exec, + InVec1 x, InVec2 y, InOutMat A, Triangle t); + + // Hermitian rank-2 matrix update + template<@\exposconcept{in-vector}@ InVec1, @\exposconcept{in-vector}@ InVec2, + @\exposconcept{possibly-packed-inout-matrix}@ InOutMat, class Triangle> + void hermitian_matrix_rank_2_update(InVec1 x, InVec2 y, InOutMat A, Triangle t); + template + void hermitian_matrix_rank_2_update(ExecutionPolicy&& exec, + InVec1 x, InVec2 y, InOutMat A, Triangle t); + + // \ref{linalg.algs.blas3}, BLAS 3 algorithms + + // \ref{linalg.algs.blas3.gemm}, general matrix-matrix product + template<@\exposconcept{in-matrix}@ InMat1, @\exposconcept{in-matrix}@ InMat2, @\exposconcept{out-matrix}@ OutMat> + void matrix_product(InMat1 A, InMat2 B, OutMat C); + template + void matrix_product(ExecutionPolicy&& exec, + InMat1 A, InMat2 B, OutMat C); + template<@\exposconcept{in-matrix}@ InMat1, @\exposconcept{in-matrix}@ InMat2, @\exposconcept{in-matrix}@ InMat3, @\exposconcept{out-matrix}@ OutMat> + void matrix_product(InMat1 A, InMat2 B, InMat3 E, OutMat C); + template + void matrix_product(ExecutionPolicy&& exec, + InMat1 A, InMat2 B, InMat3 E, OutMat C); + + // \ref{linalg.algs.blas3.xxmm}, symmetric, Hermitian, and triangular matrix-matrix product + + template<@\exposconcept{in-matrix}@ InMat1, class Triangle, @\exposconcept{in-matrix}@ InMat2, @\exposconcept{out-matrix}@ OutMat> + void symmetric_matrix_product(InMat1 A, Triangle t, InMat2 B, OutMat C); + template + void symmetric_matrix_product(ExecutionPolicy&& exec, + InMat1 A, Triangle t, InMat2 B, OutMat C); + + template<@\exposconcept{in-matrix}@ InMat1, class Triangle, @\exposconcept{in-matrix}@ InMat2, @\exposconcept{out-matrix}@ OutMat> + void hermitian_matrix_product(InMat1 A, Triangle t, InMat2 B, OutMat C); + template + void hermitian_matrix_product(ExecutionPolicy&& exec, + InMat1 A, Triangle t, InMat2 B, OutMat C); + + template<@\exposconcept{in-matrix}@ InMat1, class Triangle, class DiagonalStorage, + @\exposconcept{in-matrix}@ InMat2, @\exposconcept{out-matrix}@ OutMat> + void triangular_matrix_product(InMat1 A, Triangle t, DiagonalStorage d, InMat2 B, OutMat C); + template + void triangular_matrix_product(ExecutionPolicy&& exec, + InMat1 A, Triangle t, DiagonalStorage d, InMat2 B, OutMat C); + + template<@\exposconcept{in-matrix}@ InMat1, @\exposconcept{in-matrix}@ InMat2, class Triangle, @\exposconcept{out-matrix}@ OutMat> + void symmetric_matrix_product(InMat1 A, InMat2 B, Triangle t, OutMat C); + template + void symmetric_matrix_product(ExecutionPolicy&& exec, + InMat1 A, InMat2 B, Triangle t, OutMat C); + + template<@\exposconcept{in-matrix}@ InMat1, @\exposconcept{in-matrix}@ InMat2, class Triangle, @\exposconcept{out-matrix}@ OutMat> + void hermitian_matrix_product(InMat1 A, InMat2 B, Triangle t, OutMat C); + template + void hermitian_matrix_product(ExecutionPolicy&& exec, + InMat1 A, InMat2 B, Triangle t, OutMat C); + + template<@\exposconcept{in-matrix}@ InMat1, @\exposconcept{in-matrix}@ InMat2, class Triangle, class DiagonalStorage, + @\exposconcept{out-matrix}@ OutMat> + void triangular_matrix_product(InMat1 A, InMat2 B, Triangle t, DiagonalStorage d, OutMat C); + template + void triangular_matrix_product(ExecutionPolicy&& exec, + InMat1 A, InMat2 B, Triangle t, DiagonalStorage d, OutMat C); + + template<@\exposconcept{in-matrix}@ InMat1, class Triangle, @\exposconcept{in-matrix}@ InMat2, @\exposconcept{in-matrix}@ InMat3, + @\exposconcept{out-matrix}@ OutMat> + void symmetric_matrix_product(InMat1 A, Triangle t, InMat2 B, InMat3 E, OutMat C); + template + void symmetric_matrix_product(ExecutionPolicy&& exec, + InMat1 A, Triangle t, InMat2 B, InMat3 E, OutMat C); + + template<@\exposconcept{in-matrix}@ InMat1, class Triangle, @\exposconcept{in-matrix}@ InMat2, @\exposconcept{in-matrix}@ InMat3, + @\exposconcept{out-matrix}@ OutMat> + void hermitian_matrix_product(InMat1 A, Triangle t, InMat2 B, InMat3 E, OutMat C); + template + void hermitian_matrix_product(ExecutionPolicy&& exec, + InMat1 A, Triangle t, InMat2 B, InMat3 E, OutMat C); + + template<@\exposconcept{in-matrix}@ InMat1, class Triangle, class DiagonalStorage, + @\exposconcept{in-matrix}@ InMat2, @\exposconcept{in-matrix}@ InMat3, @\exposconcept{out-matrix}@ OutMat> + void triangular_matrix_product(InMat1 A, Triangle t, DiagonalStorage d, InMat2 B, InMat3 E, + OutMat C); + template + void triangular_matrix_product(ExecutionPolicy&& exec, + InMat1 A, Triangle t, DiagonalStorage d, InMat2 B, InMat3 E, + OutMat C); + + template<@\exposconcept{in-matrix}@ InMat1, @\exposconcept{in-matrix}@ InMat2, class Triangle, @\exposconcept{in-matrix}@ InMat3, + @\exposconcept{out-matrix}@ OutMat> + void symmetric_matrix_product(InMat1 A, InMat2 B, Triangle t, InMat3 E, OutMat C); + template + void symmetric_matrix_product(ExecutionPolicy&& exec, + InMat1 A, InMat2 B, Triangle t, InMat3 E, OutMat C); + + template<@\exposconcept{in-matrix}@ InMat1, @\exposconcept{in-matrix}@ InMat2, class Triangle, @\exposconcept{in-matrix}@ InMat3, + @\exposconcept{out-matrix}@ OutMat> + void hermitian_matrix_product(InMat1 A, InMat2 B, Triangle t, InMat3 E, OutMat C); + template + void hermitian_matrix_product(ExecutionPolicy&& exec, + InMat1 A, InMat2 B, Triangle t, InMat3 E, OutMat C); + + template<@\exposconcept{in-matrix}@ InMat1, @\exposconcept{in-matrix}@ InMat2, class Triangle, class DiagonalStorage, + @\exposconcept{in-matrix}@ InMat3, @\exposconcept{out-matrix}@ OutMat> + void triangular_matrix_product(InMat1 A, InMat2 B, Triangle t, DiagonalStorage d, InMat3 E, + OutMat C); + template + void triangular_matrix_product(ExecutionPolicy&& exec, + InMat1 A, InMat2 B, Triangle t, DiagonalStorage d, InMat3 E, + OutMat C); + + // \ref{linalg.algs.blas3.trmm}, in-place triangular matrix-matrix product + + template<@\exposconcept{in-matrix}@ InMat, class Triangle, class DiagonalStorage, @\exposconcept{inout-matrix}@ InOutMat> + void triangular_matrix_left_product(InMat A, Triangle t, DiagonalStorage d, InOutMat C); + template + void triangular_matrix_left_product(ExecutionPolicy&& exec, + InMat A, Triangle t, DiagonalStorage d, InOutMat C); + + template<@\exposconcept{in-matrix}@ InMat, class Triangle, class DiagonalStorage, @\exposconcept{inout-matrix}@ InOutMat> + void triangular_matrix_right_product(InMat A, Triangle t, DiagonalStorage d, InOutMat C); + template + void triangular_matrix_right_product(ExecutionPolicy&& exec, + InMat A, Triangle t, DiagonalStorage d, InOutMat C); + + // \ref{linalg.algs.blas3.rankk}, rank-k update of a symmetric or Hermitian matrix + + // rank-k symmetric matrix update + template + void symmetric_matrix_rank_k_update(Scalar alpha, InMat A, InOutMat C, Triangle t); + template + void symmetric_matrix_rank_k_update(ExecutionPolicy&& exec, + Scalar alpha, InMat A, InOutMat C, Triangle t); + + template<@\exposconcept{in-matrix}@ InMat, @\exposconcept{possibly-packed-inout-matrix}@ InOutMat, class Triangle> + void symmetric_matrix_rank_k_update(InMat A, InOutMat C, Triangle t); + template + void symmetric_matrix_rank_k_update(ExecutionPolicy&& exec, + InMat A, InOutMat C, Triangle t); + + // rank-k Hermitian matrix update + template + void hermitian_matrix_rank_k_update(Scalar alpha, InMat A, InOutMat C, Triangle t); + template + void hermitian_matrix_rank_k_update(ExecutionPolicy&& exec, + Scalar alpha, InMat A, InOutMat C, Triangle t); + + template<@\exposconcept{in-matrix}@ InMat, @\exposconcept{possibly-packed-inout-matrix}@ InOutMat, class Triangle> + void hermitian_matrix_rank_k_update(InMat A, InOutMat C, Triangle t); + template + void hermitian_matrix_rank_k_update(ExecutionPolicy&& exec, + InMat A, InOutMat C, Triangle t); + + // \ref{linalg.algs.blas3.rank2k}, rank-2k update of a symmetric or Hermitian matrix + + // rank-2k symmetric matrix update + template<@\exposconcept{in-matrix}@ InMat1, @\exposconcept{in-matrix}@ InMat2, + @\exposconcept{possibly-packed-inout-matrix}@ InOutMat, class Triangle> + void symmetric_matrix_rank_2k_update(InMat1 A, InMat2 B, InOutMat C, Triangle t); + template + void symmetric_matrix_rank_2k_update(ExecutionPolicy&& exec, + InMat1 A, InMat2 B, InOutMat C, Triangle t); + + // rank-2k Hermitian matrix update + template<@\exposconcept{in-matrix}@ InMat1, @\exposconcept{in-matrix}@ InMat2, + @\exposconcept{possibly-packed-inout-matrix}@ InOutMat, class Triangle> + void hermitian_matrix_rank_2k_update(InMat1 A, InMat2 B, InOutMat C, Triangle t); + template + void hermitian_matrix_rank_2k_update(ExecutionPolicy&& exec, + InMat1 A, InMat2 B, InOutMat C, Triangle t); + + // \ref{linalg.algs.blas3.trsm}, solve multiple triangular linear systems + + // solve multiple triangular systems on the left, not-in-place + template<@\exposconcept{in-matrix}@ InMat1, class Triangle, class DiagonalStorage, + @\exposconcept{in-matrix}@ InMat2, @\exposconcept{out-matrix}@ OutMat, class BinaryDivideOp> + void triangular_matrix_matrix_left_solve(InMat1 A, Triangle t, DiagonalStorage d, + InMat2 B, OutMat X, BinaryDivideOp divide); + template + void triangular_matrix_matrix_left_solve(ExecutionPolicy&& exec, + InMat1 A, Triangle t, DiagonalStorage d, + InMat2 B, OutMat X, BinaryDivideOp divide); + template<@\exposconcept{in-matrix}@ InMat1, class Triangle, class DiagonalStorage, + @\exposconcept{in-matrix}@ InMat2, @\exposconcept{out-matrix}@ OutMat> + void triangular_matrix_matrix_left_solve(InMat1 A, Triangle t, DiagonalStorage d, + InMat2 B, OutMat X); + template + void triangular_matrix_matrix_left_solve(ExecutionPolicy&& exec, + InMat1 A, Triangle t, DiagonalStorage d, + InMat2 B, OutMat X); + + // solve multiple triangular systems on the right, not-in-place + template<@\exposconcept{in-matrix}@ InMat1, class Triangle, class DiagonalStorage, + @\exposconcept{in-matrix}@ InMat2, @\exposconcept{out-matrix}@ OutMat, class BinaryDivideOp> + void triangular_matrix_matrix_right_solve(InMat1 A, Triangle t, DiagonalStorage d, + InMat2 B, OutMat X, BinaryDivideOp divide); + template + void triangular_matrix_matrix_right_solve(ExecutionPolicy&& exec, + InMat1 A, Triangle t, DiagonalStorage d, + InMat2 B, OutMat X, BinaryDivideOp divide); + template<@\exposconcept{in-matrix}@ InMat1, class Triangle, class DiagonalStorage, + @\exposconcept{in-matrix}@ InMat2, @\exposconcept{out-matrix}@ OutMat> + void triangular_matrix_matrix_right_solve(InMat1 A, Triangle t, DiagonalStorage d, + InMat2 B, OutMat X); + template + void triangular_matrix_matrix_right_solve(ExecutionPolicy&& exec, + InMat1 A, Triangle t, DiagonalStorage d, + InMat2 B, OutMat X); + + // solve multiple triangular systems on the left, in-place + template<@\exposconcept{in-matrix}@ InMat, class Triangle, class DiagonalStorage, + @\exposconcept{inout-matrix}@ InOutMat, class BinaryDivideOp> + void triangular_matrix_matrix_left_solve(InMat A, Triangle t, DiagonalStorage d, + InOutMat B, BinaryDivideOp divide); + template + void triangular_matrix_matrix_left_solve(ExecutionPolicy&& exec, + InMat A, Triangle t, DiagonalStorage d, + InOutMat B, BinaryDivideOp divide); + template<@\exposconcept{in-matrix}@ InMat, class Triangle, class DiagonalStorage, @\exposconcept{inout-matrix}@ InOutMat> + void triangular_matrix_matrix_left_solve(InMat A, Triangle t, DiagonalStorage d, + InOutMat B); + template + void triangular_matrix_matrix_left_solve(ExecutionPolicy&& exec, + InMat A, Triangle t, DiagonalStorage d, + InOutMat B); + + // solve multiple triangular systems on the right, in-place + template<@\exposconcept{in-matrix}@ InMat, class Triangle, class DiagonalStorage, + @\exposconcept{inout-matrix}@ InOutMat, class BinaryDivideOp> + void triangular_matrix_matrix_right_solve(InMat A, Triangle t, DiagonalStorage d, + InOutMat B, BinaryDivideOp divide); + template + void triangular_matrix_matrix_right_solve(ExecutionPolicy&& exec, + InMat A, Triangle t, DiagonalStorage d, + InOutMat B, BinaryDivideOp divide); + template<@\exposconcept{in-matrix}@ InMat, class Triangle, class DiagonalStorage, @\exposconcept{inout-matrix}@ InOutMat> + void triangular_matrix_matrix_right_solve(InMat A, Triangle t, DiagonalStorage d, + InOutMat B); + template + void triangular_matrix_matrix_right_solve(ExecutionPolicy&& exec, + InMat A, Triangle t, DiagonalStorage d, + InOutMat B); +} +\end{codeblock} + +\rSec2[linalg.general]{General} + +\pnum +For the effects of all functions in \ref{linalg}, +when the effects are described as +``computes $R = E$'' or ``compute $R = E$'' +(for some $R$ and mathematical expression $E$), +the following apply: +\begin{itemize} +\item +$E$ has + the conventional mathematical meaning as written. +\item +The pattern $x^T$ should be read as + ``the transpose of $x$.'' +\item +The pattern $x^H$ should be read as + ``the conjugate transpose of $x$.'' +\item +When $R$ is the same name as a function parameter + whose type is a template parameter with \tcode{Out} in its name, + the intent is that the result of the computation + is written to the elements of the function parameter \tcode{R}. +\end{itemize} + +\pnum +Some of the functions and types in \ref{linalg} distinguish between +the ``rows'' and the ``columns'' of a matrix. +For a matrix \tcode{A} and a multidimensional index \tcode{i, j} in \tcode{A.extents()}, +\begin{itemize} +\item +\term{row} \tcode{i} of \tcode{A} is the set of elements \tcode{A[i, k1]} + for all \tcode{k1} such that \tcode{i, k1} is in \tcode{A.extents()}; and +\item +\term{column} \tcode{j} of \tcode{A} is the set of elements \tcode{A[k0, j]} + for all \tcode{k0} such that \tcode{k0, j} is in \tcode{A.extents()}. +\end{itemize} + +\pnum +Some of the functions in \ref{linalg} distinguish between +the ``upper triangle,'' ``lower triangle,'' and ``diagonal'' of a matrix. +\begin{itemize} +\item +The \defn{diagonal} is the set of all elements of \tcode{A} accessed by + \tcode{A[i,i]} for 0 $\le$ \tcode{i} < min(\tcode{A.extent(0)}, \tcode{A.extent(1)}). +\item +The \defnadj{upper}{triangle} of a matrix \tcode{A} is the set of all elements of + \tcode{A} accessed by \tcode{A[i,j]} with \tcode{i} $\le$ \tcode{j}. It includes the diagonal. +\item +The \defnadj{lower}{triangle} of \tcode{A} is the set of all elements of \tcode{A} + accessed by \tcode{A[i,j]} with \tcode{i} $\ge$ \tcode{j}. It includes the diagonal. +\end{itemize} + +\pnum +For any function \tcode{F} that takes +a parameter named \tcode{t}, +\tcode{t} applies to accesses done through the parameter preceding \tcode{t} in the parameter list of \tcode{F}. +Let \tcode{m} be such an access-modified function parameter. +\tcode{F} will only access the triangle of \tcode{m} specified by \tcode{t}. +For accesses \tcode{m[i, j]} outside the triangle specified by \tcode{t}, +\tcode{F} will use the value +\begin{itemize} +\item +\tcode{\exposid{conj-if-needed}(m[j, i])} if the name of \tcode{F} starts with \tcode{hermitian}, +\item +\tcode{m[j, i]} if the name of \tcode{F} starts with \tcode{symmetric}, or +\item +the additive identity if the name of \tcode{F} starts with \tcode{triangular}. +\end{itemize} +\begin{example} +Small vector product accessing only specified triangle. +It would not be a precondition violation for the non-accessed +matrix element to be non-zero. +\begin{codeblock} +template +void triangular_matrix_vector_2x2_product( + mdspan> m, + Triangle t, + mdspan> x, + mdspan> y) { + + static_assert(is_same_v || + is_same_v); + + if constexpr (is_same_v) { + y[0] = m[0,0] * x[0]; // \tcode{+ 0 * x[1]} + y[1] = m[1,0] * x[0] + m[1,1] * x[1]; + } else { // upper_triangle_t + y[0] = m[0,0] * x[0] + m[0,1] * x[1]; + y[1] = /* 0 * x[0] + */ m[1,1] * x[1]; + } +} +\end{codeblock} +\end{example} + +\pnum +For any function \tcode{F} that takes a parameter named \tcode{d}, +\tcode{d} applies to accesses done through +the previous-of-the-previous parameter of \tcode{d} +in the parameter list of \tcode{F}. +Let \tcode{m} be such an access-modified function parameter. +If \tcode{d} specifies that an implicit unit diagonal is to be assumed, then +\begin{itemize} +\item +\tcode{F} will not access the diagonal of \tcode{m}; and +\item +the algorithm will interpret \tcode{m} as if it has a unit diagonal, that is, +a diagonal each of whose elements behaves as a two-sided multiplicative identity +(even if \tcode{m}'s value type does not have +a two-sided multiplicative identity). +\end{itemize} +Otherwise, +if \tcode{d} specifies that an explicit diagonal is to be assumed, +then \tcode{F} will access the diagonal of \tcode{m}. + +\pnum +Within all the functions in \ref{linalg}, +any calls to \tcode{abs}, \tcode{conj}, \tcode{imag}, and \tcode{real} +are unqualified. + +\pnum +Two \tcode{mdspan} objects \tcode{x} and \tcode{y} \term{alias} each other, +if they have the same extents \tcode{e}, and +for every pack of integers \tcode{i} +which is a multidimensional index in \tcode{e}, +\tcode{x[i...]} and \tcode{y[i...]} refer to the same element. +\begin{note} +This means that +\tcode{x} and \tcode{y} view the same elements in the same order. +\end{note} + +\pnum +Two \tcode{mdspan} objects \tcode{x} and \tcode{y} \term{overlap} each other, +if for some pack of integers \tcode{i} +that is a multidimensional index in \tcode{x.extents()}, +there exists a pack of integers \tcode{j} +that is a multidimensional index in \tcode{y.extents()}, +such that \tcode{x[i....]} and \tcode{y[j...]} refer to the same element. +\begin{note} +Aliasing is a special case of overlapping. +If \tcode{x} and \tcode{y} do not overlap, +then they also do not alias each other. +\end{note} + +\rSec2[linalg.reqs]{Requirements} + +\rSec3[linalg.reqs.val]{Linear algebra value types} + +\pnum +Throughout \ref{linalg}, +the following types are +\defnadjx{linear algebra}{value type}{linear algebra value types}: +\begin{itemize} +\item +the \tcode{value_type} type alias of +any input or output \tcode{mdspan} parameter(s) of +any function in \ref{linalg}; and +\item +the \tcode{Scalar} template parameter (if any) of +any function or class in \ref{linalg}. +\end{itemize} + +\pnum +Linear algebra value types shall model \libconcept{semiregular}. + +\pnum +A value-initialized object of linear algebra value type +shall act as the additive identity. + +\rSec3[linalg.reqs.alg]{Algorithm and class requirements} + +\pnum +\ref{linalg.reqs.alg} lists common requirements for +all algorithms and classes in \ref{linalg}. + +\pnum +All of the following statements presume +that the algorithm's asymptotic complexity requirements, if any, are satisfied. +\begin{itemize} +\item +The function may make arbitrarily many objects of any linear algebra value type, +value-initializing or direct-initializing them +with any existing object of that type. +\item +The \term{triangular solve algorithms} in +\ref{linalg.algs.blas2.trsv}, +\ref{linalg.algs.blas3.trmm}, +\ref{linalg.algs.blas3.trsm}, and +\ref{linalg.algs.blas3.inplacetrsm} +either have +a \tcode{BinaryDi\-videOp} template parameter (see \ref{linalg.algs.reqs}) and +a binary function object parameter \tcode{divide} of that type, +or they have effects equivalent to invoking such an algorithm. +Triangular solve algorithms interpret \tcode{divide(a, b)} as +\tcode{a} times the multiplicative inverse of \tcode{b}. +Each triangular solve algorithm uses a sequence of evaluations of +\tcode{*}, \tcode{*=}, \tcode{divide}, +unary \tcode{+}, binary \tcode{+}, \tcode{+=}, +unary \tcode{-}, binary \tcode{-}, \tcode{-=}, +and \tcode{=} operators +that would produce the result +specified by the algorithm's \Fundescx{Effects} and \Fundescx{Remarks} +when operating on elements of a field with noncommutative multiplication. +It is a precondition of the algorithm that +any addend, +any subtrahend, +any partial sum of addends in any order +(treating any difference as a sum with the second term negated), +any factor, +any partial product of factors respecting their order, +any numerator (first argument of \tcode{divide}), +any denominator (second argument of \tcode{divide}), +and any assignment +is a well-formed expression. +\item +Each function in +\ref{linalg.algs.blas1}, \ref{linalg.algs.blas2}, and \ref{linalg.algs.blas3} +that is not a triangular solve algorithm +will use a sequence of evaluations of +\tcode{*}, \tcode{*=}, \tcode{+}, \tcode{+=}, and \tcode{=} operators +that would produce the result +specified by the algorithm's \Fundescx{Effects} and \Fundescx{Remarks} +when operating on elements of a semiring with noncommutative multiplication. +It is a precondition of the algorithm that +any addend, +any partial sum of addends in any order, +any factor, +any partial product of factors respecting their order, +and any assignment +is a well-formed expression. +\item +If the function has an output \tcode{mdspan}, +then all addends, +subtrahends (for the triangular solve algorithms), +or results of the \tcode{divide} parameter on intermediate terms +(if the function takes a \tcode{divide} parameter) +are assignable and convertible to +the output \tcode{mdspan}'s \tcode{value_type}. +\item +The function may reorder addends and partial sums arbitrarily. +\begin{note} +Factors in each product are not reordered; +multiplication is not necessarily commutative. +\end{note} +\end{itemize} +\begin{note} +The above requirements do not prohibit +implementation approaches and optimization techniques +which are not user-observable. +In particular, if for all input and output arguments +the \tcode{value_type} is a floating-point type, +implementers are free to leverage approximations, +use arithmetic operations not explicitly listed above, and +compute floating point sums in any way that improves their accuracy. +\end{note} + +\pnum +\begin{note} +For all functions in \ref{linalg}, +suppose that all input and output \tcode{mdspan} have as \tcode{value_type} +a floating-point type, and +any \tcode{Scalar} template argument has a floating-point type. +Then, functions can do all of the following: +\begin{itemize} +\item +compute floating-point sums in any way +that improves their accuracy for arbitrary input; +\item +perform additional arithmetic operations +(other than those specified by the function's wording and \ref{linalg.reqs.alg}) +in order to improve performance or accuracy; and +\item +use approximations +(that might not be exact even if computing with real numbers), +instead of computations that would be exact +if it were possible to compute without rounding error; +\end{itemize} +as long as +\begin{itemize} +\item +the function satisfies the complexity requirements; and +\item +the function is logarithmically stable, +as defined in Demmel 2007\supercite{linalg-stable}. +Strassen's algorithm for matrix-matrix multiply +is an example of a logarithmically stable algorithm. +\end{itemize} +\end{note} + +\rSec2[linalg.tags]{Tag classes} + +\rSec3[linalg.tags.order]{Storage order tags} + +\pnum +The storage order tags describe +the order of elements in an \tcode{mdspan} with +\tcode{layout_blas_packed}\iref{linalg.layout.packed} layout. +\begin{itemdecl} +struct column_major_t { + explicit column_major_t() = default; +}; +inline constexpr column_major_t column_major{}; + +struct row_major_t { + explicit row_major_t() = default; +}; +inline constexpr row_major_t row_major{}; +\end{itemdecl} + +\begin{itemdescr} +\pnum +\tcode{column_major_t} indicates a column-major order, +and \tcode{row_major_t} indicates a row-major order. +\end{itemdescr} + +\rSec3[linalg.tags.triangle]{Triangle tags} + +\begin{itemdecl} +struct upper_triangle_t { + explicit upper_triangle_t() = default; +}; +inline constexpr upper_triangle_t upper_triangle{}; + +struct lower_triangle_t { + explicit lower_triangle_t() = default; +}; +inline constexpr lower_triangle_t lower_triangle{}; +\end{itemdecl} + +\begin{itemdescr} +\pnum +These tag classes specify whether +algorithms and other users of a matrix (represented as an \tcode{mdspan}) +access the +upper triangle (\tcode{upper_triangle_t}) or +lower triangle (\tcode{lower_triangle_t}) +of the matrix (see also \ref{linalg.general}). +This is also subject to the restrictions of \tcode{implicit_unit_diagonal_t} +if that tag is also used as a function argument; see below. +\end{itemdescr} + +\rSec3[linalg.tags.diagonal]{Diagonal tags} + +\begin{itemdecl} +struct implicit_unit_diagonal_t { + explicit implicit_unit_diagonal_t() = default; +}; +inline constexpr implicit_unit_diagonal_t + implicit_unit_diagonal{}; + +struct explicit_diagonal_t { + explicit explicit_diagonal_t() = default; +}; +inline constexpr explicit_diagonal_t explicit_diagonal{}; +\end{itemdecl} + +\begin{itemdescr} +\pnum +These tag classes specify whether algorithms +access the matrix's diagonal entries, and if not, +then how algorithms interpret +the matrix's implicitly represented diagonal values. + +\pnum +The \tcode{implicit_unit_diagonal_t} tag indicates that +an implicit unit diagonal is to be assumed\iref{linalg.general}. + +\pnum +The \tcode{explicit_diagonal_t} tag indicates that +an explicit diagonal is used\iref{linalg.general}. +\end{itemdescr} + +\rSec2[linalg.layout.packed]{Layouts for packed matrix types} + +\rSec3[linalg.layout.packed.overview]{Overview} + +\pnum +\tcode{layout_blas_packed} is an \tcode{mdspan} layout mapping policy +that represents a square matrix that stores only the entries in one +triangle, in a packed contiguous format. +Its \tcode{Triangle} template parameter determines +whether an \tcode{mdspan} with this layout +stores the upper or lower triangle of the matrix. +Its \tcode{StorageOrder} template parameter determines +whether the layout packs the matrix's elements +in column-major or row-major order. + +\pnum +A \tcode{StorageOrder} of \tcode{column_major_t} +indicates column-major ordering. +This packs matrix elements +starting with the leftmost (least column index) column, and +proceeding column by column, from the top entry (least row index). + +\pnum +A \tcode{StorageOrder} of \tcode{row_major_t} +indicates row-major ordering. +This packs matrix elements +starting with the topmost (least row index) row, and +proceeding row by row, from the leftmost (least column index) entry. + +\pnum +\begin{note} +\tcode{layout_blas_packed} describes the data layout used by +the BLAS' +Symmetric Packed (SP), Hermitian Packed (HP), and Triangular Packed (TP) +matrix types. +\end{note} +\begin{codeblock} +namespace std::linalg { + template + class layout_blas_packed { + public: + using triangle_type = Triangle; + using storage_order_type = StorageOrder; + + template + struct mapping { + public: + using extents_type = Extents; + using index_type = typename extents_type::index_type; + using size_type = typename extents_type::size_type; + using rank_type = typename extents_type::rank_type; + using layout_type = layout_blas_packed; + + // \ref{linalg.layout.packed.cons}, constructors + constexpr mapping() noexcept = default; + constexpr mapping(const mapping&) noexcept = default; + constexpr mapping(const extents_type&) noexcept; + template + constexpr explicit(!is_convertible_v) + mapping(const mapping& other) noexcept; + + constexpr mapping& operator=(const mapping&) noexcept = default; + + // \ref{linalg.layout.packed.obs}, observers + constexpr const extents_type& extents() const noexcept { return @\exposid{extents_}@; } + + constexpr index_type required_span_size() const noexcept; + + template + constexpr index_type operator() (Index0 ind0, Index1 ind1) const noexcept; + + static constexpr bool is_always_unique() noexcept { + return (extents_type::static_extent(0) != dynamic_extent && + extents_type::static_extent(0) < 2) || + (extents_type::static_extent(1) != dynamic_extent && + extents_type::static_extent(1) < 2); + } + static constexpr bool is_always_exhaustive() noexcept { return true; } + static constexpr bool is_always_strided() noexcept + { return is_always_unique(); } + + constexpr bool is_unique() const noexcept { + return @\exposid{extents_}@.extent(0) < 2; + } + constexpr bool is_exhaustive() const noexcept { return true; } + constexpr bool is_strided() const noexcept { + return @\exposid{extents_}@.extent(0) < 2; + } + + constexpr index_type stride(rank_type) const noexcept; + + template + friend constexpr bool operator==(const mapping&, const mapping&) noexcept; + + private: + extents_type @\exposid{extents_}@{}; // \expos + }; + }; +} +\end{codeblock} + +\begin{itemdescr} +\pnum +\mandates +\begin{itemize} +\item +\tcode{Triangle} is either \tcode{upper_triangle_t} or \tcode{lower_triangle_t}, +\item +\tcode{StorageOrder} is either \tcode{column_major_t} or \tcode{row_major_t}, +\item +\tcode{Extents} is a specialization of \tcode{std::extents}, +\item +\tcode{Extents::rank()} equals 2, +\item +one of +\begin{codeblock} +extents_type::static_extent(0) == dynamic_extent@\textrm{,}@ +extents_type::static_extent(1) == dynamic_extent@\textrm{, or}@ +extents_type::static_extent(0) == extents_type::static_extent(1) +\end{codeblock} +is \tcode{true}, and +\item +if \tcode{Extents::rank_dynamic() == 0} is \tcode{true}, +let $N_s$ be equal to \tcode{Extents::static_extent(0)}; then, +$N_s \times (N_s + 1)$ is representable as a value of type \tcode{index_type}. +\end{itemize} + +\pnum +\tcode{layout_blas_packed::mapping} is a trivially copyable type +that models \libconcept{regular} for each \tcode{T}, \tcode{SO}, and \tcode{E}. +\end{itemdescr} + +\rSec3[linalg.layout.packed.cons]{Constructors} + +\begin{itemdecl} +constexpr mapping(const extents_type& e) noexcept; +\end{itemdecl} + +\begin{itemdescr} +\pnum +\expects +\begin{itemize} +\item +Let $N$ be equal to \tcode{e.extent(0)}. +Then, $N \times (N+1)$ is representable as +a value of type \tcode{index_type}\iref{basic.fundamental}. +\item +\tcode{e.extent(0)} equals \tcode{e.extent(1)}. +\end{itemize} + +\pnum +\effects +Direct-non-list-initializes \exposid{extents_} with \tcode{e}. +\end{itemdescr} + +\begin{itemdecl} +template + explicit(!is_convertible_v) + constexpr mapping(const mapping& other) noexcept; +\end{itemdecl} + +\begin{itemdescr} +\pnum +\constraints +\tcode{is_constructible_v} is \tcode{true}. + +\pnum +\expects +Let $N$ be \tcode{other.extents().extent(0)}. +Then, $N \times (N+1)$ is representable as +a value of type \tcode{index_type}\iref{basic.fundamental}. + +\pnum +\effects +Direct-non-list-initializes \exposid{extents_} with \tcode{other.extents()}. +\end{itemdescr} + +\rSec3[linalg.layout.packed.obs]{Observers} + +\begin{itemdecl} +constexpr index_type required_span_size() const noexcept; +\end{itemdecl} + +\begin{itemdescr} +\pnum +\returns +\tcode{\exposid{extents_}.extent(0) * (\exposid{extents_}.extent(0) + 1)/2}. +\begin{note} +For example, a 5 x 5 packed matrix +only stores 15 matrix elements. +\end{note} +\end{itemdescr} + +\begin{itemdecl} +template + constexpr index_type operator() (Index0 ind0, Index1 ind1) const noexcept; +\end{itemdecl} + +\begin{itemdescr} +\pnum +\constraints +\begin{itemize} +\item +\tcode{is_convertible_v} is \tcode{true}, +\item +\tcode{is_convertible_v} is \tcode{true}, +\item +\tcode{is_nothrow_constructible_v} is \tcode{true}, and +\item +\tcode{is_nothrow_constructible_v} is \tcode{true}. +\end{itemize} + +\pnum +Let \tcode{i} be \tcode{extents_type::\exposid{index-cast}(ind0)}, and +let \tcode{j} be \tcode{extents_type::\exposid{index-cast}(ind1)}. + +\pnum +\expects +\tcode{i, j} +is a multidimensional index in \exposid{extents_}\iref{mdspan.overview}. + +\pnum +\returns +Let \tcode{N} be \tcode{\exposid{extents_}.extent(0)}. +Then +\begin{itemize} +\item +\tcode{(*this)(j, i)} if \tcode{i > j} is \tcode{true}; otherwise +\item +\tcode{i + j * (j + 1)/2} if +\begin{codeblock} +is_same_v && is_same_v +\end{codeblock} +is \tcode{true} or +\begin{codeblock} +is_same_v && is_same_v +\end{codeblock} +is \tcode{true}; otherwise +\item +\tcode{j + N * i - i * (i + 1)/2}. +\end{itemize} +\end{itemdescr} + +\begin{itemdecl} +constexpr index_type stride(rank_type r) const noexcept; +\end{itemdecl} + +\begin{itemdescr} +\pnum +\expects +\begin{itemize} +\item +\tcode{is_strided()} is \tcode{true}, and +\item +\tcode{r < extents_type::rank()} is \tcode{true}. +\end{itemize} + +\pnum +\returns +\tcode{1}. +\end{itemdescr} + +\begin{itemdecl} +template + friend constexpr bool operator==(const mapping& x, const mapping& y) noexcept; +\end{itemdecl} + +\begin{itemdescr} +\pnum +\effects +Equivalent to: \tcode{return x.extents() == y.extents();} +\end{itemdescr} + +\rSec2[linalg.helpers]{Exposition-only helpers} + +\rSec3[linalg.helpers.abs]{\exposid{abs-if-needed}} + +\pnum +The name \exposid{abs-if-needed} denotes an exposition-only function object. +The expression \tcode{\exposid{abs-if-needed}(E)} for a subexpression \tcode{E} +whose type is \tcode{T} is expression-equivalent to: +\begin{itemize} +\item +\tcode{E} if \tcode{T} is an unsigned integer; +\item +otherwise, \tcode{std::abs(E)} if \tcode{T} is an arithmetic type, +\item +otherwise, \tcode{abs(E)}, +if that expression is valid, +with overload resolution performed in a context that includes the declaration +\begin{codeblock} +template T abs(T) = delete; +\end{codeblock} +If the function selected by overload resolution +does not return the absolute value of its input, +the program is ill-formed, no diagnostic required. +\end{itemize} + +\rSec3[linalg.helpers.conj]{\exposid{conj-if-needed}} + +\pnum +The name \exposid{conj-if-needed} denotes an exposition-only function object. +The expression \tcode{\exposid{conj-if-needed}(E)} for a subexpression \tcode{E} +whose type is \tcode{T} is expression-equivalent to: +\begin{itemize} +\item +\tcode{conj(E)}, +if \tcode{T} is not an arithmetic type and +the expression \tcode{conj(E)} is valid, +with overload resolution performed in a context that includes the declaration +\begin{codeblock} +template T conj(const T&) = delete; +\end{codeblock} +If the function selected by overload resolution +does not return the complex conjugate of its input, +the program is ill-formed, no diagnostic required; +\item +otherwise, \tcode{E}. +\end{itemize} + +\rSec3[linalg.helpers.real]{\exposid{real-if-needed}} + +\pnum +The name \exposid{real-if-needed} denotes an exposition-only function object. +The expression \tcode{\exposid{real-if-needed}(E)} for a subexpression \tcode{E} +whose type is \tcode{T} is expression-equivalent to: +\begin{itemize} +\item +\tcode{real(E)}, +if \tcode{T} is not an arithmetic type and +the expression \tcode{real(E)} is valid, +with overload resolution performed in a context that includes the declaration +\begin{codeblock} +template T real(const T&) = delete; +\end{codeblock} +If the function selected by overload resolution +does not return the real part of its input, +the program is ill-formed, no diagnostic required; +\item +otherwise, \tcode{E}. +\end{itemize} + +\rSec3[linalg.helpers.imag]{\exposid{imag-if-needed}} + +\pnum +The name \exposid{imag-if-needed} denotes an exposition-only function object. +The expression \tcode{\exposid{imag-if-needed}(E)} for a subexpression \tcode{E} +whose type is \tcode{T} is expression-equivalent to: +\begin{itemize} +\item +\tcode{imag(E)}, +if \tcode{T} is not an arithmetic type and the expression \tcode{imag(E)} +is valid, with overload resolution performed in a context +that includes the declaration +\begin{codeblock} +template T imag(const T&) = delete; +\end{codeblock} +If the function selected by overload resolution +does not return the imaginary part of its input, +the program is ill-formed, no diagnostic required; +\item +otherwise, \tcode{((void)E, T\{\})}. +\end{itemize} + +\rSec3[linalg.helpers.concepts]{Argument concepts} + +\pnum +The exposition-only concepts defined in this section +constrain the algorithms in \ref{linalg}. +\begin{codeblock} +template + constexpr bool @\exposid{is-mdspan}@ = false; + +template + constexpr bool @\exposid{is-mdspan}@> = true; + +template + concept @\defexposconcept{in-vector}@ = + @\exposid{is-mdspan}@ && T::rank() == 1; + +template + concept @\defexposconcept{out-vector}@ = + @\exposid{is-mdspan}@ && T::rank() == 1 && + is_assignable_v && T::is_always_unique(); + +template + concept @\defexposconcept{inout-vector}@ = + @\exposid{is-mdspan}@ && T::rank() == 1 && + is_assignable_v && T::is_always_unique(); + +template + concept @\defexposconcept{in-matrix}@ = + @\exposid{is-mdspan}@ && T::rank() == 2; + +template + concept @\defexposconcept{out-matrix}@ = + @\exposid{is-mdspan}@ && T::rank() == 2 && + is_assignable_v && T::is_always_unique(); + +template + concept @\defexposconcept{inout-matrix}@ = + @\exposid{is-mdspan}@ && T::rank() == 2 && + is_assignable_v && T::is_always_unique(); + +template + constexpr bool @\exposid{is-layout-blas-packed}@ = false; // \expos + +template + constexpr bool @\exposid{is-layout-blas-packed}@> = true; + +template + concept @\defexposconcept{possibly-packed-inout-matrix}@ = + @\exposid{is-mdspan}@ && T::rank() == 2 && + is_assignable_v && + (T::is_always_unique() || @\exposid{is-layout-blas-packed}@); + +template + concept @\defexposconcept{in-object}@ = + @\exposid{is-mdspan}@ && (T::rank() == 1 || T::rank() == 2); + +template + concept @\defexposconcept{out-object}@ = + @\exposid{is-mdspan}@ && (T::rank() == 1 || T::rank() == 2) && + is_assignable_v && T::is_always_unique(); + +template + concept @\defexposconcept{inout-object}@ = + @\exposid{is-mdspan}@ && (T::rank() == 1 || T::rank() == 2) && + is_assignable_v && T::is_always_unique(); +\end{codeblock} + +\pnum +If a function in \ref{linalg} accesses the elements +of a parameter constrained by +\exposconcept{in-vector}, +\exposconcept{in-matrix}, or +\exposconcept{in-object}, +those accesses will not modify the elements. + +\pnum +Unless explicitly permitted, any +\exposconcept{inout-vector}, +\exposconcept{inout-matrix}, +\exposconcept{inout-object}, +\exposconcept{out-vector}, +\exposconcept{out-matrix}, +\exposconcept{out-object}, or +\exposconcept{possibly-packed-inout-matrix} +parameter of a function in \ref{linalg} +shall not overlap any other \tcode{mdspan} parameter of the function. + +\rSec3[linalg.helpers.mandates]{Mandates} + +\pnum +\begin{note} +These exposition-only helper functions use +the less constraining input concepts even for the output arguments, +because the additional constraint for assignability of elements +is not necessary, and +they are sometimes used in a context +where the third argument is an input type too. +\end{note} + +\begin{codeblock} +template + requires(@\exposid{is-mdspan}@ && @\exposid{is-mdspan}@) + constexpr + bool @\exposid{compatible-static-extents}@(size_t r1, size_t r2) { // \expos + return MDS1::static_extent(r1) == dynamic_extent || + MDS2::static_extent(r2) == dynamic_extent || + MDS1::static_extent(r1) == MDS2::static_extent(r2)); + } + +template<@\exposconcept{in-vector}@ In1, @\exposconcept{in-vector}@ In2, @\exposconcept{in-vector}@ Out> + constexpr bool @\exposid{possibly-addable}@() { // \expos + return @\exposid{compatible-static-extents}@(0, 0) && + @\exposid{compatible-static-extents}@(0, 0) && + @\exposid{compatible-static-extents}@(0, 0); + } + +template<@\exposconcept{in-matrix}@ In1, @\exposconcept{in-matrix}@ In2, @\exposconcept{in-matrix}@ Out> + constexpr bool @\exposid{possibly-addable}@() { // \expos + return @\exposid{compatible-static-extents}@(0, 0) && + @\exposid{compatible-static-extents}@(1, 1) && + @\exposid{compatible-static-extents}@(0, 0) && + @\exposid{compatible-static-extents}@(1, 1) && + @\exposid{compatible-static-extents}@(0, 0) && + @\exposid{compatible-static-extents}@(1, 1); + } + +template<@\exposconcept{in-matrix}@ InMat, @\exposconcept{in-vector}@ InVec, @\exposconcept{in-vector}@ OutVec> + constexpr bool @\exposid{possibly-multipliable}@() { // \expos + return @\exposid{compatible-static-extents}@(0, 0) && + @\exposid{compatible-static-extents}@(1, 0); + } + +template<@\exposconcept{in-vector}@ InVec, @\exposconcept{in-matrix}@ InMat, @\exposconcept{in-vector}@ OutVec> + constexpr bool @\exposid{possibly-multipliable}@() { // \expos + return @\exposid{compatible-static-extents}@(0, 1) && + @\exposid{compatible-static-extents}@(0, 0); + } + +template<@\exposconcept{in-matrix}@ InMat1, @\exposconcept{in-matrix}@ InMat2, @\exposconcept{in-matrix}@ OutMat> + constexpr bool @\exposid{possibly-multipliable}@() { // \expos + return @\exposid{compatible-static-extents}@(0, 0) && + @\exposid{compatible-static-extents}@(1, 1) && + @\exposid{compatible-static-extents}@(1, 0); + } +\end{codeblock} + +\rSec3[linalg.helpers.precond]{Preconditions} + +\pnum +\begin{note} +These exposition-only helper functions use +the less constraining input concepts even for the output arguments, +because the additional constraint for assignability of elements +is not necessary, and +they are sometimes used in a context +where the third argument is an input type too. +\end{note} + +\begin{codeblock} +constexpr bool @\exposid{addable}@( // \expos + const @\exposconcept{in-vector}@ auto& in1, const @\exposconcept{in-vector}@ auto& in2, const @\exposconcept{in-vector}@ auto& out) { + return out.extent(0) == in1.extent(0) && out.extent(0) == in2.extent(0); +} + +constexpr bool @\exposid{addable}@( // \expos + const @\exposconcept{in-matrix}@ auto& in1, const @\exposconcept{in-matrix}@ auto& in2, const @\exposconcept{in-matrix}@ auto& out) { + return out.extent(0) == in1.extent(0) && out.extent(1) == in1.extent(1) && + out.extent(0) == in2.extent(0) && out.extent(1) == in2.extent(1); +} + +constexpr bool @\exposid{multipliable}@( // \expos + const @\exposconcept{in-matrix}@ auto& in_mat, const @\exposconcept{in-vector}@ auto& in_vec, const @\exposconcept{in-vector}@ auto& out_vec) { + return out_vec.extent(0) == in_mat.extent(0) && in_mat.extent(1) == in_vec.extent(0); +} + +constexpr bool @\exposid{multipliable}@( // \expos + const @\exposconcept{in-vector}@ auto& in_vec, const @\exposconcept{in-matrix}@ auto& in_mat, const @\exposconcept{in-vector}@ auto& out_vec) { + return out_vec.extent(0) == in_mat.extent(1) && in_mat.extent(0) == in_vec.extent(0); +} + +constexpr bool @\exposid{multipliable}@( // \expos + const @\exposconcept{in-matrix}@ auto& in_mat1, const @\exposconcept{in-matrix}@ auto& in_mat2, const @\exposconcept{in-matrix}@ auto& out_mat) { + return out_mat.extent(0) == in_mat1.extent(0) && out_mat.extent(1) == in_mat2.extent(1) && + in1_mat.extent(1) == in_mat2.extent(0); +} +\end{codeblock} + +\rSec2[linalg.scaled]{Scaled in-place transformation} + +\rSec3[linalg.scaled.intro]{Introduction} + +\pnum +The \tcode{scaled} function +takes a value \tcode{alpha} and an \tcode{mdspan} \tcode{x}, and +returns a new read-only \tcode{mdspan} +that represents +the elementwise product of \tcode{alpha} with each element of \tcode{x}. +\begin{example} +\begin{codeblock} +using Vec = mdspan>; + +// z = alpha * x + y +void z_equals_alpha_times_x_plus_y(double alpha, Vec x, Vec y, Vec z) { + add(scaled(alpha, x), y, z); +} + +// z = alpha * x + beta * y +void z_equals_alpha_times_x_plus_beta_times_y(double alpha, Vec x, double beta, Vec y, Vec z) { + add(scaled(alpha, x), scaled(beta, y), z); +} +\end{codeblock} +\end{example} + +\rSec3[linalg.scaled.scaledaccessor]{Class template \tcode{scaled_accessor}} + + +\pnum +The class template \tcode{scaled_accessor} is an \tcode{mdspan} accessor policy +which upon access produces scaled elements. +It is part of the implementation of \tcode{scaled}\iref{linalg.scaled.scaled}. +\begin{codeblock} +namespace std::linalg { + template + class scaled_accessor { + public: + using element_type = + add_const_t() * declval())>; + using reference = remove_const_t; + using data_handle_type = NestedAccessor::data_handle_type; + using offset_policy = scaled_accessor; + + constexpr scaled_accessor() = default; + template + explicit(!is_convertible_v) + constexpr scaled_accessor(const scaled_accessor& other); + constexpr scaled_accessor(const ScalingFactor& s, const NestedAccessor& a); + + constexpr reference access(data_handle_type p, size_t i) const; + constexpr offset_policy::data_handle_type offset(data_handle_type p, size_t i) const; + + constexpr const ScalingFactor& scaling_factor() const noexcept { return @\exposid{scaling-factor}@; } + constexpr const NestedAccessor& nested_accessor() const noexcept { return @\exposid{nested-accessor}@; } + + private: + ScalingFactor @\exposid{scaling-factor}@{}; // \expos + NestedAccessor @\exposid{nested-accessor}@{}; // \expos + }; +} +\end{codeblock} + +\pnum +\mandates +\begin{itemize} +\item +\tcode{element_type} is valid and denotes a type, +\item +\tcode{is_copy_constructible_v} is \tcode{true}, +\item +\tcode{is_reference_v} is \tcode{false}, +\item +\tcode{ScalingFactor} models \libconcept{semiregular}, and +\item +\tcode{NestedAccessor} meets the accessor policy requirements\iref{mdspan.accessor.reqmts}. +\end{itemize} + +\begin{itemdecl} +template + explicit(!is_convertible_v) + constexpr scaled_accessor(const scaled_accessor& other); +\end{itemdecl} + +\begin{itemdescr} +\pnum +\constraints +\tcode{is_constructible_v} is \tcode{true}. + +\pnum +\effects +\begin{itemize} +\item +Direct-non-list-initializes \exposid{scaling-factor} +with \tcode{other.scaling_factor()}, and +\item +direct-non-list-initializes \exposid{nested-accessor} +with \tcode{other.nested_accessor()}. +\end{itemize} +\end{itemdescr} + +\begin{itemdecl} +constexpr scaled_accessor(const ScalingFactor& s, const NestedAccessor& a); +\end{itemdecl} + +\begin{itemdescr} +\pnum +\effects +\begin{itemize} +\item +Direct-non-list-initializes \exposid{scaling-factor} with \tcode{s}, and +\item +direct-non-list-initializes \exposid{nested-accessor} with \tcode{a}. +\end{itemize} +\end{itemdescr} + +\begin{itemdecl} +constexpr reference access(data_handle_type p, size_t i) const; +\end{itemdecl} + +\begin{itemdescr} +\pnum +\returns +\begin{codeblock} +scaling_factor() * NestedAccessor::element_type(@\exposid{nested-accessor}@.access(p, i)) +\end{codeblock} +\end{itemdescr} + +\begin{itemdecl} +constexpr offset_policy::data_handle_type offset(data_handle_type p, size_t i) const; +\end{itemdecl} + +\begin{itemdescr} +\pnum +\returns +\tcode{\exposid{nested-accessor}.offset(p, i)} +\end{itemdescr} + +\rSec3[linalg.scaled.scaled]{Function template \tcode{scaled}} + +\pnum +The \tcode{scaled} function template takes +a scaling factor \tcode{alpha} and +an \tcode{mdspan} \tcode{x}, and +returns a new read-only \tcode{mdspan} with the same domain as \tcode{x}, +that represents the elementwise product of \tcode{alpha} +with each element of \tcode{x}. + +\begin{itemdecl} + template + constexpr auto scaled(ScalingFactor alpha, mdspan x); +\end{itemdecl} + +\begin{itemdescr} +\pnum +Let \tcode{SA} be \tcode{scaled_accessor}. + +\pnum +\returns +\begin{codeblock} +mdspan(x.data_handle(), x.mapping(), + SA(alpha, x.accessor())) +\end{codeblock} +\end{itemdescr} + +\pnum +\begin{example} +\begin{codeblock} +void test_scaled(mdspan> x) +{ + auto x_scaled = scaled(5.0, x); + for(int i = 0; i < x.extent(0); ++i) { + assert(x_scaled[i] == 5.0 * x[i]); + } +} +\end{codeblock} +\end{example} + +\rSec2[linalg.conj]{Conjugated in-place transformation} + +\rSec3[linalg.conj.intro]{Introduction} + +\pnum +The \tcode{conjugated} function takes an \tcode{mdspan} \tcode{x}, +and returns a new read-only \tcode{mdspan} \tcode{y} +with the same domain as \tcode{x}, +whose elements are the complex conjugates +of the corresponding elements of \tcode{x}. + +\rSec3[linalg.conj.conjugatedaccessor]{Class template \tcode{conjugated_accessor}} + +\pnum +The class template \tcode{conjugated_accessor} +is an \tcode{mdspan} accessor policy +which upon access produces conjugate elements. +It is part of the implementation of +\tcode{conjugated}\iref{linalg.conj.conjugated}. + +\begin{codeblock} +namespace std::linalg { + template + class conjugated_accessor { + public: + using element_type = + add_const_t()))>; + using reference = remove_const_t; + using data_handle_type = typename NestedAccessor::data_handle_type; + using offset_policy = conjugated_accessor; + + constexpr conjugated_accessor() = default; + template + explicit(!is_convertible_v>) + constexpr conjugated_accessor(const conjugated_accessor& other); + + constexpr reference access(data_handle_type p, size_t i) const; + + constexpr typename offset_policy::data_handle_type + offset(data_handle_type p, size_t i) const; + + constexpr const Accessor& nested_accessor() const noexcept { return @\exposid{nested-accessor_}@; } + + private: + NestedAccessor @\exposid{nested-accessor_}@{}; // \expos + }; +} +\end{codeblock} + +\pnum +\mandates +\begin{itemize} +\item +\tcode{element_type} is valid and denotes a type, +\item +\tcode{is_copy_constructible_v} is \tcode{true}, +\item +\tcode{is_reference_v} is \tcode{false}, and +\item +\tcode{NestedAccessor} meets the accessor policy requirements\iref{mdspan.accessor.reqmts}. +\end{itemize} + +\begin{itemdecl} +constexpr conjugated_accessor(const NestedAccessor& acc); +\end{itemdecl} + +\begin{itemdescr} +\pnum +\effects +Direct-non-list-initializes +\exposid{nested-accessor_} with \tcode{acc}. +\end{itemdescr} + +\begin{itemdecl} +template + explicit(!is_convertible_v>) + constexpr conjugated_accessor(const conjugated_accessor& other); +\end{itemdecl} + +\begin{itemdescr} +\pnum +\constraints +\tcode{is_constructible_v} +is \tcode{true}. + +\pnum +\effects +Direct-non-list-initializes \exposid{nested-accessor_} +with \tcode{other.nested_accessor()}. +\end{itemdescr} + +\begin{itemdecl} +constexpr reference access(data_handle_type p, size_t i) const; +\end{itemdecl} + +\begin{itemdescr} +\pnum +\returns +\tcode{\exposid{conj-if-needed}(NestedAccessor::element_type(\exposid{nested-accessor_}.access(p, i)))} +\end{itemdescr} + +\begin{itemdecl} +constexpr typename offset_policy::data_handle_type offset(data_handle_type p, size_t i) const; +\end{itemdecl} + +\begin{itemdescr} +\pnum +\returns +\tcode{\exposid{nested-accessor_}.offset(p, i)} +\end{itemdescr} + +\rSec3[linalg.conj.conjugated]{Function template \tcode{conjugated}} + +\begin{itemdecl} + template + constexpr auto conjugated(mdspan a); +\end{itemdecl} + +\begin{itemdescr} +\pnum +Let \tcode{A} be +\tcode{remove_cvref_t} +if \tcode{Accessor} is a specialization of \tcode{conjugated_accessor}, and +otherwise \tcode{conjugated_accessor}. + +\pnum +\returns +\begin{itemize} +\item +If \tcode{Accessor} is a specialization of \tcode{conjugated_accessor}, +\begin{codeblock} +mdspan(a.data_handle(), a.mapping(), + a.accessor().nested_accessor()) +\end{codeblock} +\item +otherwise, +\begin{codeblock} +mdspan(a.data_handle(), a.mapping(), + conjugated_accessor(a.accessor())) +\end{codeblock} +\end{itemize} +\end{itemdescr} + +\pnum +\begin{example} +\begin{codeblock} +void test_conjugated_complex(mdspan, extents> a) { + auto a_conj = conjugated(a); + for(int i = 0; i < a.extent(0); ++i) { + assert(a_conj[i] == conj(a[i]); + } + auto a_conj_conj = conjugated(a_conj); + for(int i = 0; i < a.extent(0); ++i) { + assert(a_conj_conj[i] == a[i]); + } +} + +void test_conjugated_real(mdspan> a) { + auto a_conj = conjugated(a); + for(int i = 0; i < a.extent(0); ++i) { + assert(a_conj[i] == a[i]); + } + auto a_conj_conj = conjugated(a_conj); + for(int i = 0; i < a.extent(0); ++i) { + assert(a_conj_conj[i] == a[i]); + } +} +\end{codeblock} +\end{example} + +\rSec2[linalg.transp]{Transpose in-place transformation} + +\rSec3[linalg.transp.intro]{Introduction} + +\pnum +\tcode{layout_transpose} is an \tcode{mdspan} layout mapping policy +that swaps the two indices, extents, and strides +of any unique \tcode{mdspan} layout mapping policy. + +\pnum +The \tcode{transposed} function takes an \tcode{mdspan} +representing a matrix, and returns a new \tcode{mdspan} +representing the transpose of the input matrix. + +\rSec3[linalg.transp.helpers]{Exposition-only helpers for \tcode{layout_transpose} and \tcode{transposed}} + +\pnum +The exposition-only \exposid{transpose-extents} function +takes an \tcode{extents} object representing the extents of a matrix, +and returns a new \tcode{extents} object +representing the extents of the transpose of the matrix. + +\pnum +The exposition-only alias template +\tcode{\exposid{transpose-extents-t}} +gives the type of \tcode{\exposid{transpose-ex\-tents}(e)} +for a given \tcode{extents} object \tcode{e} of type \tcode{InputExtents}. +\begin{itemdecl} +template + constexpr extents + @\exposid{transpose-extents}@(const extents& in); // \expos +\end{itemdecl} + +\begin{itemdescr} +\pnum +\returns +\tcode{extents(in.extent(1), in.extent(0))} +\end{itemdescr} + +\begin{codeblock} +template + using @\exposid{transpose-extents-t}@ = + decltype(@\exposid{transpose-extents}@(declval())); // \expos +\end{codeblock} + +\rSec3[linalg.transp.layout.transpose]{Class template \tcode{layout_transpose}} + +\pnum +\tcode{layout_transpose} is an \tcode{mdspan} layout mapping policy +that swaps the two indices, extents, and strides +of any \tcode{mdspan} layout mapping policy. + +\begin{codeblock} +namespace std::linalg { + template + class layout_transpose { + public: + using nested_layout_type = Layout; + + template + struct mapping { + private: + using @\exposid{nested-mapping-type}@ = + typename Layout::template mapping<@\exposid{transpose-extents-t}@>; // \expos + + public: + using extents_type = Extents; + using index_type = typename extents_type::index_type; + using size_type = typename extents_type::size_type; + using rank_type = typename extents_type::rank_type; + using layout_type = layout_transpose; + + constexpr explicit mapping(const @\exposid{nested-mapping-type}@&); + + constexpr const extents_type& extents() const noexcept { return @\exposid{extents_}@; } + + constexpr index_type required_span_size() const + { return @\exposid{nested-mapping_}@.required_span_size(); + + template + constexpr index_type operator()(Index0 ind0, Index1 ind1) const + { return @\exposid{nested-mapping_}@(ind1, ind0); } + + constexpr const @\exposid{nested-mapping-type}@& nested_mapping() const noexcept + { return @\exposid{nested-mapping_}@; } + + static constexpr bool is_always_unique() noexcept + { return @\exposid{nested-mapping-type}@::is_always_unique(); } + static constexpr bool is_always_exhaustive() noexcept + { return @\exposid{nested-mapping-type}@::is_always_exhaustive(); } + static constexpr bool is_always_strided() noexcept + { return @\exposid{nested-mapping-type}@::is_always_strided(); } + + constexpr bool is_unique() const { return @\exposid{nested-mapping_}@.is_unique(); } + constexpr bool is_exhaustive() const { return @\exposid{nested-mapping_}@.is_exhaustive(); } + constexpr bool is_strided() const { return @\exposid{nested-mapping_}@.is_strided(); } + + constexpr index_type stride(size_t r) const; + + template + friend constexpr bool operator==(const mapping& x, const mapping& y); + }; + + private: + @\exposid{nested-mapping-type}@ @\exposid{nested-mapping_}@; // \expos + extents_type @\exposid{extents_}@; // \expos + }; +} +\end{codeblock} + +\pnum +\tcode{Layout} shall meet +the layout mapping policy requirements\iref{mdspan.layout.policy.reqmts}. + +\pnum +\mandates +\begin{itemize} +\item +\tcode{Extents} is a specialization of \tcode{std::extents}, and +\item +\tcode{Extents::rank()} equals 2. +\end{itemize} + +\begin{itemdecl} +constexpr explicit mapping(const @\exposid{nested-mapping-type}@& map); +\end{itemdecl} + +\begin{itemdescr} +\pnum +\effects +\begin{itemize} +\item +Initializes \exposid{nested-mapping_} with \tcode{map}, and +\item +initializes \exposid{extents_} +with \tcode{\exposid{transpose-extents}(map.extents())}. +\end{itemize} +\end{itemdescr} + +\begin{itemdecl} +constexpr index_type stride(size_t r) const; +\end{itemdecl} + +\begin{itemdescr} + +\pnum +\expects +\begin{itemize} +\item +\tcode{is_strided()} is \tcode{true}, and +\item +\tcode{r < 2} is \tcode{true}. +\end{itemize} + +\pnum +\returns +\tcode{\exposid{nested-mapping_}.stride(r == 0 ? 1 : 0)} +\end{itemdescr} + +\begin{itemdecl} +template + friend constexpr bool operator==(const mapping& x, const mapping& y); +\end{itemdecl} + +\begin{itemdescr} +\pnum +\constraints +The expression +\tcode{x.\exposid{nested-mapping_} == y.\exposid{nested-mapping_}} +is well-formed and its result is convertible to \tcode{bool}. + +\pnum +\returns +\tcode{x.\exposid{nested-mapping_} == y.\exposid{nested-mapping_}}. +\end{itemdescr} + +\rSec3[linalg.transp.transposed]{Function template \tcode{transposed}} + +\pnum +The \tcode{transposed} function +takes a rank-2 \tcode{mdspan} representing a matrix, and +returns a new \tcode{mdspan} representing the input matrix's transpose. +The input matrix's data are not modified, and +the returned \tcode{mdspan} accesses the input matrix's data in place. +\begin{itemdecl} + template + constexpr auto transposed(mdspan a); +\end{itemdecl} + +\begin{itemdescr} +\pnum +\mandates +\tcode{Extents::rank() == 2} is \tcode{true}. + +\pnum +Let \tcode{ReturnExtents} be +\tcode{\exposid{transpose-extents-t}}. +Let \tcode{R} be +\tcode{mdspan}, +where \tcode{ReturnLayout} is: +\begin{itemize} +\item +\tcode{layout_right} if \tcode{Layout} is \tcode{layout_left}; +\item +otherwise, \tcode{layout_left} if \tcode{Layout} is \tcode{layout_right}; +\item +otherwise, \tcode{layout_stride} if \tcode{Layout} is \tcode{layout_stride}; +\item +otherwise, +\tcode{layout_blas_packed}, +if \tcode{Layout} is\newline +\tcode{layout_blas_packed} +for some \tcode{Triangle} and \tcode{StorageOrder}, where +\begin{itemize} +\item +\tcode{OppositeTriangle} is +\begin{codeblock} +conditional_t, + lower_triangle_t, upper_triangle_t> +\end{codeblock} +and +\item +\tcode{OppositeStorageOrder} is +\begin{codeblock} +conditional_t, row_major_t, column_major_t> +\end{codeblock} +\end{itemize} +\item +otherwise, \tcode{NestedLayout} +if \tcode{Layout} is \tcode{layout_transpose} +for some \tcode{NestedLay\-out}; +\item +otherwise, \tcode{layout_transpose}. +\end{itemize} + +\pnum +\returns +With \tcode{ReturnMapping} being +the type \tcode{typename ReturnLayout::template mapping}: +\begin{itemize} +\item +if \tcode{Layout} is \tcode{layout_left}, \tcode{layout_right}, or +a specialization of \tcode{layout_blas_packed}, +\begin{codeblock} +R(a.data_handle(), ReturnMapping(@\exposid{transpose-extents}@(a.mapping().extents())), + a.accessor()) +\end{codeblock} +\item +otherwise, if \tcode{Layout} is \tcode{layout_stride}, +\begin{codeblock} +R(a.data_handle(), ReturnMapping(@\exposid{transpose-extents}@(a.mapping().extents()), + array{a.mapping().stride(1), a.mapping().stride(0)}), + a.accessor()) +\end{codeblock} +\item +otherwise, if \tcode{Layout} is a specialization of \tcode{layout_transpose}, +\begin{codeblock} +R(a.data_handle(), a.mapping().nested_mapping(), a.accessor()) +\end{codeblock} +\item +otherwise, +\begin{codeblock} +R(a.data_handle(), ReturnMapping(a.mapping()), a.accessor()) +\end{codeblock} +\end{itemize} +\end{itemdescr} + +\pnum +\begin{example} +\begin{codeblock} +void test_transposed(mdspan> a) { + const auto num_rows = a.extent(0); + const auto num_cols = a.extent(1); + + auto a_t = transposed(a); + assert(num_rows == a_t.extent(1)); + assert(num_cols == a_t.extent(0)); + assert(a.stride(0) == a_t.stride(1)); + assert(a.stride(1) == a_t.stride(0)); + + for(size_t row = 0; row < num_rows; ++row) { + for(size_t col = 0; col < num_rows; ++col) { + assert(a[row, col] == a_t[col, row]); + } + } + + auto a_t_t = transposed(a_t); + assert(num_rows == a_t_t.extent(0)); + assert(num_cols == a_t_t.extent(1)); + assert(a.stride(0) == a_t_t.stride(0)); + assert(a.stride(1) == a_t_t.stride(1)); + + for(size_t row = 0; row < num_rows; ++row) { + for(size_t col = 0; col < num_rows; ++col) { + assert(a[row, col] == a_t_t[row, col]); + } + } +} +\end{codeblock} +\end{example} + +\rSec2[linalg.conjtransposed]{Conjugate transpose in-place transform} + +\pnum +The \tcode{conjugate_transposed} function +returns a conjugate transpose view of an object. +This combines the effects of \tcode{transposed} and \tcode{conjugated}. +\begin{itemdecl} + template + constexpr auto conjugate_transposed(mdspan a); +\end{itemdecl} + +\begin{itemdescr} +\pnum +\effects +Equivalent to: \tcode{return conjugated(transposed(a));} +\end{itemdescr} + +\pnum +\begin{example} +\begin{codeblock} +void test_conjugate_transposed(mdspan, extents> a) { + const auto num_rows = a.extent(0); + const auto num_cols = a.extent(1); + + auto a_ct = conjugate_transposed(a); + assert(num_rows == a_ct.extent(1)); + assert(num_cols == a_ct.extent(0)); + assert(a.stride(0) == a_ct.stride(1)); + assert(a.stride(1) == a_ct.stride(0)); + + for(size_t row = 0; row < num_rows; ++row) { + for(size_t col = 0; col < num_rows; ++col) { + assert(a[row, col] == conj(a_ct[col, row])); + } + } + + auto a_ct_ct = conjugate_transposed(a_ct); + assert(num_rows == a_ct_ct.extent(0)); + assert(num_cols == a_ct_ct.extent(1)); + assert(a.stride(0) == a_ct_ct.stride(0)); + assert(a.stride(1) == a_ct_ct.stride(1)); + + for(size_t row = 0; row < num_rows; ++row) { + for(size_t col = 0; col < num_rows; ++col) { + assert(a[row, col] == a_ct_ct[row, col]); + assert(conj(a_ct[col, row]) == a_ct_ct[row, col]); + } + } +} +\end{codeblock} +\end{example} + +\rSec2[linalg.algs.reqs]{Algorithm requirements based on template parameter name} + +\pnum +Throughout +\ref{linalg.algs.blas1}, \ref{linalg.algs.blas2}, and \ref{linalg.algs.blas3}, +where the template parameters are not constrained, +the names of template parameters are used to express the following constraints. +\begin{itemize} +\item +\tcode{is_execution_policy::value} +is \tcode{true}\iref{execpol.type}. +\item +\tcode{Real} is any type such that \tcode{complex} is specified +\iref{complex.numbers.general}. +\item +\tcode{Triangle} is either \tcode{upper_triangle_t} or \tcode{lower_triangle_t}. +\item +\tcode{DiagonalStorage} is +either \tcode{implicit_unit_diagonal_t} or \tcode{explicit_diagonal_t}. +\end{itemize} +\begin{note} +Function templates that have a template parameter named \tcode{ExecutionPolicy} +are parallel algorithms\iref{algorithms.parallel.defns}. +\end{note} + +\rSec2[linalg.algs.blas1]{BLAS 1 algorithms} + +\rSec3[linalg.algs.blas1.complexity]{Complexity} + +\pnum +\complexity +All algorithms in \ref{linalg.algs.blas1} with \tcode{mdspan} parameters +perform a count of \tcode{mdspan} array accesses +and arithmetic operations that is linear in +the maximum product of extents of any \tcode{mdspan} parameter. + +\rSec3[linalg.algs.blas1.givens]{Givens rotations} + +\rSec4[linalg.algs.blas1.givens.lartg]{Compute Givens rotation} + +\begin{itemdecl} +template + setup_givens_rotation_result setup_givens_rotation(Real a, Real b) noexcept; + +template + setup_givens_rotation_result> + setup_givens_rotation(complex a, complex b) noexcept; +\end{itemdecl} + +\begin{itemdescr} +\pnum +These functions compute the Givens plane rotation +represented by the two values $c$ and $s$ +such that the 2 x 2 system of equations +\begin{equation*} +\left[ \begin{matrix} +c & s \\ +-\overline{s} & c \\ +\end{matrix} \right] +\cdot +\left[ \begin{matrix} +a \\ +b \\ +\end{matrix} \right] += +\left[ \begin{matrix} +r \\ +0 \\ +\end{matrix} \right] +\end{equation*} + +holds, where $c$ is always a real scalar, and $c^2 + |s|^2 = 1$. +That is, $c$ and $s$ represent a 2 x 2 matrix, +that when multiplied by the right by the input vector +whose components are $a$ and $b$, +produces a result vector +whose first component $r$ is the Euclidean norm of the input vector, and +whose second component is zero. +\begin{note} +These functions correspond to the LAPACK function \tcode{xLARTG}\supercite{lapack}. +\end{note} + +\pnum +\returns +\tcode{{c, s, r}}, +where \tcode{c} and \tcode{s} form the Givens plane rotation +corresponding to the input \tcode{a} and \tcode{b}, +and \tcode{r} is the Euclidean norm of the two-component vector +formed by \tcode{a} and \tcode{b}. +\end{itemdescr} + +\rSec4[linalg.algs.blas1.givens.rot]{Apply a computed Givens rotation to vectors} + +\begin{itemdecl} +template<@\exposconcept{inout-vector}@ InOutVec1, @\exposconcept{inout-vector}@ InOutVec2, class Real> + void apply_givens_rotation(InOutVec1 x, InOutVec2 y, Real c, Real s); +template + void apply_givens_rotation(ExecutionPolicy&& exec, + InOutVec1 x, InOutVec2 y, Real c, Real s); +template<@\exposconcept{inout-vector}@ InOutVec1, @\exposconcept{inout-vector}@ InOutVec2, class Real> + void apply_givens_rotation(InOutVec1 x, InOutVec2 y, Real c, complex s); +template + void apply_givens_rotation(ExecutionPolicy&& exec, + InOutVec1 x, InOutVec2 y, Real c, complex s); +\end{itemdecl} + +\begin{itemdescr} +\pnum +\begin{note} +These functions correspond to the BLAS function \tcode{xROT}\supercite{blas1}. +\end{note} + +\pnum +\mandates +\tcode{\exposid{compatible-static-extents}(0, 0)} is \tcode{true}. + +\pnum +\expects +\tcode{x.extent(0)} equals \tcode{y.extent(0)}. + +\pnum +\effects +Applies the plane rotation +specified by \tcode{c} and \tcode{s} to +the input vectors \tcode{x} and \tcode{y}, +as if the rotation were a 2 x 2 matrix and +the input vectors were successive rows of a matrix with two rows. +\end{itemdescr} + +\rSec3[linalg.algs.blas1.swap]{Swap matrix or vector elements} + +\begin{itemdecl} +template<@\exposconcept{inout-object}@ InOutObj1, @\exposconcept{inout-object}@ InOutObj2> + void swap_elements(InOutObj1 x, InOutObj2 y); +template + void swap_elements(ExecutionPolicy&& exec, InOutObj1 x, InOutObj2 y); +\end{itemdecl} + +\begin{itemdescr} +\pnum +\begin{note} +These functions correspond to the BLAS function \tcode{xSWAP}\supercite{blas1}. +\end{note} + +\pnum +\constraints +\tcode{x.rank()} equals \tcode{y.rank()}. + +\pnum +\mandates +For all \tcode{r} in the range $[0, \tcode{x.rank()})$, +\begin{codeblock} +@\exposid{compatible-static-extents}@(r, r) +\end{codeblock} +is \tcode{true}. + +\pnum +\expects +\tcode{x.extents()} equals \tcode{y.extents()}. + +\pnum +\effects +Swaps all corresponding elements of \tcode{x} and \tcode{y}. +\end{itemdescr} + +\rSec3[linalg.algs.blas1.scal]{Multiply the elements of an object in place by a scalar} + +\begin{itemdecl} +template + void scale(Scalar alpha, InOutObj x); +template + void scale(ExecutionPolicy&& exec, Scalar alpha, InOutObj x); +\end{itemdecl} + +\begin{itemdescr} +\pnum +\begin{note} +These functions correspond to the BLAS function \tcode{xSCAL}\supercite{blas1}. +\end{note} + +\pnum +\effects +Overwrites $x$ with the result of +computing the elementwise multiplication $\alpha x$, +where the scalar $\alpha$ is \tcode{alpha}. +\end{itemdescr} + +\rSec3[linalg.algs.blas1.copy]{Copy elements of one matrix or vector into another} + +\begin{itemdecl} +template<@\exposconcept{in-object}@ InObj, @\exposconcept{out-object}@ OutObj> + void copy(InObj x, OutObj y); +template + void copy(ExecutionPolicy&& exec, InObj x, OutObj y); +\end{itemdecl} + +\begin{itemdescr} +\pnum +\begin{note} +These functions correspond to the BLAS function \tcode{xCOPY\supercite{blas1}}. +\end{note} + +\pnum +\constraints +\tcode{x.rank()} equals \tcode{y.rank()}. + +\pnum +\mandates +For all \tcode{r} in the range $[ 0, \tcode{x.rank()})$, +\begin{codeblock} +@\exposid{compatible-static-extents}@(r, r) +\end{codeblock} +is \tcode{true}. + +\pnum +\expects +\tcode{x.extents()} equals \tcode{y.extents()}. + +\pnum +\effects +Assigns each element of $x$ to the corresponding element of $y$. +\end{itemdescr} + +\rSec3[linalg.algs.blas1.add]{Add vectors or matrices elementwise} + +\begin{itemdecl} +template<@\exposconcept{in-object}@ InObj1, @\exposconcept{in-object}@ InObj2, @\exposconcept{out-object}@ OutObj> + void add(InObj1 x, InObj2 y, OutObj z); +template + void add(ExecutionPolicy&& exec, + InObj1 x, InObj2 y, OutObj z); +\end{itemdecl} + +\begin{itemdescr} +\pnum +\begin{note} +These functions correspond to the BLAS function \tcode{xAXPY}\supercite{blas1}. +\end{note} + +\pnum +\constraints +\tcode{x.rank()}, \tcode{y.rank()}, and \tcode{z.rank()} are all equal. + +\pnum +\mandates +\tcode{\exposid{possibly-addable}()} is \tcode{true}. + +\pnum +\expects +\tcode{\exposid{addable}(x,y,z)} is \tcode{true}. + +\pnum +\effects +Computes $z = x + y$. + +\pnum +\remarks +\tcode{z} may alias \tcode{x} or \tcode{y}. +\end{itemdescr} + +\rSec3[linalg.algs.blas1.dot]{Dot product of two vectors} + +\pnum +\begin{note} +The functions in this section correspond to the BLAS +functions \tcode{xDOT}, \tcode{xDOTU}, and \tcode{xDOTC}\supercite{blas1}. +\end{note} + +\pnum +The following elements apply to all functions in \ref{linalg.algs.blas1.dot}. + +\pnum +\mandates +\tcode{\exposid{compatible-static-extents}(0, 0)} is \tcode{true}. + +\pnum +\expects +\tcode{v1.extent(0)} equals \tcode{v2.extent(0)}. +\begin{itemdecl} +template<@\exposconcept{in-vector}@ InVec1, @\exposconcept{in-vector}@ InVec2, class Scalar> + Scalar dot(InVec1 v1, InVec2 v2, Scalar init); +template + Scalar dot(ExecutionPolicy&& exec, + InVec1 v1, InVec2 v2, Scalar init); +\end{itemdecl} + +\begin{itemdescr} +\pnum +These functions compute a non-conjugated dot product +with an explicitly specified result type. + +\pnum +\returns +Let \tcode{N} be \tcode{v1.extent(0)}. +\begin{itemize} +\item +\tcode{init} if \tcode{N} is zero; +\item +otherwise, +\tcode{\placeholdernc{GENERALIZED_SUM}(plus<>(), init, v1[0]*v2[0], \ldots, v1[N-1]*v2[N-1])}. +\end{itemize} + +\pnum +\remarks +If \tcode{InVec1::value_type}, \tcode{InVec2::value_type}, and \tcode{Scalar} +are all floating-point types or specializations of \tcode{complex}, +and if \tcode{Scalar} has higher precision +than \tcode{InVec1::value_type} or \tcode{InVec2::value_type}, +then intermediate terms in the sum use \tcode{Scalar}'s precision or greater. +\end{itemdescr} + +\begin{itemdecl} + template<@\exposconcept{in-vector}@ InVec1, @\exposconcept{in-vector}@ InVec2> + auto dot(InVec1 v1, InVec2 v2); + template + auto dot(ExecutionPolicy&& exec, + InVec1 v1, InVec2 v2); +\end{itemdecl} + +\begin{itemdescr} +\pnum +These functions compute a non-conjugated dot product with a default result type. + +\pnum +\effects +Let \tcode{T} be +\tcode{decltype(declval() * declval())}. +Then, +\begin{itemize} +\item +the two-parameter overload is equivalent to: +\begin{codeblock} +return dot(v1, v2, T{}); +\end{codeblock} +and +\item +the three-parameter overload is equivalent to: +\begin{codeblock} +return dot(std::forward(exec), v1, v2, T{}); +\end{codeblock} +\end{itemize} +\end{itemdescr} + +\begin{itemdecl} +template<@\exposconcept{in-vector}@ InVec1, @\exposconcept{in-vector}@ InVec2, class Scalar> + Scalar dotc(InVec1 v1, InVec2 v2, Scalar init); +template + Scalar dotc(ExecutionPolicy&& exec, + InVec1 v1, InVec2 v2, Scalar init); +\end{itemdecl} + +\begin{itemdescr} + +\pnum +These functions compute a conjugated dot product +with an explicitly specified result type. + +\pnum +\effects +\begin{itemize} +\item +The three-parameter overload is equivalent to: +\begin{codeblock} +return dot(conjugated(v1), v2, init); +\end{codeblock} +and +\item +the four-parameter overload is equivalent to: +\begin{codeblock} +return dot(std::forward(exec), conjugated(v1), v2, init); +\end{codeblock} +\end{itemize} +\end{itemdescr} + +\begin{itemdecl} +template<@\exposconcept{in-vector}@ InVec1, @\exposconcept{in-vector}@ InVec2> + auto dotc(InVec1 v1, InVec2 v2); +template + auto dotc(ExecutionPolicy&& exec, + InVec1 v1, InVec2 v2); +\end{itemdecl} + +\begin{itemdescr} + +\pnum +These functions compute a conjugated dot product with a default result type. + +\pnum +\effects +Let \tcode{T} be \tcode{decltype(\exposid{conj-if-needed}(declval()) * decl\-val())}. +Then, +\begin{itemize} +\item +the two-parameter overload is equivalent to: +\begin{codeblock} +return dotc(v1, v2, T{}); +\end{codeblock} +and +\item +the three-parameter overload is equivalent to +\begin{codeblock} +return dotc(std::forward(exec), v1, v2, T{}); +\end{codeblock} +\end{itemize} +\end{itemdescr} + +\rSec3[linalg.algs.blas1.ssq]{Scaled sum of squares of a vector's elements} + +\begin{itemdecl} +template<@\exposconcept{in-vector}@ InVec, class Scalar> + sum_of_squares_result vector_sum_of_squares(InVec v, sum_of_squares_result init); +template + sum_of_squares_result vector_sum_of_squares(ExecutionPolicy&& exec, + InVec v, sum_of_squares_result init); +\end{itemdecl} + +\begin{itemdescr} +\pnum +\begin{note} +These functions correspond to the LAPACK function \tcode{xLASSQ}\supercite{lapack}. +\end{note} + +\pnum +\mandates +\tcode{decltype(\exposid{abs-if-needed}(declval()))} is convertible to \tcode{Scalar}. + +\pnum +\effects +Returns a value \tcode{result} such that +\begin{itemize} +\item +\tcode{result.scaling_factor} is the maximum of \tcode{init.scaling_factor} and +\tcode{\exposid{abs-if-needed}(x[i])} +for all \tcode{i} in the domain of \tcode{v}; and +\item +let \tcode{s2init} be +\begin{codeblock} +init.scaling_factor * init.scaling_factor * init.scaled_sum_of_squares +\end{codeblock} +then \tcode{result.scaling_factor * result.scaling_factor * result.scaled_sum_of_squares} +equals the sum of \tcode{s2init} and +the squares of \tcode{\exposid{abs-if-needed}(x[i])} +for all \tcode{i} in the domain of \tcode{v}. +\end{itemize} + +\pnum +\remarks +If \tcode{InVec::value_type}, and \tcode{Scalar} +are all floating-point types or specializations of \tcode{complex}, +and if \tcode{Scalar} has higher precision +than \tcode{InVec::value_type}, +then intermediate terms in the sum use \tcode{Scalar}'s precision or greater. +\end{itemdescr} + +\rSec3[linalg.algs.blas1.nrm2]{Euclidean norm of a vector} + +\begin{itemdecl} +template<@\exposconcept{in-vector}@ InVec, class Scalar> + Scalar vector_two_norm(InVec v, Scalar init); +template + Scalar vector_two_norm(ExecutionPolicy&& exec, InVec v, Scalar init); +\end{itemdecl} + +\begin{itemdescr} +\pnum +\begin{note} +These functions correspond to the BLAS function \tcode{xNRM2}\supercite{blas1}. +\end{note} + +\pnum +\mandates +Let \tcode{a} be +\tcode{\exposid{abs-if-needed}(declval())}. +Then, \tcode{decltype(\linebreak init + a * a} is convertible to \tcode{Scalar}. + +\pnum +\returns +The square root of the sum of the square of \tcode{init} and the squares of the absolute values of the elements of \tcode{v}. +\begin{note} +For \tcode{init} equal to zero, this is the Euclidean norm +(also called 2-norm) of the vector \tcode{v}. +\end{note} + +\pnum +\remarks +If \tcode{InVec::value_type}, and \tcode{Scalar} +are all floating-point types or specializations of \tcode{complex}, +and if \tcode{Scalar} has higher precision +than \tcode{InVec::value_type}, +then intermediate terms in the sum use \tcode{Scalar}'s precision or greater. +\begin{note} +An implementation of this function for floating-point types \tcode{T} +can use the \tcode{scaled_sum_of_squares} result from +\tcode{vector_sum_of_squares(x, \{.scaling_factor=1.0, .scaled_sum_of_squares=init\})}. +\end{note} +\end{itemdescr} + +\begin{itemdecl} +template<@\exposconcept{in-vector}@ InVec> + auto vector_two_norm(InVec v); +template + auto vector_two_norm(ExecutionPolicy&& exec, InVec v); +\end{itemdecl} + +\begin{itemdescr} +\pnum +\effects +Let \tcode{a} be +\tcode{\exposid{abs-if-needed}(declval())}. +Let \tcode{T} be \tcode{decltype(a * a)}. +Then, +\begin{itemize} +\item +the one-parameter overload is equivalent to: +\begin{codeblock} +return vector_two_norm(v, T{}); +\end{codeblock} +and +\item +the two-parameter overload is equivalent to: +\begin{codeblock} +return vector_two_norm(std::forward(exec), v, T{}); +\end{codeblock} +\end{itemize} +\end{itemdescr} + +\rSec3[linalg.algs.blas1.asum]{Sum of absolute values of vector elements} + +\begin{itemdecl} +template<@\exposconcept{in-vector}@ InVec, class Scalar> + Scalar vector_abs_sum(InVec v, Scalar init); +template + Scalar vector_abs_sum(ExecutionPolicy&& exec, InVec v, Scalar init); +\end{itemdecl} + +\begin{itemdescr} +\pnum +\begin{note} +These functions correspond to the BLAS functions +\tcode{SASUM}, \tcode{DASUM}, \tcode{SCASUM}, and \tcode{DZASUM}\supercite{blas1}. +\end{note} + +\pnum +\mandates +\begin{codeblock} +decltype(init + @\exposid{abs-if-needed}@(@\exposid{real-if-needed}@(declval())) + + @\exposid{abs-if-needed}@(@\exposid{imag-if-needed}@(declval()))) +\end{codeblock} +is convertible to \tcode{Scalar}. + +\pnum +\returns +Let \tcode{N} be \tcode{v.extent(0)}. +\begin{itemize} +\item +\tcode{init} if \tcode{N} is zero; +\item +otherwise, if \tcode{InVec::value_type} is an arithmetic type, +\begin{codeblock} +@\placeholdernc{GENERALIZED_SUM}@(plus<>(), init, @\exposid{abs-if-needed}@(v[0]), @\ldots@, @\exposid{abs-if-needed}@(v[N-1])) +\end{codeblock} +\item +otherwise, +\begin{codeblock} +@\placeholdernc{GENERALIZED_SUM}@(plus<>(), init, + @\exposid{abs-if-needed}@(@\exposid{real-if-needed}@(v[0])) + @\exposid{abs-if-needed}@(@\exposid{imag-if-needed}@(v[0])), + @\ldots@, + @\exposid{abs-if-needed}@(@\exposid{real-if-needed}@(v[N-1])) + @\exposid{abs-if-needed}@(@\exposid{imag-if-needed}@(v[N-1]))) +\end{codeblock} +\end{itemize} + +\pnum +\remarks +If \tcode{InVec::value_type} and \tcode{Scalar} +are all floating-point types or specializations of \tcode{complex}, +and if \tcode{Scalar} has higher precision +than \tcode{InVec::value_type}, +then intermediate terms in the sum use \tcode{Scalar}'s precision or greater. +\end{itemdescr} + +\begin{itemdecl} +template<@\exposconcept{in-vector}@ InVec> + auto vector_abs_sum(InVec v); +template + auto vector_abs_sum(ExecutionPolicy&& exec, InVec v); +\end{itemdecl} + +\begin{itemdescr} +\pnum +\effects +Let \tcode{T} be \tcode{typename InVec::value_type}. +Then, +\begin{itemize} +\item +the one-parameter overload is equivalent to: +\begin{codeblock} +return vector_abs_sum(v, T{}); +\end{codeblock} +and +\item +the two-parameter overload is equivalent to: +\begin{codeblock} +return vector_abs_sum(std::forward(exec), v, T{}); +\end{codeblock} +\end{itemize} +\end{itemdescr} + +\rSec3[linalg.algs.blas1.iamax]{Index of maximum absolute value of vector elements} + +\begin{itemdecl} +template<@\exposconcept{in-vector}@ InVec> + typename InVec::extents_type vector_idx_abs_max(InVec v); +template + typename InVec::extents_type vector_idx_abs_max(ExecutionPolicy&& exec, InVec v); +\end{itemdecl} + +\begin{itemdescr} +\pnum +\begin{note} +These functions correspond to the BLAS function \tcode{IxAMAX}\supercite{blas1}. +\end{note} + +\pnum +Let \tcode{T} be +\begin{codeblock} +decltype(@\exposid{abs-if-needed}@(@\exposid{real-if-needed}@(declval())) + + @\exposid{abs-if-needed}@(@\exposid{imag-if-needed}@(declval()))) +\end{codeblock} + +\pnum +\mandates +\tcode{declval() < declval()} is a valid expression. + +\pnum +\returns +\begin{itemize} +\item +\tcode{numeric_limits::max()} + if \tcode{v} has zero elements; +\item +otherwise, the index of the first element of \tcode{v} +having largest absolute value, +if \tcode{InVec::value_type} is an arithmetic type; +\item +otherwise, the index of the first element $\tcode{v}_e$ of \tcode{v} +for which +\begin{codeblock} +@\exposid{abs-if-needed}@(@\exposid{real-if-needed}@(@$\tcode{v}_e$@)) + @\exposid{abs-if-needed}@(@\exposid{imag-if-needed}@(@$\tcode{v}_e$@)) +\end{codeblock} +has the largest value. +\end{itemize} +\end{itemdescr} + +\rSec3[linalg.algs.blas1.matfrobnorm]{Frobenius norm of a matrix} + +\pnum +\begin{note} +These functions exist in the BLAS standard\supercite{blas-std} +but are not part of the reference implementation. +\end{note} +\begin{itemdecl} +template<@\exposconcept{in-matrix}@ InMat, class Scalar> + Scalar matrix_frob_norm(InMat A, Scalar init); +template + Scalar matrix_frob_norm(ExecutionPolicy&& exec, InMat A, Scalar init); +\end{itemdecl} + +\begin{itemdescr} + +\pnum +\mandates +Let \tcode{a} be +\tcode{\exposid{abs-if-needed}(declval())}. +Then, \tcode{decltype(\linebreak init + a * a)} +is convertible to \tcode{Scalar}. + +\pnum +\returns +The square root of the sum of squares +of \tcode{init} and the absolute values of the elements of \tcode{A}. +\begin{note} +For \tcode{init} equal to zero, +this is the Frobenius norm of the matrix \tcode{A}. +\end{note} + +\pnum +\remarks +If \tcode{InMat::value_type} and \tcode{Scalar} +are all floating-point types or specializations of \tcode{complex}, +and if \tcode{Scalar} has higher precision +than \tcode{InMat::value_type}, +then intermediate terms in the sum use \tcode{Scalar}'s precision or greater. +\end{itemdescr} + +\begin{itemdecl} +template<@\exposconcept{in-matrix}@ InMat> + auto matrix_frob_norm(InMat A); +template + auto matrix_frob_norm(ExecutionPolicy&& exec, InMat A); +\end{itemdecl} + +\begin{itemdescr} +\pnum +\effects +Let \tcode{a} be +\tcode{\exposid{abs-if-needed}(declval())}. +Let \tcode{T} be +\tcode{decltype(a * a)}. +Then, +\begin{itemize} +\item +the one-parameter overload is equivalent to: +\begin{codeblock} +return matrix_frob_norm(A, T{}); +\end{codeblock} +and +\item +the two-parameter overload is equivalent to: +\begin{codeblock} +return matrix_frob_norm(std::forward(exec), A, T{}); +\end{codeblock} +\end{itemize} +\end{itemdescr} + +\rSec3[linalg.algs.blas1.matonenorm]{One norm of a matrix} + +\pnum +\begin{note} +These functions exist in the BLAS standard\supercite{blas-std} +but are not part of the reference implementation. +\end{note} + +\begin{itemdecl} +template<@\exposconcept{in-matrix}@ InMat, class Scalar> + Scalar matrix_one_norm(InMat A, Scalar init); +template + Scalar matrix_one_norm(ExecutionPolicy&& exec, InMat A, Scalar init); +\end{itemdecl} + +\begin{itemdescr} +\pnum +\mandates +\tcode{decltype(\exposid{abs-if-needed}(declval()))} +is convertible to \tcode{Scalar}. + +\pnum +\returns +\begin{itemize} +\item +\tcode{init} if \tcode{A.extent(1)} is zero; +\item +otherwise, the sum of \tcode{init} and the one norm of the matrix $A$. +\end{itemize} +\begin{note} +The one norm of the matrix \tcode{A} +is the maximum over all columns of \tcode{A}, +of the sum of the absolute values of the elements of the column. +\end{note} + +\pnum +\remarks +If \tcode{InMat::value_type} and \tcode{Scalar} +are all floating-point types or specializations of \tcode{complex}, +and if \tcode{Scalar} has higher precision +than \tcode{InMat::value_type}, +then intermediate terms in the sum use \tcode{Scalar}'s precision or greater. +\end{itemdescr} + +\begin{itemdecl} +template<@\exposconcept{in-matrix}@ InMat> + auto matrix_one_norm(InMat A); +template + auto matrix_one_norm(ExecutionPolicy&& exec, InMat A); +\end{itemdecl} + +\begin{itemdescr} +\pnum +\effects +Let \tcode{T} be +\tcode{decltype(\exposid{abs-if-needed}(declval())}. +Then, +\begin{itemize} +\item +the one-parameter overload is equivalent to: +\begin{codeblock} +return matrix_one_norm(A, T{}); +\end{codeblock} +and +\item +the two-parameter overload is equivalent to: +\begin{codeblock} +return matrix_one_norm(std::forward(exec), A, T{}); +\end{codeblock} +\end{itemize} +\end{itemdescr} + +\rSec3[linalg.algs.blas1.matinfnorm]{Infinity norm of a matrix} + +\pnum +\begin{note} +These functions exist in the BLAS standard\supercite{blas-std} +but are not part of the reference implementation. +\end{note} + +\begin{itemdecl} +template<@\exposconcept{in-matrix}@ InMat, class Scalar> + Scalar matrix_inf_norm(InMat A, Scalar init); +template + Scalar matrix_inf_norm(ExecutionPolicy&& exec, InMat A, Scalar init); +\end{itemdecl} + +\begin{itemdescr} +\pnum +\mandates +\tcode{decltype(\exposid{abs-if-needed}(declval()))} +is convertible to \tcode{Scalar}. + +\pnum +\returns +\begin{itemize} +\item +\tcode{init} if \tcode{A.extent(0)} is zero; +\item +otherwise, +the sum of \tcode{init} and the infinity norm of the matrix \tcode{A}. +\end{itemize} +\begin{note} +The infinity norm of the matrix \tcode{A} +is the maximum over all rows of \tcode{A}, +of the sum of the absolute values +of the elements of the row. +\end{note} + +\pnum +\remarks +If \tcode{InMat::value_type} and \tcode{Scalar} +are all floating-point types or specializations of \tcode{complex}, +and if \tcode{Scalar} has higher precision +than \tcode{InMat::value_type}, +then intermediate terms in the sum use \tcode{Scalar}'s precision or greater. +\end{itemdescr} + +\begin{itemdecl} +template<@\exposconcept{in-matrix}@ InMat> + auto matrix_inf_norm(InMat A); +template + auto matrix_inf_norm(ExecutionPolicy&& exec, InMat A); +\end{itemdecl} + +\begin{itemdescr} + +\pnum +\effects +Let \tcode{T} be +\tcode{decltype(\exposid{abs-if-needed}(declval())}. +Then, +\begin{itemize} +\item +the one-parameter overload is equivalent to: +\begin{codeblock} +return matrix_inf_norm(A, T{}); +\end{codeblock} +and +\item +the two-parameter overload is equivalent to: +\begin{codeblock} +return matrix_inf_norm(std::forward(exec), A, T{}); +\end{codeblock} +\end{itemize} +\end{itemdescr} + +\rSec2[linalg.algs.blas2]{BLAS 2 algorithms} + +\rSec3[linalg.algs.blas2.gemv]{General matrix-vector product} + +\pnum +\begin{note} +These functions correspond to the BLAS function \tcode{xGEMV}. +\end{note} + +\pnum +The following elements apply to all functions in \ref{linalg.algs.blas2.gemv}. + +\pnum +\mandates +\begin{itemize} +\item +\tcode{\exposid{possibly-multipliable}()} +is \tcode{true}, and +\item +\tcode{\exposid{possibly-addable}()} +is \tcode{true} for those overloads that take a \tcode{z} parameter. +\end{itemize} + +\pnum +\expects +\begin{itemize} +\item +\tcode{\exposid{multipliable}(A,x,y)} is \tcode{true}, and +\item +\tcode{\exposid{addable}(x,y,z)} is \tcode{true} for those overloads that take a \tcode{z} parameter. +\end{itemize} + +\pnum +\complexity +\bigoh{\tcode{x.extent(0)} \times \tcode{A.extent(1)}}. + +\begin{itemdecl} +template<@\exposconcept{in-matrix}@ InMat, @\exposconcept{in-vector}@ InVec, @\exposconcept{out-vector}@ OutVec> + void matrix_vector_product(InMat A, InVec x, OutVec y); +template + void matrix_vector_product(ExecutionPolicy&& exec, InMat A, InVec x, OutVec y); +\end{itemdecl} + +\begin{itemdescr} +\pnum +These functions perform an overwriting matrix-vector product. + +\pnum +\effects +Computes $y = A x$. +\end{itemdescr} + +\begin{example} +\begin{codeblock} +constexpr size_t num_rows = 5; +constexpr size_t num_cols = 6; + +// y = 3.0 * A * x +void scaled_matvec_1(mdspan> A, + mdspan> x, mdspan> y) { + matrix_vector_product(scaled(3.0, A), x, y); +} + +// z = 7.0 times the transpose of A, times y +void scaled_transposed_matvec(mdspan> A, + mdspan> y, mdspan> z) { + matrix_vector_product(scaled(7.0, transposed(A)), y, z); +} +\end{codeblock} +\end{example} + +\begin{itemdecl} + template<@\exposconcept{in-matrix}@ InMat, @\exposconcept{in-vector}@ InVec1, @\exposconcept{in-vector}@ InVec2, @\exposconcept{out-vector}@ OutVec> + void matrix_vector_product(InMat A, InVec1 x, InVec2 y, OutVec z); + template + void matrix_vector_product(ExecutionPolicy&& exec, + InMat A, InVec1 x, InVec2 y, OutVec z); +\end{itemdecl} + +\begin{itemdescr} +\pnum +These functions performs an updating matrix-vector product. + +\pnum +\effects +Computes $z = y + A x$. + +\pnum +\remarks +\tcode{z} may alias \tcode{y}. +\end{itemdescr} + +\begin{example} +\begin{codeblock} +// y = 3.0 * A * x + 2.0 * y +void scaled_matvec_2(mdspan> A, + mdspan> x, mdspan> y) { + matrix_vector_product(scaled(3.0, A), x, scaled(2.0, y), y); +} +\end{codeblock} +\end{example} + +\rSec3[linalg.algs.blas2.symv]{Symmetric matrix-vector product} + +\pnum +\begin{note} +These functions correspond to the BLAS functions +\tcode{xSYMV} and \tcode{xSPMV}\supercite{blas2}. +\end{note} + +\pnum +The following elements apply to all functions in \ref{linalg.algs.blas2.symv}. + +\pnum +\mandates +\begin{itemize} +\item +If \tcode{InMat} has \tcode{layout_blas_packed} layout, +then the layout's \tcode{Triangle} template argument has +the same type as the function's \tcode{Triangle} template argument; +\item +\tcode{\exposid{compatible-static-extents}(0, 1)} is \tcode{true}; +\item +\tcode{\exposid{possibly-multipliable}()} +is \tcode{true}; and +\item +\tcode{\exposid{possibly-addable}()} +is \tcode{true} for those overloads that take a \tcode{z} parameter. +\end{itemize} + +\pnum +\expects +\begin{itemize} +\item +\tcode{A.extent(0)} equals \tcode{A.extent(1)}, +\item +\tcode{\exposid{multipliable}(A,x,y)} is \tcode{true}, and +\item +\tcode{\exposid{addable}(x,y,z)} is \tcode{true} +for those overloads that take a \tcode{z} parameter. +\end{itemize} + +\pnum +\complexity +\bigoh{\tcode{x.extent(0)} \times \tcode{A.extent(1)}}. + +\begin{itemdecl} +template<@\exposconcept{in-matrix}@ InMat, class Triangle, @\exposconcept{in-vector}@ InVec, @\exposconcept{out-vector}@ OutVec> + void symmetric_matrix_vector_product(InMat A, Triangle t, InVec x, OutVec y); +template + void symmetric_matrix_vector_product(ExecutionPolicy&& exec, + InMat A, Triangle t, InVec x, OutVec y); +\end{itemdecl} + +\begin{itemdescr} + +\pnum +These functions perform an overwriting symmetric matrix-vector product, +taking into account the \tcode{Triangle} parameter +that applies to the symmetric matrix \tcode{A}\iref{linalg.general}. + +\pnum +\effects +Computes $y = A x$. +\end{itemdescr} + +\begin{itemdecl} +template<@\exposconcept{in-matrix}@ InMat, class Triangle, @\exposconcept{in-vector}@ InVec1, @\exposconcept{in-vector}@ InVec2, @\exposconcept{out-vector}@ OutVec> + void symmetric_matrix_vector_product(InMat A, Triangle t, InVec1 x, InVec2 y, OutVec z); +template + void symmetric_matrix_vector_product(ExecutionPolicy&& exec, + InMat A, Triangle t, InVec1 x, InVec2 y, OutVec z); +\end{itemdecl} + +\begin{itemdescr} +\pnum +These functions perform an updating symmetric matrix-vector product, +taking into account the \tcode{Triangle} parameter +that applies to the symmetric matrix \tcode{A}\iref{linalg.general}. + +\pnum +\effects +Computes $z = y + A x$. + +\pnum +\remarks +\tcode{z} may alias \tcode{y}. +\end{itemdescr} + +\rSec3[linalg.algs.blas2.hemv]{Hermitian matrix-vector product} + +\pnum +\begin{note} +These functions correspond to the BLAS functions +\tcode{xHEMV} and \tcode{xHPMV}\supercite{blas2}. +\end{note} + +\pnum +The following elements apply to all functions in \ref{linalg.algs.blas2.hemv}. + +\pnum +\mandates +\begin{itemize} +\item +If \tcode{InMat} has \tcode{layout_blas_packed} layout, +then the layout's \tcode{Triangle} template argument +has the same type as the function's \tcode{Triangle} template argument; +\item +\tcode{\exposid{compatible-static-extents}(0, 1)} +is \tcode{true}; +\item +\tcode{\exposid{possibly-multipliable}()} +is \tcode{true}; and +\item +\tcode{\exposid{possibly-addable}()} +is \tcode{true} for those overloads that take a \tcode{z} parameter. +\end{itemize} + +\pnum +\expects +\begin{itemize} +\item +\tcode{A.extent(0)} equals \tcode{A.extent(1)}, +\item +\tcode{\exposid{multipliable}(A, x, y)} is \tcode{true}, and +\item +\tcode{\exposid{addable}(x, y, z)} is \tcode{true} for those overloads that take a \tcode{z} parameter. +\end{itemize} + +\pnum +\complexity +\bigoh{\tcode{x.extent(0)} \times \tcode{A.extent(1)}}. + +\begin{itemdecl} +template<@\exposconcept{in-matrix}@ InMat, class Triangle, @\exposconcept{in-vector}@ InVec, @\exposconcept{out-vector}@ OutVec> + void hermitian_matrix_vector_product(InMat A, Triangle t, InVec x, OutVec y); +template + void hermitian_matrix_vector_product(ExecutionPolicy&& exec, + InMat A, Triangle t, InVec x, OutVec y); +\end{itemdecl} + +\begin{itemdescr} +\pnum +These functions perform an overwriting Hermitian matrix-vector product, +taking into account the \tcode{Triangle} parameter +that applies to the Hermitian matrix \tcode{A}\iref{linalg.general}. + +\pnum +\effects +Computes $y = A x$. +\end{itemdescr} + +\begin{itemdecl} +template<@\exposconcept{in-matrix}@ InMat, class Triangle, @\exposconcept{in-vector}@ InVec1, @\exposconcept{in-vector}@ InVec2, @\exposconcept{out-vector}@ OutVec> + void hermitian_matrix_vector_product(InMat A, Triangle t, InVec1 x, InVec2 y, OutVec z); +template + void hermitian_matrix_vector_product(ExecutionPolicy&& exec, + InMat A, Triangle t, InVec1 x, InVec2 y, OutVec z); +\end{itemdecl} + +\begin{itemdescr} +\pnum +These functions perform an updating Hermitian matrix-vector product, +taking into account the \tcode{Triangle} parameter +that applies to the Hermitian matrix \tcode{A}\iref{linalg.general}. + +\pnum +\effects +Computes $z = y + A x$. + +\pnum +\remarks +\tcode{z} may alias \tcode{y}. +\end{itemdescr} + +\rSec3[linalg.algs.blas2.trmv]{Triangular matrix-vector product} + +\pnum +\begin{note} +These functions correspond to the BLAS functions +\tcode{xTRMV} and \tcode{xTPMV}\supercite{blas2}. +\end{note} + +\pnum +The following elements apply to all functions in \ref{linalg.algs.blas2.trmv}. + +\pnum +\mandates +\begin{itemize} +\item +If \tcode{InMat} has \tcode{layout_blas_packed} layout, +then the layout's \tcode{Triangle} template argument has +the same type as the function's \tcode{Triangle} template argument; +\item +\tcode{\exposid{compatible-static-extents}(0, 1)} +is \tcode{true}; +\item +\tcode{\exposid{compatible-static-extents}(0, 0)} +is \tcode{true}; +\item +\tcode{\exposid{compatible-static-extents}(0, 0)} +is \tcode{true} for those overloads that take an \tcode{x} parameter; and +\item +\tcode{\exposid{compatible-static-extents}(0, 0)} +is \tcode{true} for those overloads that take a \tcode{z} parameter. +\end{itemize} + +\pnum +\expects +\begin{itemize} +\item +\tcode{A.extent(0)} equals \tcode{A.extent(1)}, +\item +\tcode{A.extent(0)} equals \tcode{y.extent(0)}, +\item +\tcode{A.extent(0)} equals \tcode{x.extent(0)} for those overloads that take an \tcode{x} parameter, and +\item +\tcode{A.extent(0)} equals \tcode{z.extent(0)} for those overloads that take a \tcode{z} parameter. +\end{itemize} + +\begin{itemdecl} +template<@\exposconcept{in-matrix}@ InMat, class Triangle, class DiagonalStorage, @\exposconcept{in-vector}@ InVec, + @\exposconcept{out-vector}@ OutVec> + void triangular_matrix_vector_product(InMat A, Triangle t, DiagonalStorage d, InVec x, OutVec y); +template + void triangular_matrix_vector_product(ExecutionPolicy&& exec, + InMat A, Triangle t, DiagonalStorage d, InVec x, OutVec y); +\end{itemdecl} + +\begin{itemdescr} + +\pnum +These functions perform +an overwriting triangular matrix-vector product, +taking into account the \tcode{Triangle} and \tcode{DiagonalStorage} parameters +that apply to the triangular matrix \tcode{A}\iref{linalg.general}. + +\pnum +\effects +Computes $y = A x$. + +\pnum +\complexity +\bigoh{\tcode{x.extent(0)} \times \tcode{A.extent(1)}}. +\end{itemdescr} + +\begin{itemdecl} +template<@\exposconcept{in-matrix}@ InMat, class Triangle, class DiagonalStorage, @\exposconcept{inout-vector}@ InOutVec> + void triangular_matrix_vector_product(InMat A, Triangle t, DiagonalStorage d, InOutVec y); +template + void triangular_matrix_vector_product(ExecutionPolicy&& exec, + InMat A, Triangle t, DiagonalStorage d, InOutVec y); +\end{itemdecl} + +\begin{itemdescr} + +\pnum +These functions perform an in-place triangular matrix-vector product, +taking into account the \tcode{Triangle} and \tcode{DiagonalStorage} parameters +that apply to the triangular matrix \tcode{A}\iref{linalg.general}. +\begin{note} +Performing this operation in place hinders parallelization. +However, other \tcode{ExecutionPolicy} specific optimizations, +such as vectorization, are still possible. +\end{note} + +\pnum +\effects +Computes a vector $y'$ such that $y' = A y$, +and assigns each element of $y'$ to the corresponding element of $y$. + +\pnum +\complexity +\bigoh{\tcode{y.extent(0)} \times \tcode{A.extent(1)}}. +\end{itemdescr} + +\begin{itemdecl} +template<@\exposconcept{in-matrix}@ InMat, class Triangle, class DiagonalStorage, + @\exposconcept{in-vector}@ InVec1, @\exposconcept{in-vector}@ InVec2, @\exposconcept{out-vector}@ OutVec> + void triangular_matrix_vector_product(InMat A, Triangle t, DiagonalStorage d, + InVec1 x, InVec2 y, OutVec z); +template + void triangular_matrix_vector_product(ExecutionPolicy&& exec, + InMat A, Triangle t, DiagonalStorage d, + InVec1 x, InVec2 y, OutVec z); +\end{itemdecl} + +\begin{itemdescr} +\pnum +These functions perform an updating triangular matrix-vector product, +taking into account the \tcode{Triangle} and \tcode{DiagonalStorage} parameters +that apply to the triangular matrix \tcode{A}\iref{linalg.general}. + +\pnum +\effects +Computes $z = y + A x$. + +\pnum +\complexity +\bigoh{\tcode{x.extent(0)} \times \tcode{A.extent(1)}}. + +\pnum +\remarks +\tcode{z} may alias \tcode{y}. +\end{itemdescr} + +\rSec3[linalg.algs.blas2.trsv]{Solve a triangular linear system} + +\pnum +\begin{note} +These functions correspond to the BLAS functions +\tcode{xTRSV} and \tcode{xTPSV}\supercite{blas2}. +\end{note} + +\pnum +The following elements apply to all functions in \ref{linalg.algs.blas2.trsv}. + +\pnum +\mandates +\begin{itemize} +\item +If \tcode{InMat} has \tcode{layout_blas_packed} layout, +then the layout's \tcode{Triangle} template argument has +the same type as the function's \tcode{Triangle} template argument; +\item +\tcode{\exposid{compatible-static-extents}(0, 1)} +is \tcode{true}; +\item +\tcode{\exposid{compatible-static-extents}(0, 0)} +is \tcode{true}; and +\item +\tcode{\exposid{compatible-static-extents}(0, 0)} +is \tcode{true} for those overloads that take an \tcode{x} parameter. +\end{itemize} + +\pnum +\expects +\begin{itemize} +\item +\tcode{A.extent(0)} equals \tcode{A.extent(1)}, +\item +\tcode{A.extent(0)} equals \tcode{b.extent(0)}, and +\item +\tcode{A.extent(0)} equals \tcode{x.extent(0)} +for those overloads that take an \tcode{x} parameter. +\end{itemize} + +\begin{itemdecl} + template<@\exposconcept{in-matrix}@ InMat, class Triangle, class DiagonalStorage, + @\exposconcept{in-vector}@ InVec, @\exposconcept{out-vector}@ OutVec, class BinaryDivideOp> + void triangular_matrix_vector_solve(InMat A, Triangle t, DiagonalStorage d, + InVec b, OutVec x, BinaryDivideOp divide); + template + void triangular_matrix_vector_solve(ExecutionPolicy&& exec, + InMat A, Triangle t, DiagonalStorage d, + InVec b, OutVec x, BinaryDivideOp divide); +\end{itemdecl} + +\begin{itemdescr} + +\pnum +These functions perform +a triangular solve, +taking into account the \tcode{Triangle} and \tcode{DiagonalStorage} parameters +that apply to the triangular matrix \tcode{A}\iref{linalg.general}. + +\pnum +\effects +Computes a vector $x'$ such that $b = A x'$, +and assigns each element of $x'$ to the corresponding element of $x$. +If no such $x'$ exists, +then the elements of \tcode{x} are valid but unspecified. + +\pnum +\complexity +\bigoh{\tcode{A.extent(1)} \times \tcode{b.extent(0)}}. +\end{itemdescr} + +\begin{itemdecl} +template<@\exposconcept{in-matrix}@ InMat, class Triangle, class DiagonalStorage, + @\exposconcept{in-vector}@ InVec, @\exposconcept{out-vector}@ OutVec> + void triangular_matrix_vector_solve(InMat A, Triangle t, DiagonalStorage d, InVec b, OutVec x); +\end{itemdecl} + +\begin{itemdescr} +\pnum +\effects +Equivalent to: +\begin{codeblock} +triangular_matrix_vector_solve(A, t, d, b, x, divides{}); +\end{codeblock} +\end{itemdescr} + +\begin{itemdecl} +template + void triangular_matrix_vector_solve(ExecutionPolicy&& exec, + InMat A, Triangle t, DiagonalStorage d, InVec b, OutVec x); +\end{itemdecl} + +\begin{itemdescr} +\pnum +\effects +Equivalent to: +\begin{codeblock} +triangular_matrix_vector_solve(std::forward(exec), + A, t, d, b, x, divides{}); +\end{codeblock} +\end{itemdescr} + +\begin{itemdecl} +template<@\exposconcept{in-matrix}@ InMat, class Triangle, class DiagonalStorage, + @\exposconcept{inout-vector}@ InOutVec, class BinaryDivideOp> + void triangular_matrix_vector_solve(InMat A, Triangle t, DiagonalStorage d, + InOutVec b, BinaryDivideOp divide); +template + void triangular_matrix_vector_solve(ExecutionPolicy&& exec, + InMat A, Triangle t, DiagonalStorage d, + InOutVec b, BinaryDivideOp divide); +\end{itemdecl} + +\begin{itemdescr} +\pnum +These functions perform an in-place triangular solve, +taking into account the \tcode{Triangle} and \tcode{Diagonal\-Storage} parameters +that apply to the triangular matrix \tcode{A}\iref{linalg.general}. +\begin{note} +Performing triangular solve in place hinders parallelization. +However, other \tcode{ExecutionPolicy} specific optimizations, +such as vectorization, are still possible. +\end{note} + +\pnum +\effects +Computes a vector $x'$ such that $b = A x'$, +and assigns each element of $x'$ to the corresponding element of $b$. +If no such $x'$ exists, +then the elements of \tcode{b} are valid but unspecified. + +\pnum +\complexity +\bigoh{\tcode{A.extent(1)} \times \tcode{b.extent(0)}}. +\end{itemdescr} + +\begin{itemdecl} +template<@\exposconcept{in-matrix}@ InMat, class Triangle, class DiagonalStorage, @\exposconcept{inout-vector}@ InOutVec> + void triangular_matrix_vector_solve(InMat A, Triangle t, DiagonalStorage d, InOutVec b); +\end{itemdecl} + +\begin{itemdescr} +\pnum +\effects +Equivalent to: +\begin{codeblock} +triangular_matrix_vector_solve(A, t, d, b, divides{}); +\end{codeblock} +\end{itemdescr} + +\begin{itemdecl} +template + void triangular_matrix_vector_solve(ExecutionPolicy&& exec, + InMat A, Triangle t, DiagonalStorage d, InOutVec b); +\end{itemdecl} + +\begin{itemdescr} +\pnum +\effects +Equivalent to: +\begin{codeblock} +triangular_matrix_vector_solve(std::forward(exec), + A, t, d, b, divides{}); +\end{codeblock} +\end{itemdescr} + +\rSec3[linalg.algs.blas2.rank1]{Rank-1 (outer product) update of a matrix} + +\begin{itemdecl} +template<@\exposconcept{in-vector}@ InVec1, @\exposconcept{in-vector}@ InVec2, @\exposconcept{inout-matrix}@ InOutMat> + void matrix_rank_1_update(InVec1 x, InVec2 y, InOutMat A); +template + void matrix_rank_1_update(ExecutionPolicy&& exec, InVec1 x, InVec2 y, InOutMat A); +\end{itemdecl} + +\begin{itemdescr} +\pnum +These functions perform a nonsymmetric nonconjugated rank-1 update. +\begin{note} +These functions correspond to the BLAS functions +\tcode{xGER} (for real element types) and +\tcode{xGERU} (for complex element types)\supercite{blas2}. +\end{note} + +\pnum +\mandates +\tcode{\exposid{possibly-multipliable}()} +is \tcode{true}. + +\pnum +\expects +\tcode{\exposid{multipliable}(A, y, x)} is \tcode{true}. + +\pnum +\effects +Computes a matrix $A'$ such that $A' = A + x y^T$, +and assigns each element of $A'$ to the corresponding element of $A$. + +\pnum +\complexity +\bigoh{\tcode{x.extent(0)} \times \tcode{y.extent(0)}}. +\end{itemdescr} + +\begin{itemdecl} +template<@\exposconcept{in-vector}@ InVec1, @\exposconcept{in-vector}@ InVec2, @\exposconcept{inout-matrix}@ InOutMat> + void matrix_rank_1_update_c(InVec1 x, InVec2 y, InOutMat A); +template + void matrix_rank_1_update_c(ExecutionPolicy&& exec, InVec1 x, InVec2 y, InOutMat A); +\end{itemdecl} + +\begin{itemdescr} +\pnum +These functions perform a nonsymmetric conjugated rank-1 update. +\begin{note} +These functions correspond to the BLAS functions +\tcode{xGER} (for real element types) and +\tcode{xGERC} (for complex element types)\supercite{blas2}. +\end{note} + +\pnum +\effects +\begin{itemize} +\item +For the overloads without an \tcode{ExecutionPolicy} argument, +equivalent to: +\begin{codeblock} +matrix_rank_1_update(x, conjugated(y), A); +\end{codeblock} +\item +otherwise, equivalent to: +\begin{codeblock} +matrix_rank_1_update(std::forward(exec), x, conjugated(y), A); +\end{codeblock} +\end{itemize} +\end{itemdescr} + +\rSec3[linalg.algs.blas2.symherrank1]{Symmetric or Hermitian Rank-1 (outer product) update of a matrix} + +\pnum +\begin{note} +These functions correspond to the BLAS functions +\tcode{xSYR}, \tcode{xSPR}, \tcode{xHER}, and \tcode{xHPR}\supercite{blas2}. +They have overloads taking a scaling factor \tcode{alpha}, +because it would be impossible to express the update +$A = A - x x^T$ otherwise. +\end{note} + +\pnum +The following elements apply to all functions in \ref{linalg.algs.blas2.symherrank1}. + +\pnum +\mandates +\begin{itemize} +\item +If \tcode{InOutMat} has \tcode{layout_blas_packed} layout, +then the layout's \tcode{Triangle} template argument has +the same type as the function's \tcode{Triangle} template argument; +\item +\tcode{\exposid{compatible-static-extents}(0, 1)} +is \tcode{true}; and +\item +\tcode{\exposid{compatible-static-extents}(0, 0)} +is \tcode{true}. +\end{itemize} + +\pnum +\expects +\begin{itemize} +\item +\tcode{A.extent(0)} equals \tcode{A.extent(1)}, and +\item +\tcode{A.extent(0)} equals \tcode{x.extent(0)}. +\end{itemize} + +\pnum +\complexity +\bigoh{\tcode{x.extent(0)} \times \tcode{x.extent(0)}}. + +\begin{itemdecl} +template + void symmetric_matrix_rank_1_update(Scalar alpha, InVec x, InOutMat A, Triangle t); +template + void symmetric_matrix_rank_1_update(ExecutionPolicy&& exec, + Scalar alpha, InVec x, InOutMat A, Triangle t); +\end{itemdecl} + +\begin{itemdescr} +\pnum +These functions perform +a symmetric rank-1 update of the symmetric matrix \tcode{A}, +taking into account the \tcode{Triangle} parameter +that applies to \tcode{A}\iref{linalg.general}. + +\pnum +\effects +Computes a matrix $A'$ such that +$A' = A + \alpha x x^T$, where the scalar $\alpha$ is \tcode{alpha}, +and assigns each element of $A'$ to the corresponding element of $A$. +\end{itemdescr} + +\begin{itemdecl} +template<@\exposconcept{in-vector}@ InVec, @\exposconcept{possibly-packed-inout-matrix}@ InOutMat, class Triangle> + void symmetric_matrix_rank_1_update(InVec x, InOutMat A, Triangle t); +template + void symmetric_matrix_rank_1_update(ExecutionPolicy&& exec, InVec x, InOutMat A, Triangle t); +\end{itemdecl} + +\begin{itemdescr} +\pnum +These functions perform +a symmetric rank-1 update of the symmetric matrix \tcode{A}, +taking into account the \tcode{Triangle} parameter +that applies to \tcode{A}\iref{linalg.general}. + +\pnum +\effects +Computes a matrix $A'$ such that $A' = A + x x^T$ +and assigns each element of $A'$ to the corresponding element of $A$. +\end{itemdescr} + +\begin{itemdecl} +template + void hermitian_matrix_rank_1_update(Scalar alpha, InVec x, InOutMat A, Triangle t); +template + void hermitian_matrix_rank_1_update(ExecutionPolicy&& exec, + Scalar alpha, InVec x, InOutMat A, Triangle t); +\end{itemdecl} + +\begin{itemdescr} +\pnum +These functions perform +a Hermitian rank-1 update of the Hermitian matrix \tcode{A}, +taking into account the \tcode{Triangle} parameter +that applies to \tcode{A}\iref{linalg.general}. + +\pnum +\effects +Computes $A'$ such that +$A' = A + \alpha x x^H$, where the scalar $\alpha$ is \tcode{alpha}, +and assigns each element of $A'$ to the corresponding element of $A$. +\end{itemdescr} + +\begin{itemdecl} +template<@\exposconcept{in-vector}@ InVec, @\exposconcept{possibly-packed-inout-matrix}@ InOutMat, class Triangle> + void hermitian_matrix_rank_1_update(InVec x, InOutMat A, Triangle t); +template + void hermitian_matrix_rank_1_update(ExecutionPolicy&& exec, InVec x, InOutMat A, Triangle t); +\end{itemdecl} + +\begin{itemdescr} +\pnum +These functions perform +a Hermitian rank-1 update of the Hermitian matrix \tcode{A}, +taking into account the \tcode{Triangle} parameter +that applies to \tcode{A}\iref{linalg.general}. + +\pnum +\effects +Computes a matrix $A'$ such that $A' = A + x x^H$ and +assigns each element of $A'$ to the corresponding element of $A$. +\end{itemdescr} + +\rSec3[linalg.algs.blas2.rank2]{Symmetric and Hermitian rank-2 matrix updates} + +\pnum +\begin{note} +These functions correspond to the BLAS functions +\tcode{xSYR2},\tcode{xSPR2}, \tcode{xHER2} and \tcode{xHPR2}\supercite{blas2}. +\end{note} + +\pnum +The following elements apply to all functions in \ref{linalg.algs.blas2.rank2}. + +\pnum +\mandates +\begin{itemize} +\item +If \tcode{InOutMat} has \tcode{layout_blas_packed} layout, +then the layout's \tcode{Triangle} template argument has +the same type as the function's \tcode{Triangle} template argument; +\item +\tcode{\exposid{compatible-static-extents}(0, 1)} +is \tcode{true}; and +\item +\tcode{\exposid{possibly-multipliable}()} +is \tcode{true}. +\end{itemize} + +\pnum +\expects +\begin{itemize} +\item +\tcode{A.extent(0)} equals \tcode{A.extent(1)}, and +\item +\tcode{\exposid{multipliable}(A, x, y)} is \tcode{true}. +\end{itemize} + +\pnum +\complexity +\bigoh{\tcode{x.extent(0)} \times \tcode{y.extent(0)}}. + +\begin{itemdecl} +template<@\exposconcept{in-vector}@ InVec1, @\exposconcept{in-vector}@ InVec2, + @\exposconcept{possibly-packed-inout-matrix}@ InOutMat, class Triangle> + void symmetric_matrix_rank_2_update(InVec1 x, InVec2 y, InOutMat A, Triangle t); +template + void symmetric_matrix_rank_2_update(ExecutionPolicy&& exec, + InVec1 x, InVec2 y, InOutMat A, Triangle t); +\end{itemdecl} + +\begin{itemdescr} +\pnum +These functions perform +a symmetric rank-2 update of the symmetric matrix \tcode{A}, +taking into account the \tcode{Triangle} parameter +that applies to \tcode{A}\iref{linalg.general}. + +\pnum +\effects +Computes $A'$ such that $A' = A + x y^T + y x^T$ and +assigns each element of $A'$ to the corresponding element of $A$. +\end{itemdescr} + +\begin{itemdecl} +template<@\exposconcept{in-vector}@ InVec1, @\exposconcept{in-vector}@ InVec2, + @\exposconcept{possibly-packed-inout-matrix}@ InOutMat, class Triangle> + void hermitian_matrix_rank_2_update(InVec1 x, InVec2 y, InOutMat A, Triangle t); +template + void hermitian_matrix_rank_2_update(ExecutionPolicy&& exec, + InVec1 x, InVec2 y, InOutMat A, Triangle t); +\end{itemdecl} + +\begin{itemdescr} +\pnum +These functions perform +a Hermitian rank-2 update of the Hermitian matrix \tcode{A}, +taking into account the \tcode{Triangle} parameter +that applies to \tcode{A}\iref{linalg.general}. + +\pnum +\effects +Computes $A'$ such that $A' = A + x y^H + y x^H$ and +assigns each element of $A'$ to the corresponding element of $A$. +\end{itemdescr} + +\rSec2[linalg.algs.blas3]{BLAS 3 algorithms} + +\rSec3[linalg.algs.blas3.gemm]{General matrix-matrix product} + +\pnum +\begin{note} +These functions correspond to the BLAS function \tcode{xGEMM}\supercite{blas3}. +\end{note} + +\pnum +The following elements apply +to all functions in \ref{linalg.algs.blas3.gemm} +in addition to function-specific elements. + +\pnum +\mandates +\tcode{\exposid{possibly-multipliable}()} +is \tcode{true}. + +\pnum +\expects +\tcode{\exposid{multipliable}(A, B, C)} is \tcode{true}. + +\pnum +\complexity +\bigoh{\tcode{A.extent(0)} \times \tcode{A.extent(1)} \times \tcode{B.extent(1)}}. + +\begin{itemdecl} + template<@\exposconcept{in-matrix}@ InMat1, @\exposconcept{in-matrix}@ InMat2, @\exposconcept{out-matrix}@ OutMat> + void matrix_product(InMat1 A, InMat2 B, OutMat C); + template + void matrix_product(ExecutionPolicy&& exec, InMat1 A, InMat2 B, OutMat C); +\end{itemdecl} + +\begin{itemdescr} +\pnum +\effects +Computes $C = A B$. +\end{itemdescr} + +\begin{itemdecl} +template<@\exposconcept{in-matrix}@ InMat1, @\exposconcept{in-matrix}@ InMat2, @\exposconcept{in-matrix}@ InMat3, @\exposconcept{out-matrix}@ OutMat> + void matrix_product(InMat1 A, InMat2 B, InMat3 E, OutMat C); +template + void matrix_product(ExecutionPolicy&& exec, InMat1 A, InMat2 B, InMat3 E, OutMat C); +\end{itemdecl} + +\begin{itemdescr} +\pnum +\mandates +\tcode{\exposid{possibly-addable}()} is \tcode{true}. + +\pnum +\expects +\tcode{\exposid{addable}(E, E, C)} is \tcode{true}. + +\pnum +\effects +Computes $C = E + A B$. + +\pnum +\remarks +\tcode{C} may alias \tcode{E}. +\end{itemdescr} + +\rSec3[linalg.algs.blas3.xxmm]{Symmetric, Hermitian, and triangular matrix-matrix product} + +\pnum +\begin{note} +These functions correspond to the BLAS functions +\tcode{xSYMM}, \tcode{xHEMM}, and \tcode{xTRMM}\supercite{blas3}. +\end{note} + +\pnum +The following elements apply to all functions in \ref{linalg.algs.blas3.xxmm} +in addition to function-specific elements. + +\pnum +\mandates +\begin{itemize} +\item +\tcode{\exposid{possibly-multipliable}()} +is \tcode{true}, and +\item +\tcode{\exposid{possibly-addable}()} +is \tcode{true} for those overloads that take an \tcode{E} parameter. +\end{itemize} + +\pnum +\expects +\begin{itemize} +\item +\tcode{\exposid{multipliable}(A, B, C)} is \tcode{true}, and +\item +\tcode{\exposid{addable}(E, E, C)} +is \tcode{true} for those overloads that take an \tcode{E} parameter. +\end{itemize} + +\pnum +\complexity +\bigoh{\tcode{A.extent(0)} \times \tcode{A.extent(1)} \times \tcode{B.extent(1)}}. + +\begin{itemdecl} +template<@\exposconcept{in-matrix}@ InMat1, class Triangle, @\exposconcept{in-matrix}@ InMat2, @\exposconcept{out-matrix}@ OutMat> + void symmetric_matrix_product(InMat1 A, Triangle t, InMat2 B, OutMat C); +template + void symmetric_matrix_product(ExecutionPolicy&& exec, InMat1 A, Triangle t, InMat2 B, OutMat C); + +template<@\exposconcept{in-matrix}@ InMat1, class Triangle, @\exposconcept{in-matrix}@ InMat2, @\exposconcept{out-matrix}@ OutMat> + void hermitian_matrix_product(InMat1 A, Triangle t, InMat2 B, OutMat C); +template + void hermitian_matrix_product(ExecutionPolicy&& exec, InMat1 A, Triangle t, InMat2 B, OutMat C); + +template<@\exposconcept{in-matrix}@ InMat1, class Triangle, class DiagonalStorage, + @\exposconcept{in-matrix}@ InMat2, @\exposconcept{out-matrix}@ OutMat> + void triangular_matrix_product(InMat1 A, Triangle t, DiagonalStorage d, InMat2 B, OutMat C); +template + void triangular_matrix_product(ExecutionPolicy&& exec, + InMat1 A, Triangle t, DiagonalStorage d, InMat2 B, OutMat C); +\end{itemdecl} + +\begin{itemdescr} +\pnum +These functions perform a matrix-matrix multiply, +taking into account +the \tcode{Triangle} and \tcode{Diagonal\-Storage} (if applicable) parameters +that apply to the symmetric, Hermitian, or triangular (respectively) matrix \tcode{A}\iref{linalg.general}. + +\pnum +\mandates +\begin{itemize} +\item +If \tcode{InMat1} has \tcode{layout_blas_packed} layout, +then the layout's \tcode{Triangle} template argument has +the same type as the function's \tcode{Triangle} template argument; and +\item +\tcode{\exposid{compatible-static-extents}(0, 1)} is \tcode{true}. +\end{itemize} + +\pnum +\expects +\tcode{A.extent(0) == A.extent(1)} is \tcode{true}. + +\pnum +\effects +Computes $C = A B$. +\end{itemdescr} + +\begin{itemdecl} + template<@\exposconcept{in-matrix}@ InMat1, @\exposconcept{in-matrix}@ InMat2, class Triangle, @\exposconcept{out-matrix}@ OutMat> + void symmetric_matrix_product(InMat1 A, InMat2 B, Triangle t, OutMat C); + template + void symmetric_matrix_product(ExecutionPolicy&& exec, + InMat1 A, InMat2 B, Triangle t, OutMat C); + + template<@\exposconcept{in-matrix}@ InMat1, @\exposconcept{in-matrix}@ InMat2, class Triangle, @\exposconcept{out-matrix}@ OutMat> + void hermitian_matrix_product(InMat1 A, InMat2 B, Triangle t, OutMat C); + template + void hermitian_matrix_product(ExecutionPolicy&& exec, + InMat1 A, InMat2 B, Triangle t, OutMat C); + + template<@\exposconcept{in-matrix}@ InMat1, @\exposconcept{in-matrix}@ InMat2, class Triangle, class DiagonalStorage, + @\exposconcept{out-matrix}@ OutMat> + void triangular_matrix_product(InMat1 A, InMat2 B, Triangle t, DiagonalStorage d, OutMat C); + template + void triangular_matrix_product(ExecutionPolicy&& exec, + InMat1 A, InMat2 B, Triangle t, DiagonalStorage d, OutMat C); +\end{itemdecl} + +\begin{itemdescr} +\pnum +These functions perform a matrix-matrix multiply, +taking into account +the \tcode{Triangle} and \tcode{Diagonal\-Storage} (if applicable) parameters +that apply to the symmetric, Hermitian, or triangular (respectively) matrix \tcode{B}\iref{linalg.general}. + +\pnum +\mandates +\begin{itemize} +\item +If \tcode{InMat2} has \tcode{layout_blas_packed} layout, +then the layout's \tcode{Triangle} template argument has +the same type as the function's \tcode{Triangle} template argument; and +\item +\tcode{\exposid{compatible-static-extents}(0, 1)} is \tcode{true}. +\end{itemize} + +\pnum +\expects +\tcode{B.extent(0) == B.extent(1)} is \tcode{true}. + +\pnum +\effects +Computes $C = A B$. +\end{itemdescr} + +\begin{itemdecl} +template<@\exposconcept{in-matrix}@ InMat1, class Triangle, @\exposconcept{in-matrix}@ InMat2, @\exposconcept{in-matrix}@ InMat3, + @\exposconcept{out-matrix}@ OutMat> + void symmetric_matrix_product(InMat1 A, Triangle t, InMat2 B, InMat3 E, OutMat C); +template + void symmetric_matrix_product(ExecutionPolicy&& exec, + InMat1 A, Triangle t, InMat2 B, InMat3 E, OutMat C); + +template<@\exposconcept{in-matrix}@ InMat1, class Triangle, @\exposconcept{in-matrix}@ InMat2, @\exposconcept{in-matrix}@ InMat3, + @\exposconcept{out-matrix}@ OutMat> + void hermitian_matrix_product(InMat1 A, Triangle t, InMat2 B, InMat3 E, OutMat C); +template + void hermitian_matrix_product(ExecutionPolicy&& exec, + InMat1 A, Triangle t, InMat2 B, InMat3 E, OutMat C); + +template<@\exposconcept{in-matrix}@ InMat1, class Triangle, class DiagonalStorage, + @\exposconcept{in-matrix}@ InMat2, @\exposconcept{in-matrix}@ InMat3, @\exposconcept{out-matrix}@ OutMat> + void triangular_matrix_product(InMat1 A, Triangle t, DiagonalStorage d, InMat2 B, InMat3 E, + OutMat C); +template + void triangular_matrix_product(ExecutionPolicy&& exec, + InMat1 A, Triangle t, DiagonalStorage d, InMat2 B, InMat3 E, + OutMat C); +\end{itemdecl} + +\begin{itemdescr} +\pnum +These functions perform +a potentially overwriting matrix-matrix multiply-add, +taking into account the \tcode{Triangle} and \tcode{DiagonalStorage} (if applicable) parameters +that apply to the symmetric, Hermitian, or triangular (respectively) matrix \tcode{A}\iref{linalg.general}. + +\pnum +\mandates +\begin{itemize} +\item +If \tcode{InMat1} has \tcode{layout_blas_packed} layout, then the + layout's \tcode{Triangle} template argument has the same type as + the function's \tcode{Triangle} template argument; and +\item +\tcode{\exposid{compatible-static-extents}(0, 1)} is \tcode{true}. +\end{itemize} + +\pnum +\expects +\tcode{A.extent(0) == A.extent(1)} is \tcode{true}. + +\pnum +\effects +Computes $C = E + A B$. + +\pnum +\remarks +\tcode{C} may alias \tcode{E}. +\end{itemdescr} + +\begin{itemdecl} +template<@\exposconcept{in-matrix}@ InMat1, @\exposconcept{in-matrix}@ InMat2, class Triangle, @\exposconcept{in-matrix}@ InMat3, + @\exposconcept{out-matrix}@ OutMat> + void symmetric_matrix_product(InMat1 A, InMat2 B, Triangle t, InMat3 E, OutMat C); +template + void symmetric_matrix_product(ExecutionPolicy&& exec, + InMat1 A, InMat2 B, Triangle t, InMat3 E, OutMat C); + +template<@\exposconcept{in-matrix}@ InMat1, @\exposconcept{in-matrix}@ InMat2, class Triangle, @\exposconcept{in-matrix}@ InMat3, + @\exposconcept{out-matrix}@ OutMat> + void hermitian_matrix_product(InMat1 A, InMat2 B, Triangle t, InMat3 E, OutMat C); +template + void hermitian_matrix_product(ExecutionPolicy&& exec, + InMat1 A, InMat2 B, Triangle t, InMat3 E, OutMat C); + +template<@\exposconcept{in-matrix}@ InMat1, @\exposconcept{in-matrix}@ InMat2, class Triangle, class DiagonalStorage, + @\exposconcept{in-matrix}@ InMat3, @\exposconcept{out-matrix}@ OutMat> + void triangular_matrix_product(InMat1 A, InMat2 B, Triangle t, DiagonalStorage d, InMat3 E, + OutMat C); +template + void triangular_matrix_product(ExecutionPolicy&& exec, + InMat1 A, InMat2 B, Triangle t, DiagonalStorage d, InMat3 E, + OutMat C); +\end{itemdecl} + +\begin{itemdescr} +\pnum +These functions perform +a potentially overwriting matrix-matrix multiply-add, +taking into account +the \tcode{Triangle} and \tcode{Diagonal\-Storage} (if applicable) parameters +that apply to the symmetric, Hermitian, or triangular (respectively) matrix \tcode{B}\iref{linalg.general}. + +\pnum +\mandates +\begin{itemize} +\item +If \tcode{InMat2} has \tcode{layout_blas_packed} layout, +then the layout's \tcode{Triangle} template argument has +the same type as the function's \tcode{Triangle} template argument; and +\item +\tcode{\exposid{compatible-static-extents}(0, 1)} is \tcode{true}. +\end{itemize} + +\pnum +\expects +\tcode{B.extent(0) == B.extent(1)} is \tcode{true}. + +\pnum +\effects +Computes $C = E + A B$. + +\pnum +\remarks +\tcode{C} may alias \tcode{E}. +\end{itemdescr} + +\rSec3[linalg.algs.blas3.trmm]{In-place triangular matrix-matrix product} + +\pnum +These functions perform +an in-place matrix-matrix multiply, +taking into account +the \tcode{Triangle} and \tcode{Diagonal\-Storage} parameters +that apply to the triangular matrix \tcode{A}\iref{linalg.general}. +\begin{note} +These functions correspond to the BLAS function \tcode{xTRMM}\supercite{blas3}. +\end{note} + +\begin{itemdecl} +template<@\exposconcept{in-matrix}@ InMat, class Triangle, class DiagonalStorage, @\exposconcept{inout-matrix}@ InOutMat> + void triangular_matrix_left_product(InMat A, Triangle t, DiagonalStorage d, InOutMat C); +template + void triangular_matrix_left_product(ExecutionPolicy&& exec, + InMat A, Triangle t, DiagonalStorage d, InOutMat C); +\end{itemdecl} + +\begin{itemdescr} +\pnum +\mandates +\begin{itemize} +\item +If \tcode{InMat} has \tcode{layout_blas_packed} layout, +then the layout's \tcode{Triangle} template argument has +the same type as the function's \tcode{Triangle} template argument; +\item +\tcode{\exposid{possibly-multipliable}()} +is \tcode{true}; and +\item +\tcode{\exposid{compatible-static-extents}(0, 1)} is \tcode{true}. +\end{itemize} + +\pnum +\expects +\begin{itemize} +\item +\tcode{\exposid{multipliable}(A, C, C)} is \tcode{true}, and +\item +\tcode{A.extent(0) == A.extent(1)} is \tcode{true}. +\end{itemize} + +\pnum +\effects +Computes a matrix $C'$ such that $C' = A C$ and +assigns each element of $C'$ to the corresponding element of $C$. + +\pnum +\complexity +\bigoh{\tcode{A.extent(0)} \times \tcode{A.extent(1)} \times \tcode{C.extent(0)}}. +\end{itemdescr} + +\begin{itemdecl} +template<@\exposconcept{in-matrix}@ InMat, class Triangle, class DiagonalStorage, @\exposconcept{inout-matrix}@ InOutMat> + void triangular_matrix_right_product(InMat A, Triangle t, DiagonalStorage d, InOutMat C); +template + void triangular_matrix_right_product(ExecutionPolicy&& exec, + InMat A, Triangle t, DiagonalStorage d, InOutMat C); +\end{itemdecl} + +\begin{itemdescr} +\pnum +\mandates +\begin{itemize} +\item +If \tcode{InMat} has \tcode{layout_blas_packed} layout, +then the layout's \tcode{Triangle} template argument has +the same type as the function's \tcode{Triangle} template argument; +\item +\tcode{\exposid{possibly-multipliable}()} +is \tcode{true}; and +\item +\tcode{\exposid{compatible-static-extents}(0, 1)} is \tcode{true}. +\end{itemize} + +\pnum +\expects +\begin{itemize} +\item +\tcode{\exposid{multipliable}(C, A, C)} is \tcode{true}, and +\item +\tcode{A.extent(0) == A.extent(1)} is \tcode{true}. +\end{itemize} + +\pnum +\effects +Computes a matrix $C'$ such that $C' = C A$ and +assigns each element of $C'$ to the corresponding element of $C$. + +\pnum +\complexity +\bigoh{\tcode{A.extent(0)} \times \tcode{A.extent(1)} \times \tcode{C.extent(0)}}. +\end{itemdescr} + +\rSec3[linalg.algs.blas3.rankk]{Rank-k update of a symmetric or Hermitian matrix} + +\pnum +\begin{note} +These functions correspond to the BLAS functions +\tcode{xSYRK} and \tcode{xHERK}\supercite{blas3}. +\end{note} + +\pnum +The following elements apply to all functions in \ref{linalg.algs.blas3.rankk}. + +\pnum +\mandates +\begin{itemize} +\item +If \tcode{InOutMat} has \tcode{layout_blas_packed} layout, +then the layout's \tcode{Triangle} template argument has +the same type as the function's \tcode{Triangle} template argument; +\item +\tcode{\exposid{compatible-static-extents}(0, 1)} +is \tcode{true}; +\item +\tcode{\exposid{compatible-static-extents}(0, 1)} +is \tcode{true}; and +\item +\tcode{\exposid{compatible-static-extents}(0, 0)} +is \tcode{true}. +\end{itemize} + +\pnum +\expects +\begin{itemize} +\item +\tcode{A.extent(0)} equals \tcode{A.extent(1)}, +\item +\tcode{C.extent(0)} equals \tcode{C.extent(1)}, and +\item +\tcode{A.extent(0)} equals \tcode{C.extent(0)}. +\end{itemize} + +\pnum +\complexity +\bigoh{\tcode{A.extent(0)} \times \tcode{A.extent(1)} \times \tcode{C.extent(0)}}. + +\begin{itemdecl} + template + void symmetric_matrix_rank_k_update(Scalar alpha, InMat A, InOutMat C, Triangle t); + template + void symmetric_matrix_rank_k_update(ExecutionPolicy&& exec, + Scalar alpha, InMat A, InOutMat C, Triangle t); +\end{itemdecl} + +\begin{itemdescr} +\pnum +\effects +Computes a matrix $C'$ such that $C' = C + \alpha A A^T$, +where the scalar $\alpha$ is \tcode{alpha}, +and assigns each element of $C'$ to the corresponding element of $C$. +\end{itemdescr} + +\begin{itemdecl} +template<@\exposconcept{in-matrix}@ InMat, @\exposconcept{possibly-packed-inout-matrix}@ InOutMat, class Triangle> + void symmetric_matrix_rank_k_update(InMat A, InOutMat C, Triangle t); +template + void symmetric_matrix_rank_k_update(ExecutionPolicy&& exec, + InMat A, InOutMat C, Triangle t); +\end{itemdecl} + +\begin{itemdescr} +\pnum +\effects +Computes a matrix $C'$ such that $C' = C + A A^T$, and +assigns each element of $C'$ to the corresponding element of $C$. +\end{itemdescr} + +\begin{itemdecl} +template + void hermitian_matrix_rank_k_update(Scalar alpha, InMat A, InOutMat C, Triangle t); +template + void hermitian_matrix_rank_k_update(ExecutionPolicy&& exec, + Scalar alpha, InMat A, InOutMat C, Triangle t); +\end{itemdecl} + +\begin{itemdescr} +\pnum +\effects +Computes a matrix $C'$ such that $C' = C + \alpha A A^H$, +where the scalar $\alpha$ is \tcode{alpha}, +and assigns each element of $C'$ to the corresponding element of $C$. +\end{itemdescr} + +\begin{itemdecl} +template<@\exposconcept{in-matrix}@ InMat, @\exposconcept{possibly-packed-inout-matrix}@ InOutMat, class Triangle> + void hermitian_matrix_rank_k_update(InMat A, InOutMat C, Triangle t); +template + void hermitian_matrix_rank_k_update(ExecutionPolicy&& exec, + InMat A, InOutMat C, Triangle t); +\end{itemdecl} + +\begin{itemdescr} +\pnum +\effects +Computes a matrix $C'$ such that $C' = C + A A^H$, and +assigns each element of $C'$ to the corresponding element of $C$. +\end{itemdescr} + +\rSec3[linalg.algs.blas3.rank2k]{Rank-2k update of a symmetric or Hermitian matrix} + +\pnum +\begin{note} +These functions correspond to the BLAS functions +\tcode{xSYR2K} and \tcode{xHER2K}\supercite{blas3}. +\end{note} + +\pnum +The following elements apply to all functions in \ref{linalg.algs.blas3.rank2k}. + +\pnum +\mandates +\begin{itemize} +\item +If \tcode{InOutMat} has \tcode{layout_blas_packed} layout, +then the layout's \tcode{Triangle} template argument has +the same type as the function's \tcode{Triangle} template argument; +\item +\tcode{\exposid{possibly-addable}()} +is \tcode{true}; and +\item +\tcode{\exposid{compatible-static-extents}(0, 1)} +is \tcode{true}. +\end{itemize} + +\pnum +\expects +\begin{itemize} +\item +\tcode{\exposid{addable}(A, B, C)} is \tcode{true}, and +\item +\tcode{A.extent(0)} equals \tcode{A.extent(1)}. +\end{itemize} + +\pnum +\complexity +\bigoh{\tcode{A.extent(0)} \times \tcode{A.extent(1)} \times \tcode{C.extent(0)}}. + +\begin{itemdecl} +template<@\exposconcept{in-matrix}@ InMat1, @\exposconcept{in-matrix}@ InMat2, + @\exposconcept{possibly-packed-inout-matrix}@ InOutMat, class Triangle> + void symmetric_matrix_rank_2k_update(InMat1 A, InMat2 B, InOutMat C, Triangle t); +template + void symmetric_matrix_rank_2k_update(ExecutionPolicy&& exec, + InMat1 A, InMat2 B, InOutMat C, Triangle t); +\end{itemdecl} + +\begin{itemdescr} +\pnum +\effects +Computes a matrix $C'$ such that $C' = C + A B^T + B A^T$, +and assigns each element of $C'$ to the corresponding element of $C$. +\end{itemdescr} + +\begin{itemdecl} +template<@\exposconcept{in-matrix}@ InMat1, @\exposconcept{in-matrix}@ InMat2, + @\exposconcept{possibly-packed-inout-matrix}@ InOutMat, class Triangle> + void hermitian_matrix_rank_2k_update(InMat1 A, InMat2 B, InOutMat C, Triangle t); +template + void hermitian_matrix_rank_2k_update(ExecutionPolicy&& exec, + InMat1 A, InMat2 B, InOutMat C, Triangle t); +\end{itemdecl} + +\begin{itemdescr} +\pnum +\effects +Computes a matrix $C'$ such that $C' = C + A B^H + B A^H$, +and assigns each element of $C'$ to the corresponding element of $C$. +\end{itemdescr} + +\rSec3[linalg.algs.blas3.trsm]{Solve multiple triangular linear systems} + +\pnum +\begin{note} +These functions correspond to the BLAS function \tcode{xTRSM}\supercite{blas3}. +\end{note} + +\begin{itemdecl} +template<@\exposconcept{in-matrix}@ InMat1, class Triangle, class DiagonalStorage, + @\exposconcept{in-matrix}@ InMat2, @\exposconcept{out-matrix}@ OutMat, class BinaryDivideOp> + void triangular_matrix_matrix_left_solve(InMat1 A, Triangle t, DiagonalStorage d, + InMat2 B, OutMat X, BinaryDivideOp divide); +template + void triangular_matrix_matrix_left_solve(ExecutionPolicy&& exec, + InMat1 A, Triangle t, DiagonalStorage d, + InMat2 B, OutMat X, BinaryDivideOp divide); +\end{itemdecl} + +\begin{itemdescr} +\pnum +These functions perform multiple matrix solves, +taking into account the \tcode{Triangle} and \tcode{DiagonalStorage} parameters +that apply to the triangular matrix \tcode{A}\iref{linalg.general}. + +\pnum +\mandates +\begin{itemize} +\item +If \tcode{InMat1} has \tcode{layout_blas_packed} layout, then the + layout's \tcode{Triangle} template argument has the same type as + the function's \tcode{Triangle} template argument; +\item +\tcode{\exposid{possibly-multipliable}()} +is \tcode{true}; and +\item +\tcode{\exposid{compatible-static-extents}(0, 1)} +is \tcode{true}. +\end{itemize} + +\pnum +\expects +\begin{itemize} +\item +\tcode{\exposid{multipliable}(A, X, B)} is \tcode{true}, and +\item +\tcode{A.extent(0) == A.extent(1)} is \tcode{true}. +\end{itemize} + +\pnum +\effects +Computes $X'$ such that $AX' = B$, +and assigns each element of $X'$ to the corresponding element of $X$. +If no such $X'$ exists, +then the elements of \tcode{X} are valid but unspecified. + +\pnum +\complexity +\bigoh{\tcode{A.extent(0)} \times \tcode{X.extent(1)} \times \tcode{X.extent(1)}}. +\end{itemdescr} + +\pnum +\begin{note} +Since the triangular matrix is on the left, +the desired \tcode{divide} implementation +in the case of noncommutative multiplication +is mathematically equivalent to $y^{-1} x$, +where $x$ is the first argument and $y$ is the second argument, +and $y^{-1}$ denotes the multiplicative inverse of $y$. +\end{note} + +\begin{itemdecl} +template<@\exposconcept{in-matrix}@ InMat1, class Triangle, class DiagonalStorage, + @\exposconcept{in-matrix}@ InMat2, @\exposconcept{out-matrix}@ OutMat> + void triangular_matrix_matrix_left_solve(InMat1 A, Triangle t, DiagonalStorage d, + InMat2 B, OutMat X); +\end{itemdecl} + +\begin{itemdescr} +\pnum +\effects +Equivalent to: +\begin{codeblock} +triangular_matrix_matrix_left_solve(A, t, d, B, X, divides{}); +\end{codeblock} +\end{itemdescr} + +\begin{itemdecl} +template + void triangular_matrix_matrix_left_solve(ExecutionPolicy&& exec, + InMat1 A, Triangle t, DiagonalStorage d, + InMat2 B, OutMat X); +\end{itemdecl} + +\begin{itemdescr} +\pnum +\effects +Equivalent to: +\begin{codeblock} +triangular_matrix_matrix_left_solve(std::forward(exec), + A, t, d, B, X, divides{}); +\end{codeblock} +\end{itemdescr} + +\begin{itemdecl} +template<@\exposconcept{in-matrix}@ InMat1, class Triangle, class DiagonalStorage, + @\exposconcept{in-matrix}@ InMat2, @\exposconcept{out-matrix}@ OutMat, class BinaryDivideOp> + void triangular_matrix_matrix_right_solve(InMat1 A, Triangle t, DiagonalStorage d, + InMat2 B, OutMat X, BinaryDivideOp divide); +template + void triangular_matrix_matrix_right_solve(ExecutionPolicy&& exec, + InMat1 A, Triangle t, DiagonalStorage d, + InMat2 B, OutMat X, BinaryDivideOp divide); +\end{itemdecl} + +\begin{itemdescr} + +\pnum +These functions perform multiple matrix solves, +taking into account the \tcode{Triangle} and \tcode{DiagonalStorage} parameters +that apply to the triangular matrix \tcode{A}\iref{linalg.general}. + +\pnum +\mandates +\begin{itemize} +\item +If \tcode{InMat1} has \tcode{layout_blas_packed} layout, +then the layout's \tcode{Triangle} template argument has +the same type as the function's \tcode{Triangle} template argument; +\item +\tcode{\exposid{possibly-multipliable}()} is \tcode{true}; and +\item +\tcode{\exposid{compatible-static-extents}(0,1)} is \tcode{true}. +\end{itemize} + +\pnum +\expects +\begin{itemize} +\item +\tcode{\exposid{multipliable}(X, A, B)} is \tcode{true}, and +\item +\tcode{A.extent(0) == A.extent(1)} is \tcode{true}. +\end{itemize} + +\pnum +\effects +Computes $X'$ such that $X'A = B$, +and assigns each element of $X'$ to the corresponding element of $X$. +If no such $X'$ exists, +then the elements of \tcode{X} are valid but unspecified. + +\pnum +\complexity +$O($ \tcode{B.extent(0)} $\cdot$ \tcode{B.extent(1)} $\cdot$ \tcode{A.extent(1)} $)$ +\begin{note} +Since the triangular matrix is on the right, +the desired \tcode{divide} implementation +in the case of noncommutative multiplication +is mathematically equivalent to $x y^{-1}$, +where $x$ is the first argument and $y$ is the second argument, +and $y^{-1}$ denotes the multiplicative inverse of $y$. +\end{note} +\end{itemdescr} + +\begin{itemdecl} +template<@\exposconcept{in-matrix}@ InMat1, class Triangle, class DiagonalStorage, + @\exposconcept{in-matrix}@ InMat2, @\exposconcept{out-matrix}@ OutMat> + void triangular_matrix_matrix_right_solve(InMat1 A, Triangle t, DiagonalStorage d, + InMat2 B, OutMat X); +\end{itemdecl} + +\begin{itemdescr} +\pnum +\effects +Equivalent to: +\begin{codeblock} +triangular_matrix_matrix_right_solve(A, t, d, B, X, divides{}); +\end{codeblock} +\end{itemdescr} + +\begin{itemdecl} +template + void triangular_matrix_matrix_right_solve(ExecutionPolicy&& exec, + InMat1 A, Triangle t, DiagonalStorage d, + InMat2 B, OutMat X); +\end{itemdecl} + +\begin{itemdescr} +\pnum +\effects +Equivalent to: +\begin{codeblock} +triangular_matrix_matrix_right_solve(std::forward(exec), + A, t, d, B, X, divides{}); +\end{codeblock} +\end{itemdescr} + +\rSec3[linalg.algs.blas3.inplacetrsm]{Solve multiple triangular linear systems in-place} + +\pnum +\begin{note} +These functions correspond to the BLAS function \tcode{xTRSM}\supercite{blas3}. +\end{note} + +\begin{itemdecl} +template<@\exposconcept{in-matrix}@ InMat, class Triangle, class DiagonalStorage, + @\exposconcept{inout-matrix}@ InOutMat, class BinaryDivideOp> + void triangular_matrix_matrix_left_solve(InMat A, Triangle t, DiagonalStorage d, + InOutMat B, BinaryDivideOp divide); +template + void triangular_matrix_matrix_left_solve(ExecutionPolicy&& exec, + InMat A, Triangle t, DiagonalStorage d, + InOutMat B, BinaryDivideOp divide); +\end{itemdecl} + +\begin{itemdescr} +\pnum +These functions perform multiple in-place matrix solves, +taking into account the \tcode{Triangle} and \tcode{DiagonalStorage} parameters +that apply to the triangular matrix \tcode{A}\iref{linalg.general}. +\begin{note} +This algorithm makes it possible +to compute factorizations like Cholesky and LU in place. +Performing triangular solve in place hinders parallelization. +However, other \tcode{ExecutionPolicy} specific optimizations, +such as vectorization, are still possible. +\end{note} + +\pnum +\mandates +\begin{itemize} +\item +If \tcode{InMat} has \tcode{layout_blas_packed} layout, +then the layout's \tcode{Triangle} template argument has +the same type as the function's \tcode{Triangle} template argument; +\item +\tcode{\exposid{possibly-multipliable}()} +is \tcode{true}; and +\item +\tcode{\exposid{compatible-static-extents}(0, 1)} +is \tcode{true}. +\end{itemize} + +\pnum +\expects +\begin{itemize} +\item +\tcode{\exposid{multipliable}(A, B, B)} is \tcode{true}, and +\item +\tcode{A.extent(0) == A.extent(1)} is \tcode{true}. +\end{itemize} + +\pnum +\effects +Computes $X'$ such that $AX' = B$, +and assigns each element of $X'$ to the corresponding element of $B$. +If so such $X'$ exists, +then the elements of \tcode{B} are valid but unspecified. + +\pnum +\complexity +\bigoh{\tcode{A.extent(0)} \times \tcode{A.extent(1)} \times \tcode{B.extent(1)}}. +\end{itemdescr} + +\begin{itemdecl} +template<@\exposconcept{in-matrix}@ InMat, class Triangle, class DiagonalStorage, @\exposconcept{inout-matrix}@ InOutMat> + void triangular_matrix_matrix_left_solve(InMat A, Triangle t, DiagonalStorage d, + InOutMat B); +\end{itemdecl} + +\begin{itemdescr} +\pnum +\effects +Equivalent to: +\begin{codeblock} +triangular_matrix_matrix_left_solve(A, t, d, B, divides{}); +\end{codeblock} +\end{itemdescr} + +\begin{itemdecl} + template + void triangular_matrix_matrix_left_solve(ExecutionPolicy&& exec, + InMat A, Triangle t, DiagonalStorage d, + InOutMat B); +\end{itemdecl} + +\begin{itemdescr} +\pnum +\effects +Equivalent to: +\begin{codeblock} +triangular_matrix_matrix_left_solve(std::forward(exec), + A, t, d, B, divides{}); +\end{codeblock} +\end{itemdescr} + +\begin{itemdecl} +template<@\exposconcept{in-matrix}@ InMat, class Triangle, class DiagonalStorage, + @\exposconcept{inout-matrix}@ InOutMat, class BinaryDivideOp> + void triangular_matrix_matrix_right_solve(InMat A, Triangle t, DiagonalStorage d, + InOutMat B, BinaryDivideOp divide); +template + void triangular_matrix_matrix_right_solve(ExecutionPolicy&& exec, + InMat A, Triangle t, DiagonalStorage d, + InOutMat B, BinaryDivideOp divide); +\end{itemdecl} + +\begin{itemdescr} +\pnum +These functions perform multiple in-place matrix solves, +taking into account the \tcode{Triangle} and \tcode{DiagonalStorage} parameters +that apply to the triangular matrix \tcode{A}\iref{linalg.general}. +\begin{note} +This algorithm makes it possible +to compute factorizations like Cholesky and LU in place. +Performing triangular solve in place hinders parallelization. +However, other \tcode{ExecutionPolicy} specific optimizations, +such as vectorization, are still possible. +\end{note} + +\pnum +\mandates +\begin{itemize} +\item +If \tcode{InMat} has \tcode{layout_blas_packed} layout, +then the layout's \tcode{Triangle} template argument has +the same type as the function's \tcode{Triangle} template argument; +\item +\tcode{\exposid{possibly-multipliable}()} +is \tcode{true}; and +\item +\tcode{\exposid{compatible-static-extents}(0, 1)} is \tcode{true}. +\end{itemize} + +\pnum +\expects +\begin{itemize} +\item +\tcode{\exposid{multipliable}(B, A, B)} is \tcode{true}, and +\item +\tcode{A.extent(0) == A.extent(1)} is \tcode{true}. +\end{itemize} + +\pnum +\effects +Computes $X'$ such that $X'A = B$, +and assigns each element of $X'$ to the corresponding element of $B$. +If so such $X'$ exists, +then the elements of \tcode{B} are valid but unspecified. + +\pnum +\complexity +\bigoh{\tcode{A.extent(0)} \times \tcode{A.extent(1)} \times \tcode{B.extent(1)}}. +\end{itemdescr} + +\begin{itemdecl} +template<@\exposconcept{in-matrix}@ InMat, class Triangle, class DiagonalStorage, @\exposconcept{inout-matrix}@ InOutMat> + void triangular_matrix_matrix_right_solve(InMat A, Triangle t, DiagonalStorage d, InOutMat B); +\end{itemdecl} + +\begin{itemdescr} +\pnum +\effects +Equivalent to: +\begin{codeblock} +triangular_matrix_matrix_right_solve(A, t, d, B, divides{}); +\end{codeblock} +\end{itemdescr} + +\begin{itemdecl} +template + void triangular_matrix_matrix_right_solve(ExecutionPolicy&& exec, + InMat A, Triangle t, DiagonalStorage d, + InOutMat B); +\end{itemdecl} + +\begin{itemdescr} +\pnum +\effects +Equivalent to: +\begin{codeblock} +triangular_matrix_matrix_right_solve(std::forward(exec), + A, t, d, B, divides{}); +\end{codeblock} +\end{itemdescr} diff --git a/source/support.tex b/source/support.tex index 5d301b21a9..c9f553f3f1 100644 --- a/source/support.tex +++ b/source/support.tex @@ -697,6 +697,7 @@ #define @\defnlibxname{cpp_lib_jthread}@ 201911L // also in \libheader{stop_token}, \libheader{thread} #define @\defnlibxname{cpp_lib_latch}@ 201907L // also in \libheader{latch} #define @\defnlibxname{cpp_lib_launder}@ 201606L // freestanding, also in \libheader{new} +#define @\defnlibxname{cpp_lib_linalg}@ 202311L // also in \libheader{linalg} #define @\defnlibxname{cpp_lib_list_remove_return_type}@ 201806L // also in \libheader{forward_list}, \libheader{list} #define @\defnlibxname{cpp_lib_logical_traits}@ 201510L // freestanding, also in \libheader{type_traits} #define @\defnlibxname{cpp_lib_make_from_tuple}@ 201606L // freestanding, also in \libheader{tuple} diff --git a/source/time.tex b/source/time.tex index 8f4b13de72..7fa74b7d30 100644 --- a/source/time.tex +++ b/source/time.tex @@ -8736,7 +8736,7 @@ \pnum \ref{time.zone} describes an interface for accessing -the IANA Time Zone Database +the IANA Time Zone Database\supercite{iana-tz} that interoperates with \tcode{sys_time} and \tcode{local_time}. This interface provides time zone support to both the civil calendar types\iref{time.cal}