@@ -74,7 +74,7 @@ inline std::vector<int> generateIntRandomVector(size_t n, int low, int hi) {
7474 * @param threads_per_block threads per block (defaults to THREADS_PER_BLOCK)
7575 * @return number of blocks
7676 */
77- constexpr size_t numBlocks (size_t n, size_t threads_per_block= THREADS_PER_BLOCK) {
77+ constexpr size_t numBlocks (size_t n, size_t threads_per_block = THREADS_PER_BLOCK) {
7878 return (n / threads_per_block + (n % threads_per_block != 0 ));
7979}
8080
@@ -390,6 +390,51 @@ public:
390390 */
391391 T minAbs () const ;
392392
393+ /* *
394+ * Applied the Givens rotation G(i, j, c, s) on row i and column j,
395+ * with cos θ = c, and sin θ = s. The rotation is applied in-place
396+ * to all slices of the tensor. Recall that the Givens rotation is
397+ *
398+ * i j
399+ * |1 0 ... 0 |
400+ * |0 1 ... 0 |
401+ * | |
402+ * | 1 |
403+ * i | c s |
404+ * G' = ' '
405+ * j | -s c |
406+ * | 1 |
407+ * | |
408+ * | 0 |
409+ *
410+ * The right Givens rotation consists in multiplying from the right
411+ * by G.
412+ *
413+ * Equivalently, the right Givens transformation performs the following
414+ * operation on the i and j columns of this matrix:
415+ *
416+ * A[:, i] <-- c * A[:, i] - s * A[:, j]
417+ * A[:, j] <-- s * A[:, i] + c * A[:, j]
418+ *
419+ * @param i first column index
420+ * @param j second column index
421+ * @param c cos θ
422+ * @param minus_s minus sin θ
423+ * @throws std::invalid_argument if either i or j is greater or equal ncols
424+ */
425+ void applyRightGivensRotation (size_t i, size_t j, const T *c, const T *minus_s);
426+
427+ /* *
428+ * Performs a Givens rotation (left multiplication by G')
429+ *
430+ * @param i first row index
431+ * @param j second row index
432+ * @param c cos θ
433+ * @param minus_s minus sin θ
434+ * @throws std::invalid_argument if either i or j is greater or equal nrows
435+ */
436+ void applyLeftGivensRotation (size_t i, size_t j, const T *c, const T *minus_s);
437+
393438 /* *
394439 * Batch solves `A \ b`.
395440 * Solves `bi <- Ai \ bi` for each k-index `i`.
@@ -449,7 +494,6 @@ public:
449494 }
450495
451496 friend DTensor<T> operator *(T a, DTensor &B) {
452- size_t nrA = B.m_numRows , ncB = B.m_numCols , nmB = B.m_numMats ;
453497 DTensor<T> result (B);
454498 result *= a;
455499 return result;
@@ -471,6 +515,8 @@ DTensor<T> DTensor<T>::createRandomTensor(size_t numRows, size_t numCols, size_t
471515 auto randVec = generateIntRandomVector (numRows * numCols * numMats, low, hi);
472516 DTensor<T> a (randVec, numRows, numCols, numMats);
473517 return a;
518+ } else {
519+ throw std::invalid_argument (" [createRandomTensor] unsupported type T" );
474520 }
475521}
476522
@@ -675,6 +721,38 @@ inline double DTensor<double>::minAbs() const {
675721 return std::signbit (hostDst) ? -hostDst : hostDst;
676722}
677723
724+ template <typename T>
725+ void DTensor<T>::applyRightGivensRotation(size_t i, size_t j, const T *c, const T *minus_s) {
726+ if (m_numMats > 1 ) throw std::invalid_argument (" [applyRightGivensRotation] tensors (nMat>1) not supported" );
727+ T *col_i = m_d_data + i * m_numRows;
728+ T *col_j = m_d_data + j * m_numRows;
729+ if constexpr (std::is_same_v<T, double >) {
730+ gpuErrChk (cublasDrot (Session::getInstance ().cuBlasHandle (), m_numRows,
731+ col_i, 1 , col_j, 1 , c, minus_s));
732+ } else if constexpr (std::is_same_v<T, float >) {
733+ gpuErrChk (cublasSrot (Session::getInstance ().cuBlasHandle (), m_numRows,
734+ col_i, 1 , col_j, 1 , c, minus_s));
735+ }
736+ }
737+
738+ template <typename T>
739+ void DTensor<T>::applyLeftGivensRotation(size_t i, size_t j, const T *c, const T *minus_s) {
740+ if (m_numMats > 1 ) throw std::invalid_argument (" [applyLeftGivensRotation] tensors (nMat>1) not supported" );
741+ if constexpr (std::is_same_v<T, double >) {
742+ gpuErrChk (cublasDrot (Session::getInstance ().cuBlasHandle (), m_numCols,
743+ m_d_data + i, m_numRows,
744+ m_d_data + j, m_numRows,
745+ c, minus_s));
746+ } else if constexpr (std::is_same_v<T, float >) {
747+ gpuErrChk (cublasSrot (Session::getInstance ().cuBlasHandle (), m_numCols,
748+ m_d_data + i, m_numRows,
749+ m_d_data + j, m_numRows,
750+ c, minus_s));
751+ } else {
752+ throw std::invalid_argument (" [applyLeftGivensRotation] Unsupported type T" );
753+ }
754+ }
755+
678756template <typename T>
679757inline bool DTensor<T>::allocateOnDevice(size_t size, bool zero) {
680758 if (size <= 0 ) return false ;
@@ -1132,7 +1210,7 @@ public:
11321210 DTensor<T> Si (*m_S, 2 , i, i);
11331211 DTensor<unsigned int > rankI (*m_rank, 2 , i, i);
11341212 k_countNonzeroSingularValues<T><<<numBlocks(numElS), THREADS_PER_BLOCK>>> (Si.raw (), numElS,
1135- rankI.raw (), epsilon);
1213+ rankI.raw (), epsilon);
11361214 }
11371215 return *m_rank;
11381216 }
@@ -1380,14 +1458,19 @@ public:
13801458 * @param b provided matrix
13811459 * @return status code of computation
13821460 */
1383- int leastSquares (DTensor<T> &);
1461+ int leastSquares (DTensor<T> &b );
13841462
13851463 /* *
13861464 * Populate the given tensors with Q and R.
13871465 * Caution! This is an inefficient method: only to be used for debugging.
1388- * @return resulting Q and R from factorisation
1466+ *
1467+ * @param Q matrix Q (preallocated)
1468+ * @param R matrix R (preallocated)
1469+ * @return status code
1470+ *
1471+ * @throws std::invalid_argument if Q or R have invalid dimensions
13891472 */
1390- int getQR (DTensor<T> &, DTensor<T> &);
1473+ int getQR (DTensor<T> &Q , DTensor<T> &R );
13911474
13921475};
13931476
@@ -1759,4 +1842,118 @@ inline void CholeskyBatchFactoriser<float>::solve(DTensor<float> &b) {
17591842 m_numMats));
17601843}
17611844
1845+
1846+
1847+ /* ================================================================================================
1848+ * GIVENS ANNIHILATOR
1849+ * ================================================================================================ */
1850+
1851+ /* *
1852+ * GivensAnnihilator is used to apply a left Givens rotation that
1853+ * makes a particular element (k, j) of a matrix zero by applying
1854+ * an appropriate Givens rotation G(i, k, c, s).
1855+ *
1856+ * @tparam T data type of tensor (must be float or double)
1857+ */
1858+ TEMPLATE_WITH_TYPE_T TEMPLATE_CONSTRAINT_REQUIRES_FPX
1859+ class GivensAnnihilator {
1860+
1861+ private:
1862+ DTensor<T> *m_matrix;
1863+ /* *
1864+ * Auxiliary memory on the device of length 3 used to store
1865+ * rhypot(xij, xkj), cos θ, and sin θ.
1866+ */
1867+ std::unique_ptr<DTensor<T>> m_d_rhyp_cos_sin;
1868+
1869+ void init () {
1870+ m_d_rhyp_cos_sin = std::make_unique<DTensor<T>>(3 );
1871+ }
1872+
1873+ public:
1874+
1875+ GivensAnnihilator () {
1876+ init ();
1877+ }
1878+
1879+ /* *
1880+ * Constructor of GivensAnnihilator
1881+ * @param a matrix
1882+ * @throws std::invalid_argument if a.numMats() > 1
1883+ */
1884+ GivensAnnihilator (DTensor<T> &a) {
1885+ if (a.numMats () > 1 ) {
1886+ throw std::invalid_argument (" [GivensAnnihilator] tensors (numMats > 1) not supported" );
1887+ }
1888+ m_matrix = &a;
1889+ init ();
1890+ }
1891+
1892+ /* *
1893+ * Set the reference to a matrix; this way the current
1894+ * object can be reused
1895+ *
1896+ * @param a
1897+ */
1898+ void setMatrix (DTensor<T> &a) {
1899+ if (a.numMats () > 1 ) {
1900+ throw std::invalid_argument (" [GivensAnnihilator] tensors (numMats > 1) not supported" );
1901+ }
1902+ m_matrix = &a;
1903+ }
1904+
1905+ /* *
1906+ * Applies a left Givens rotation G(i, k, c, s) that eliminates
1907+ * the (k, j) element of the given matrix.
1908+ *
1909+ * @param i row index i
1910+ * @param k row index k
1911+ * @param j column index j
1912+ *
1913+ * @throws std::invalid_argument if i, k, or j are out of bounds
1914+ */
1915+ void annihilate (size_t i, size_t k, size_t j);
1916+
1917+ };
1918+
1919+ TEMPLATE_WITH_TYPE_T TEMPLATE_CONSTRAINT_REQUIRES_FPX
1920+ __global__ void k_givensAnnihilateRHypot (const T *data,
1921+ T *res,
1922+ size_t i, size_t k, size_t j,
1923+ size_t nRows) {
1924+ T xij = data[i + j * nRows];
1925+ T xkj = data[k + j * nRows];
1926+ res[0 ] = rhypot (xij, xkj);
1927+ res[1 ] = xij * (*res); // cos
1928+ res[2 ] = xkj * (*res); // -sin
1929+ }
1930+
1931+ template <typename T>
1932+ inline void GivensAnnihilator<T>::annihilate(size_t i, size_t k, size_t j) {
1933+ /* A few checks */
1934+ size_t nR = m_matrix->numRows (), nC = m_matrix->numCols ();
1935+ if (i >= nR or k >= nR) throw std::invalid_argument (" [GivensAnnihilator::annihilate] invalid row index" );
1936+ if (j >= nC) std::invalid_argument (" [GivensAnnihilator::annihilate] invalid column index j" );
1937+
1938+ /*
1939+ * Pass cosine and sine as device pointers
1940+ * (Avoid having to download first)
1941+ */
1942+ gpuErrChk (cublasSetPointerMode (Session::getInstance ().cuBlasHandle (), CUBLAS_POINTER_MODE_DEVICE));
1943+
1944+ /* Useful definitions */
1945+ T *aux = m_d_rhyp_cos_sin->raw ();
1946+ T *matData = m_matrix->raw ();
1947+
1948+ /* Call kernel to determine 1/sqrt(Ai^2 + Ak^2) */
1949+ k_givensAnnihilateRHypot<<<1 , 1 >>> (m_matrix->raw (), aux, i, k, j, nR);
1950+
1951+ /* Apply Givens rotation */
1952+ m_matrix->applyLeftGivensRotation (i, k, aux + 1 , aux + 2 );
1953+
1954+ /* Change back to default behaviour */
1955+ gpuErrChk (cublasSetPointerMode (Session::getInstance ().cuBlasHandle (), CUBLAS_POINTER_MODE_HOST));
1956+ }
1957+
1958+
17621959#endif
0 commit comments