Merge pull request #45 from GPUEngineering/b/qr-malloc-error

ruairimoran · web-flow · commit 40b83a19b7b4 · 2024-10-07T17:23:30.000+01:00
fix initialisation of Q
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -6,6 +6,16 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/),
 and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
 
 
+<!-- ---------------------
+      v1.2.1
+     --------------------- -->
+## v1.2.1 - 07-10-2024
+
+### Added
+
+- Patch initialisation of Q in QR decomposition.
+- Add test for tall skinny matrices.
+
 <!-- ---------------------
       v1.2.0
      --------------------- -->
diff --git a/include/tensor.cuh b/include/tensor.cuh
@@ -1488,8 +1488,10 @@ inline int QRFactoriser<double>::getQR(DTensor<double> &Q, DTensor<double> &R) {
         throw std::invalid_argument("[QR] invalid shape of R.");
     // Initialize Q to 1's on diagonal
     std::vector<double> vecQ(m * n, 0.);
-    for (size_t i = 0; i < m; i++) {
-        vecQ[i * n + i] = 1.;
+    for (size_t r = 0; r < m; r++) {
+        for (size_t c = 0; c < n; c++) {
+            if (r == c) { vecQ[r * n + c] = 1.; }
+        }
     }
     Q.upload(vecQ, rowMajor);
     // Apply Householder reflectors to compute Q
@@ -1521,8 +1523,10 @@ inline int QRFactoriser<float>::getQR(DTensor<float> &Q, DTensor<float> &R) {
         throw std::invalid_argument("[QR] invalid shape of R.");
     // Initialize Q to 1's on diagonal
     std::vector<float> vecQ(m * n, 0.);
-    for (size_t i = 0; i < m; i++) {
-        vecQ[i * n + i] = 1.;
+    for (size_t r = 0; r < m; r++) {
+        for (size_t c = 0; c < n; c++) {
+            if (r == c) { vecQ[r * n + c] = 1.; }
+        }
     }
     Q.upload(vecQ, rowMajor);
     // Apply Householder reflectors to compute Q
diff --git a/test/testTensor.cu b/test/testTensor.cu
@@ -1079,6 +1079,37 @@ TEST_F(QRTest, qrFactorisation) {
     qrFactorisation<double>(PRECISION_HIGH);
 }
 
+/* ---------------------------------------
+ * QR factorisation
+ * - tall and skinny matrix
+ * --------------------------------------- */
+
+TEMPLATE_WITH_TYPE_T TEMPLATE_CONSTRAINT_REQUIRES_FPX
+void qrFactorisationTall(T epsilon) {
+    size_t nR = 20;
+    size_t nC = 3;
+    DTensor<T> temp(nR, nC);
+    DTensor<T> A = DTensor<T>::createRandomTensor(nR, nC, 1, -100, 100);
+    QRFactoriser<T> qr(temp);
+    A.deviceCopyTo(temp);
+    int status = qr.factorise();
+    EXPECT_EQ(status, 0);
+    DTensor<T> Q(nR, nC);
+    DTensor<T> R(nC, nC, 1, true);
+    DTensor<T> QR(nR, nC);
+    status = qr.getQR(Q, R);
+    EXPECT_EQ(status, 0);
+    QR.addAB(Q, R);
+    QR -= A;
+    T nrm = QR.normF();
+    EXPECT_NEAR(nrm, 0., epsilon);
+}
+
+TEST_F(QRTest, qrFactorisationTall) {
+    qrFactorisationTall<float>(PRECISION_LOW);
+    qrFactorisationTall<double>(PRECISION_HIGH);
+}
+
 /* ---------------------------------------
  * QR factorisation: solve least squares
  * --------------------------------------- */