fix unflatten_dense_tensor when there is empty tensor inside (pytorch#50321)

zhaojuanmao · facebook-github-bot · commit c9cae1446f9a · 2021-01-23T12:14:34.000-08:00
Summary: Pull Request resolved: pytorch#50321 Quantization team reported that when there are two empty tensors are replicated among ranks, the two empty tensors start to share storage after resizing. The root cause is unflatten_dense_tensor unflattened the empty tensor as view of flat tensor and thus share storage with other tensors. This PR is trying to avoid unflatten the empty tensor as view of flat tensor so that empty tensor will not share storage with other tensors. Test Plan: unit test Reviewed By: pritamdamania87 Differential Revision: D25859503 fbshipit-source-id: 5b760b31af6ed2b66bb22954cba8d1514f389cca
diff --git a/test/cpp/api/tensor_flatten.cpp b/test/cpp/api/tensor_flatten.cpp
@@ -0,0 +1,39 @@
+#include <gtest/gtest.h>
+#include <test/cpp/api/support.h>
+
+#include <torch/torch.h>
+#include <torch/csrc/utils/tensor_flatten.h>
+#include <torch/csrc/autograd/variable.h>
+
+using namespace torch::test;
+
+TEST(UnflattenDenseTensorTest, TestEmptyTensor) {
+  auto emptyTensor1 = at::tensor(std::vector<int>());
+  auto emptyTensor2 = at::tensor(std::vector<int>());
+  auto tensor1 = at::tensor({1, 2, 3});
+  auto tensor2 = at::tensor({4, 5});
+  auto tensorList = std::vector<at::Tensor>({tensor1, emptyTensor1, emptyTensor2, tensor2});
+  auto flatTensor = at::tensor({1, 2, 3, 4, 5});
+  auto unflatten_results = torch::utils::unflatten_dense_tensors(flatTensor, tensorList);
+  ASSERT_EQ(unflatten_results.size(), 4);
+  ASSERT_EQ(unflatten_results.at(0).numel(), 3);
+  ASSERT_EQ(unflatten_results.at(1).numel(), 0);
+  ASSERT_EQ(unflatten_results.at(2).numel(), 0);
+  ASSERT_EQ(unflatten_results.at(3).numel(), 2);
+
+  // empty tensor address is 0 as memory is not allocated yet
+  ASSERT_EQ(unflatten_results.at(1).data_ptr(), nullptr);
+  ASSERT_EQ(unflatten_results.at(2).data_ptr(), nullptr);
+  // without fix in unflatten_dense_tensors() for empty tensors,
+  // unflattend empty tensor unflatten_results.at(1) will share the same storage
+  // as other non-empty tenosr like unflatten_results.at(3).
+  // after fix, the empty tensor and non-empty tensor do not share the same
+  // storage.
+  ASSERT_NE(unflatten_results.at(1).data_ptr(), unflatten_results.at(3).data_ptr());
+  unflatten_results.at(1).resize_(1);
+  unflatten_results.at(2).resize_(1);
+  // after resizing the two empty tensors, the resized tensors do not share
+  // the same storage. without fix in unflatten_dense_tensors() for empty tensors,
+  // the resized tensors will share the same storage.
+  ASSERT_NE(unflatten_results.at(1).data_ptr(), unflatten_results.at(2).data_ptr());
+}
diff --git a/torch/csrc/utils/tensor_flatten.h b/torch/csrc/utils/tensor_flatten.h
@@ -20,8 +20,16 @@ inline std::vector<at::Tensor> unflatten_dense_tensors(const at::Tensor& flat, a
   size_t offset = 0;
   for (const auto & tensor : tensors) {
     auto numel = tensor.numel();
-    outputs.push_back(flat.narrow(0, offset, numel).view(tensor.sizes()));
-    offset += numel;
+    // If unflatten an empty tensor, create a new empty tensor using
+    // flat tensor Options.
+    // This can avoid the unflattened empty tensor to share the same storage
+    // with other unflatten tensors.
+    if (numel == 0) {
+      outputs.push_back(at::empty({0}, flat.options()));
+    } else {
+      outputs.push_back(flat.narrow(0, offset, numel).view(tensor.sizes()));
+      offset += numel;
+    }
   }
   return outputs;
 }