Tensor construction codemod - 2/3 (pytorch#14836)

jerryzh168 · facebook-github-bot · commit 83f32eebd9c9 · 2018-12-10T19:30:56.000-08:00
Summary: Pull Request resolved: pytorch#14836 Codemod generated with clangr shard mode, 25 files per diff, motivation: pytorch#12407 Reviewed By: bddppq Differential Revision: D13335176 fbshipit-source-id: 8d89510670e2cf70559d2f75e68f7181feb0b6d9
diff --git a/caffe2/core/operator.h b/caffe2/core/operator.h
@@ -207,7 +207,7 @@ class CAFFE2_API OperatorBase : public Observable<OperatorBase> {
       int idx,
       at::TensorOptions options,
       const Tensor& src,
-      BaseContext* context = nullptr) {
+      bool async = false) {
     Tensor* t = Output<Tensor>(idx, options.device().type());
     // TODO:
     // We plan to use the following:
@@ -216,7 +216,7 @@ class CAFFE2_API OperatorBase : public Observable<OperatorBase> {
     CAFFE_ENFORCE(
         !t->dtype_initialized() || t->dtype() == src.dtype(),
         "We don't allow a change of data type in OutputTensor");
-    t->CopyFrom(src, context);
+    t->CopyFrom(src, async);
     return t;
   }
 
diff --git a/caffe2/ideep/operators/concat_split_op.cc b/caffe2/ideep/operators/concat_split_op.cc
@@ -26,7 +26,6 @@ class IDEEPConcatOp final : public IDEEPOperator {
 
   bool RunOnDevice() override {
     auto* output = Output(OUTPUT);
-    TensorCPU* axis_info = OperatorBase::Output<TensorCPU>(AXIS_INFO, CPU);
 
     vector<itensor> inputs;
     for (int i = 0; i < InputSize(); ++i) {
@@ -44,7 +43,10 @@ class IDEEPConcatOp final : public IDEEPOperator {
     }
 
     auto axis_vdata = ideep::concat::compute(inputs, axis_, add_axis_, *output);
-    axis_info->Resize(vector<int64_t>(1, InputSize()));
+    Tensor* axis_info = OutputTensor(
+        AXIS_INFO,
+        vector<int64_t>(1, InputSize()),
+        at::dtype<int>().device(CPU));
     int* axis_data = axis_info->template mutable_data<int>();
     for (int i = 0; i < axis_vdata.size(); i++) {
       axis_data[i] = axis_vdata[i];
diff --git a/caffe2/ideep/operators/utility_ops.cc b/caffe2/ideep/operators/utility_ops.cc
@@ -49,13 +49,16 @@ class CopyIDEEPToCPUOp final : public IDEEPOperator {
     if (BlobIsTensorType(input_blob, CPU)) {
       VLOG(2) << "Directing sharing of TensorCPU";
       const auto& X = OperatorBase::Input<Tensor>(0, CPU);
-      auto* Y = OperatorBase::Output<Tensor>(0, CPU);
-      Y->CopyFrom(X);
+      OutputTensorCopyFrom(0, at::device(CPU), X);
     } else {
       const auto& X = OperatorBase::Input<itensor>(0);
-      auto* Y = OperatorBase::Output<Tensor>(0, CPU);
-      Y->Resize(X.get_dims());
       if (X.get_data_type() == itensor::data_type::f32) {
+        std::vector<int64_t> dims;
+        for (int i = 0; i < X.get_dims().size(); ++i) {
+          dims.push_back(X.get_dims()[i]);
+        }
+        auto* Y =
+            OperatorBase::OutputTensor(0, dims, at::dtype<float>().device(CPU));
         X.reorder_to(Y->template mutable_data<float>());
       } else {
         CAFFE_THROW("Unsupported ideep type: ", X.get_data_type());
diff --git a/caffe2/operators/conv_op_cudnn.cc b/caffe2/operators/conv_op_cudnn.cc
@@ -1306,8 +1306,7 @@ bool CudnnConvGradientOp::DoRunWithType() {
 
   // Now, actually run the computation.
   if (!no_bias_) {
-    auto* dbias = Output(BIAS_OR_INPUT_GRAD);
-    dbias->Resize(M);
+    auto* dbias = Output(BIAS_OR_INPUT_GRAD, {M}, at::dtype<T_DB>());
     CUDNN_ENFORCE(cudnnConvolutionBackwardBias(
         cudnn_wrapper_.inline_cudnn_handle(),
         cudnnTypeWrapper<T_DY>::kOne(),
diff --git a/caffe2/operators/conv_transpose_op_cudnn.cc b/caffe2/operators/conv_transpose_op_cudnn.cc
@@ -615,8 +615,7 @@ bool CudnnConvTransposeGradientOp<T>::RunOnDevice() {
 
   // Now, actually run the computation.
   if (!no_bias_) {
-    auto* dbias = Output(BIAS_OR_INPUT_GRAD);
-    dbias->Resize(C);
+    auto* dbias = Output(BIAS_OR_INPUT_GRAD, {C}, at::dtype<T>());
     CUDNN_ENFORCE(cudnnConvolutionBackwardBias(
         cudnn_wrapper_.inline_cudnn_handle(),
         cudnnTypeWrapper<T>::kOne(),
diff --git a/caffe2/operators/dropout_op_cudnn.cc b/caffe2/operators/dropout_op_cudnn.cc
@@ -55,7 +55,7 @@ class CuDNNDropoutOp final : public Operator<CUDAContext> {
   cudnnTensorDescriptor_t data_desc_;
   cudnnDropoutDescriptor_t dropout_desc_;
 
-  vector<int64_t> cudnn_input_dims_;
+  at::IntList cudnn_input_dims_;
 
   float ratio_;
   bool is_test_;
@@ -113,7 +113,7 @@ class CuDNNDropoutGradientOp final : public Operator<CUDAContext> {
   cudnnTensorDescriptor_t data_desc_;
   cudnnDropoutDescriptor_t dropout_desc_;
 
-  vector<int64_t> cudnn_input_dims_;
+  at::IntList cudnn_input_dims_;
 
   Blob* scratch_blob_;
 
@@ -146,12 +146,11 @@ bool CuDNNDropoutOp::DoRunWithType() {
     }
     return true;
   } else {
-    auto* mask = Output(1);
     // Reshape tensor descriptors if necessary
-    if (X.sizes() != cudnn_input_dims_ && !is_test_) {
+    if (X.sizes() != cudnn_input_dims_) {
       CAFFE_ENFORCE(scratch_blob_);
       Tensor* states = BlobGetMutableTensor(scratch_blob_, CUDA);
-      cudnn_input_dims_ = X.sizes().vec();
+      cudnn_input_dims_ = X.sizes();
       CUDNN_ENFORCE(cudnnSetTensor4dDescriptor(
           data_desc_,
           GetCudnnTensorFormat(StorageOrder::NCHW),
@@ -165,7 +164,6 @@ bool CuDNNDropoutOp::DoRunWithType() {
       CUDNN_ENFORCE(cudnnDropoutGetReserveSpaceSize(
           data_desc_, &reserve_space_size_in_bytes_));
 
-      mask->Resize(reserve_space_size_in_bytes_);
       states->Resize(states_size_in_bytes_);
 
       if (!states_initialized_) {
@@ -187,6 +185,10 @@ bool CuDNNDropoutOp::DoRunWithType() {
         states_initialized_ = true;
       }
     }
+    auto* mask = Output(
+        1,
+        {static_cast<int64_t>(reserve_space_size_in_bytes_)},
+        at::dtype<uint8_t>());
     CUDNN_ENFORCE(cudnnDropoutForward(
         cudnn_wrapper_.inline_cudnn_handle(),
         dropout_desc_,
@@ -244,7 +246,7 @@ bool CuDNNDropoutGradientOp::DoRunWithType() {
   }
 
   if (dY.sizes() != cudnn_input_dims_) {
-    cudnn_input_dims_ = dY.sizes().vec();
+    cudnn_input_dims_ = dY.sizes();
     CUDNN_ENFORCE(cudnnSetTensor4dDescriptor(
         data_desc_,
         GetCudnnTensorFormat(StorageOrder::NCHW),
diff --git a/caffe2/operators/fully_connected_op.h b/caffe2/operators/fully_connected_op.h
@@ -208,9 +208,9 @@ class FullyConnectedGradientOp : public Operator<Context> {
     CAFFE_ENFORCE(K * N == W.numel(), dimErrorString());
 
     auto* dW = Output(0);
-    auto* db = Output(1);
+
     dW->ResizeLike(W);
-    db->Resize(N);
+    auto* db = Output(1, {N}, at::dtype<T_DB>());
 
     if (X.numel() == 0) {
       // generate a zero blob for db and dW when X is empty
diff --git a/caffe2/operators/fused_rowwise_8bit_conversion_ops.h b/caffe2/operators/fused_rowwise_8bit_conversion_ops.h
@@ -106,7 +106,6 @@ class Fused8BitRowwiseQuantizedToFloatOp : public Operator<Context> {
     CAFFE_ENFORCE(IS_LITTLE_ENDIAN, "Unsupported endianness");
 
     const auto& input = Input(DATA_FUSED_SCALE_BIAS_INT8);
-    auto* output = Output(DATA_FLOAT);
 
     const auto input_rows = input.size(0);
     const auto input_columns = input.size(1);
@@ -116,7 +115,7 @@ class Fused8BitRowwiseQuantizedToFloatOp : public Operator<Context> {
     // input_columns is the number of values in the original row.
     const std::vector<int64_t> output_dimensions = {input_rows,
                                                     input_columns - 8};
-    output->Resize(output_dimensions);
+    auto* output = Output(DATA_FLOAT, output_dimensions, at::dtype<T>());
     const auto output_columns = output->size(1);
 
     const auto* input_data = input.template data<uint8_t>();
diff --git a/caffe2/operators/order_switch_ops_cudnn.cc b/caffe2/operators/order_switch_ops_cudnn.cc
@@ -28,6 +28,7 @@ class CuDNNOrderSwithOpBase : public Operator<CUDAContext> {
   }
 
  protected:
+  // TODO: std::vector<int> -> std::vector<int64_t>
   void SetTensorDescriptor(
       const cudnnDataType_t data_type,
       const StorageOrder order,
@@ -82,7 +83,7 @@ class CuDNNNHWC2NCHWOp final : public CuDNNOrderSwithOpBase {
   template <typename T>
   bool DoRunWithType() {
     const auto& X = Input(0);
-    auto* Y = Output(0);
+
     const int ndim = X.dim();
     const int N = X.dim32(0);
     const int C = X.dim32(ndim - 1);
@@ -91,7 +92,9 @@ class CuDNNNHWC2NCHWOp final : public CuDNNOrderSwithOpBase {
     Y_dims[0] = N;
     Y_dims[1] = C;
     std::copy(X_dims.cbegin() + 1, X_dims.cend() - 1, Y_dims.begin() + 2);
-    Y->Resize(Y_dims);
+    std::vector<int64_t> Y_dims_64;
+    std::copy(Y_dims.cbegin(), Y_dims.cend(), std::back_inserter(Y_dims_64));
+    auto* Y = Output(0, Y_dims_64, at::dtype<T>());
     if (cached_X_dims_ != X_dims) {
       cached_X_dims_ = X_dims;
       SetTensorDescriptor(
@@ -123,7 +126,7 @@ class CuDNNNCHW2NHWCOp final : public CuDNNOrderSwithOpBase {
   template <typename T>
   bool DoRunWithType() {
     const auto& X = Input(0);
-    auto* Y = Output(0);
+
     const int ndim = X.dim();
     const int N = X.dim32(0);
     const int C = X.dim32(1);
@@ -132,7 +135,9 @@ class CuDNNNCHW2NHWCOp final : public CuDNNOrderSwithOpBase {
     Y_dims[0] = N;
     Y_dims[ndim - 1] = C;
     std::copy(X_dims.cbegin() + 2, X_dims.cend(), Y_dims.begin() + 1);
-    Y->Resize(Y_dims);
+    std::vector<int64_t> Y_dims_64;
+    std::copy(Y_dims.cbegin(), Y_dims.cend(), std::back_inserter(Y_dims_64));
+    auto* Y = Output(0, Y_dims_64, at::dtype<T>());
     if (cached_X_dims_ != X_dims) {
       cached_X_dims_ = X_dims;
       SetTensorDescriptor(
diff --git a/caffe2/quantization/server/fully_connected_fake_lowp_op.cc b/caffe2/quantization/server/fully_connected_fake_lowp_op.cc
@@ -33,7 +33,7 @@ bool FullyConnectedFakeLowpFPOp<Q, Context, Engine, TransposeWeight>::
   const auto& X = Input(0);
   const auto& W = Input(1);
   const auto& b = Input(2);
-  auto* Y = Output(0);
+
   CAFFE_ENFORCE(b.dim() == 1, b.dim());
   // batch size
   const auto canonical_axis = X.canonical_axis_index(axis_);
@@ -79,7 +79,7 @@ bool FullyConnectedFakeLowpFPOp<Q, Context, Engine, TransposeWeight>::
   DCHECK_LE(canonical_axis + 1, Y_shape_cache_.size());
   Y_shape_cache_.resize(canonical_axis + 1);
   Y_shape_cache_[canonical_axis] = N;
-  Y->Resize(Y_shape_cache_);
+  auto* Y = Output(0, Y_shape_cache_, at::dtype<T_Y>());
   CAFFE_ENFORCE(M * N == Y->size(), dimErrorString());
 
   if (X.size() == 0) {
@@ -180,9 +180,9 @@ bool FullyConnectedGradientFakeLowpFPOp<Q, Context, Engine, TransposeWeight>::
   CAFFE_ENFORCE(K * N == W.size());
 
   auto* dW = Output(0);
-  auto* db = Output(1);
+
   dW->ResizeLike(W);
-  db->Resize(N);
+  auto* db = Output(1, {N}, at::dtype<T_DB>());
 
   if (X.size() == 0) {
     // generate a zero blob for db and dW when X is empty
diff --git a/caffe2/queue/queue_ops.h b/caffe2/queue/queue_ops.h
@@ -244,8 +244,8 @@ class WeightedSampleDequeueBlobsOp final : public Operator<Context> {
     CAFFE_ENFORCE_EQ(OutputSize(), size + 1);
     bool status = queue->blockingRead(this->Outputs());
     if (table_idx_blob_ >= 0) {
-      auto* table_idx_blob_out = Output(table_idx_blob_);
-      table_idx_blob_out->Resize(1);
+      auto* table_idx_blob_out =
+          Output(table_idx_blob_, {1}, at::dtype<int32_t>());
       int32_t* data = table_idx_blob_out->template mutable_data<int32_t>();
       data[0] = idx;
     }
diff --git a/caffe2/sgd/iter_op.h b/caffe2/sgd/iter_op.h
@@ -45,9 +45,9 @@ class IterOp final : public Operator<Context> {
                       "be deprecated soon. More specifically, IterOp now "
                       "requires an explicit in-place input and output.";
 
-        auto* output = OperatorBase::Output<Tensor>(0, CPU);
         VLOG(1) << "Initializing iter counter.";
-        output->Resize(1);
+        auto* output = OperatorBase::OutputTensor(
+            0, {1}, at::dtype<int64_t>().device(CPU));
         output->template mutable_data<int64_t>()[0] = 0;
       }
     }
diff --git a/caffe2/sgd/lars_op.h b/caffe2/sgd/lars_op.h
@@ -28,8 +28,8 @@ class LarsOp final : public Operator<Context> {
     auto& wd = Input(2);
     auto& trust = Input(3);
     auto& lr_max = Input(4);
-    auto* lr_rescaled = Output(0);
-    lr_rescaled->Resize(vector<int64_t>{1});
+
+    auto* lr_rescaled = Output(0, vector<int64_t>{1}, at::dtype<T>());
 
     X_norm_tensor_.Resize(1);
     T* X_norm_ = X_norm_tensor_.template mutable_data<T>();