Back out "Back out "Back out "Revert D18542342: Boxed variable dispatch""" (pytorch#30650)

smessmer · facebook-github-bot · commit e123d90a9354 · 2019-12-06T11:45:09.000-08:00
Summary: Pull Request resolved: pytorch#30650 Original commit changeset: 51bb7aac7cb7 ghstack-source-id: 95082205 Test Plan: CI Differential Revision: D18778190 fbshipit-source-id: 7e9577e88fd0492006b6ea836ec081aea9da6b0c
diff --git a/aten/src/ATen/core/VariableFallbackKernel.cpp b/aten/src/ATen/core/VariableFallbackKernel.cpp
@@ -0,0 +1,40 @@
+#include <ATen/core/dispatch/Dispatcher.h>
+#include <ATen/core/LegacyTypeDispatch.h>
+
+/*
+ * This file implements a variable fallback kernel for custom operators.
+ * Since tensors always have the VariableTensorId set, but custom operators
+ * usually don't have a kernel registered for VariableTensorId, the dispatcher
+ * will call into this fallback kernel instead.
+ * Note that this is not a correct autograd implementation. It will just
+ * fallthrough to the custom operator implementation.
+ * If you want a custom operator to work with autograd, you need to use
+ * autograd::Function so that the custom operator implementation knows how to
+ * do autograd.
+ * Note also that ops from native_functions.yaml register their own variable
+ * kernels, so this is never called for them.
+ */
+
+// TODO This whole file should be deleted and replaced with the mechanism
+//      described in https://github.com/pytorch/pytorch/issues/29548
+
+using c10::OperatorHandle;
+using c10::Stack;
+using c10::TensorTypeId;
+using c10::TensorTypeSet;
+using c10::Dispatcher;
+using c10::KernelFunction;
+
+namespace {
+
+void variable_fallback_kernel(const OperatorHandle& op, Stack* stack) {
+    at::AutoNonVariableTypeMode _var_guard(true);
+    Dispatcher::singleton().callBoxed(op, stack);
+}
+
+static auto registry = Dispatcher::singleton().registerBackendFallbackKernel(
+    TensorTypeId::VariableTensorId,
+    KernelFunction::makeFromBoxedFunction<&variable_fallback_kernel>()
+);
+
+}
diff --git a/aten/src/ATen/core/dispatch/DispatchKeyExtractor.h b/aten/src/ATen/core/dispatch/DispatchKeyExtractor.h
@@ -84,12 +84,7 @@ struct DispatchKeyExtractor final {
         }
       }
     }
-    if (C10_UNLIKELY(ts.empty())) {
-      return c10::nullopt;
-    }
-
-    // TODO: Don't use legacy extractor; blocked on c10 understanding variable
-    return c10::legacyExtractTypeId(ts);
+    return typeSetToDispatchKey_(ts);
   }
 
   template<class... Args>
diff --git a/aten/src/ATen/core/dispatch/Dispatcher.h b/aten/src/ATen/core/dispatch/Dispatcher.h
@@ -189,6 +189,7 @@ inline void Dispatcher::callBoxed(const OperatorHandle& op, Stack* stack) const
 
 inline const KernelFunction& Dispatcher::dispatch_(const DispatchTable& dispatchTable, c10::optional<TensorTypeId> dispatchKey) const {
   if (C10_LIKELY(dispatchKey.has_value())) {
+
     const KernelFunction* backendKernel = dispatchTable.lookup(*dispatchKey);
 
     if (nullptr != backendKernel) {
diff --git a/caffe2/c2_aten_srcs.bzl b/caffe2/c2_aten_srcs.bzl
@@ -9,4 +9,5 @@ ATEN_CORE_HEADER_FILES = [
 
 ATEN_CORE_SRC_FILES = [
     "aten/src/ATen/core/grad_mode.cpp",
+    "aten/src/ATen/core/VariableFallbackKernel.cpp",
 ]
diff --git a/torch/csrc/autograd/VariableTypeManual.cpp b/torch/csrc/autograd/VariableTypeManual.cpp
@@ -299,10 +299,11 @@ Tensor & detach_(Tensor & self) {
 }
 
 // Some ops in the following registration list are registered as catch-all kernels,
-// some as backend kernels for VariableTensorId. The reason for this is that some
-// ops also use dispatch (i.e. register CPU/CUDA/QuantizedCPU kernels) and those
-// need to get a separate VariableTensorId kernel instead of a catch-all kernel,
-// otherwise we won't ever call it for CPU/CUDA/QuantizedCPU tensors.
+// some as catch-all kernels and additionally as backend kernels for VariableTensorId.
+// The reason for this is that ops that also use dispatch (e.g. register CPU/CUDA/QuantizedCPU
+// kernels) need to get a separate VariableTensorId kernel instead of a catch-all kernel,
+// otherwise we won't ever call it for CPU/CUDA/QuantizedCPU tensors, because the backend
+// kernel has a higher priority than catch-all kernels.
 // Unfortunately, this setup doesn't work in NonVariableTypeMode because that will
 // skip past variable kernels. So for ops that we want to use in NonVariableTypeMode
 // (and that don't use dispatch), we register them as catch-all kernels instead.
@@ -329,6 +330,13 @@ static auto registry = torch::RegisterOperators()
     .aliasAnalysis(AliasAnalysisKind::FROM_SCHEMA))
   .op(torch::RegisterOperators::options()
     .schema("aten::backward(Tensor self, Tensor? gradient=None, bool keep_graph=False, bool create_graph=False) -> ()")
+    // For backward(), we need the catch-all kernel (see comment above), but we also need the VariableTensorId backend
+    // kernel, because when called with a VariableTensorId tensor, it goes through the variable fallback kernel,
+    // which calls callBoxed(), which doesn't support optional tensor arguments yet and backward() has an optional
+    // tensor argument.
+    // TODO Once callBoxed() supports optional tensor arguments, we can enable `use_c10_dispatcher: full` for backward()
+    //      and remove the backend VariableTensorId kernel here, only leaving the catch-all kernel.
+    .impl_unboxedOnlyKernel<decltype(VariableType::backward), &VariableType::backward>(TensorTypeId::VariableTensorId)
     .impl_unboxedOnlyCatchAllKernel<decltype(VariableType::backward), &VariableType::backward>()
     .aliasAnalysis(AliasAnalysisKind::FROM_SCHEMA))
   .op(torch::RegisterOperators::options()
@@ -353,6 +361,13 @@ static auto registry = torch::RegisterOperators()
     .aliasAnalysis(AliasAnalysisKind::FROM_SCHEMA))
   .op(torch::RegisterOperators::options()
     .schema("aten::requires_grad_(Tensor(a!) self, bool _requires_grad=True) -> Tensor(a!)")
+    // For requires_grad_(), we need the catch-all kernel (see comment above), but we also need the VariableTensorId backend
+    // kernel, because when called with a VariableTensorId tensor, it goes through the variable fallback kernel,
+    // which calls callBoxed(), which doesn't support mutable tensor arguments yet and requires_grad_() has a mutable
+    // tensor argument.
+    // TODO Once callBoxed() supports mutable tensor arguments, we can enable `use_c10_dispatcher: full` for requires_grad_()
+    //      and remove the backend VariableTensorId kernel here, only leaving the catch-all kernel.
+    .impl_unboxedOnlyKernel<decltype(VariableType::requires_grad_), &VariableType::requires_grad_>(TensorTypeId::VariableTensorId)
     .impl_unboxedOnlyCatchAllKernel<decltype(VariableType::requires_grad_), &VariableType::requires_grad_>()
     .aliasAnalysis(AliasAnalysisKind::FROM_SCHEMA))
   ;

Original file line number	Diff line number	Diff line change
`@@ -84,12 +84,7 @@ struct DispatchKeyExtractor final {`
`84`	`84`	`}`
`85`	`85`	`}`
`86`	`86`	`}`
`87`		`- if (C10_UNLIKELY(ts.empty())) {`
`88`		`- return c10::nullopt;`
`89`		`- }`
`90`		`-`
`91`		`- // TODO: Don't use legacy extractor; blocked on c10 understanding variable`
`92`		`- return c10::legacyExtractTypeId(ts);`
	`87`	`+ return typeSetToDispatchKey_(ts);`
`93`	`88`	`}`
`94`	`89`
`95`	`90`	`template<class... Args>`
Original file line number	Diff line number	Diff line change
`@@ -9,4 +9,5 @@ ATEN_CORE_HEADER_FILES = [`
`9`	`9`
`10`	`10`	`ATEN_CORE_SRC_FILES = [`
`11`	`11`	`"aten/src/ATen/core/grad_mode.cpp",`
	`12`	`+ "aten/src/ATen/core/VariableFallbackKernel.cpp",`
`12`	`13`	`]`