Allow a non-OpenMP based build (pytorch#19749)

Ilia Cherniavskii · facebook-github-bot · commit 481b6d026848 · 2019-05-06T19:34:48.000-07:00
Summary: Pull Request resolved: pytorch#19749 ghimport-source-id: a6636c0 Differential Revision: D15141993 Pulled By: ilia-cher fbshipit-source-id: 96085608398b2a4c97c68b2948f5184d07f9ad3d
diff --git a/.jenkins/pytorch/test.sh b/.jenkins/pytorch/test.sh
@@ -203,6 +203,7 @@ test_xla() {
 }
 
 (cd test && python -c "import torch; print(torch.__config__.show())")
+(cd test && python -c "import torch; print(torch.__config__.parallel_info())")
 
 if [[ "${BUILD_ENVIRONMENT}" == *xla* ]]; then
   test_torchvision
diff --git a/aten/src/ATen/Parallel.cpp b/aten/src/ATen/Parallel.cpp
@@ -1,6 +1,10 @@
 #include <ATen/Parallel.h>
 
+#include <ATen/Config.h>
+#include <ATen/Version.h>
+
 #include <atomic>
+#include <sstream>
 
 #ifdef TH_BLAS_MKL
 #include <mkl.h>
@@ -60,6 +64,41 @@ size_t get_num_threads() {
 #endif
 }
 
+namespace {
+const char* get_env_var(const char* var_name) {
+  const char* value = std::getenv(var_name);
+  return value ? value : "[not set]";
+}
+}
+
+std::string get_parallel_info() {
+  std::ostringstream ss;
+
+  ss << "ATen/Parallel:\n\tat::get_num_threads() : "
+     << at::get_num_threads() << std::endl;
+
+  ss << at::get_openmp_version() << std::endl;
+#ifdef _OPENMP
+  ss << "\tomp_get_max_threads() : " << omp_get_max_threads() << std::endl;
+#endif
+
+  ss << at::get_mkl_version() << std::endl;
+#ifdef TH_BLAS_MKL
+  ss << "\tmkl_get_max_threads() : " << mkl_get_max_threads() << std::endl;
+#endif
+
+  ss << at::get_mkldnn_version() << std::endl;
+
+  ss << "std::thread::hardware_concurrency() : "
+     << std::thread::hardware_concurrency() << std::endl;
+
+  ss << "Environment variables:" << std::endl;
+  ss << "\tOMP_NUM_THREADS : " << get_env_var("OMP_NUM_THREADS") << std::endl;
+  ss << "\tMKL_NUM_THREADS : " << get_env_var("MKL_NUM_THREADS") << std::endl;
+
+  return ss.str();
+}
+
 PTThreadPool::PTThreadPool(
     std::size_t pool_size,
     int numa_node_id)
diff --git a/aten/src/ATen/Parallel.h b/aten/src/ATen/Parallel.h
@@ -143,6 +143,9 @@ inline scalar_t parallel_reduce(
   }
 }
 
+// Returns a detailed string describing parallelization settings
+CAFFE2_API std::string get_parallel_info();
+
 class CAFFE2_API PTThreadPool : public c10::ThreadPool {
  public:
   explicit PTThreadPool(
diff --git a/aten/src/ATen/Version.cpp b/aten/src/ATen/Version.cpp
@@ -16,6 +16,78 @@
 
 namespace at {
 
+std::string get_mkl_version() {
+  std::string version;
+  #if AT_MKL_ENABLED()
+    {
+      // Magic buffer number is from MKL documentation
+      // https://software.intel.com/en-us/mkl-developer-reference-c-mkl-get-version-string
+      char buf[198];
+      mkl_get_version_string(buf, 198);
+      version = buf;
+    }
+  #else
+    version = "MKL not found";
+  #endif
+  return version;
+}
+
+std::string get_mkldnn_version() {
+  std::ostringstream ss;
+  #if AT_MKLDNN_ENABLED()
+    // Cribbed from mkl-dnn/src/common/verbose.cpp
+    // Too bad: can't get ISA info conveniently :(
+    // Apparently no way to get ideep version?
+    // https://github.com/intel/ideep/issues/29
+    {
+      const mkldnn_version_t* ver = mkldnn_version();
+      ss << "Intel(R) MKL-DNN v" << ver->major << "." << ver->minor << "." << ver->patch
+         << " (Git Hash " << ver->hash << ")";
+    }
+  #else
+    ss << "MKLDNN not found";
+  #endif
+  return ss.str();
+}
+
+std::string get_openmp_version() {
+  std::ostringstream ss;
+  #ifdef _OPENMP
+    {
+      ss << "OpenMP " << _OPENMP;
+      // Reference:
+      // https://stackoverflow.com/questions/1304363/how-to-check-the-version-of-openmp-on-linux
+      const char* ver_str = nullptr;
+      switch (_OPENMP) {
+        case 200505:
+          ver_str = "2.5";
+          break;
+        case 200805:
+          ver_str = "3.0";
+          break;
+        case 201107:
+          ver_str = "3.1";
+          break;
+        case 201307:
+          ver_str = "4.0";
+          break;
+        case 201511:
+          ver_str = "4.5";
+          break;
+        default:
+          ver_str = nullptr;
+          break;
+      }
+      if (ver_str) {
+        ss << " (a.k.a. OpenMP " << ver_str << ")";
+      }
+    }
+  #else
+    ss << "OpenMP not found";
+  #endif
+  return ss.str();
+}
+
 std::string show_config() {
   std::ostringstream ss;
   ss << "PyTorch built with:\n"; // TODO add the version of PyTorch
@@ -42,58 +114,15 @@ std::string show_config() {
 #endif
 
 #if AT_MKL_ENABLED()
-  {
-    // Magic buffer number is from MKL documentation
-    // https://software.intel.com/en-us/mkl-developer-reference-c-mkl-get-version-string
-    char buf[198];
-    mkl_get_version_string(buf, 198);
-    ss << "  - " << buf << "\n";
-  }
+  ss << "  - " << get_mkl_version() << "\n";
 #endif
 
 #if AT_MKLDNN_ENABLED()
-  // Cribbed from mkl-dnn/src/common/verbose.cpp
-  // Too bad: can't get ISA info conveniently :(
-  // Apparently no way to get ideep version?
-  // https://github.com/intel/ideep/issues/29
-  {
-    const mkldnn_version_t* ver = mkldnn_version();
-    ss << "  - Intel(R) MKL-DNN v" << ver->major << "." << ver->minor << "." << ver->patch
-       << " (Git Hash " << ver->hash << ")\n";
-  }
+  ss << "  - " << get_mkldnn_version() << "\n";
 #endif
 
 #ifdef _OPENMP
-  {
-    ss << "  - OpenMP " << _OPENMP;
-    // Reference:
-    // https://stackoverflow.com/questions/1304363/how-to-check-the-version-of-openmp-on-linux
-    const char* ver_str = nullptr;
-    switch (_OPENMP) {
-      case 200505:
-        ver_str = "2.5";
-        break;
-      case 200805:
-        ver_str = "3.0";
-        break;
-      case 201107:
-        ver_str = "3.1";
-        break;
-      case 201307:
-        ver_str = "4.0";
-        break;
-      case 201511:
-        ver_str = "4.5";
-        break;
-      default:
-        ver_str = nullptr;
-        break;
-    }
-    if (ver_str) {
-      ss << " (a.k.a. OpenMP " << ver_str << ")";
-    }
-    ss << "\n";
-  }
+  ss << "  - " << get_openmp_version() << "\n";
 #endif
 
 #ifdef USE_LAPACK
diff --git a/aten/src/ATen/Version.h b/aten/src/ATen/Version.h
@@ -5,4 +5,10 @@ namespace at {
 /// Returns a detailed string describing the configuration PyTorch.
 CAFFE2_API std::string show_config();
 
+CAFFE2_API std::string get_mkl_version();
+
+CAFFE2_API std::string get_mkldnn_version();
+
+CAFFE2_API std::string get_openmp_version();
+
 }  // namespace at
diff --git a/aten/src/ATen/test/thread_init_test.cpp b/aten/src/ATen/test/thread_init_test.cpp
@@ -28,15 +28,15 @@ int main() {
   t1.join();
 
   at::set_num_threads(4);
-  std::thread t2(test, 4);
-  std::thread t3(test, 4);
-  std::thread t4(test, 4);
+  std::thread t2(test, at::get_num_threads());
+  std::thread t3(test, at::get_num_threads());
+  std::thread t4(test, at::get_num_threads());
   t4.join();
   t3.join();
   t2.join();
 
   at::set_num_threads(5);
-  test(5);
+  test(at::get_num_threads());
 
   return 0;
 }
diff --git a/binaries/CMakeLists.txt b/binaries/CMakeLists.txt
@@ -2,6 +2,11 @@ caffe2_binary_target("convert_caffe_image_db.cc")
 caffe2_binary_target("convert_db.cc")
 caffe2_binary_target("make_cifar_db.cc")
 caffe2_binary_target("make_mnist_db.cc")
+if (NOT ANDROID)
+  caffe2_binary_target("parallel_info.cc")
+  target_include_directories(parallel_info PUBLIC
+    ${CMAKE_BINARY_DIR}/aten/src) # provides "ATen/TypeExtendedInterface.h" to ATen.h
+endif()
 caffe2_binary_target("predictor_verifier.cc")
 caffe2_binary_target("print_registered_core_operators.cc")
 caffe2_binary_target("run_plan.cc")
diff --git a/binaries/parallel_info.cc b/binaries/parallel_info.cc
@@ -0,0 +1,41 @@
+/**
+ * Copyright (c) 2016-present, Facebook, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "ATen/Parallel.h"
+
+#include <iostream>
+#include <sstream>
+
+#ifdef __linux__
+#include <sys/types.h>
+#include <unistd.h>
+#endif
+
+int main(int argc, char** argv) {
+  at::init_num_threads();
+
+  std::cout << at::get_parallel_info() << std::endl;
+
+# ifdef __linux__
+  std::ostringstream cmd;
+  cmd << "lsof -p " << getpid() << " | grep .so";
+  std::cout << "Loaded .so:" << std::endl;
+  std::cout << cmd.str() << std::endl;
+  std::system(cmd.str().c_str());
+# endif
+
+  return 0;
+}
diff --git a/cmake/Modules/FindMKLDNN.cmake b/cmake/Modules/FindMKLDNN.cmake
@@ -89,7 +89,7 @@ ENDIF(MKL_FOUND)
 
 IF(MKL_FOUND)
   SET(MKL_cmake_included TRUE)
-  SET(MKLDNN_THREADING "OMP:COMP" CACHE STRING "" FORCE)
+  SET(MKLDNN_THREADING "OMP:COMP" CACHE STRING "")
 ENDIF(MKL_FOUND)
 SET(WITH_TEST FALSE CACHE BOOL "" FORCE)
 SET(WITH_EXAMPLE FALSE CACHE BOOL "" FORCE)
diff --git a/docs/source/__config__.rst b/docs/source/__config__.rst
@@ -4,3 +4,4 @@ torch.__config__
 .. automodule:: torch.__config__
 
 .. autofunction:: show
+.. autofunction:: parallel_info
diff --git a/setup.py b/setup.py
@@ -42,6 +42,9 @@
 #   USE_MKLDNN=0
 #     disables use of MKLDNN
 #
+#   MKLDNN_THREADING
+#     MKL-DNN threading mode (https://github.com/intel/mkl-dnn/)
+#
 #   USE_NNPACK=0
 #     disables NNPACK build
 #
@@ -64,6 +67,9 @@
 #   USE_OPENCV
 #     enables use of OpenCV for additional operators
 #
+#   USE_OPENMP=0
+#     disables use of OpenMP for parallelization
+#
 #   USE_FFMPEG
 #     enables use of ffmpeg for additional operators
 #
@@ -96,6 +102,9 @@
 #     then the build will fail if the requested BLAS is not found, otherwise
 #     the BLAS will be chosen based on what is found on your system.
 #
+#   MKL_SEQ=1
+#     chooses a sequential version of MKL library (in case of BLAS=MKL)
+#
 #   USE_FBGEMM
 #     Enables use of FBGEMM
 #
diff --git a/test/test_torch.py b/test/test_torch.py
@@ -10996,6 +10996,9 @@ def test_show_config(self):
         # We can't usefully test the output; just make sure this doesn't crash
         torch.__config__.show()
 
+    def test_parallel_info(self):
+        torch.__config__.parallel_info()
+
     @staticmethod
     def _test_bincount(self, device):
         # negative input throws
diff --git a/tools/build_pytorch_libs.py b/tools/build_pytorch_libs.py
@@ -208,6 +208,16 @@ def run_cmake(version,
         USE_GFLAGS=os.getenv('USE_GFLAGS'),
         WERROR=os.getenv('WERROR'))
 
+    if os.getenv('USE_OPENMP'):
+        cmake_defines(cmake_args, USE_OPENMP=check_env_flag('USE_OPENMP'))
+
+    if os.getenv('MKL_SEQ'):
+        cmake_defines(cmake_args, INTEL_MKL_SEQUENTIAL=check_env_flag('MKL_SEQ'))
+
+    mkldnn_threading = os.getenv('MKLDNN_THREADING')
+    if mkldnn_threading:
+        cmake_defines(cmake_args, MKLDNN_THREADING=mkldnn_threading)
+
     if USE_GLOO_IBVERBS:
         cmake_defines(cmake_args, USE_IBVERBS="1", USE_GLOO_IBVERBS="1")
 
diff --git a/torch/CMakeLists.txt b/torch/CMakeLists.txt
@@ -337,7 +337,7 @@ endif()
 target_link_libraries(torch caffe2_library)
 
 find_package(OpenMP QUIET)
-if(OPENMP_FOUND)
+if(USE_OPENMP AND OPENMP_FOUND)
   message(STATUS "pytorch is compiling with OpenMP. \n"
     "OpenMP CXX_FLAGS: ${OpenMP_CXX_FLAGS}. \n"
     "OpenMP libraries: ${OpenMP_CXX_LIBRARIES}.")
diff --git a/torch/__config__.py b/torch/__config__.py
@@ -11,3 +11,7 @@ def show():
 # TODO: In principle, we could provide more structured version/config
 # information here.  We're not for now; considering doing so if someone
 # asks for it.
+
+def parallel_info():
+    r"""Returns detailed string with parallelization settings"""
+    return torch._C._parallel_info()
diff --git a/torch/csrc/Module.cpp b/torch/csrc/Module.cpp

Original file line number	Diff line number	Diff line change
`@@ -203,6 +203,7 @@ test_xla() {`
`203`	`203`	`}`
`204`	`204`
`205`	`205`	`(cd test && python -c "import torch; print(torch.__config__.show())")`
	`206`	`+(cd test && python -c "import torch; print(torch.__config__.parallel_info())")`
`206`	`207`
`207`	`208`	`if [[ "${BUILD_ENVIRONMENT}" == xla ]]; then`
`208`	`209`	`test_torchvision`
Original file line number	Diff line number	Diff line change
`@@ -143,6 +143,9 @@ inline scalar_t parallel_reduce(`
`143`	`143`	`}`
`144`	`144`	`}`
`145`	`145`
	`146`	`+// Returns a detailed string describing parallelization settings`
	`147`	`+CAFFE2_API std::string get_parallel_info();`
	`148`	`+`
`146`	`149`	`class CAFFE2_API PTThreadPool : public c10::ThreadPool {`
`147`	`150`	`public:`
`148`	`151`	`explicit PTThreadPool(`
Original file line number	Diff line number	Diff line change
`@@ -4,3 +4,4 @@ torch.__config__`
`4`	`4`	`.. automodule:: torch.__config__`
`5`	`5`
`6`	`6`	`.. autofunction:: show`
	`7`	`+.. autofunction:: parallel_info`