Skip to content

Fix broken headings in Markdown files #64

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 18 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,7 @@ Hello, TensorFlow!
>>>
```

##For more information
## For more information

* [TensorFlow website](http://tensorflow.org)
* [TensorFlow whitepaper](http://download.tensorflow.org/paper/whitepaper2015.pdf)
Expand Down
4 changes: 2 additions & 2 deletions configure
Original file line number Diff line number Diff line change
Expand Up @@ -504,7 +504,7 @@ if [ "$TF_NEED_OPENCL" == "1" ]; then
while true; do
fromuser=""
if [ -z "$HOST_CXX_COMPILER" ]; then
default_cxx_host_compiler=$(which clang++-3.6 || true)
default_cxx_host_compiler=$(which g++-4.8 || true)
read -p "Please specify which C++ compiler should be used as the host C++ compiler. [Default is $default_cxx_host_compiler]: " HOST_CXX_COMPILER
fromuser="1"
if [ -z "$HOST_CXX_COMPILER" ]; then
Expand All @@ -527,7 +527,7 @@ done
while true; do
fromuser=""
if [ -z "$HOST_C_COMPILER" ]; then
default_c_host_compiler=$(which clang-3.6 || true)
default_c_host_compiler=$(which gcc-4.8 || true)
read -p "Please specify which C compiler should be used as the host C compiler. [Default is $default_c_host_compiler]: " HOST_C_COMPILER
fromuser="1"
if [ -z "$HOST_C_COMPILER" ]; then
Expand Down
2 changes: 1 addition & 1 deletion tensorflow/contrib/tfprof/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ Author: Xin Pan ([email protected], github: panyx0718)

Consultants: Jon Shlens, Pete Warden

###Major Features
### Major Features

1. Measure model parameters, float operations, tensor shapes.
2. Measure op execution times, requested memory size and device placement.
Expand Down
1 change: 1 addition & 0 deletions tensorflow/core/BUILD
Original file line number Diff line number Diff line change
Expand Up @@ -1597,6 +1597,7 @@ cc_library(
hdrs = if_not_windows([
"common_runtime/sycl/sycl_allocator.h",
"common_runtime/sycl/sycl_device.h",
"common_runtime/sycl/sycl_util.h",
"common_runtime/sycl/sycl_device_context.h",
]),
copts = tf_copts(),
Expand Down
3 changes: 1 addition & 2 deletions tensorflow/core/common_runtime/direct_session_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -871,8 +871,6 @@ class BlockingOp : public OpKernel {
REGISTER_KERNEL_BUILDER(Name("BlockingOp").Device(DEVICE_CPU), BlockingOp);
REGISTER_OP("BlockingOp").Input("x: float").Output("y: float").Doc("");

REGISTER_KERNEL_BUILDER(Name("BlockingOp").Device(DEVICE_SYCL), BlockingOp);

static void TestSessionInterOpThreadsImpl(bool use_function_lib) {
FunctionDefLibrary library_graph_def;
if (use_function_lib) {
Expand Down Expand Up @@ -910,6 +908,7 @@ static void TestSessionInterOpThreadsImpl(bool use_function_lib) {
->set_opt_level(OptimizerOptions_Level_L0);
(*options.config.mutable_device_count())["CPU"] = 2;
(*options.config.mutable_device_count())["GPU"] = 0;
(*options.config.mutable_device_count())["SYCL"] = 0;

options.config.add_session_inter_op_thread_pool();
auto* p = options.config.add_session_inter_op_thread_pool();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -138,7 +138,8 @@ TEST(DirectSessionWithTrackingAllocTest, CostModelWarmup) {
DirectSession* ds = static_cast<DirectSession*>(session.get());
CostModelManager::CostModelMap cost_models;
ds->ExportCostModels(&cost_models);
CHECK_EQ(cost_models.size(), 1);
ASSERT_GE(2, cost_models.size());
ASSERT_LE(1, cost_models.size());
const CostModel* cm = (*cost_models.begin()).second;
EXPECT_EQ(measure_steps, cm->GetUpdateTimes());
}
Expand All @@ -155,10 +156,16 @@ static void TestHWAccelerator(bool enableHWTrace) {
test::FillValues<float>(&x_tensor, {1, 1});
Node* x = test::graph::Constant(&graph, x_tensor);
x->set_assigned_device_name("/job:localhost/replica:0/task:0/gpu:0");
#ifdef TENSORFLOW_USE_SYCL
x->set_assigned_device_name("/job:localhost/replica:0/task:0/device:SYCL:0");
#endif // TENSORFLOW_USE_SYCL

// y = A * x
Node* y = test::graph::Matmul(&graph, a, x, false, false);
y->set_assigned_device_name("/job:localhost/replica:0/task:0/gpu:0");
#ifdef TENSORFLOW_USE_SYCL
y->set_assigned_device_name("/job:localhost/replica:0/task:0/device:SYCL:0");
#endif // TENSORFLOW_USE_SYCL

Node* y_neg = test::graph::Unary(&graph, "Neg", y);
y_neg->set_assigned_device_name("/job:localhost/replica:0/task:0/cpu:0");
Expand All @@ -169,6 +176,9 @@ static void TestHWAccelerator(bool enableHWTrace) {
SessionOptions options;
(*options.config.mutable_device_count())["CPU"] = 1;
(*options.config.mutable_device_count())["GPU"] = 1;
#ifdef TENSORFLOW_USE_SYCL
(*options.config.mutable_device_count())["SYCL"] = 1;
#endif // TENSORFLOW_USE_SYCL
options.config.set_allow_soft_placement(true);
options.config.mutable_graph_options()->set_build_cost_model(1);
std::unique_ptr<Session> session(NewSession(options));
Expand Down
6 changes: 3 additions & 3 deletions tensorflow/core/common_runtime/memory_types.cc
Original file line number Diff line number Diff line change
Expand Up @@ -45,12 +45,12 @@ struct EndpointEq {
static Status ProcessMemoryTypes(
DeviceType device_type, const Graph* g,
std::function<Status(const Edge*, MemoryType, MemoryType)> fn) {
if (device_type != DEVICE_GPU) {
// On non-GPU devices, HOST_MEMORY and DEVICE_MEMORY are always
if (device_type != DEVICE_GPU && device_type != DEVICE_SYCL ) {
// On non-GPU and non-SYCL devices, HOST_MEMORY and DEVICE_MEMORY are always
// compatible.
return Status::OK();
}
// For GPU device, HOST_MEMORY and DEVICE_MEMORY is not
// For GPU and SYCL device, HOST_MEMORY and DEVICE_MEMORY is not
// compatible. I.e., a conversion/transfer must be done.
//
// {node id, slot id} -> memory type.
Expand Down
18 changes: 18 additions & 0 deletions tensorflow/core/common_runtime/memory_types_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,9 @@ TEST(MemoryTypeChecker, Int32OK) {
// There is a kernel for adding two int32s on host memory.
TF_EXPECT_OK(ValidateMemoryTypes(DEVICE_GPU, g));
#endif // GOOGLE_CUDA
#ifdef TENSORFLOW_USE_SYCL
TF_EXPECT_OK(ValidateMemoryTypes(DEVICE_SYCL, g));
#endif // TENSORFLOW_USE_SYCL
delete g;
}

Expand All @@ -53,6 +56,15 @@ TEST(MemoryTypeChecker, Int32NotOk) {
TF_EXPECT_OK(EnsureMemoryTypes(DEVICE_GPU, "/gpu:0", g));
TF_EXPECT_OK(ValidateMemoryTypes(DEVICE_GPU, g));
#endif // GOOGLE_CUDA
#ifdef TENSORFLOW_USE_SYCL
// There is no kernel for casting int32/host memory to float/device
// memory.
EXPECT_TRUE(errors::IsInternal(ValidateMemoryTypes(DEVICE_SYCL, g)));

// But we can insert _HostSend/_HostRecv to ensure the invariant.
TF_EXPECT_OK(EnsureMemoryTypes(DEVICE_SYCL, "/device:SYCL:0", g));
TF_EXPECT_OK(ValidateMemoryTypes(DEVICE_SYCL, g));
#endif // TENSORFLOW_USE_SYCL
delete g;
}

Expand All @@ -74,6 +86,12 @@ TEST(MemoryTypeChecker, MemoryTypeForOutput) {
// int Switch's output on GPU has HOST_MEMORY constraint.
EXPECT_EQ(memory_type, HOST_MEMORY);
#endif // GOOGLE_CUDA
#ifdef TENSORFLOW_USE_SYCL
auto si = test::graph::Switch(g, test::graph::Constant(g, vi), pred);
TF_EXPECT_OK(MemoryTypeForOutput(DEVICE_SYCL, g, si, 0, &memory_type));
// int Switch's output on GPU has HOST_MEMORY constraint.
EXPECT_EQ(memory_type, HOST_MEMORY);
#endif // TENSORFLOW_USE_SYCL
delete g;
}

Expand Down
4 changes: 3 additions & 1 deletion tensorflow/core/common_runtime/sycl/sycl_device_factory.cc
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,8 @@ limitations under the License.
#include "tensorflow/core/common_runtime/device_factory.h"
#include "tensorflow/core/common_runtime/sycl/sycl_device.h"

#include "tensorflow/core/common_runtime/sycl/sycl_util.h"

namespace tensorflow {

class SYCLDeviceFactory : public DeviceFactory {
Expand All @@ -34,7 +36,7 @@ class SYCLDeviceFactory : public DeviceFactory {
devices->push_back(
new SYCLDevice(options, name, Bytes(256 << 20), DeviceLocality(),
SYCLDevice::GetShortDeviceDescription(),
cl::sycl::gpu_selector(), cpu_allocator()));
GetSYCLDevice(), cpu_allocator()));
}
return Status::OK();
}
Expand Down
57 changes: 57 additions & 0 deletions tensorflow/core/common_runtime/sycl/sycl_util.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,57 @@
/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.

Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at

http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/

#if !TENSORFLOW_USE_SYCL
#error This file must only be included when building TensorFlow with SYCL support
#endif

#ifndef TENSORFLOW_CORE_COMMON_RUNTIME_SYCL_SYCL_UTIL_H_
#define TENSORFLOW_CORE_COMMON_RUNTIME_SYCL_SYCL_UTIL_H_

// For DMA helper
#include "tensorflow/core/common_runtime/dma_helper.h"
#include "tensorflow/core/framework/tensor.h"

namespace tensorflow {
inline void* GetBase(const Tensor* src) {
return const_cast<void*>(DMAHelper::base(src));
}

inline void* GetBase(Tensor* dst) { return DMAHelper::base(dst); }

inline cl::sycl::device GetSYCLDevice() {
// Obtain list of supported devices from Eigen
for (const auto& device :Eigen::get_sycl_supported_devices()) {
if(device.is_gpu()) {
// returns first found GPU
return device;
}
}

// Currently Intel GPU is not supported
LOG(WARNING) << "No OpenCL GPU found that is supported by ComputeCpp, trying OpenCL CPU";

for (const auto& device :Eigen::get_sycl_supported_devices()) {
if(device.is_cpu()) {
// returns first found CPU
return device;
}
}
// Currently Intel GPU is not supported
LOG(FATAL) << "No OpenCL GPU nor CPU found that is supported by ComputeCpp";
}
}

#endif // TENSORFLOW_CORE_COMMON_RUNTIME_SYCL_SYCL_UTIL_H_
2 changes: 1 addition & 1 deletion tensorflow/core/debug/debug_gateway.cc
Original file line number Diff line number Diff line change
Expand Up @@ -84,7 +84,7 @@ void DebugGateway::CopyTensor(const string& node_name, const int output_slot,
// Determine if the tensor is on device (GPU) or host (CPU).
// The second part of the check is necessary because even an OpKernel on
// may have output tensors allocated on CPU.
if (device->name().find("gpu:") != string::npos &&
if ((device->name().find("gpu:") != string::npos || device->name().find("SYCL:") != string::npos) &&
!ctx->output_alloc_attr(output_slot).on_host()) {
// GPU tensors: Copy it to host (CPU).
DeviceContext* device_ctxt = ctx->op_device_context();
Expand Down
18 changes: 16 additions & 2 deletions tensorflow/core/debug/debug_gateway_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,8 @@ class SessionDebugMinusAXTest : public ::testing::Test {

#if GOOGLE_CUDA
const string kDeviceName = "/job:localhost/replica:0/task:0/gpu:0";
#elif defined(TENSORFLOW_USE_SYCL)
const string kDeviceName = "/job:localhost/replica:0/task:0/device:SYCL:0";
#else
const string kDeviceName = "/job:localhost/replica:0/task:0/cpu:0";
#endif
Expand Down Expand Up @@ -302,6 +304,8 @@ TEST_F(SessionDebugMinusAXTest, RunSimpleNetworkWithTwoDebugNodesInserted) {
// through RunMetadata, given whether GPU is involved.
#if GOOGLE_CUDA
ASSERT_EQ(2, run_metadata.partition_graphs().size());
#elif defined(TENSORFLOW_USE_SYCL)
ASSERT_EQ(2, run_metadata.partition_graphs().size());
#else
ASSERT_EQ(1, run_metadata.partition_graphs().size());
#endif
Expand Down Expand Up @@ -336,7 +340,7 @@ TEST_F(SessionDebugMinusAXTest, RunSimpleNetworkWithTwoDebugNodesInserted) {
ASSERT_EQ(1, debug_nan_count_tensor_vals[0].scalar<int64>()());
}

#ifndef GOOGLE_CUDA
#if !defined(GOOGLE_CUDA) && !defined(TENSORFLOW_USE_SYCL)
// TODO(cais): Reinstate the following test for concurrent debugged runs on
// a GPU once the root cause of the ~0.5% flakiness has been addressed.
// (b/34081273)
Expand Down Expand Up @@ -499,6 +503,8 @@ class SessionDebugOutputSlotWithoutOngoingEdgeTest : public ::testing::Test {

#if GOOGLE_CUDA
const string kDeviceName = "/job:localhost/replica:0/task:0/gpu:0";
#elif defined(TENSORFLOW_USE_SYCL)
const string kDeviceName = "/job:localhost/replica:0/task:0/device:SYCL:0";
#else
const string kDeviceName = "/job:localhost/replica:0/task:0/cpu:0";
#endif
Expand Down Expand Up @@ -599,6 +605,8 @@ class SessionDebugVariableTest : public ::testing::Test {

#if GOOGLE_CUDA
const string kDeviceName = "/job:localhost/replica:0/task:0/gpu:0";
#elif defined(TENSORFLOW_USE_SYCL)
const string kDeviceName = "/job:localhost/replica:0/task:0/device:SYCL:0";
#else
const string kDeviceName = "/job:localhost/replica:0/task:0/cpu:0";
#endif
Expand Down Expand Up @@ -818,6 +826,8 @@ TEST_F(SessionDebugVariableTest, VariableAssignWithDebugOps) {

#if GOOGLE_CUDA
ASSERT_EQ(2, run_metadata.partition_graphs().size());
#elif defined(TENSORFLOW_USE_SYCL)
ASSERT_EQ(2, run_metadata.partition_graphs().size());
#else
ASSERT_EQ(1, run_metadata.partition_graphs().size());
#endif
Expand Down Expand Up @@ -855,13 +865,17 @@ TEST_F(SessionDebugVariableTest, VariableAssignWithDebugOps) {
ASSERT_EQ(2, debug_nan_count_tensor_vals[0].scalar<int64>()());
}

#if GOOGLE_CUDA
#if defined(GOOGLE_CUDA) || defined(TENSORFLOW_USE_SYCL)
class SessionDebugGPUSwitchTest : public ::testing::Test {
public:
void Initialize() {
Graph graph(OpRegistry::Global());

#ifdef GOOGLE_CUDA
const string kDeviceName = "/job:localhost/replica:0/task:0/gpu:0";
#elif TENSORFLOW_USE_SYCL
const string kDeviceName = "/job:localhost/replica:0/task:0/device:SYCL:0";
#endif

Tensor vb(DT_BOOL, TensorShape({}));
vb.scalar<bool>()() = true;
Expand Down
4 changes: 2 additions & 2 deletions tensorflow/core/framework/op_kernel.cc
Original file line number Diff line number Diff line change
Expand Up @@ -94,9 +94,9 @@ OpKernel::OpKernel(OpKernelConstruction* context)
OP_REQUIRES_OK(context, CheckOpDeprecation(context->op_def(),
context->graph_def_version()));

// Kernels executing on GPU tie very few resources on the CPU where the
// Kernels executing on GPU/SYCL tie very few resources on the CPU where the
// scheduler runs: we consider them as inexpensive.
expensive_ = context->device_type() != DeviceType(DEVICE_GPU);
expensive_ = context->device_type() != DeviceType(DEVICE_GPU) && context->device_type() != DeviceType(DEVICE_SYCL);
}

OpKernel::~OpKernel() {}
Expand Down
4 changes: 4 additions & 0 deletions tensorflow/core/graph/testlib.cc
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,10 @@ namespace tensorflow {
REGISTER_KERNEL_BUILDER(Name("HostConst").Device(DEVICE_CPU), HostConstantOp);
REGISTER_KERNEL_BUILDER(
Name("HostConst").Device(DEVICE_GPU).HostMemory("output"), HostConstantOp);
#ifdef TENSORFLOW_USE_SYCL
REGISTER_KERNEL_BUILDER(
Name("HostConst").Device(DEVICE_SYCL).HostMemory("output"), HostConstantOp);
#endif // TENSORFLOW_USE_SYCL

// Register the HostConst Op
// Returns a constant tensor on the host. Useful for writing C++ tests
Expand Down
3 changes: 2 additions & 1 deletion tensorflow/core/kernels/BUILD
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@ load(
"tf_kernel_library",
"cc_header_only_library",
)
load("@local_config_sycl//sycl:build_defs.bzl", "if_sycl")
load("//tensorflow:tensorflow.bzl", "tf_cuda_cc_test")
load("//tensorflow:tensorflow.bzl", "tf_cuda_cc_tests")
load(
Expand Down Expand Up @@ -433,7 +434,7 @@ ARRAY_DEPS = [
"//tensorflow/core:proto_text",
"//tensorflow/core:protos_all_cc",
"//third_party/eigen3",
]
] + if_sycl(["//tensorflow/core:sycl_runtime"])

cc_library(
name = "array_not_windows",
Expand Down
3 changes: 0 additions & 3 deletions tensorflow/core/kernels/aggregate_ops.cc
Original file line number Diff line number Diff line change
Expand Up @@ -165,9 +165,6 @@ REGISTER_KERNEL_BUILDER(Name("AddN")
REGISTER_ADDN(float, SYCL);
REGISTER_ADDN(double, SYCL);

// A special GPU kernel for int32.
// TODO(b/25387198): Also enable int32 in device memory. This kernel
// registration requires all int32 inputs and outputs to be in host memory.
REGISTER_KERNEL_BUILDER(Name("AddN")
.Device(DEVICE_SYCL)
.TypeConstraint<int32>("T")
Expand Down
Loading