Skip to content

[UR][Offload] Add initial Offload adapter #18271

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 11 commits into
base: sycl
Choose a base branch
from
1 change: 1 addition & 0 deletions unified-runtime/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,7 @@ option(UR_BUILD_ADAPTER_HIP "Build the HIP adapter" OFF)
option(UR_BUILD_ADAPTER_NATIVE_CPU "Build the Native-CPU adapter" OFF)
option(UR_BUILD_ADAPTER_ALL "Build all currently supported adapters" OFF)
option(UR_BUILD_ADAPTER_L0_V2 "Build the (experimental) Level-Zero v2 adapter" OFF)
option(UR_BUILD_ADAPTER_OFFLOAD "Build the experimental Offload adapter" OFF)
option(UR_STATIC_ADAPTER_L0 "Build the Level-Zero adapter as static and embed in the loader" OFF)
option(UR_BUILD_EXAMPLE_CODEGEN "Build the codegen example." OFF)
option(VAL_USE_LIBBACKTRACE_BACKTRACE "enable libbacktrace validation backtrace for linux" OFF)
Expand Down
4 changes: 4 additions & 0 deletions unified-runtime/include/ur_api.h

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

6 changes: 6 additions & 0 deletions unified-runtime/include/ur_print.hpp

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

3 changes: 3 additions & 0 deletions unified-runtime/scripts/core/adapter.yml
Original file line number Diff line number Diff line change
Expand Up @@ -209,6 +209,9 @@ etors:
- name: NATIVE_CPU
value: "5"
desc: "The backend is Native CPU"
- name: OFFLOAD
value: "0x100"
desc: "The backend is liboffload"
--- #--------------------------------------------------------------------------
type: enum
desc: "Minimum level of messages to be processed by the logger."
Expand Down
7 changes: 7 additions & 0 deletions unified-runtime/scripts/core/manifests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -61,3 +61,10 @@ name: native_cpu
backend: $X_ADAPTER_BACKEND_NATIVE_CPU
device_types:
- $X_DEVICE_TYPE_CPU
--- #--------------------------------------------------------------------------
type: manifest
name: offload
backend: $X_ADAPTER_BACKEND_OFFLOAD
device_types:
- $X_DEVICE_TYPE_CPU
- $X_DEVICE_TYPE_GPU
3 changes: 3 additions & 0 deletions unified-runtime/scripts/core/platform.yml
Original file line number Diff line number Diff line change
Expand Up @@ -279,3 +279,6 @@ etors:
- name: NATIVE_CPU
value: "5"
desc: "The backend is Native CPU"
- name: OFFLOAD
value: "0x100"
desc: "The backend is liboffload"
6 changes: 6 additions & 0 deletions unified-runtime/source/adapters/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -68,9 +68,15 @@ if(UR_BUILD_ADAPTER_OPENCL OR UR_BUILD_ADAPTER_ALL)
add_ur_adapter_subdirectory(opencl)
list(APPEND TEMP_LIST "opencl")
endif()

if(UR_BUILD_ADAPTER_NATIVE_CPU OR UR_BUILD_ADAPTER_ALL)
add_ur_adapter_subdirectory(native_cpu)
list(APPEND TEMP_LIST "native_cpu")
endif()

if(UR_BUILD_ADAPTER_OFFLOAD)
add_ur_adapter_subdirectory(offload)
list(APPEND TEMP_LIST "offload")
endif()

set(UR_ADAPTERS_LIST "${TEMP_LIST}" CACHE STRING "" FORCE)
72 changes: 72 additions & 0 deletions unified-runtime/source/adapters/offload/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,72 @@
# Copyright (C) 2025 Intel Corporation
# Part of the Unified-Runtime Project, under the Apache License v2.0 with LLVM Exceptions.
# See LICENSE.TXT
# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception

set(TARGET_NAME ur_adapter_offload)

set(UR_OFFLOAD_INSTALL_DIR "" CACHE PATH "Path to the directory containing libomptarget.so etc")
if (UR_OFFLOAD_INSTALL_DIR STREQUAL "")
message(FATAL_ERROR "UR_OFFLOAD_INSTALL_DIR must be defined for the Offload adapter")
endif()

set(UR_OFFLOAD_INCLUDE_DIR "" CACHE PATH "Path to the directory containing LLVM headers")
if (UR_OFFLOAD_INCLUDE_DIR STREQUAL "")
message(FATAL_ERROR "UR_OFFLOAD_INCLUDE_DIR must be defined for the Offload adapter")
endif()

# When targetting CUDA devices, we need a workaround to avoid sending PTX to
# liboffload as the CUDA plugin doesn't support it yet. The workaround is to
# simply always link the incoming program so it ends up as CUBIN. Try to find
# the cuda driver so we can enable this where possible.
if (NOT TARGET cudadrv)
find_package(CUDA 10.1)
add_library(cudadrv SHARED IMPORTED GLOBAL)
set_target_properties(
cudadrv PROPERTIES
IMPORTED_LOCATION ${CUDA_cuda_driver_LIBRARY}
INTERFACE_INCLUDE_DIRECTORIES ${CUDAToolkit_INCLUDE_DIRS}
)
endif()

add_ur_adapter(${TARGET_NAME}
SHARED
${CMAKE_CURRENT_SOURCE_DIR}/adapter.cpp
${CMAKE_CURRENT_SOURCE_DIR}/context.cpp
${CMAKE_CURRENT_SOURCE_DIR}/device.cpp
${CMAKE_CURRENT_SOURCE_DIR}/enqueue.cpp
${CMAKE_CURRENT_SOURCE_DIR}/event.cpp
${CMAKE_CURRENT_SOURCE_DIR}/kernel.cpp
${CMAKE_CURRENT_SOURCE_DIR}/platform.cpp
${CMAKE_CURRENT_SOURCE_DIR}/program.cpp
${CMAKE_CURRENT_SOURCE_DIR}/queue.cpp
${CMAKE_CURRENT_SOURCE_DIR}/ur2offload.hpp
${CMAKE_CURRENT_SOURCE_DIR}/ur_interface_loader.cpp
${CMAKE_CURRENT_SOURCE_DIR}/usm.cpp
)

set_target_properties(${TARGET_NAME} PROPERTIES
VERSION "${PROJECT_VERSION_MAJOR}.${PROJECT_VERSION_MINOR}.${PROJECT_VERSION_PATCH}"
SOVERSION "${PROJECT_VERSION_MAJOR}"
)

set(ADDITIONAL_LINK_LIBS "")
if (CUDA_cuda_driver_LIBRARY)
list(APPEND ADDITIONAL_LINK_LIBS
cudadrv
)
target_compile_definitions(${TARGET_NAME} PRIVATE UR_CUDA_ENABLED)
endif()

target_link_libraries(${TARGET_NAME} PRIVATE
${PROJECT_NAME}::headers
${PROJECT_NAME}::common
${PROJECT_NAME}::umf
${UR_OFFLOAD_INSTALL_DIR}/lib/libLLVMOffload.so
${ADDITIONAL_LINK_LIBS}
)

target_include_directories(${TARGET_NAME} PRIVATE
"${UR_OFFLOAD_INCLUDE_DIR}/offload"
"${CMAKE_CURRENT_SOURCE_DIR}/../../"
)
108 changes: 108 additions & 0 deletions unified-runtime/source/adapters/offload/adapter.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,108 @@
//===----------- adapter.cpp - LLVM Offload Adapter ----------------------===//
//
// Copyright (C) 2024 Intel Corporation
//
// Part of the Unified-Runtime Project, under the Apache License v2.0 with LLVM
// Exceptions. See LICENSE.TXT
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//

#include <OffloadAPI.h>
#include <atomic>
#include <cstdint>
#include <unordered_set>

#include "adapter.hpp"
#include "device.hpp"
#include "platform.hpp"
#include "ur/ur.hpp"
#include "ur_api.h"

ur_adapter_handle_t_ Adapter{};

// Initialize liboffload and perform the initial platform and device discovery
ur_result_t ur_adapter_handle_t_::init() {
auto Res = olInit();

// Discover every platform and device
Res = olIterateDevices(
[](ol_device_handle_t D, void *UserData) {
auto *Platforms =
reinterpret_cast<decltype(Adapter.Platforms) *>(UserData);

ol_platform_handle_t Platform;
olGetDeviceInfo(D, OL_DEVICE_INFO_PLATFORM, sizeof(Platform),
&Platform);
ol_platform_backend_t Backend;
olGetPlatformInfo(Platform, OL_PLATFORM_INFO_BACKEND, sizeof(Backend),
&Backend);
if (Backend == OL_PLATFORM_BACKEND_HOST) {
Adapter.HostDevice = D;
} else if (Backend != OL_PLATFORM_BACKEND_UNKNOWN) {
auto URPlatform =
std::find_if(Platforms->begin(), Platforms->end(), [&](auto &P) {
return P.OffloadPlatform == Platform;
});

if (URPlatform == Platforms->end()) {
URPlatform =
Platforms->insert(URPlatform, ur_platform_handle_t_(Platform));
}

URPlatform->Devices.push_back(ur_device_handle_t_{&*URPlatform, D});
}
return false;
},
&Adapter.Platforms);

(void)Res;

return UR_RESULT_SUCCESS;
}

UR_APIEXPORT ur_result_t UR_APICALL urAdapterGet(
uint32_t, ur_adapter_handle_t *phAdapters, uint32_t *pNumAdapters) {
if (phAdapters) {
if (++Adapter.RefCount == 1) {
Adapter.init();
}
*phAdapters = &Adapter;
}
if (pNumAdapters) {
*pNumAdapters = 1;
}
return UR_RESULT_SUCCESS;
}

UR_APIEXPORT ur_result_t UR_APICALL urAdapterRelease(ur_adapter_handle_t) {
if (--Adapter.RefCount == 0) {
// This can crash when tracing is enabled.
// olShutDown();
};
return UR_RESULT_SUCCESS;
}

UR_APIEXPORT ur_result_t UR_APICALL urAdapterRetain(ur_adapter_handle_t) {
Adapter.RefCount++;
return UR_RESULT_SUCCESS;
}

UR_APIEXPORT ur_result_t UR_APICALL urAdapterGetInfo(ur_adapter_handle_t,
ur_adapter_info_t propName,
size_t propSize,
void *pPropValue,
size_t *pPropSizeRet) {
UrReturnHelper ReturnValue(propSize, pPropValue, pPropSizeRet);

switch (propName) {
case UR_ADAPTER_INFO_BACKEND:
return ReturnValue(UR_ADAPTER_BACKEND_OFFLOAD);
case UR_ADAPTER_INFO_REFERENCE_COUNT:
return ReturnValue(Adapter.RefCount.load());
default:
return UR_RESULT_ERROR_INVALID_ENUMERATION;
}

return UR_RESULT_SUCCESS;
}
32 changes: 32 additions & 0 deletions unified-runtime/source/adapters/offload/adapter.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
//===----------- adapter.hpp - LLVM Offload Adapter ----------------------===//
//
// Copyright (C) 2025 Intel Corporation
//
// Part of the Unified-Runtime Project, under the Apache License v2.0 with LLVM
// Exceptions. See LICENSE.TXT
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//

#pragma once

#include <atomic>
#include <cstdint>
#include <unordered_set>

#include <OffloadAPI.h>

#include "common.hpp"
#include "logger/ur_logger.hpp"
#include "platform.hpp"

struct ur_adapter_handle_t_ : ur::offload::handle_base {
std::atomic_uint32_t RefCount = 0;
logger::Logger &Logger = logger::get_logger("offload");
ol_device_handle_t HostDevice = nullptr;
std::vector<ur_platform_handle_t_> Platforms;

ur_result_t init();
};

extern ur_adapter_handle_t_ Adapter;
21 changes: 21 additions & 0 deletions unified-runtime/source/adapters/offload/common.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
//===----------- common.hpp - LLVM Offload Adapter -----------------------===//
//
// Copyright (C) 2024 Intel Corporation
//
// Part of the Unified-Runtime Project, under the Apache License v2.0 with LLVM
// Exceptions. See LICENSE.TXT
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//

#pragma once

#include <atomic>

namespace ur::offload {
struct handle_base {};
} // namespace ur::offload

struct RefCounted : ur::offload::handle_base {
std::atomic_uint32_t RefCount = 1;
};
38 changes: 38 additions & 0 deletions unified-runtime/source/adapters/offload/context.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
//===----------- context.cpp - LLVM Offload Adapter ----------------------===//
//
// Copyright (C) 2025 Intel Corporation
//
// Part of the Unified-Runtime Project, under the Apache License v2.0 with LLVM
// Exceptions. See LICENSE.TXT
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//

#include "context.hpp"
#include <ur_api.h>

UR_APIEXPORT ur_result_t UR_APICALL urContextCreate(
uint32_t DeviceCount, const ur_device_handle_t *phDevices,
const ur_context_properties_t *, ur_context_handle_t *phContext) {
if (DeviceCount > 1) {
return UR_RESULT_ERROR_UNSUPPORTED_FEATURE;
}

auto Ctx = new ur_context_handle_t_(*phDevices);
*phContext = Ctx;
return UR_RESULT_SUCCESS;
}

UR_APIEXPORT ur_result_t UR_APICALL
urContextRetain(ur_context_handle_t hContext) {
hContext->RefCount++;
return UR_RESULT_SUCCESS;
}

UR_APIEXPORT ur_result_t UR_APICALL
urContextRelease(ur_context_handle_t hContext) {
if (--hContext->RefCount == 0) {
delete hContext;
}
return UR_RESULT_SUCCESS;
}
26 changes: 26 additions & 0 deletions unified-runtime/source/adapters/offload/context.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
//===----------- context.hpp - LLVM Offload Adapter ----------------------===//
//
// Copyright (C) 2025 Intel Corporation
//
// Part of the Unified-Runtime Project, under the Apache License v2.0 with LLVM
// Exceptions. See LICENSE.TXT
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//

#pragma once

#include "common.hpp"
#include <OffloadAPI.h>
#include <unordered_map>
#include <ur_api.h>

struct ur_context_handle_t_ : RefCounted {
ur_context_handle_t_(ur_device_handle_t hDevice) : Device{hDevice} {
urDeviceRetain(Device);
}
~ur_context_handle_t_() { urDeviceRelease(Device); }

ur_device_handle_t Device;
std::unordered_map<void *, ol_alloc_type_t> AllocTypeMap;
};
Loading