Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
63 changes: 59 additions & 4 deletions libkineto/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -79,6 +79,50 @@ else()
set(LIBKINETO_NOXPUPTI ON)
endif()

# Detect ROCM Version
if(NOT LIBKINETO_NOROCTRACER)
if(NOT ROCM_INCLUDE_DIRS)
set(ROCM_INCLUDE_DIRS "${ROCM_SOURCE_DIR}/include")
endif()

find_file(ROCM_VERSION_HEADER_PATH
NAMES rocm-core/rocm_version.h
NO_DEFAULT_PATH
PATHS ${ROCM_INCLUDE_DIRS}
)

if(EXISTS ${ROCM_VERSION_HEADER_PATH})
set(ROCM_HEADER_FILE ${ROCM_VERSION_HEADER_PATH})
endif()

# Read the ROCM headerfile into a variable
message(STATUS "Reading ROCM version from: ${ROCM_HEADER_FILE}")
file(READ "${ROCM_HEADER_FILE}" ROCM_HEADER_CONTENT)

string(REGEX MATCH "ROCM_VERSION_MAJOR[ ]+[0-9]+" TEMP1 ${ROCM_HEADER_CONTENT})
string(REPLACE "ROCM_VERSION_MAJOR" "" TEMP2 ${TEMP1})
string(STRIP ${TEMP2} ROCM_VERSION_DEV_MAJOR)
string(REGEX MATCH "ROCM_VERSION_MINOR[ ]+[0-9]+" TEMP1 ${ROCM_HEADER_CONTENT})
string(REPLACE "ROCM_VERSION_MINOR" "" TEMP2 ${TEMP1})
string(STRIP ${TEMP2} ROCM_VERSION_DEV_MINOR)
string(REGEX MATCH "ROCM_VERSION_PATCH[ ]+[0-9]+" TEMP1 ${ROCM_HEADER_CONTENT})
string(REPLACE "ROCM_VERSION_PATCH" "" TEMP2 ${TEMP1})
string(STRIP ${TEMP2} ROCM_VERSION_DEV_PATCH)

message(STATUS "ROCM major: ${ROCM_VERSION_DEV_MAJOR}")
message(STATUS "ROCM minor: ${ROCM_VERSION_DEV_MINOR}")
message(STATUS "ROCM patch: ${ROCM_VERSION_DEV_PATCH}")

# Use rocprofiler-sdk for rocm version 6.4 forward
if ((${ROCM_VERSION_DEV_MAJOR} GREATER_EQUAL 7) OR (${ROCM_VERSION_DEV_MAJOR} GREATER_EQUAL 6 AND ${ROCM_VERSION_DEV_MINOR} GREATER_EQUAL 4))
set(USE_ROCPROFILER_SDK ON)
endif()
if (${USE_ROCPROFILER_SDK})
message(STATUS "Building with: rocprofiler-sdk")
else()
message(STATUS "Building with: libroctracer")
endif()
endif()
if(NOT DEFINED LIBKINETO_NOAIUPTI)
message(INFO " LIBKINETO_NOAIUPTI NOT DEFINED adding subdirectory(src/plugin/aiupti)")
add_subdirectory(src/plugin/aiupti)
Expand All @@ -90,8 +134,14 @@ if(LIBKINETO_NOCUPTI AND LIBKINETO_NOROCTRACER AND LIBKINETO_NOXPUPTI AND LIBKIN
message(STATUS " CUPTI unavailable or disabled - not building GPU profilers")
else()
if(NOT LIBKINETO_NOROCTRACER)
get_filelist("get_libkineto_roctracer_srcs(with_api=False)" LIBKINETO_roc_SRCS)
message(STATUS " Building with roctracer")
if (${USE_ROCPROFILER_SDK})
get_filelist("get_libkineto_rocprofiler_srcs(with_api=False)" LIBKINETO_roc_SRCS)
message(STATUS " Building with rocprofiler-sdk")
else()
get_filelist("get_libkineto_roctracer_srcs(with_api=False)" LIBKINETO_roc_SRCS)
add_compile_options(-DROCTRACER_FALLBACK)
message(STATUS " Building with roctracer")
endif()
elseif(NOT LIBKINETO_NOCUPTI)
get_filelist("get_libkineto_cupti_srcs(with_api=False)" LIBKINETO_cuda_SRCS)
endif()
Expand Down Expand Up @@ -228,9 +278,14 @@ target_include_directories(kineto PUBLIC
$<BUILD_INTERFACE:${LIBKINETO_SOURCE_DIR}>)

if(NOT LIBKINETO_NOROCTRACER)
find_library(ROCTRACER_LIBRARY NAMES libroctracer64.so HINTS
if (${USE_ROCPROFILER_SDK})
find_library(ROCPROF_LIBRARY NAMES librocprofiler-sdk.so HINTS
${ROCM_SOURCE_DIR}/lib)
target_link_libraries(kineto "${ROCTRACER_LIBRARY}")
else()
find_library(ROCPROF_LIBRARY NAMES libroctracer64.so HINTS
${ROCM_SOURCE_DIR}/lib)
endif()
target_link_libraries(kineto "${ROCPROF_LIBRARY}")
find_library(KINETO_HIP_LIBRARY NAMES libamdhip64.so HINTS
${ROCM_SOURCE_DIR}/lib)
target_link_libraries(kineto "${KINETO_HIP_LIBRARY}")
Expand Down
8 changes: 8 additions & 0 deletions libkineto/libkineto_defs.bzl
Original file line number Diff line number Diff line change
Expand Up @@ -27,10 +27,18 @@ def get_libkineto_cupti_srcs(with_api = True):
"src/cupti_strings.cpp",
] + (get_libkineto_cpu_only_srcs(with_api))

def get_libkineto_rocprofiler_srcs(with_api = True):
return [
"src/RocprofActivityApi.cpp",
"src/RocprofLogger.cpp",
"src/RocLogger.cpp",
] + (get_libkineto_cpu_only_srcs(with_api))

def get_libkineto_roctracer_srcs(with_api = True):
return [
"src/RoctracerActivityApi.cpp",
"src/RoctracerLogger.cpp",
"src/RocLogger.cpp",
] + (get_libkineto_cpu_only_srcs(with_api))

def get_libkineto_xpupti_srcs(with_api = True):
Expand Down
9 changes: 9 additions & 0 deletions libkineto/src/ActivityProfilerController.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -18,8 +18,12 @@

#include "CuptiActivityApi.h"
#ifdef HAS_ROCTRACER
#ifndef ROCTRACER_FALLBACK
#include "RocprofActivityApi.h"
#else
#include "RoctracerActivityApi.h"
#endif
#endif

#include "ThreadUtil.h"
#include "output_json.h"
Expand Down Expand Up @@ -67,8 +71,13 @@ ActivityProfilerController::ActivityProfilerController(
#endif // !USE_GOOGLE_LOG

#ifdef HAS_ROCTRACER
#ifndef ROCTRACER_FALLBACK
profiler_ = std::make_unique<CuptiActivityProfiler>(
RocprofActivityApi::singleton(), cpuOnly);
#else
profiler_ = std::make_unique<CuptiActivityProfiler>(
RoctracerActivityApi::singleton(), cpuOnly);
#endif
#else
profiler_ = std::make_unique<CuptiActivityProfiler>(
CuptiActivityApi::singleton(), cpuOnly);
Expand Down
69 changes: 50 additions & 19 deletions libkineto/src/CuptiActivityProfiler.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@
#ifdef HAS_CUPTI
#include <cupti.h>
#elif defined(HAS_ROCTRACER)
#include <roctracer.h>
#include <rocprofiler-sdk/version.h>
#endif

#include "Config.h"
Expand All @@ -39,9 +39,14 @@
#include "KernelRegistry.h"
#endif // HAS_CUPTI
#ifdef HAS_ROCTRACER
#include "RocLogger.h"
#ifndef ROCTRACER_FALLBACK
#include "RocprofActivity.h"
#include "RocprofActivityApi.h"
#else
#include "RoctracerActivity.h"
#include "RoctracerActivityApi.h"
#include "RoctracerLogger.h"
#endif
#endif
#ifdef HAS_XPUPTI
#include "plugin/xpupti/XpuptiActivityProfiler.h"
Expand Down Expand Up @@ -214,9 +219,15 @@ void CuptiActivityProfiler::transferCpuTrace(
}

#ifdef HAS_ROCTRACER
#ifndef ROCTRACER_FALLBACK
CuptiActivityProfiler::CuptiActivityProfiler(
RocprofActivityApi& cupti,
bool cpuOnly)
#else
CuptiActivityProfiler::CuptiActivityProfiler(
RoctracerActivityApi& cupti,
bool cpuOnly)
#endif
#else
CuptiActivityProfiler::CuptiActivityProfiler(
CuptiActivityApi& cupti,
Expand Down Expand Up @@ -256,23 +267,23 @@ void CuptiActivityProfiler::logGpuVersions() {
addVersionMetadata("cuda_driver_version", std::to_string(cudaDriverVersion));

#elif defined(HAS_ROCTRACER)
uint32_t majorVersion = roctracer_version_major();
uint32_t minorVersion = roctracer_version_minor();
uint32_t majorVersion = ROCPROFILER_VERSION_MAJOR;
uint32_t minorVersion = ROCPROFILER_VERSION_MINOR;
std::string roctracerVersion =
std::to_string(majorVersion) + "." + std::to_string(minorVersion);
int hipRuntimeVersion = 0, hipDriverVersion = 0;
CUDA_CALL(hipRuntimeGetVersion(&hipRuntimeVersion));
CUDA_CALL(hipDriverGetVersion(&hipDriverVersion));
LOG(INFO) << "HIP versions. Roctracer: " << roctracerVersion
LOG(INFO) << "HIP versions. Rocprofiler-sdk: " << roctracerVersion
<< "; Runtime: " << hipRuntimeVersion
<< "; Driver: " << hipDriverVersion;

LOGGER_OBSERVER_ADD_METADATA("roctracer_version", roctracerVersion);
LOGGER_OBSERVER_ADD_METADATA("rocprofiler-sdk_version", roctracerVersion);
LOGGER_OBSERVER_ADD_METADATA(
"hip_runtime_version", std::to_string(hipRuntimeVersion));
LOGGER_OBSERVER_ADD_METADATA(
"hip_driver_version", std::to_string(hipDriverVersion));
addVersionMetadata("roctracer_version", roctracerVersion);
addVersionMetadata("rocprofiler-sdk_version", roctracerVersion);
addVersionMetadata("hip_runtime_version", std::to_string(hipRuntimeVersion));
addVersionMetadata("hip_driver_version", std::to_string(hipDriverVersion));

Expand Down Expand Up @@ -372,7 +383,7 @@ void CuptiActivityProfiler::processTraceInternal(ActivityLogger& logger) {
VLOG(0) << "Retrieving GPU activity buffers";
const int count = cupti_.processActivities(
std::bind(
&CuptiActivityProfiler::handleRoctracerActivity,
&CuptiActivityProfiler::handleRocprofActivity,
this,
std::placeholders::_1,
&logger),
Expand Down Expand Up @@ -480,10 +491,10 @@ inline void CuptiActivityProfiler::handleCorrelationActivity(
inline void CuptiActivityProfiler::handleCorrelationActivity(
uint64_t correlationId,
uint64_t externalId,
RoctracerLogger::CorrelationDomain externalKind) {
if (externalKind == RoctracerLogger::CorrelationDomain::Domain0) {
RocLogger::CorrelationDomain externalKind) {
if (externalKind == RocLogger::CorrelationDomain::Domain0) {
cpuCorrelationMap_[correlationId] = externalId;
} else if (externalKind == RoctracerLogger::CorrelationDomain::Domain1) {
} else if (externalKind == RocLogger::CorrelationDomain::Domain1) {
userCorrelationMap_[correlationId] = externalId;
} else {
LOG(WARNING)
Expand Down Expand Up @@ -960,37 +971,37 @@ void CuptiActivityProfiler::handleRuntimeActivity(
}

inline void CuptiActivityProfiler::handleGpuActivity(
const roctracerAsyncRow* act,
const rocprofAsyncRow* act,
ActivityLogger* logger) {
const ITraceActivity* linked = linkedActivity(act->id, cpuCorrelationMap_);
const auto& gpu_activity =
traceBuffers_->addActivityWrapper(GpuActivity(act, linked));
handleGpuActivity(gpu_activity, logger);
}

void CuptiActivityProfiler::handleRoctracerActivity(
const roctracerBase* record,
void CuptiActivityProfiler::handleRocprofActivity(
const rocprofBase* record,
ActivityLogger* logger) {
switch (record->type) {
case ROCTRACER_ACTIVITY_DEFAULT:
handleRuntimeActivity(
reinterpret_cast<const roctracerRow*>(record), logger);
reinterpret_cast<const rocprofRow*>(record), logger);
break;
case ROCTRACER_ACTIVITY_KERNEL:
handleRuntimeActivity(
reinterpret_cast<const roctracerKernelRow*>(record), logger);
reinterpret_cast<const rocprofKernelRow*>(record), logger);
break;
case ROCTRACER_ACTIVITY_COPY:
handleRuntimeActivity(
reinterpret_cast<const roctracerCopyRow*>(record), logger);
reinterpret_cast<const rocprofCopyRow*>(record), logger);
break;
case ROCTRACER_ACTIVITY_MALLOC:
handleRuntimeActivity(
reinterpret_cast<const roctracerMallocRow*>(record), logger);
reinterpret_cast<const rocprofMallocRow*>(record), logger);
break;
case ROCTRACER_ACTIVITY_ASYNC:
handleGpuActivity(
reinterpret_cast<const roctracerAsyncRow*>(record), logger);
reinterpret_cast<const rocprofAsyncRow*>(record), logger);
break;
case ROCTRACER_ACTIVITY_NONE:
default:
Expand Down Expand Up @@ -1571,8 +1582,13 @@ void CuptiActivityProfiler::pushCorrelationId(uint64_t id) {
id, CuptiActivityApi::CorrelationFlowType::Default);
#endif // HAS_CUPTI
#ifdef HAS_ROCTRACER
#ifndef ROCTRACER_FALLBACK
RocprofActivityApi::pushCorrelationID(
id, RocprofActivityApi::CorrelationFlowType::Default);
#else
RoctracerActivityApi::pushCorrelationID(
id, RoctracerActivityApi::CorrelationFlowType::Default);
#endif
#endif
for (auto& session : sessions_) {
session->pushCorrelationId(id);
Expand All @@ -1585,8 +1601,13 @@ void CuptiActivityProfiler::popCorrelationId() {
CuptiActivityApi::CorrelationFlowType::Default);
#endif // HAS_CUPTI
#ifdef HAS_ROCTRACER
#ifndef ROCTRACER_FALLBACK
RocprofActivityApi::popCorrelationID(
RocprofActivityApi::CorrelationFlowType::Default);
#else
RoctracerActivityApi::popCorrelationID(
RoctracerActivityApi::CorrelationFlowType::Default);
#endif
#endif
for (auto& session : sessions_) {
session->popCorrelationId();
Expand All @@ -1599,8 +1620,13 @@ void CuptiActivityProfiler::pushUserCorrelationId(uint64_t id) {
id, CuptiActivityApi::CorrelationFlowType::User);
#endif // HAS_CUPTI
#ifdef HAS_ROCTRACER
#ifndef ROCTRACER_FALLBACK
RocprofActivityApi::pushCorrelationID(
id, RocprofActivityApi::CorrelationFlowType::User);
#else
RoctracerActivityApi::pushCorrelationID(
id, RoctracerActivityApi::CorrelationFlowType::User);
#endif
#endif
for (auto& session : sessions_) {
session->pushUserCorrelationId(id);
Expand All @@ -1613,8 +1639,13 @@ void CuptiActivityProfiler::popUserCorrelationId() {
CuptiActivityApi::CorrelationFlowType::User);
#endif // HAS_CUPTI
#ifdef HAS_ROCTRACER
#ifndef ROCTRACER_FALLBACK
RocprofActivityApi::popCorrelationID(
RocprofActivityApi::CorrelationFlowType::User);
#else
RoctracerActivityApi::popCorrelationID(
RoctracerActivityApi::CorrelationFlowType::User);
#endif
#endif
for (auto& session : sessions_) {
session->popUserCorrelationId();
Expand Down
26 changes: 16 additions & 10 deletions libkineto/src/CuptiActivityProfiler.h
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,11 @@
#endif // HAS_CUPTI

#ifdef HAS_ROCTRACER
#ifndef ROCTRACER_FALLBACK
#include "RocprofLogger.h"
#else
#include "RoctracerLogger.h"
#endif
#endif // HAS_ROCTRACER

#include "GenericTraceActivity.h"
Expand All @@ -45,6 +49,7 @@ namespace KINETO_NAMESPACE {

class Config;
class CuptiActivityApi;
class RocprofActivityApi;
class RoctracerActivityApi;

// This struct is a derived snapshot of the Config. And should not
Expand Down Expand Up @@ -121,7 +126,8 @@ inline size_t hash_combine(size_t seed, size_t value) {
class CuptiActivityProfiler {
public:
CuptiActivityProfiler(CuptiActivityApi& cupti, bool cpuOnly);
CuptiActivityProfiler(RoctracerActivityApi& rai, bool cpuOnly);
CuptiActivityProfiler(RocprofActivityApi& rai, bool cpuOnly);
CuptiActivityProfiler(RoctracerActivityApi& rtai, bool cpuOnly);
CuptiActivityProfiler(const CuptiActivityProfiler&) = delete;
CuptiActivityProfiler& operator=(const CuptiActivityProfiler&) = delete;
~CuptiActivityProfiler();
Expand Down Expand Up @@ -411,20 +417,16 @@ class CuptiActivityProfiler {
#endif // HAS_CUPTI

#ifdef HAS_ROCTRACER
// Process generic RocTracer activity
void handleRoctracerActivity(
const roctracerBase* record,
ActivityLogger* logger);
// Process generic RocProf activity
void handleRocprofActivity(const rocprofBase* record, ActivityLogger* logger);
void handleCorrelationActivity(
uint64_t correlationId,
uint64_t externalId,
RoctracerLogger::CorrelationDomain externalKind);
RocLogger::CorrelationDomain externalKind);
// Process specific GPU activity types
template <class T>
void handleRuntimeActivity(const T* activity, ActivityLogger* logger);
void handleGpuActivity(
const roctracerAsyncRow* record,
ActivityLogger* logger);
void handleGpuActivity(const rocprofAsyncRow* record, ActivityLogger* logger);
#endif // HAS_ROCTRACER

void resetTraceData();
Expand Down Expand Up @@ -457,7 +459,11 @@ class CuptiActivityProfiler {

// Calls to CUPTI is encapsulated behind this interface
#ifdef HAS_ROCTRACER
RoctracerActivityApi& cupti_; // Design failure here
#ifndef ROCTRACER_FALLBACK
RocprofActivityApi& cupti_; // Design failure here
#else
RoctracerActivityApi& cupti_;
#endif
#else
CuptiActivityApi& cupti_;
#endif
Expand Down
Loading