diff --git a/src/components/amd_smi/README.md b/src/components/amd_smi/README.md new file mode 100644 index 000000000..a0e035924 --- /dev/null +++ b/src/components/amd_smi/README.md @@ -0,0 +1,94 @@ +# AMD_SMI Component + +The **AMD_SMI** (AMD System Management Interface) component exposes hardware +management counters (and selected controls) for AMD GPUs — e.g., power usage, +temperatures, clocks, PCIe link metrics, VRAM information, and RAS/ECC status — +by querying the AMD SMI library at runtime (ROCm ≥ 6.3.4). + +- [Environment Variables](#environment-variables) +- [Enabling the AMD_SMI Component](#enabling-the-amd_smi-component) + +--- + +## Environment Variables + +For AMD_SMI, PAPI requires the environment variable `PAPI_AMDSMI_ROOT` to be set +so that the AMD SMI shared library and headers can be found. This variable is +required at both **compile** and **run** time. + +There is a single case to consider (AMD SMI is available on ROCm ≥ 6.0): + +1. **For ROCm versions 6.0 and newer:** + Set `PAPI_AMDSMI_ROOT` to the top-level ROCm directory. For example: + + ```bash + export PAPI_AMDSMI_ROOT=/opt/rocm-6.4.0 + # or + export PAPI_AMDSMI_ROOT=/opt/rocm + ``` + +The directory specified by `PAPI_AMDSMI_ROOT` **must contain** the following +subdirectories: + +- `PAPI_AMDSMI_ROOT/lib` (which should include the dynamic library `libamd_smi.so`) +- `PAPI_AMDSMI_ROOT/include/amd_smi` (AMD SMI headers) + +If the library is not found or is not functional at runtime, the component will +appear as "disabled" in `papi_component_avail`, with a message describing the +problem (e.g., library not found). + +--- + +## Enabling the AMD_SMI Component + +To enable reading (and where supported, writing) of AMD_SMI counters, build +PAPI with this component enabled. For example: + +```bash +./configure --with-components="amd_smi" +make +``` + +You can verify availability with the utilities in `papi/src/utils/`: + +```bash +papi_component_avail # shows enabled/disabled components +papi_native_avail -i amd_smi # lists native events for this component +``` + +--- + +## File-by-file Summary + +- **`linux-amd-smi.c`** + Declares the `papi_vector_t` for this component; initializes on first use; hands off work to `amds_*` for device/event management; implements PAPI hooks (`init_component`, `update_control_state`, `start`, `read`, `stop`, `reset`, `shutdown`, and native-event queries). + +- **`amds.c`** + Dynamically loads `libamd_smi.so`, resolves AMD SMI symbols, discovers sockets/devices, and **builds the native event table**. Defines helpers to add simple and counter-based events. Manages global teardown (destroy event table, close library). + +- **`amds_accessors.c`** + Implements the **accessors** that read/write individual metrics (e.g., temperatures, fans, PCIe, energy, power caps, RAS/ECC, clocks, VRAM, link topology, XGMI/PCIe metrics, firmware/board info, etc.). Each accessor maps an event’s `(variant, subvariant)` to the right SMI call and returns the value. + +- **`amds_ctx.c`** + Provides the **per-eventset context**: + - `amds_ctx_open/close` — acquire/release devices, run per-event open/close hooks. + - `amds_ctx_start/stop` — start/stop counters where needed. + - `amds_ctx_read/write/reset` — read current values, optionally write supported controls (e.g., power cap), zero software view. + +- **`amds_evtapi.c`** + Implements native-event enumeration for PAPI (`enum`, `code_to_name`, `name_to_code`, `code_to_descr`) using the in-memory event table and a small hash map for fast lookups. + +- **`amds_priv.h`** + Internal definitions: `native_event_t` (name/descr/device/mode/value + open/close/start/stop/access callbacks), global getters, and the AMD SMI function-pointer declarations (via `amds_funcs.h`). + +- **`amds_funcs.h`** + Centralized macro list of **AMD SMI APIs** used by the component; generates function-pointer declarations/definitions so `amds.c` can `dlsym()` them at runtime. Conditional entries handle newer SMI features. + +- **`htable.h`** + Minimal chained hash table for **name→event** mapping; used by `amds_evtapi.c` to resolve native event names quickly. + +- **`amds.h`** + Public, component-internal API across files: init/shutdown, native-event queries, context ops, and error-string retrieval. + +- **`Rules.amd_smi`** + Build integration for PAPI’s make system; compiles this component and sets include/library paths for AMD SMI. diff --git a/src/components/amd_smi/Rules.amd_smi b/src/components/amd_smi/Rules.amd_smi new file mode 100644 index 000000000..2d493b2c0 --- /dev/null +++ b/src/components/amd_smi/Rules.amd_smi @@ -0,0 +1,111 @@ +# Set default if the root environment variable is not already set. +# Note PAPI_AMDSMI_ROOT is an environment variable that must be set. +# There are four other environment variables that must be exported +# for runtime operation; see the README file. + +PAPI_AMDSMI_ROOT ?= /opt/rocm + +# There is one library used by the AMD_SMI component: libamd_smi64.so +# By default, the software tries to find this in system paths, including +# those listed in the environment variable LD_LIBRARY_PATH. If not found +# there it looks in $(PAPI_AMDSMI_ROOT)/lib/libamd_smi64.so + +# However, this can be overridden by exporting PAPI_AMD_SMI_LIB as +# something else. It would still need to be a full path and library name. +# If it is exported, it must work or the component will be disabled. e.g. +# export PAPI_AMD_SMI_LIB=$(PAPI_AMD_SMI_LIB)/lib/libamd_smi64.so +# This allows users to overcome non-standard ROCM installs or specify +# specific version of the libamd_smi64.so library. + +# PAPI_AMDSMI_ROOT is used at both at compile time and run time. + +# There are many ways to cause this path to be known. Spack is a package +# manager used on supercomputers, Linux and MacOS. If Spack is aware of ROCM, +# it encodes the paths to the necessary libraries. + +# The environment variable LD_LIBRARY_PATH encodes a list of paths to +# search for libraries; separated by a colon (:). New paths can be +# added to LD_LIBRARY_PATH. +# +# Warning: LD_LIBRARY_PATH often contains directories that apply to other +# installed packages you may be using. Always add to LD_LIBRARY_PATH +# recursively; for example: + +# >export LD_LIBRARY_PATH=someNewLibraryDirectory:$LD_LIBRARY_PATH which would +# append the existing LD_LIBRARY_PATH to the new directory you wish to add. +# Alternatively, you can prepend it: +# >export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:someNewLibraryDirectory Which will +# search the existing libraries first, then your new directory. + +# You can check on the value of LD_LIBRARY_PATH with +# >echo $LD_LIBRARY_PATH + +# There may be other package managers or utilities, for example on a system +# with modules; the command 'module load rocm' may modify LD_LIBRARY_PATH. + +# A Linux system will also search for libraries by default in the directories +# listed by /etc/ld.so.conf, and /usr/lib64, /lib64, /usr/lib, /lib. + +# Note: If you change the exports, PAPI should be rebuilt from scratch; see +# note below. + +# Note: AMD_SMI is typically provided with the ROCM libraries, but in PAPI +# ROCM and AMD_SMI are treated as separate components, and must be given +# separately on the configure option --with-components. e.g. + +# From within the papi/src/ director: +# make clobber +# ./configure --with-components="amd_smi" +# make + +# An alternative, for both rocm and amd_smi components: +# ./configure --with-components="rocm amd_smi" + +# OPERATION, per library: +# 1) If an override is not empty, we will use it explicitly and fail if it +# does not work. This means disabling the component; a reason for disabling +# is shown using the papi utility, papi/src/utils/papi_component_avail + +# 2) We will attempt to open the library using the normal system library search +# paths; if Spack is present and configured correctly it should deliver the +# proper library. A failure here will be silent; we will proceed to (3). + +# 3) If that fails, we will try to find the library in the standard installed +# locations listed above. If this fails, we disable the component, the reason +# for disabling is shown using the papi utility, +# papi/src/utils/papi_component_avail. + +COMPSRCS += components/amd_smi/amds.c \ + components/amd_smi/linux-amd-smi.c \ + components/amd_smi/amds_accessors.c \ + components/amd_smi/amds_evtapi.c \ + components/amd_smi/amds_ctx.c +COMPOBJS += amds.o \ + linux-amd-smi.o \ + amds_accessors.o \ + amds_evtapi.o \ + amds_ctx.o + +# CFLAGS specifies compile flags; need include files here, and macro defines. +# Where to find amd_smi.h varied in early ROCM releases. If it changes again, +# for backward compatibility add *more* -I paths, do not just replace this one. + +CFLAGS += -I$(PAPI_AMDSMI_ROOT)/include/amd_smi +CFLAGS += -I$(PAPI_AMDSMI_ROOT)/include +CFLAGS += -g +LDFLAGS += $(LDL) -g + +linux-amd-smi.o: components/amd_smi/linux-amd-smi.c $(HEADERS) + $(CC) $(LIBCFLAGS) $(OPTFLAGS) -c components/amd_smi/linux-amd-smi.c -o linux-amd-smi.o + +amds.o: components/amd_smi/amds.c $(HEADERS) + $(CC) $(LIBCFLAGS) $(OPTFLAGS) -c components/amd_smi/amds.c -o amds.o + +amds_accessors.o: components/amd_smi/amds_accessors.c $(HEADERS) + $(CC) $(LIBCFLAGS) $(OPTFLAGS) -c components/amd_smi/amds_accessors.c -o amds_accessors.o + +amds_evtapi.o: components/amd_smi/amds_evtapi.c $(HEADERS) + $(CC) $(LIBCFLAGS) $(OPTFLAGS) -c components/amd_smi/amds_evtapi.c -o amds_evtapi.o + +amds_ctx.o: components/amd_smi/amds_ctx.c $(HEADERS) + $(CC) $(LIBCFLAGS) $(OPTFLAGS) -c components/amd_smi/amds_ctx.c -o amds_ctx.o diff --git a/src/components/amd_smi/amds.c b/src/components/amd_smi/amds.c new file mode 100644 index 000000000..de67d69e4 --- /dev/null +++ b/src/components/amd_smi/amds.c @@ -0,0 +1,3515 @@ +/** + * @file amds.c + * @author Dong Jun Woun + * djwoun@gmail.com + * + */ + +#include "amds.h" +#define AMDS_PRIV_IMPL +#include "amds_priv.h" +#include +#include "htable.h" +#include "papi.h" +#include "papi_memory.h" +#include +#include +#include +#include +#include +#include +#include +#define MAX_EVENTS_PER_DEVICE 1024 + +// Pointers to AMD SMI library functions (dynamically loaded) +#include "amds_funcs.h" +#define DEFINE_AMDSMI(name, ret, args) ret(*name) args; +AMD_SMI_GPU_FUNCTIONS(DEFINE_AMDSMI) +#ifndef AMDSMI_DISABLE_ESMI +AMD_SMI_CPU_FUNCTIONS(DEFINE_AMDSMI) +#endif +#undef DEFINE_AMDSMI +// Global device list and count +static int32_t device_count = 0; +static amdsmi_processor_handle *device_handles = NULL; +static int32_t gpu_count = 0; +static int32_t cpu_count = 0; +static amdsmi_processor_handle **cpu_core_handles = NULL; +static uint32_t *cores_per_socket = NULL; +static void *amds_dlp = NULL; +static void *htable = NULL; +static char error_string[PAPI_MAX_STR_LEN + 1]; +static uint32_t amdsmi_lib_major = 0; +static uint32_t amdsmi_lib_minor = 0; +// Forward declarations for internal helpers +static int load_amdsmi_sym(void); +static int init_device_table(void); +static int shutdown_device_table(void); +static int init_event_table(void); +static int shutdown_event_table(void); +static native_event_table_t ntv_table; +static native_event_table_t *ntv_table_p = NULL; + +/* Internal state accessors */ +int32_t amds_get_device_count(void) { return device_count; } +amdsmi_processor_handle *amds_get_device_handles(void) { return device_handles; } +int32_t amds_get_gpu_count(void) { return gpu_count; } +int32_t amds_get_cpu_count(void) { return cpu_count; } +amdsmi_processor_handle **amds_get_cpu_core_handles(void) { + return cpu_core_handles; +} +uint32_t *amds_get_cores_per_socket(void) { return cores_per_socket; } +native_event_table_t *amds_get_ntv_table(void) { return ntv_table_p; } +void *amds_get_htable(void) { return htable; } +uint32_t amds_get_lib_major(void) { return amdsmi_lib_major; } + +#define CHECK_EVENT_IDX(i) \ + do { \ + if ((i) >= MAX_EVENTS_PER_DEVICE * device_count) { \ + papi_free(ntv_table.events); \ + return PAPI_ENOSUPP; \ + } \ + } while (0) +// Temporarily redirects stderr to /dev/null; returns dup of original fd (or -1 on failure) +static int silence_stderr_begin(void) { + int devnull = open("/dev/null", O_WRONLY); + if (devnull < 0) + return -1; + int saved = dup(STDERR_FILENO); + if (saved < 0) { + close(devnull); + return -1; + } + (void)dup2(devnull, STDERR_FILENO); + close(devnull); + return saved; +} + +// Restores stderr using the fd returned by silence_stderr_begin() +static void silence_stderr_end(int saved_fd) { + if (saved_fd >= 0) { + (void)dup2(saved_fd, STDERR_FILENO); + close(saved_fd); + } +} +// Simple open/close/start/stop functions (no special handling needed for most events) +static int open_simple(native_event_t *event) { + (void)event; + return PAPI_OK; +} +static int close_simple(native_event_t *event) { + (void)event; + return PAPI_OK; +} +static int start_simple(native_event_t *event) { + (void)event; + return PAPI_OK; +} +static int stop_simple(native_event_t *event) { + (void)event; + return PAPI_OK; +} + +typedef struct { + amdsmi_event_handle_t handle; + uint64_t accum; +} counter_priv_t; + +static int open_counter(native_event_t *event) { + if (!amdsmi_gpu_create_counter_p) + return PAPI_ENOSUPP; + counter_priv_t *priv = (counter_priv_t *)papi_calloc(1, sizeof(counter_priv_t)); + if (!priv) + return PAPI_ENOMEM; + amdsmi_status_t status = amdsmi_gpu_create_counter_p( + device_handles[event->device], (amdsmi_event_type_t)event->variant, + &priv->handle); + if (status != AMDSMI_STATUS_SUCCESS) { + papi_free(priv); + return PAPI_ENOSUPP; + } + event->priv = priv; + return PAPI_OK; +} + +static int close_counter(native_event_t *event) { + counter_priv_t *priv = (counter_priv_t *)event->priv; + if (priv) { + if (amdsmi_gpu_destroy_counter_p) + amdsmi_gpu_destroy_counter_p(priv->handle); + papi_free(priv); + event->priv = NULL; + } + return PAPI_OK; +} + +static int start_counter(native_event_t *event) { + counter_priv_t *priv = (counter_priv_t *)event->priv; + if (!priv || !amdsmi_gpu_control_counter_p) + return PAPI_ENOSUPP; + priv->accum = 0; + amdsmi_status_t status = amdsmi_gpu_control_counter_p( + priv->handle, AMDSMI_CNTR_CMD_START, NULL); + return (status == AMDSMI_STATUS_SUCCESS) ? PAPI_OK : PAPI_ENOSUPP; +} + +static int stop_counter(native_event_t *event) { + counter_priv_t *priv = (counter_priv_t *)event->priv; + if (!priv || !amdsmi_gpu_control_counter_p) + return PAPI_ENOSUPP; + amdsmi_status_t status = + amdsmi_gpu_control_counter_p(priv->handle, AMDSMI_CNTR_CMD_STOP, NULL); + return (status == AMDSMI_STATUS_SUCCESS) ? PAPI_OK : PAPI_ENOSUPP; +} + +static int access_amdsmi_gpu_counter(int mode, void *arg) { + if (mode != PAPI_MODE_READ) + return PAPI_ENOSUPP; + native_event_t *event = (native_event_t *)arg; + counter_priv_t *priv = (counter_priv_t *)event->priv; + if (!priv || !amdsmi_gpu_read_counter_p) + return PAPI_ENOSUPP; + amdsmi_counter_value_t val; + if (amdsmi_gpu_read_counter_p(priv->handle, &val) != AMDSMI_STATUS_SUCCESS) + return PAPI_ENOSUPP; + priv->accum += val.value; + event->value = priv->accum; + return PAPI_OK; +} + +// Replace any non-alphanumeric characters with '_' to build safe event names +static void sanitize_name(const char *src, char *dst, size_t len) { + size_t j = 0; + for (size_t i = 0; src[i] && j < len - 1; ++i) { + char c = src[i]; + if ((c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') || + (c >= '0' && c <= '9')) + dst[j++] = c; + else + dst[j++] = '_'; + } + dst[j] = '\0'; +} + +// Dynamic load of AMD SMI library symbols +static void *sym(const char *preferred, const char *fallback) { + void *p = dlsym(amds_dlp, preferred); + return p ? p : (fallback ? dlsym(amds_dlp, fallback) : NULL); +} +static int load_amdsmi_sym(void) { + const char *root = getenv("PAPI_AMDSMI_ROOT"); + char so_path[PATH_MAX] = {0}; + if (!root) { + snprintf(error_string, sizeof(error_string), + "PAPI_AMDSMI_ROOT not set; cannot find libamd_smi.so"); + return PAPI_ENOSUPP; + } + snprintf(so_path, sizeof(so_path), "%s/lib/libamd_smi.so", root); + amds_dlp = dlopen(so_path, RTLD_NOW | RTLD_GLOBAL); + if (!amds_dlp) { + snprintf(error_string, sizeof(error_string), "dlopen(\"%s\"): %s", so_path, + dlerror()); + return PAPI_ENOSUPP; + } + // Resolve required function symbols + amdsmi_init_p = sym("amdsmi_init", NULL); + amdsmi_shut_down_p = sym("amdsmi_shut_down", NULL); + amdsmi_get_socket_handles_p = sym("amdsmi_get_socket_handles", NULL); + amdsmi_get_processor_handles_by_type_p = + sym("amdsmi_get_processor_handles_by_type", NULL); + amdsmi_get_processor_handles_p = + sym("amdsmi_get_processor_handles", NULL); + amdsmi_get_processor_info_p = + sym("amdsmi_get_processor_info", NULL); + amdsmi_get_processor_type_p = + sym("amdsmi_get_processor_type", NULL); + amdsmi_get_socket_info_p = sym("amdsmi_get_socket_info", NULL); + // Sensors + amdsmi_get_temp_metric_p = sym("amdsmi_get_temp_metric", NULL); + amdsmi_get_gpu_fan_rpms_p = sym("amdsmi_get_gpu_fan_rpms", NULL); + amdsmi_get_gpu_fan_speed_p = sym("amdsmi_get_gpu_fan_speed", NULL); + amdsmi_get_gpu_fan_speed_max_p = sym("amdsmi_get_gpu_fan_speed_max", NULL); + // Memory + amdsmi_get_total_memory_p = + sym("amdsmi_get_gpu_memory_total", "amdsmi_get_total_memory"); + amdsmi_get_memory_usage_p = + sym("amdsmi_get_gpu_memory_usage", "amdsmi_get_memory_usage"); + // Utilization / activity + amdsmi_get_gpu_activity_p = + sym("amdsmi_get_gpu_activity", "amdsmi_get_engine_usage"); + amdsmi_get_utilization_count_p = + sym("amdsmi_get_utilization_count", NULL); + // Power + amdsmi_get_power_info_p = sym("amdsmi_get_power_info", NULL); + amdsmi_get_power_cap_info_p = sym("amdsmi_get_power_cap_info", NULL); + amdsmi_set_power_cap_p = + sym("amdsmi_set_power_cap", "amdsmi_dev_set_power_cap"); + // PCIe + amdsmi_get_gpu_pci_throughput_p = sym("amdsmi_get_gpu_pci_throughput", NULL); + amdsmi_get_gpu_pci_replay_counter_p = + sym("amdsmi_get_gpu_pci_replay_counter", NULL); + // Clocks + amdsmi_get_clk_freq_p = sym("amdsmi_get_clk_freq", NULL); + amdsmi_get_clock_info_p = sym("amdsmi_get_clock_info", NULL); + amdsmi_set_clk_freq_p = sym("amdsmi_set_clk_freq", NULL); + // GPU metrics + amdsmi_get_gpu_metrics_info_p = sym("amdsmi_get_gpu_metrics_info", NULL); + // Identification and other queries + amdsmi_get_gpu_id_p = sym("amdsmi_get_gpu_id", NULL); + amdsmi_get_gpu_revision_p = sym("amdsmi_get_gpu_revision", NULL); + amdsmi_get_gpu_subsystem_id_p = sym("amdsmi_get_gpu_subsystem_id", NULL); +#if AMDSMI_LIB_VERSION_MAJOR >= 25 + amdsmi_get_gpu_virtualization_mode_p = + sym("amdsmi_get_gpu_virtualization_mode", NULL); +#endif + amdsmi_get_gpu_process_isolation_p = + sym("amdsmi_get_gpu_process_isolation", NULL); + amdsmi_get_gpu_xcd_counter_p = sym("amdsmi_get_gpu_xcd_counter", NULL); + amdsmi_get_gpu_pci_bandwidth_p = sym("amdsmi_get_gpu_pci_bandwidth", NULL); + amdsmi_get_gpu_bdf_id_p = sym("amdsmi_get_gpu_bdf_id", NULL); + amdsmi_get_gpu_topo_numa_affinity_p = + sym("amdsmi_get_gpu_topo_numa_affinity", NULL); + amdsmi_get_energy_count_p = sym("amdsmi_get_energy_count", NULL); + amdsmi_get_gpu_power_profile_presets_p = + sym("amdsmi_get_gpu_power_profile_presets", NULL); + amdsmi_get_violation_status_p = + sym("amdsmi_get_violation_status", NULL); + // Additional read-only queries + amdsmi_get_lib_version_p = sym("amdsmi_get_lib_version", NULL); + amdsmi_get_gpu_driver_info_p = sym("amdsmi_get_gpu_driver_info", NULL); + amdsmi_get_gpu_asic_info_p = sym("amdsmi_get_gpu_asic_info", NULL); + amdsmi_get_gpu_board_info_p = sym("amdsmi_get_gpu_board_info", NULL); + amdsmi_get_fw_info_p = sym("amdsmi_get_fw_info", NULL); + amdsmi_get_gpu_vbios_info_p = sym("amdsmi_get_gpu_vbios_info", NULL); + amdsmi_get_gpu_device_uuid_p = sym("amdsmi_get_gpu_device_uuid", NULL); +#if AMDSMI_LIB_VERSION_MAJOR >= 25 + amdsmi_get_gpu_enumeration_info_p = + sym("amdsmi_get_gpu_enumeration_info", NULL); +#endif + amdsmi_get_gpu_vendor_name_p = sym("amdsmi_get_gpu_vendor_name", NULL); + amdsmi_get_gpu_vram_vendor_p = sym("amdsmi_get_gpu_vram_vendor", NULL); + amdsmi_get_gpu_subsystem_name_p = sym("amdsmi_get_gpu_subsystem_name", NULL); + amdsmi_get_link_metrics_p = sym("amdsmi_get_link_metrics", NULL); + amdsmi_get_gpu_process_list_p = sym("amdsmi_get_gpu_process_list", NULL); + amdsmi_topo_get_numa_node_number_p = + sym("amdsmi_topo_get_numa_node_number", NULL); + amdsmi_topo_get_link_weight_p = sym("amdsmi_topo_get_link_weight", NULL); + amdsmi_topo_get_link_type_p = sym("amdsmi_topo_get_link_type", NULL); + amdsmi_topo_get_p2p_status_p = sym("amdsmi_topo_get_p2p_status", NULL); + amdsmi_is_P2P_accessible_p = sym("amdsmi_is_P2P_accessible", NULL); + amdsmi_get_link_topology_nearest_p = + sym("amdsmi_get_link_topology_nearest", NULL); + amdsmi_get_gpu_device_bdf_p = sym("amdsmi_get_gpu_device_bdf", NULL); + amdsmi_get_gpu_ecc_enabled_p = sym("amdsmi_get_gpu_ecc_enabled", NULL); + amdsmi_get_gpu_total_ecc_count_p = + sym("amdsmi_get_gpu_total_ecc_count", NULL); + amdsmi_get_gpu_ecc_count_p = sym("amdsmi_get_gpu_ecc_count", NULL); + amdsmi_get_gpu_ecc_status_p = sym("amdsmi_get_gpu_ecc_status", NULL); + amdsmi_get_gpu_compute_partition_p = + sym("amdsmi_get_gpu_compute_partition", NULL); + amdsmi_get_gpu_memory_partition_p = + sym("amdsmi_get_gpu_memory_partition", NULL); +#if AMDSMI_LIB_VERSION_MAJOR >= 25 + amdsmi_get_gpu_memory_partition_config_p = + sym("amdsmi_get_gpu_memory_partition_config", NULL); +#endif + amdsmi_is_gpu_memory_partition_supported_p = + sym("amdsmi_is_gpu_memory_partition_supported", NULL); + amdsmi_get_gpu_memory_reserved_pages_p = + sym("amdsmi_get_gpu_memory_reserved_pages", NULL); + amdsmi_get_gpu_kfd_info_p = sym("amdsmi_get_gpu_kfd_info", NULL); + amdsmi_get_gpu_metrics_header_info_p = + sym("amdsmi_get_gpu_metrics_header_info", NULL); +#if AMDSMI_LIB_VERSION_MAJOR >= 25 + amdsmi_get_gpu_xgmi_link_status_p = + sym("amdsmi_get_gpu_xgmi_link_status", NULL); +#endif + amdsmi_get_xgmi_info_p = sym("amdsmi_get_xgmi_info", NULL); + amdsmi_gpu_xgmi_error_status_p = + sym("amdsmi_gpu_xgmi_error_status", NULL); + amdsmi_get_gpu_accelerator_partition_profile_p = + sym("amdsmi_get_gpu_accelerator_partition_profile", NULL); + amdsmi_get_gpu_cache_info_p = sym("amdsmi_get_gpu_cache_info", NULL); + amdsmi_get_gpu_mem_overdrive_level_p = + sym("amdsmi_get_gpu_mem_overdrive_level", NULL); + amdsmi_get_gpu_od_volt_curve_regions_p = + sym("amdsmi_get_gpu_od_volt_curve_regions", NULL); + amdsmi_get_gpu_od_volt_info_p = sym("amdsmi_get_gpu_od_volt_info", NULL); + amdsmi_get_gpu_overdrive_level_p = + sym("amdsmi_get_gpu_overdrive_level", NULL); + amdsmi_get_gpu_perf_level_p = sym("amdsmi_get_gpu_perf_level", NULL); + amdsmi_get_gpu_pm_metrics_info_p = + sym("amdsmi_get_gpu_pm_metrics_info", NULL); + amdsmi_is_gpu_power_management_enabled_p = + sym("amdsmi_is_gpu_power_management_enabled", NULL); + amdsmi_get_gpu_ras_feature_info_p = + sym("amdsmi_get_gpu_ras_feature_info", NULL); + amdsmi_get_gpu_ras_block_features_enabled_p = + sym("amdsmi_get_gpu_ras_block_features_enabled", NULL); + amdsmi_gpu_validate_ras_eeprom_p = + sym("amdsmi_gpu_validate_ras_eeprom", NULL); + amdsmi_get_gpu_reg_table_info_p = sym("amdsmi_get_gpu_reg_table_info", NULL); + amdsmi_get_gpu_volt_metric_p = sym("amdsmi_get_gpu_volt_metric", NULL); + amdsmi_get_gpu_vram_info_p = sym("amdsmi_get_gpu_vram_info", NULL); + amdsmi_get_gpu_vram_usage_p = sym("amdsmi_get_gpu_vram_usage", NULL); + amdsmi_get_pcie_info_p = sym("amdsmi_get_pcie_info", NULL); + amdsmi_get_processor_count_from_handles_p = + sym("amdsmi_get_processor_count_from_handles", NULL); + amdsmi_get_soc_pstate_p = sym("amdsmi_get_soc_pstate", NULL); + amdsmi_get_xgmi_plpd_p = sym("amdsmi_get_xgmi_plpd", NULL); + amdsmi_get_gpu_bad_page_info_p = sym("amdsmi_get_gpu_bad_page_info", NULL); + amdsmi_get_gpu_bad_page_threshold_p = + sym("amdsmi_get_gpu_bad_page_threshold", NULL); + amdsmi_get_power_info_v2_p = sym("amdsmi_get_power_info_v2", NULL); + amdsmi_init_gpu_event_notification_p = + sym("amdsmi_init_gpu_event_notification", NULL); + amdsmi_set_gpu_event_notification_mask_p = + sym("amdsmi_set_gpu_event_notification_mask", NULL); + amdsmi_get_gpu_event_notification_p = + sym("amdsmi_get_gpu_event_notification", NULL); + amdsmi_stop_gpu_event_notification_p = + sym("amdsmi_stop_gpu_event_notification", NULL); + amdsmi_gpu_counter_group_supported_p = + sym("amdsmi_gpu_counter_group_supported", NULL); + amdsmi_get_gpu_available_counters_p = + sym("amdsmi_get_gpu_available_counters", NULL); + amdsmi_gpu_create_counter_p = + sym("amdsmi_gpu_create_counter", NULL); + amdsmi_gpu_control_counter_p = + sym("amdsmi_gpu_control_counter", NULL); + amdsmi_gpu_read_counter_p = sym("amdsmi_gpu_read_counter", NULL); + amdsmi_gpu_destroy_counter_p = + sym("amdsmi_gpu_destroy_counter", NULL); + amdsmi_get_minmax_bandwidth_between_processors_p = + sym("amdsmi_get_minmax_bandwidth_between_processors", NULL); +#ifndef AMDSMI_DISABLE_ESMI + /* CPU functions */ + amdsmi_get_cpu_handles_p = sym("amdsmi_get_cpu_handles", NULL); + amdsmi_get_cpucore_handles_p = sym("amdsmi_get_cpucore_handles", NULL); + amdsmi_get_cpu_socket_power_p = sym("amdsmi_get_cpu_socket_power", NULL); + amdsmi_get_cpu_socket_power_cap_p = + sym("amdsmi_get_cpu_socket_power_cap", NULL); + amdsmi_get_cpu_socket_power_cap_max_p = + sym("amdsmi_get_cpu_socket_power_cap_max", NULL); + amdsmi_get_cpu_core_energy_p = sym("amdsmi_get_cpu_core_energy", NULL); + amdsmi_get_cpu_socket_energy_p = sym("amdsmi_get_cpu_socket_energy", NULL); + amdsmi_get_cpu_smu_fw_version_p = sym("amdsmi_get_cpu_smu_fw_version", NULL); + amdsmi_get_threads_per_core_p = sym("amdsmi_get_threads_per_core", NULL); + amdsmi_get_cpu_family_p = sym("amdsmi_get_cpu_family", NULL); + amdsmi_get_cpu_model_p = sym("amdsmi_get_cpu_model", NULL); + amdsmi_get_cpu_core_boostlimit_p = + sym("amdsmi_get_cpu_core_boostlimit", NULL); + amdsmi_get_cpu_socket_current_active_freq_limit_p = + sym("amdsmi_get_cpu_socket_current_active_freq_limit", NULL); + amdsmi_get_cpu_socket_freq_range_p = + sym("amdsmi_get_cpu_socket_freq_range", NULL); + amdsmi_get_cpu_core_current_freq_limit_p = + sym("amdsmi_get_cpu_core_current_freq_limit", NULL); + amdsmi_get_cpu_cclk_limit_p = sym("amdsmi_get_cpu_cclk_limit", NULL); + amdsmi_get_cpu_current_io_bandwidth_p = + sym("amdsmi_get_cpu_current_io_bandwidth", NULL); + amdsmi_get_cpu_current_xgmi_bw_p = + sym("amdsmi_get_cpu_current_xgmi_bw", NULL); + amdsmi_get_cpu_ddr_bw_p = sym("amdsmi_get_cpu_ddr_bw", NULL); + amdsmi_get_cpu_fclk_mclk_p = sym("amdsmi_get_cpu_fclk_mclk", NULL); + amdsmi_get_cpu_hsmp_driver_version_p = + sym("amdsmi_get_cpu_hsmp_driver_version", NULL); + amdsmi_get_cpu_hsmp_proto_ver_p = sym("amdsmi_get_cpu_hsmp_proto_ver", NULL); + amdsmi_get_cpu_prochot_status_p = + sym("amdsmi_get_cpu_prochot_status", NULL); + amdsmi_get_cpu_pwr_svi_telemetry_all_rails_p = + sym("amdsmi_get_cpu_pwr_svi_telemetry_all_rails", NULL); + amdsmi_get_cpu_dimm_temp_range_and_refresh_rate_p = + sym("amdsmi_get_cpu_dimm_temp_range_and_refresh_rate", NULL); + amdsmi_get_cpu_dimm_power_consumption_p = + sym("amdsmi_get_cpu_dimm_power_consumption", NULL); + amdsmi_get_cpu_dimm_thermal_sensor_p = + sym("amdsmi_get_cpu_dimm_thermal_sensor", NULL); +#endif + return PAPI_OK; +} + +static int shutdown_event_table(void) { + // Remove all events from hash table and free their names/descr + for (int i = 0; i < ntv_table.count; ++i) { + htable_delete(htable, ntv_table.events[i].name); + papi_free(ntv_table.events[i].name); + papi_free(ntv_table.events[i].descr); + } + papi_free(ntv_table.events); + ntv_table.events = NULL; + ntv_table.count = 0; + return PAPI_OK; +} + +static int init_device_table(void) { + // Nothing to do (device_handles and device_count already set in amds_init) + return PAPI_OK; +} + +static int shutdown_device_table(void) { + if (device_handles) { + papi_free(device_handles); + device_handles = NULL; + } + if (cpu_core_handles) { + for (int s = 0; s < cpu_count; ++s) { + if (cpu_core_handles[s]) + papi_free(cpu_core_handles[s]); + } + papi_free(cpu_core_handles); + cpu_core_handles = NULL; + } + if (cores_per_socket) { + papi_free(cores_per_socket); + cores_per_socket = NULL; + } + device_count = 0; + gpu_count = 0; + cpu_count = 0; + return PAPI_OK; +} + +int amds_init(void) { + // Check if already initialized to avoid expensive re-initialization + if (device_handles != NULL && device_count > 0) + return PAPI_OK; // Already initialized + int papi_errno = load_amdsmi_sym(); + if (papi_errno != PAPI_OK) + return papi_errno; + // AMDSMI_INIT_AMD_CPUS + amdsmi_status_t status = amdsmi_init_p(AMDSMI_INIT_AMD_GPUS); + if (status != AMDSMI_STATUS_SUCCESS) { + strcpy(error_string, "amdsmi_init failed"); + return PAPI_ENOSUPP; + } + if (amdsmi_get_lib_version_p) { + amdsmi_version_t vinfo; + if (amdsmi_get_lib_version_p(&vinfo) == AMDSMI_STATUS_SUCCESS) { + amdsmi_lib_major = vinfo.major; + amdsmi_lib_minor = vinfo.minor; + } + } + htable_init(&htable); + // Discover GPU and CPU devices + uint32_t socket_count = 0; + status = amdsmi_get_socket_handles_p(&socket_count, NULL); + if (status != AMDSMI_STATUS_SUCCESS || socket_count == 0) { + snprintf(error_string, sizeof(error_string), + "Error discovering sockets or no AMD socket found."); + papi_errno = PAPI_ENOEVNT; + goto fn_fail; + } + amdsmi_socket_handle *sockets = (amdsmi_socket_handle *)papi_calloc( + socket_count, sizeof(amdsmi_socket_handle)); + if (!sockets) { + papi_errno = PAPI_ENOMEM; + goto fn_fail; + } + status = amdsmi_get_socket_handles_p(&socket_count, sockets); + if (status != AMDSMI_STATUS_SUCCESS) { + snprintf(error_string, sizeof(error_string), + "Error getting socket handles."); + papi_free(sockets); + papi_errno = PAPI_ENOSUPP; + goto fn_fail; + } + device_count = 0; + uint32_t total_gpu_count = 0; + for (uint32_t s = 0; s < socket_count; ++s) { + uint32_t gpu_count_local = 0; + processor_type_t proc_type = AMDSMI_PROCESSOR_TYPE_AMD_GPU; + amdsmi_status_t st = amdsmi_get_processor_handles_by_type_p( + sockets[s], proc_type, NULL, &gpu_count_local); + if (st == AMDSMI_STATUS_SUCCESS) + total_gpu_count += gpu_count_local; + } + uint32_t total_cpu_count = 0; +#ifndef AMDSMI_DISABLE_ESMI + status = amdsmi_get_cpu_handles_p(&total_cpu_count, NULL); + if (status != AMDSMI_STATUS_SUCCESS) + total_cpu_count = 0; +#endif + if (total_gpu_count == 0 && total_cpu_count == 0) { + snprintf(error_string, sizeof(error_string), + "No AMD GPU or CPU devices found."); + papi_errno = PAPI_ENOEVNT; + papi_free(sockets); + goto fn_fail; + } + device_handles = (amdsmi_processor_handle *)papi_calloc( + total_gpu_count + total_cpu_count, sizeof(*device_handles)); + if (!device_handles) { + papi_errno = PAPI_ENOMEM; + snprintf(error_string, sizeof(error_string), + "Memory allocation error for device handles."); + papi_free(sockets); + goto fn_fail; + } + // Retrieve GPU processor handles for each socket - optimized to reduce + // allocations + for (uint32_t s = 0; s < socket_count; ++s) { + uint32_t gpu_count_local = 0; + processor_type_t proc_type = AMDSMI_PROCESSOR_TYPE_AMD_GPU; + status = amdsmi_get_processor_handles_by_type_p(sockets[s], proc_type, NULL, + &gpu_count_local); + if (status != AMDSMI_STATUS_SUCCESS || gpu_count_local == 0) + continue; // no GPU on this socket or error + // Use the main device_handles array directly to avoid extra allocation + amdsmi_processor_handle *gpu_handles = &device_handles[device_count]; + status = amdsmi_get_processor_handles_by_type_p( + sockets[s], proc_type, gpu_handles, &gpu_count_local); + if (status == AMDSMI_STATUS_SUCCESS) + device_count += gpu_count_local; + } + papi_free(sockets); + // Set gpu_count for use in event table initialization + gpu_count = device_count; // All devices added so far are GPUs +#ifndef AMDSMI_DISABLE_ESMI + // Retrieve CPU socket handles + amdsmi_processor_handle *cpu_handles = NULL; + if (total_cpu_count > 0) { + cpu_handles = (amdsmi_processor_handle *)papi_calloc( + total_cpu_count, sizeof(amdsmi_processor_handle)); + if (!cpu_handles) { + papi_errno = PAPI_ENOMEM; + snprintf(error_string, sizeof(error_string), + "Memory allocation error for CPU handles."); + goto fn_fail; + } + status = amdsmi_get_cpu_handles_p(&total_cpu_count, cpu_handles); + if (status != AMDSMI_STATUS_SUCCESS) { + papi_free(cpu_handles); + cpu_handles = NULL; + total_cpu_count = 0; + } + } + if (cpu_handles) { + for (uint32_t i = 0; i < total_cpu_count; ++i) { + device_handles[device_count++] = cpu_handles[i]; + } + papi_free(cpu_handles); + } +#endif + // Set global GPU/CPU counts + gpu_count = total_gpu_count; + cpu_count = total_cpu_count; + // Retrieve CPU core handles for each CPU socket + if (cpu_count > 0) { + cpu_core_handles = (amdsmi_processor_handle **)papi_calloc( + cpu_count, sizeof(amdsmi_processor_handle *)); + cores_per_socket = (uint32_t *)papi_calloc(cpu_count, sizeof(uint32_t)); + if (!cpu_core_handles || !cores_per_socket) { + papi_errno = PAPI_ENOMEM; + snprintf(error_string, sizeof(error_string), + "Memory allocation error for CPU core handles."); + if (cpu_core_handles) + papi_free(cpu_core_handles); + if (cores_per_socket) + papi_free(cores_per_socket); + goto fn_fail; + } + for (uint32_t s = 0; s < cpu_count; ++s) { + uint32_t core_count = 0; + amdsmi_status_t st = amdsmi_get_processor_handles_by_type_p( + device_handles[gpu_count + s], AMDSMI_PROCESSOR_TYPE_AMD_CPU_CORE, + NULL, &core_count); + if (st != AMDSMI_STATUS_SUCCESS || core_count == 0) { + cores_per_socket[s] = 0; + cpu_core_handles[s] = NULL; + continue; + } + cpu_core_handles[s] = (amdsmi_processor_handle *)papi_calloc( + core_count, sizeof(amdsmi_processor_handle)); + if (!cpu_core_handles[s]) { + papi_errno = PAPI_ENOMEM; + snprintf(error_string, sizeof(error_string), + "Memory allocation error for CPU core handles on socket %u.", + s); + for (uint32_t t = 0; t < s; ++t) { + if (cpu_core_handles[t]) + papi_free(cpu_core_handles[t]); + } + papi_free(cpu_core_handles); + papi_free(cores_per_socket); + goto fn_fail; + } + st = amdsmi_get_processor_handles_by_type_p( + device_handles[gpu_count + s], AMDSMI_PROCESSOR_TYPE_AMD_CPU_CORE, + cpu_core_handles[s], &core_count); + if (st != AMDSMI_STATUS_SUCCESS) { + papi_free(cpu_core_handles[s]); + cpu_core_handles[s] = NULL; + cores_per_socket[s] = 0; + } else { + cores_per_socket[s] = core_count; + } + } + } + // Initialize the native event table for all discovered metrics + papi_errno = init_event_table(); + if (papi_errno != PAPI_OK) { + snprintf(error_string, sizeof(error_string), + "Error while initializing the native event table."); + goto fn_fail; + } + ntv_table_p = &ntv_table; + return PAPI_OK; +fn_fail: + htable_shutdown(htable); + if (device_handles) { + papi_free(device_handles); + device_handles = NULL; + device_count = 0; + } + // sockets already freed if allocated + if (cpu_core_handles) { + for (int s = 0; s < cpu_count; ++s) { + if (cpu_core_handles[s]) + papi_free(cpu_core_handles[s]); + } + papi_free(cpu_core_handles); + cpu_core_handles = NULL; + } + if (cores_per_socket) { + papi_free(cores_per_socket); + cores_per_socket = NULL; + } + amdsmi_shut_down_p(); + return papi_errno; +} + +int amds_shutdown(void) { + // Tear down our tables first + shutdown_event_table(); + shutdown_device_table(); + htable_shutdown(htable); + htable = NULL; + + // Tell AMD SMI to shut down if the symbol exists + amdsmi_status_t st = AMDSMI_STATUS_SUCCESS; + if (amdsmi_shut_down_p) + st = amdsmi_shut_down_p(); + + // Unload the shared library if we loaded it + if (amds_dlp) { + dlclose(amds_dlp); + amds_dlp = NULL; + } + + // Clear function pointers so a future init can't call stale symbols + #define NULLIFY(name, ret, args) name = NULL; + AMD_SMI_GPU_FUNCTIONS(NULLIFY) + #ifndef AMDSMI_DISABLE_ESMI + AMD_SMI_CPU_FUNCTIONS(NULLIFY) + #endif + #undef NULLIFY + + // Reset a few globals used by init paths + device_count = 0; + gpu_count = 0; + cpu_count = 0; + ntv_table_p = NULL; + amdsmi_lib_major = 0; + + return (st == AMDSMI_STATUS_SUCCESS) ? PAPI_OK : PAPI_EMISC; +} + + +int amds_err_get_last(const char **err_string) { + if (err_string) + *err_string = error_string; + return PAPI_OK; +} + +// Helper to add a new event entry to ntv_table +static int add_event(int *idx_ptr, const char *name, const char *descr, int device, + uint32_t variant, uint32_t subvariant, int mode, + amds_accessor_t access_func) { + native_event_t *ev = &ntv_table.events[*idx_ptr]; + ev->id = *idx_ptr; + ev->name = strdup(name); + ev->descr = strdup(descr); + if (!ev->name || !ev->descr) + return PAPI_ENOMEM; + ev->device = device; + ev->value = 0; + ev->mode = mode; + ev->variant = variant; + ev->subvariant = subvariant; + ev->priv = NULL; + ev->open_func = open_simple; + ev->close_func = close_simple; + ev->start_func = start_simple; + ev->stop_func = stop_simple; + ev->access_func = access_func; + htable_insert(htable, ev->name, ev); + (*idx_ptr)++; + return PAPI_OK; +} + +static int add_counter_event(int *idx_ptr, const char *name, const char *descr, + int device, uint32_t variant, uint32_t subvariant) { + int papi_errno = add_event(idx_ptr, name, descr, device, variant, subvariant, + PAPI_MODE_READ, access_amdsmi_gpu_counter); + if (papi_errno != PAPI_OK) + return papi_errno; + native_event_t *ev = &ntv_table.events[*idx_ptr - 1]; + ev->open_func = open_counter; + ev->close_func = close_counter; + ev->start_func = start_counter; + ev->stop_func = stop_counter; + return PAPI_OK; +} + +// Initialize native event table: enumerate all supported events +static int init_event_table(void) { + // Check if event table is already initialized + if (ntv_table.count > 0 && ntv_table.events != NULL) + return PAPI_OK; // Already initialized, skip expensive rebuild + ntv_table.count = 0; + int idx = 0; + // Safety check - if no devices, return early + if (device_count <= 0) { + ntv_table.events = NULL; + return PAPI_OK; + } + // Keep original allocation approach + ntv_table.events = (native_event_t *)papi_calloc( + MAX_EVENTS_PER_DEVICE * device_count, sizeof(native_event_t)); + if (!ntv_table.events) + return PAPI_ENOMEM; + char name_buf[PAPI_MAX_STR_LEN]; + char descr_buf[PAPI_MAX_STR_LEN]; + // Define sensor arrays first + amdsmi_temperature_type_t temp_sensors[] = { + AMDSMI_TEMPERATURE_TYPE_EDGE, AMDSMI_TEMPERATURE_TYPE_JUNCTION, + AMDSMI_TEMPERATURE_TYPE_VRAM, AMDSMI_TEMPERATURE_TYPE_HBM_0, + AMDSMI_TEMPERATURE_TYPE_HBM_1, AMDSMI_TEMPERATURE_TYPE_HBM_2, + AMDSMI_TEMPERATURE_TYPE_HBM_3, AMDSMI_TEMPERATURE_TYPE_PLX}; + const int num_temp_sensors = + sizeof(temp_sensors) / sizeof(temp_sensors[0]); + const amdsmi_temperature_metric_t temp_metrics[] = { + AMDSMI_TEMP_CURRENT, AMDSMI_TEMP_MAX, AMDSMI_TEMP_MIN, + AMDSMI_TEMP_MAX_HYST, AMDSMI_TEMP_MIN_HYST, AMDSMI_TEMP_CRITICAL, + AMDSMI_TEMP_CRITICAL_HYST, AMDSMI_TEMP_EMERGENCY, AMDSMI_TEMP_EMERGENCY_HYST, + AMDSMI_TEMP_CRIT_MIN, AMDSMI_TEMP_CRIT_MIN_HYST, AMDSMI_TEMP_OFFSET, + AMDSMI_TEMP_LOWEST, AMDSMI_TEMP_HIGHEST}; + const char *temp_metric_names[] = { + "temp_current", "temp_max", "temp_min", + "temp_max_hyst", "temp_min_hyst", "temp_critical", + "temp_critical_hyst", "temp_emergency", "temp_emergency_hyst", + "temp_crit_min", "temp_crit_min_hyst", "temp_offset", + "temp_lowest", "temp_highest"}; + // Temperature sensors - device-level cache + individual testing + for (int d = 0; d < gpu_count; ++d) { + // Safety check for device handle + if (!device_handles || !device_handles[d]) + continue; + + // GPU cache info events + if (amdsmi_get_gpu_cache_info_p) { + amdsmi_gpu_cache_info_t cache_info; + if (amdsmi_get_gpu_cache_info_p(device_handles[d], &cache_info) == + AMDSMI_STATUS_SUCCESS) { + for (uint32_t i = 0; i < cache_info.num_cache_types; ++i) { + CHECK_EVENT_IDX(idx); + uint32_t level = cache_info.cache[i].cache_level; + uint32_t prop = cache_info.cache[i].cache_properties; + char type_str[8] = "cache"; + if ((prop & AMDSMI_CACHE_PROPERTY_INST_CACHE) && + !(prop & AMDSMI_CACHE_PROPERTY_DATA_CACHE)) { + strcpy(type_str, "icache"); + } else if ((prop & AMDSMI_CACHE_PROPERTY_DATA_CACHE) && + !(prop & AMDSMI_CACHE_PROPERTY_INST_CACHE)) { + strcpy(type_str, "dcache"); + } else { + strcpy(type_str, "cache"); + } + snprintf(name_buf, sizeof(name_buf), "L%u_%s_size:device=%d", level, + type_str, d); + snprintf(descr_buf, sizeof(descr_buf), + "Device %d L%u %s size (bytes)", d, level, + (strcmp(type_str, "cache") == 0 ? "cache" + : (strcmp(type_str, "icache") == 0 ? "instruction cache" + : "data cache"))); + if (add_event(&idx, name_buf, descr_buf, d, 0, i, PAPI_MODE_READ, + access_amdsmi_cache_stat) != PAPI_OK) + return PAPI_ENOMEM; + + CHECK_EVENT_IDX(idx); + snprintf(name_buf, sizeof(name_buf), "L%u_%s_cu_shared:device=%d", + level, type_str, d); + snprintf(descr_buf, sizeof(descr_buf), + "Device %d L%u %s max CUs sharing", d, level, type_str); + if (add_event(&idx, name_buf, descr_buf, d, 1, i, PAPI_MODE_READ, + access_amdsmi_cache_stat) != PAPI_OK) + return PAPI_ENOMEM; + + CHECK_EVENT_IDX(idx); + snprintf(name_buf, sizeof(name_buf), "L%u_%s_instances:device=%d", + level, type_str, d); + snprintf(descr_buf, sizeof(descr_buf), + "Device %d L%u %s instances", d, level, type_str); + if (add_event(&idx, name_buf, descr_buf, d, 2, i, PAPI_MODE_READ, + access_amdsmi_cache_stat) != PAPI_OK) + return PAPI_ENOMEM; + } + } + } + // GPU VRAM info events + if (amdsmi_get_gpu_vram_info_p) { + amdsmi_vram_info_t vram_info; + if (amdsmi_get_gpu_vram_info_p(device_handles[d], &vram_info) == + AMDSMI_STATUS_SUCCESS) { + CHECK_EVENT_IDX(idx); + snprintf(name_buf, sizeof(name_buf), "vram_bus_width:device=%d", d); + snprintf(descr_buf, sizeof(descr_buf), + "Device %d VRAM bus width (bits)", d); + if (add_event(&idx, name_buf, descr_buf, d, 0, 0, PAPI_MODE_READ, + access_amdsmi_vram_width) != PAPI_OK) + return PAPI_ENOMEM; + + CHECK_EVENT_IDX(idx); + snprintf(name_buf, sizeof(name_buf), "vram_size_bytes:device=%d", d); + snprintf(descr_buf, sizeof(descr_buf), + "Device %d VRAM size (bytes)", d); + if (add_event(&idx, name_buf, descr_buf, d, 0, 0, PAPI_MODE_READ, + access_amdsmi_vram_size) != PAPI_OK) + return PAPI_ENOMEM; + + CHECK_EVENT_IDX(idx); + snprintf(name_buf, sizeof(name_buf), "vram_type:device=%d", d); + snprintf(descr_buf, sizeof(descr_buf), "Device %d VRAM type id", d); + if (add_event(&idx, name_buf, descr_buf, d, 0, 0, PAPI_MODE_READ, + access_amdsmi_vram_type) != PAPI_OK) + return PAPI_ENOMEM; + + CHECK_EVENT_IDX(idx); + snprintf(name_buf, sizeof(name_buf), "vram_vendor_id:device=%d", d); + snprintf(descr_buf, sizeof(descr_buf), "Device %d VRAM vendor id", d); + if (add_event(&idx, name_buf, descr_buf, d, 0, 0, PAPI_MODE_READ, + access_amdsmi_vram_vendor) != PAPI_OK) + return PAPI_ENOMEM; + } + } + // PCIe information events + if (amdsmi_get_pcie_info_p) { + amdsmi_pcie_info_t pcie_info; + if (amdsmi_get_pcie_info_p(device_handles[d], &pcie_info) == + AMDSMI_STATUS_SUCCESS) { + CHECK_EVENT_IDX(idx); + snprintf(name_buf, sizeof(name_buf), "pcie_max_width:device=%d", d); + snprintf(descr_buf, sizeof(descr_buf), + "Device %d maximum PCIe link width (lanes)", d); + if (add_event(&idx, name_buf, descr_buf, d, 0, 0, PAPI_MODE_READ, + access_amdsmi_pcie_info) != PAPI_OK) + return PAPI_ENOMEM; + + CHECK_EVENT_IDX(idx); + snprintf(name_buf, sizeof(name_buf), "pcie_max_speed:device=%d", d); + snprintf(descr_buf, sizeof(descr_buf), + "Device %d maximum PCIe link speed (GT/s)", d); + if (add_event(&idx, name_buf, descr_buf, d, 1, 0, PAPI_MODE_READ, + access_amdsmi_pcie_info) != PAPI_OK) + return PAPI_ENOMEM; + + CHECK_EVENT_IDX(idx); + snprintf(name_buf, sizeof(name_buf), "pcie_interface_version:device=%d", + d); + snprintf(descr_buf, sizeof(descr_buf), + "Device %d PCIe interface version", d); + if (add_event(&idx, name_buf, descr_buf, d, 2, 0, PAPI_MODE_READ, + access_amdsmi_pcie_info) != PAPI_OK) + return PAPI_ENOMEM; + + CHECK_EVENT_IDX(idx); + snprintf(name_buf, sizeof(name_buf), "pcie_slot_type:device=%d", d); + snprintf(descr_buf, sizeof(descr_buf), + "Device %d PCIe slot type", d); + if (add_event(&idx, name_buf, descr_buf, d, 3, 0, PAPI_MODE_READ, + access_amdsmi_pcie_info) != PAPI_OK) + return PAPI_ENOMEM; + +#if AMDSMI_LIB_VERSION_MAJOR >= 25 + if (amdsmi_lib_major >= 25) { + CHECK_EVENT_IDX(idx); + snprintf(name_buf, sizeof(name_buf), + "pcie_max_interface_version:device=%d", d); + snprintf(descr_buf, sizeof(descr_buf), + "Device %d maximum PCIe interface version", d); + if (add_event(&idx, name_buf, descr_buf, d, 4, 0, PAPI_MODE_READ, + access_amdsmi_pcie_info) != PAPI_OK) + return PAPI_ENOMEM; + } +#endif + + CHECK_EVENT_IDX(idx); + snprintf(name_buf, sizeof(name_buf), "pcie_width:device=%d", d); + snprintf(descr_buf, sizeof(descr_buf), + "Device %d current PCIe link width (lanes)", d); + if (add_event(&idx, name_buf, descr_buf, d, 5, 0, PAPI_MODE_READ, + access_amdsmi_pcie_info) != PAPI_OK) + return PAPI_ENOMEM; + + CHECK_EVENT_IDX(idx); + snprintf(name_buf, sizeof(name_buf), "pcie_speed:device=%d", d); + snprintf(descr_buf, sizeof(descr_buf), + "Device %d current PCIe link speed (MT/s)", d); + if (add_event(&idx, name_buf, descr_buf, d, 6, 0, PAPI_MODE_READ, + access_amdsmi_pcie_info) != PAPI_OK) + return PAPI_ENOMEM; + + CHECK_EVENT_IDX(idx); + snprintf(name_buf, sizeof(name_buf), "pcie_bandwidth:device=%d", d); + snprintf(descr_buf, sizeof(descr_buf), + "Device %d instantaneous PCIe bandwidth (Mb/s)", d); + if (add_event(&idx, name_buf, descr_buf, d, 7, 0, PAPI_MODE_READ, + access_amdsmi_pcie_info) != PAPI_OK) + return PAPI_ENOMEM; + + CHECK_EVENT_IDX(idx); + snprintf(name_buf, sizeof(name_buf), "pcie_replay_count:device=%d", d); + snprintf(descr_buf, sizeof(descr_buf), "Device %d PCIe replay count", d); + if (add_event(&idx, name_buf, descr_buf, d, 8, 0, PAPI_MODE_READ, + access_amdsmi_pcie_info) != PAPI_OK) + return PAPI_ENOMEM; + + CHECK_EVENT_IDX(idx); + snprintf(name_buf, sizeof(name_buf), + "pcie_l0_to_recovery_count:device=%d", d); + snprintf(descr_buf, sizeof(descr_buf), + "Device %d PCIe L0->recovery count", d); + if (add_event(&idx, name_buf, descr_buf, d, 9, 0, PAPI_MODE_READ, + access_amdsmi_pcie_info) != PAPI_OK) + return PAPI_ENOMEM; + + CHECK_EVENT_IDX(idx); + snprintf(name_buf, sizeof(name_buf), + "pcie_replay_rollover_count:device=%d", d); + snprintf(descr_buf, sizeof(descr_buf), + "Device %d PCIe replay rollover count", d); + if (add_event(&idx, name_buf, descr_buf, d, 10, 0, PAPI_MODE_READ, + access_amdsmi_pcie_info) != PAPI_OK) + return PAPI_ENOMEM; + + CHECK_EVENT_IDX(idx); + snprintf(name_buf, sizeof(name_buf), "pcie_nak_sent_count:device=%d", d); + snprintf(descr_buf, sizeof(descr_buf), + "Device %d PCIe NAK sent count", d); + if (add_event(&idx, name_buf, descr_buf, d, 11, 0, PAPI_MODE_READ, + access_amdsmi_pcie_info) != PAPI_OK) + return PAPI_ENOMEM; + + CHECK_EVENT_IDX(idx); + snprintf(name_buf, sizeof(name_buf), + "pcie_nak_received_count:device=%d", d); + snprintf(descr_buf, sizeof(descr_buf), + "Device %d PCIe NAK received count", d); + if (add_event(&idx, name_buf, descr_buf, d, 12, 0, PAPI_MODE_READ, + access_amdsmi_pcie_info) != PAPI_OK) + return PAPI_ENOMEM; + + CHECK_EVENT_IDX(idx); + snprintf(name_buf, sizeof(name_buf), + "pcie_other_end_recovery_count:device=%d", d); + snprintf(descr_buf, sizeof(descr_buf), + "Device %d PCIe other-end recovery count", d); + if (add_event(&idx, name_buf, descr_buf, d, 13, 0, PAPI_MODE_READ, + access_amdsmi_pcie_info) != PAPI_OK) + return PAPI_ENOMEM; + } + } + // GPU Overdrive level events + if (amdsmi_get_gpu_overdrive_level_p) { + uint32_t od_val; + if (amdsmi_get_gpu_overdrive_level_p(device_handles[d], &od_val) == + AMDSMI_STATUS_SUCCESS) { + CHECK_EVENT_IDX(idx); + snprintf(name_buf, sizeof(name_buf), "gpu_overdrive_percent:device=%d", d); + snprintf(descr_buf, sizeof(descr_buf), + "Device %d GPU core clock overdrive (%%)", d); + if (add_event(&idx, name_buf, descr_buf, d, 0, 0, PAPI_MODE_READ, + access_amdsmi_overdrive_level) != PAPI_OK) + return PAPI_ENOMEM; + } + } + if (amdsmi_get_gpu_mem_overdrive_level_p) { + uint32_t od_val; + if (amdsmi_get_gpu_mem_overdrive_level_p(device_handles[d], &od_val) == + AMDSMI_STATUS_SUCCESS) { + CHECK_EVENT_IDX(idx); + snprintf(name_buf, sizeof(name_buf), "gpu_mem_overdrive_percent:device=%d", + d); + snprintf(descr_buf, sizeof(descr_buf), + "Device %d GPU memory clock overdrive (%%)", d); + if (add_event(&idx, name_buf, descr_buf, d, 0, 0, PAPI_MODE_READ, + access_amdsmi_mem_overdrive_level) != PAPI_OK) + return PAPI_ENOMEM; + } + } + // GPU performance level event + if (amdsmi_get_gpu_perf_level_p) { + amdsmi_dev_perf_level_t perf; + if (amdsmi_get_gpu_perf_level_p(device_handles[d], &perf) == + AMDSMI_STATUS_SUCCESS) { + CHECK_EVENT_IDX(idx); + snprintf(name_buf, sizeof(name_buf), "perf_level:device=%d", d); + snprintf(descr_buf, sizeof(descr_buf), + "Device %d current performance level", d); + if (add_event(&idx, name_buf, descr_buf, d, 0, 0, PAPI_MODE_READ, + access_amdsmi_perf_level) != PAPI_OK) + return PAPI_ENOMEM; + } + } + + // GPU PM metrics count event (available in lib version 25+) + if (amdsmi_lib_major >= 25 && amdsmi_get_gpu_pm_metrics_info_p) { + amdsmi_name_value_t *metrics = NULL; + uint32_t mcount = 0; + + int saved_stderr = silence_stderr_begin(); + amdsmi_status_t st = amdsmi_get_gpu_pm_metrics_info_p(device_handles[d], + &metrics, &mcount); + silence_stderr_end(saved_stderr); + + if (st == AMDSMI_STATUS_SUCCESS && mcount > 0) { + if (idx >= MAX_EVENTS_PER_DEVICE * device_count && metrics) + free(metrics); + CHECK_EVENT_IDX(idx); + snprintf(name_buf, sizeof(name_buf), "pm_metrics_count:device=%d", d); + snprintf(descr_buf, sizeof(descr_buf), + "Device %d number of PM metrics available", d); + if (add_event(&idx, name_buf, descr_buf, d, 0, 0, PAPI_MODE_READ, + access_amdsmi_pm_metrics_count) != PAPI_OK) { + if (metrics) free(metrics); + return PAPI_ENOMEM; + } + + for (uint32_t i = 0; i < mcount; ++i) { + if (idx >= MAX_EVENTS_PER_DEVICE * device_count) { + if (metrics) free(metrics); + CHECK_EVENT_IDX(idx); + } + char metric_name[MAX_AMDSMI_NAME_LENGTH]; + sanitize_name(metrics[i].name, metric_name, sizeof(metric_name)); + snprintf(name_buf, sizeof(name_buf), "pm_%s:device=%d", metric_name, d); + snprintf(descr_buf, sizeof(descr_buf), "Device %d PM metric %s", d, + metrics[i].name); + if (add_event(&idx, name_buf, descr_buf, d, i, 0, PAPI_MODE_READ, + access_amdsmi_pm_metric_value) != PAPI_OK) { + if (metrics) free(metrics); + return PAPI_ENOMEM; + } + } + } + if (metrics) + free(metrics); + } + if (amdsmi_is_gpu_power_management_enabled_p) { + bool enabled = false; + if (amdsmi_is_gpu_power_management_enabled_p(device_handles[d], &enabled) == + AMDSMI_STATUS_SUCCESS) { + CHECK_EVENT_IDX(idx); + snprintf(name_buf, sizeof(name_buf), "pm_enabled:device=%d", d); + snprintf(descr_buf, sizeof(descr_buf), + "Device %d power management enabled", d); + if (add_event(&idx, name_buf, descr_buf, d, 0, 0, PAPI_MODE_READ, + access_amdsmi_pm_enabled) != PAPI_OK) + return PAPI_ENOMEM; + } + } + // GPU RAS feature (ECC schema) event + if (amdsmi_get_gpu_ras_feature_info_p) { + amdsmi_ras_feature_t ras; + if (amdsmi_get_gpu_ras_feature_info_p(device_handles[d], &ras) == + AMDSMI_STATUS_SUCCESS) { + CHECK_EVENT_IDX(idx); + snprintf(name_buf, sizeof(name_buf), "ecc_correction_mask:device=%d", d); + snprintf(descr_buf, sizeof(descr_buf), + "Device %d ECC correction features mask", d); + if (add_event(&idx, name_buf, descr_buf, d, 0, 0, PAPI_MODE_READ, + access_amdsmi_ras_ecc_schema) != PAPI_OK) + return PAPI_ENOMEM; + + CHECK_EVENT_IDX(idx); + snprintf(name_buf, sizeof(name_buf), "ras_eeprom_version:device=%d", d); + snprintf(descr_buf, sizeof(descr_buf), "Device %d RAS EEPROM version", d); + if (add_event(&idx, name_buf, descr_buf, d, 0, 0, PAPI_MODE_READ, + access_amdsmi_ras_eeprom_version) != PAPI_OK) + return PAPI_ENOMEM; + } + } + if (amdsmi_gpu_validate_ras_eeprom_p) { + CHECK_EVENT_IDX(idx); + snprintf(name_buf, sizeof(name_buf), "ras_eeprom_valid:device=%d", d); + snprintf(descr_buf, sizeof(descr_buf), + "Device %d RAS EEPROM validation status", d); + if (add_event(&idx, name_buf, descr_buf, d, 0, 0, PAPI_MODE_READ, + access_amdsmi_ras_eeprom_validate) != PAPI_OK) + return PAPI_ENOMEM; + } + if (amdsmi_get_gpu_ras_block_features_enabled_p) { + amdsmi_gpu_block_t blocks[] = { + AMDSMI_GPU_BLOCK_UMC, AMDSMI_GPU_BLOCK_SDMA, AMDSMI_GPU_BLOCK_GFX, + AMDSMI_GPU_BLOCK_MMHUB, AMDSMI_GPU_BLOCK_ATHUB, AMDSMI_GPU_BLOCK_PCIE_BIF, + AMDSMI_GPU_BLOCK_HDP, AMDSMI_GPU_BLOCK_XGMI_WAFL, AMDSMI_GPU_BLOCK_DF, + AMDSMI_GPU_BLOCK_SMN, AMDSMI_GPU_BLOCK_SEM, AMDSMI_GPU_BLOCK_MP0, + AMDSMI_GPU_BLOCK_MP1, AMDSMI_GPU_BLOCK_FUSE, AMDSMI_GPU_BLOCK_MCA, + AMDSMI_GPU_BLOCK_VCN, AMDSMI_GPU_BLOCK_JPEG, AMDSMI_GPU_BLOCK_IH, + AMDSMI_GPU_BLOCK_MPIO}; + const char *block_names[] = { + "umc", "sdma", "gfx", "mmhub", "athub", "pcie_bif", "hdp", + "xgmi_wafl", "df", "smn", "sem", "mp0", "mp1", "fuse", + "mca", "vcn", "jpeg", "ih", "mpio"}; + size_t nb = sizeof(blocks) / sizeof(blocks[0]); + for (size_t bi = 0; bi < nb; ++bi) { + amdsmi_ras_err_state_t st; + if (amdsmi_get_gpu_ras_block_features_enabled_p( + device_handles[d], blocks[bi], &st) == AMDSMI_STATUS_SUCCESS) { + CHECK_EVENT_IDX(idx); + snprintf(name_buf, sizeof(name_buf), "ras_block_%s_state:device=%d", + block_names[bi], d); + snprintf(descr_buf, sizeof(descr_buf), + "Device %d RAS state for %s block", d, block_names[bi]); + if (add_event(&idx, name_buf, descr_buf, d, (uint32_t)blocks[bi], 0, + PAPI_MODE_READ, access_amdsmi_ras_block_state) != PAPI_OK) + return PAPI_ENOMEM; + } + } + } + + /* ECC related events */ + if (amdsmi_get_gpu_total_ecc_count_p) { + amdsmi_error_count_t ec; + if (amdsmi_get_gpu_total_ecc_count_p(device_handles[d], &ec) == + AMDSMI_STATUS_SUCCESS) { + CHECK_EVENT_IDX(idx); + snprintf(name_buf, sizeof(name_buf), + "ecc_total_correctable:device=%d", d); + snprintf(descr_buf, sizeof(descr_buf), + "Device %d total correctable ECC errors", d); + if (add_event(&idx, name_buf, descr_buf, d, 0, 0, PAPI_MODE_READ, + access_amdsmi_ecc_total) != PAPI_OK) + return PAPI_ENOMEM; + CHECK_EVENT_IDX(idx); + snprintf(name_buf, sizeof(name_buf), + "ecc_total_uncorrectable:device=%d", d); + snprintf(descr_buf, sizeof(descr_buf), + "Device %d total uncorrectable ECC errors", d); + if (add_event(&idx, name_buf, descr_buf, d, 1, 0, PAPI_MODE_READ, + access_amdsmi_ecc_total) != PAPI_OK) + return PAPI_ENOMEM; + CHECK_EVENT_IDX(idx); + snprintf(name_buf, sizeof(name_buf), + "ecc_total_deferred:device=%d", d); + snprintf(descr_buf, sizeof(descr_buf), + "Device %d total deferred ECC errors", d); + if (add_event(&idx, name_buf, descr_buf, d, 2, 0, PAPI_MODE_READ, + access_amdsmi_ecc_total) != PAPI_OK) + return PAPI_ENOMEM; + } + } + + if (amdsmi_get_gpu_ecc_enabled_p) { + uint64_t mask = 0; + if (amdsmi_get_gpu_ecc_enabled_p(device_handles[d], &mask) == + AMDSMI_STATUS_SUCCESS) { + CHECK_EVENT_IDX(idx); + snprintf(name_buf, sizeof(name_buf), "ecc_enabled_mask:device=%d", d); + snprintf(descr_buf, sizeof(descr_buf), + "Device %d ECC enabled block mask", d); + if (add_event(&idx, name_buf, descr_buf, d, 0, 0, PAPI_MODE_READ, + access_amdsmi_ecc_enabled_mask) != PAPI_OK) + return PAPI_ENOMEM; + } + } + + if (amdsmi_get_gpu_ecc_count_p) { + amdsmi_gpu_block_t eblocks[] = { + AMDSMI_GPU_BLOCK_UMC, AMDSMI_GPU_BLOCK_SDMA, AMDSMI_GPU_BLOCK_GFX, + AMDSMI_GPU_BLOCK_MMHUB, AMDSMI_GPU_BLOCK_ATHUB, AMDSMI_GPU_BLOCK_PCIE_BIF, + AMDSMI_GPU_BLOCK_HDP, AMDSMI_GPU_BLOCK_XGMI_WAFL, AMDSMI_GPU_BLOCK_DF, + AMDSMI_GPU_BLOCK_SMN, AMDSMI_GPU_BLOCK_SEM, AMDSMI_GPU_BLOCK_MP0, + AMDSMI_GPU_BLOCK_MP1, AMDSMI_GPU_BLOCK_FUSE, AMDSMI_GPU_BLOCK_MCA, + AMDSMI_GPU_BLOCK_VCN, AMDSMI_GPU_BLOCK_JPEG, AMDSMI_GPU_BLOCK_IH, + AMDSMI_GPU_BLOCK_MPIO}; + const char *eblock_names[] = { + "umc", "sdma", "gfx", "mmhub", "athub", "pcie_bif", "hdp", + "xgmi_wafl", "df", "smn", "sem", "mp0", "mp1", "fuse", + "mca", "vcn", "jpeg", "ih", "mpio"}; + size_t nb = sizeof(eblocks) / sizeof(eblocks[0]); + for (size_t bi = 0; bi < nb; ++bi) { + amdsmi_error_count_t ec; + if (amdsmi_get_gpu_ecc_count_p(device_handles[d], eblocks[bi], &ec) == + AMDSMI_STATUS_SUCCESS) { + for (uint32_t v = 0; v < 3; ++v) { + CHECK_EVENT_IDX(idx); + const char *suf = + (v == 0) ? "correctable" : (v == 1) ? "uncorrectable" : "deferred"; + snprintf(name_buf, sizeof(name_buf), + "ecc_%s_%s:device=%d", eblock_names[bi], suf, d); + snprintf(descr_buf, sizeof(descr_buf), + "Device %d %s %s ECC errors", d, eblock_names[bi], suf); + if (add_event(&idx, name_buf, descr_buf, d, v, + (uint32_t)eblocks[bi], PAPI_MODE_READ, + access_amdsmi_ecc_block) != PAPI_OK) + return PAPI_ENOMEM; + } + } + } + } + + if (amdsmi_get_gpu_ecc_status_p) { + amdsmi_gpu_block_t eblocks[] = { + AMDSMI_GPU_BLOCK_UMC, AMDSMI_GPU_BLOCK_SDMA, AMDSMI_GPU_BLOCK_GFX, + AMDSMI_GPU_BLOCK_MMHUB, AMDSMI_GPU_BLOCK_ATHUB, AMDSMI_GPU_BLOCK_PCIE_BIF, + AMDSMI_GPU_BLOCK_HDP, AMDSMI_GPU_BLOCK_XGMI_WAFL, AMDSMI_GPU_BLOCK_DF, + AMDSMI_GPU_BLOCK_SMN, AMDSMI_GPU_BLOCK_SEM, AMDSMI_GPU_BLOCK_MP0, + AMDSMI_GPU_BLOCK_MP1, AMDSMI_GPU_BLOCK_FUSE, AMDSMI_GPU_BLOCK_MCA, + AMDSMI_GPU_BLOCK_VCN, AMDSMI_GPU_BLOCK_JPEG, AMDSMI_GPU_BLOCK_IH, + AMDSMI_GPU_BLOCK_MPIO}; + const char *eblock_names[] = { + "umc", "sdma", "gfx", "mmhub", "athub", "pcie_bif", "hdp", + "xgmi_wafl", "df", "smn", "sem", "mp0", "mp1", "fuse", + "mca", "vcn", "jpeg", "ih", "mpio"}; + size_t nb = sizeof(eblocks) / sizeof(eblocks[0]); + for (size_t bi = 0; bi < nb; ++bi) { + amdsmi_ras_err_state_t st; + if (amdsmi_get_gpu_ecc_status_p(device_handles[d], eblocks[bi], &st) == + AMDSMI_STATUS_SUCCESS) { + CHECK_EVENT_IDX(idx); + snprintf(name_buf, sizeof(name_buf), "ecc_%s_status:device=%d", + eblock_names[bi], d); + snprintf(descr_buf, sizeof(descr_buf), + "Device %d ECC status for %s block", d, eblock_names[bi]); + if (add_event(&idx, name_buf, descr_buf, d, 0, + (uint32_t)eblocks[bi], PAPI_MODE_READ, + access_amdsmi_ecc_status) != PAPI_OK) + return PAPI_ENOMEM; + } + } + } + + // GPU voltage metrics events + if (amdsmi_get_gpu_volt_metric_p) { + const char *sensor_names[] = {"vddgfx", "vddmem", "vddsoc", "vddio", + "vddmisc", "vdd", "vdd2", "vddboard"}; + const amdsmi_voltage_metric_t metrics[] = { + AMDSMI_VOLT_CURRENT, AMDSMI_VOLT_MAX, AMDSMI_VOLT_MIN_CRIT, + AMDSMI_VOLT_MIN, AMDSMI_VOLT_MAX_CRIT, AMDSMI_VOLT_AVERAGE, + AMDSMI_VOLT_LOWEST, AMDSMI_VOLT_HIGHEST}; + const char *metric_names[] = {"current", "max", "min_crit", + "min", "max_crit", "average", + "lowest", "highest"}; + const uint32_t max_sensors = 8; + for (uint32_t s = 0; s < max_sensors; ++s) { + int64_t dummy = 0; + amdsmi_status_t st = amdsmi_get_gpu_volt_metric_p( + device_handles[d], (amdsmi_voltage_type_t)s, AMDSMI_VOLT_CURRENT, + &dummy); + if (st != AMDSMI_STATUS_SUCCESS) + continue; + for (uint32_t m = 0; m < sizeof(metrics) / sizeof(metrics[0]); ++m) { + st = amdsmi_get_gpu_volt_metric_p( + device_handles[d], (amdsmi_voltage_type_t)s, metrics[m], &dummy); + if (st != AMDSMI_STATUS_SUCCESS) + continue; + CHECK_EVENT_IDX(idx); + const char *sname = + (s < sizeof(sensor_names) / sizeof(sensor_names[0])) + ? sensor_names[s] + : "sensor"; + char sensor_buf[32]; + if (strcmp(sname, "sensor") == 0) { + snprintf(sensor_buf, sizeof(sensor_buf), "sensor%u", s); + sname = sensor_buf; + } + snprintf(name_buf, sizeof(name_buf), "voltage_%s_%s:device=%d", sname, + metric_names[m], d); + snprintf(descr_buf, sizeof(descr_buf), "Device %d %s %s voltage (mV)", + d, sname, metric_names[m]); + if (add_event(&idx, name_buf, descr_buf, d, metrics[m], s, PAPI_MODE_READ, + access_amdsmi_voltage) != PAPI_OK) + return PAPI_ENOMEM; + } + } + } + // GPU OD voltage curve region events + if (amdsmi_get_gpu_od_volt_curve_regions_p) { + uint32_t num_regions = 0; + amdsmi_status_t st = amdsmi_get_gpu_od_volt_curve_regions_p( + device_handles[d], &num_regions, NULL); + if (st == AMDSMI_STATUS_SUCCESS && num_regions > 0) { + amdsmi_freq_volt_region_t *regs = + (amdsmi_freq_volt_region_t *)papi_calloc( + num_regions, sizeof(amdsmi_freq_volt_region_t)); + if (regs) { + st = amdsmi_get_gpu_od_volt_curve_regions_p(device_handles[d], + &num_regions, regs); + if (st == AMDSMI_STATUS_SUCCESS) { + if (idx >= MAX_EVENTS_PER_DEVICE * device_count) + papi_free(regs); + CHECK_EVENT_IDX(idx); + snprintf(name_buf, sizeof(name_buf), "volt_curve_regions:device=%d", + d); + snprintf(descr_buf, sizeof(descr_buf), + "Device %d number of voltage curve regions", d); + if (add_event(&idx, name_buf, descr_buf, d, 0, 0, PAPI_MODE_READ, + access_amdsmi_od_volt_regions_count) != PAPI_OK) { + papi_free(regs); + return PAPI_ENOMEM; + } + + for (uint32_t r = 0; r < num_regions; ++r) { + if (idx + 4 > MAX_EVENTS_PER_DEVICE * device_count) + papi_free(regs); + CHECK_EVENT_IDX(idx + 4); + + snprintf(name_buf, sizeof(name_buf), + "volt_curve_freq_min:device=%d:region=%u", d, r); + snprintf(descr_buf, sizeof(descr_buf), + "Device %d voltage curve region %u frequency lower bound", + d, r); + if (add_event(&idx, name_buf, descr_buf, d, 0, r, PAPI_MODE_READ, + access_amdsmi_od_volt_curve_range) != PAPI_OK) { + papi_free(regs); + return PAPI_ENOMEM; + } + + snprintf(name_buf, sizeof(name_buf), + "volt_curve_freq_max:device=%d:region=%u", d, r); + snprintf(descr_buf, sizeof(descr_buf), + "Device %d voltage curve region %u frequency upper bound", + d, r); + if (add_event(&idx, name_buf, descr_buf, d, 1, r, PAPI_MODE_READ, + access_amdsmi_od_volt_curve_range) != PAPI_OK) { + papi_free(regs); + return PAPI_ENOMEM; + } + + snprintf(name_buf, sizeof(name_buf), + "volt_curve_volt_min:device=%d:region=%u", d, r); + snprintf(descr_buf, sizeof(descr_buf), + "Device %d voltage curve region %u voltage lower bound", + d, r); + if (add_event(&idx, name_buf, descr_buf, d, 2, r, PAPI_MODE_READ, + access_amdsmi_od_volt_curve_range) != PAPI_OK) { + papi_free(regs); + return PAPI_ENOMEM; + } + + snprintf(name_buf, sizeof(name_buf), + "volt_curve_volt_max:device=%d:region=%u", d, r); + snprintf(descr_buf, sizeof(descr_buf), + "Device %d voltage curve region %u voltage upper bound", + d, r); + if (add_event(&idx, name_buf, descr_buf, d, 3, r, PAPI_MODE_READ, + access_amdsmi_od_volt_curve_range) != PAPI_OK) { + papi_free(regs); + return PAPI_ENOMEM; + } + } + } + papi_free(regs); + } + } + } + if (amdsmi_get_gpu_od_volt_info_p) { + amdsmi_od_volt_freq_data_t info; + if (amdsmi_get_gpu_od_volt_info_p(device_handles[d], &info) == + AMDSMI_STATUS_SUCCESS) { + if (idx + 8 + 2 * AMDSMI_NUM_VOLTAGE_CURVE_POINTS > + MAX_EVENTS_PER_DEVICE * device_count) + CHECK_EVENT_IDX(idx + 8 + 2 * AMDSMI_NUM_VOLTAGE_CURVE_POINTS); + snprintf(name_buf, sizeof(name_buf), "od_curr_sclk_min:device=%d", d); + snprintf(descr_buf, sizeof(descr_buf), + "Device %d current SCLK frequency lower bound", d); + if (add_event(&idx, name_buf, descr_buf, d, 0, 0, PAPI_MODE_READ, + access_amdsmi_od_volt_info) != PAPI_OK) + return PAPI_ENOMEM; + + snprintf(name_buf, sizeof(name_buf), "od_curr_sclk_max:device=%d", d); + snprintf(descr_buf, sizeof(descr_buf), + "Device %d current SCLK frequency upper bound", d); + if (add_event(&idx, name_buf, descr_buf, d, 1, 0, PAPI_MODE_READ, + access_amdsmi_od_volt_info) != PAPI_OK) + return PAPI_ENOMEM; + + snprintf(name_buf, sizeof(name_buf), "od_curr_mclk_min:device=%d", d); + snprintf(descr_buf, sizeof(descr_buf), + "Device %d current MCLK frequency lower bound", d); + if (add_event(&idx, name_buf, descr_buf, d, 2, 0, PAPI_MODE_READ, + access_amdsmi_od_volt_info) != PAPI_OK) + return PAPI_ENOMEM; + + snprintf(name_buf, sizeof(name_buf), "od_curr_mclk_max:device=%d", d); + snprintf(descr_buf, sizeof(descr_buf), + "Device %d current MCLK frequency upper bound", d); + if (add_event(&idx, name_buf, descr_buf, d, 3, 0, PAPI_MODE_READ, + access_amdsmi_od_volt_info) != PAPI_OK) + return PAPI_ENOMEM; + + snprintf(name_buf, sizeof(name_buf), "od_sclk_limit_min:device=%d", d); + snprintf(descr_buf, sizeof(descr_buf), + "Device %d SCLK frequency limit lower bound", d); + if (add_event(&idx, name_buf, descr_buf, d, 4, 0, PAPI_MODE_READ, + access_amdsmi_od_volt_info) != PAPI_OK) + return PAPI_ENOMEM; + + snprintf(name_buf, sizeof(name_buf), "od_sclk_limit_max:device=%d", d); + snprintf(descr_buf, sizeof(descr_buf), + "Device %d SCLK frequency limit upper bound", d); + if (add_event(&idx, name_buf, descr_buf, d, 5, 0, PAPI_MODE_READ, + access_amdsmi_od_volt_info) != PAPI_OK) + return PAPI_ENOMEM; + + snprintf(name_buf, sizeof(name_buf), "od_mclk_limit_min:device=%d", d); + snprintf(descr_buf, sizeof(descr_buf), + "Device %d MCLK frequency limit lower bound", d); + if (add_event(&idx, name_buf, descr_buf, d, 6, 0, PAPI_MODE_READ, + access_amdsmi_od_volt_info) != PAPI_OK) + return PAPI_ENOMEM; + + snprintf(name_buf, sizeof(name_buf), "od_mclk_limit_max:device=%d", d); + snprintf(descr_buf, sizeof(descr_buf), + "Device %d MCLK frequency limit upper bound", d); + if (add_event(&idx, name_buf, descr_buf, d, 7, 0, PAPI_MODE_READ, + access_amdsmi_od_volt_info) != PAPI_OK) + return PAPI_ENOMEM; + + for (uint32_t p = 0; p < AMDSMI_NUM_VOLTAGE_CURVE_POINTS; ++p) { + CHECK_EVENT_IDX(idx + 2); + snprintf(name_buf, sizeof(name_buf), + "volt_curve_point_freq:device=%d:point=%u", d, p); + snprintf(descr_buf, sizeof(descr_buf), + "Device %d voltage curve point %u frequency", d, p); + if (add_event(&idx, name_buf, descr_buf, d, 8, p, PAPI_MODE_READ, + access_amdsmi_od_volt_info) != PAPI_OK) + return PAPI_ENOMEM; + + snprintf(name_buf, sizeof(name_buf), + "volt_curve_point_volt:device=%d:point=%u", d, p); + snprintf(descr_buf, sizeof(descr_buf), + "Device %d voltage curve point %u voltage", d, p); + if (add_event(&idx, name_buf, descr_buf, d, 9, p, PAPI_MODE_READ, + access_amdsmi_od_volt_info) != PAPI_OK) + return PAPI_ENOMEM; + } + } + } + // GPU SoC P-state policy events + if (amdsmi_get_soc_pstate_p) { + amdsmi_dpm_policy_t policy; + if (amdsmi_get_soc_pstate_p(device_handles[d], &policy) == + AMDSMI_STATUS_SUCCESS && + policy.num_supported > 0) { + CHECK_EVENT_IDX(idx); + snprintf(name_buf, sizeof(name_buf), "soc_pstate_policy:device=%d", d); + snprintf(descr_buf, sizeof(descr_buf), + "Device %d current SoC P-state policy id", d); + if (add_event(&idx, name_buf, descr_buf, d, 0, 0, PAPI_MODE_READ, + access_amdsmi_soc_pstate_id) != PAPI_OK) + return PAPI_ENOMEM; + CHECK_EVENT_IDX(idx); + snprintf(name_buf, sizeof(name_buf), "soc_pstate_supported:device=%d", d); + snprintf(descr_buf, sizeof(descr_buf), + "Device %d supported SoC P-state count", d); + if (add_event(&idx, name_buf, descr_buf, d, 0, 0, PAPI_MODE_READ, + access_amdsmi_soc_pstate_supported) != PAPI_OK) + return PAPI_ENOMEM; + } + } + // GPU XGMI PLPD policy events + if (amdsmi_get_xgmi_plpd_p) { + amdsmi_dpm_policy_t policy; + if (amdsmi_get_xgmi_plpd_p(device_handles[d], &policy) == + AMDSMI_STATUS_SUCCESS && + policy.num_supported > 0) { + CHECK_EVENT_IDX(idx); + snprintf(name_buf, sizeof(name_buf), "xgmi_plpd:device=%d", d); + snprintf(descr_buf, sizeof(descr_buf), + "Device %d current XGMI PLPD policy id", d); + if (add_event(&idx, name_buf, descr_buf, d, 0, 0, PAPI_MODE_READ, + access_amdsmi_xgmi_plpd_id) != PAPI_OK) + return PAPI_ENOMEM; + CHECK_EVENT_IDX(idx); + snprintf(name_buf, sizeof(name_buf), "xgmi_plpd_supported:device=%d", d); + snprintf(descr_buf, sizeof(descr_buf), + "Device %d supported XGMI PLPD policy count", d); + if (add_event(&idx, name_buf, descr_buf, d, 0, 0, PAPI_MODE_READ, + access_amdsmi_xgmi_plpd_supported) != PAPI_OK) + return PAPI_ENOMEM; + } + } + // GPU register table metrics count events (available in lib version 25+) + if (amdsmi_lib_major >= 25 && amdsmi_get_gpu_reg_table_info_p) { + amdsmi_reg_type_t reg_types[] = {AMDSMI_REG_XGMI, AMDSMI_REG_WAFL, + AMDSMI_REG_PCIE, AMDSMI_REG_USR, + AMDSMI_REG_USR1}; + const char *reg_names[] = {"XGMI", "WAFL", "PCIE", "USR", "USR1"}; + for (int rt = 0; rt < 5; ++rt) { + amdsmi_name_value_t *reg_metrics = NULL; + uint32_t num_metrics = 0; + + int saved_stderr = silence_stderr_begin(); + amdsmi_status_t st = amdsmi_get_gpu_reg_table_info_p( + device_handles[d], reg_types[rt], ®_metrics, &num_metrics); + silence_stderr_end(saved_stderr); + + if (st == AMDSMI_STATUS_SUCCESS && num_metrics > 0) { + if (idx >= MAX_EVENTS_PER_DEVICE * device_count) { + if (reg_metrics) + free(reg_metrics); + CHECK_EVENT_IDX(idx); + } + snprintf(name_buf, sizeof(name_buf), "reg_%s_count:device=%d", + reg_names[rt], d); + snprintf(descr_buf, sizeof(descr_buf), + "Device %d number of %s register metrics", d, reg_names[rt]); + if (add_event(&idx, name_buf, descr_buf, d, (uint32_t)reg_types[rt], 0, + PAPI_MODE_READ, access_amdsmi_reg_count) != PAPI_OK) { + if (reg_metrics) free(reg_metrics); + return PAPI_ENOMEM; + } + + for (uint32_t i = 0; i < num_metrics; ++i) { + if (idx >= MAX_EVENTS_PER_DEVICE * device_count) { + if (reg_metrics) + free(reg_metrics); + CHECK_EVENT_IDX(idx); + } + char reg_metric_name[MAX_AMDSMI_NAME_LENGTH]; + sanitize_name(reg_metrics[i].name, reg_metric_name, + sizeof(reg_metric_name)); + snprintf(name_buf, sizeof(name_buf), "reg_%s_%s:device=%d", + reg_names[rt], reg_metric_name, d); + snprintf(descr_buf, sizeof(descr_buf), "Device %d %s register %s", + d, reg_names[rt], reg_metrics[i].name); + if (add_event(&idx, name_buf, descr_buf, d, (uint32_t)reg_types[rt], + i, PAPI_MODE_READ, access_amdsmi_reg_value) != PAPI_OK) { + if (reg_metrics) free(reg_metrics); + return PAPI_ENOMEM; + } + } + } + if (reg_metrics) + free(reg_metrics); + } + } + + for (int si = 0; si < num_temp_sensors && si < 8; ++si) { + // Test each sensor individually first + int64_t sensor_test_val = 0; // <= init + if (!amdsmi_get_temp_metric_p || + amdsmi_get_temp_metric_p(device_handles[d], temp_sensors[si], + AMDSMI_TEMP_CURRENT, + &sensor_test_val) != AMDSMI_STATUS_SUCCESS) + continue; // Skip this specific sensor if it doesn't work + + // Register metrics for this working sensor, testing each metric individually + for (size_t mi = 0; mi < sizeof(temp_metrics) / sizeof(temp_metrics[0]); ++mi) { + // Bounds check to prevent buffer overflow + if (idx >= MAX_EVENTS_PER_DEVICE * device_count) + return PAPI_ENOSUPP; // Too many events + + int64_t metric_val = 0; // <= init + if (amdsmi_get_temp_metric_p(device_handles[d], temp_sensors[si], + temp_metrics[mi], &metric_val) + != AMDSMI_STATUS_SUCCESS) + continue; /* skip this specific metric if not supported */ + + snprintf(name_buf, sizeof(name_buf), "%s:device=%d:sensor=%d", + temp_metric_names[mi], d, (int)temp_sensors[si]); + snprintf(descr_buf, sizeof(descr_buf), "Device %d %s for sensor %d", d, + temp_metric_names[mi], (int)temp_sensors[si]); + if (add_event(&idx, name_buf, descr_buf, d, temp_metrics[mi], + temp_sensors[si], PAPI_MODE_READ, + access_amdsmi_temp_metric) != PAPI_OK) + return PAPI_ENOMEM; + } + } + } + /* Fan metrics - test each device individually */ + for (int d = 0; d < gpu_count; ++d) { + // Safety check for device handle + if (!device_handles || !device_handles[d]) + continue; + /* Register Fan RPM if available */ + int64_t dummy_rpm; + if (amdsmi_get_gpu_fan_rpms_p && + amdsmi_get_gpu_fan_rpms_p(device_handles[d], 0, &dummy_rpm) == + AMDSMI_STATUS_SUCCESS) { + if (idx >= MAX_EVENTS_PER_DEVICE * device_count) + return PAPI_ENOSUPP; + snprintf(name_buf, sizeof(name_buf), "fan_rpms:device=%d:sensor=0", d); + snprintf(descr_buf, sizeof(descr_buf), "Device %d fan speed in RPM", d); + if (add_event(&idx, name_buf, descr_buf, d, 0, 0, PAPI_MODE_READ, + access_amdsmi_fan_rpms) != PAPI_OK) + return PAPI_ENOMEM; + } + /* Register Fan SPEED if available */ + int64_t dummy_speed; + if (amdsmi_get_gpu_fan_speed_p && + amdsmi_get_gpu_fan_speed_p(device_handles[d], 0, &dummy_speed) == + AMDSMI_STATUS_SUCCESS) { + if (idx >= MAX_EVENTS_PER_DEVICE * device_count) + return PAPI_ENOSUPP; + snprintf(name_buf, sizeof(name_buf), "fan_speed:device=%d:sensor=0", d); + snprintf(descr_buf, sizeof(descr_buf), + "Device %d fan speed (0-255 relative)", d); + if (add_event(&idx, name_buf, descr_buf, d, 0, 0, PAPI_MODE_READ, + access_amdsmi_fan_speed) != PAPI_OK) + return PAPI_ENOMEM; + } + /* Register Fan Max Speed - always probe directly */ + int64_t dummy_max; + if (amdsmi_get_gpu_fan_speed_max_p && + amdsmi_get_gpu_fan_speed_max_p(device_handles[d], 0, &dummy_max) == + AMDSMI_STATUS_SUCCESS) { + if (idx >= MAX_EVENTS_PER_DEVICE * device_count) + return PAPI_ENOSUPP; + snprintf(name_buf, sizeof(name_buf), "fan_rpms_max:device=%d:sensor=0", d); + snprintf(descr_buf, sizeof(descr_buf), + "Device %d fan maximum speed in RPM", d); + if (add_event(&idx, name_buf, descr_buf, d, 0, 0, PAPI_MODE_READ, + access_amdsmi_fan_speed_max) != PAPI_OK) + return PAPI_ENOMEM; + } + } + /* VRAM memory metrics - test each device individually */ + for (int d = 0; d < gpu_count; ++d) { + // Safety check for device handle + if (!device_handles || !device_handles[d]) + continue; + /* total VRAM bytes - test directly */ + uint64_t dummy_total; + if (amdsmi_get_total_memory_p && + amdsmi_get_total_memory_p(device_handles[d], AMDSMI_MEM_TYPE_VRAM, + &dummy_total) == AMDSMI_STATUS_SUCCESS) { + if (idx >= MAX_EVENTS_PER_DEVICE * device_count) + return PAPI_ENOSUPP; + snprintf(name_buf, sizeof(name_buf), "mem_total_VRAM:device=%d", d); + snprintf(descr_buf, sizeof(descr_buf), + "Device %d total VRAM memory (bytes)", d); + if (add_event(&idx, name_buf, descr_buf, d, AMDSMI_MEM_TYPE_VRAM, 0, + PAPI_MODE_READ, access_amdsmi_mem_total) != PAPI_OK) + return PAPI_ENOMEM; + } + /* used VRAM bytes - test directly */ + uint64_t dummy_usage; + if (amdsmi_get_memory_usage_p && + amdsmi_get_memory_usage_p(device_handles[d], AMDSMI_MEM_TYPE_VRAM, + &dummy_usage) == AMDSMI_STATUS_SUCCESS) { + if (idx >= MAX_EVENTS_PER_DEVICE * device_count) + return PAPI_ENOSUPP; + snprintf(name_buf, sizeof(name_buf), "mem_usage_VRAM:device=%d", d); + snprintf(descr_buf, sizeof(descr_buf), + "Device %d VRAM memory usage (bytes)", d); + if (add_event(&idx, name_buf, descr_buf, d, AMDSMI_MEM_TYPE_VRAM, 0, + PAPI_MODE_READ, access_amdsmi_mem_usage) != PAPI_OK) + return PAPI_ENOMEM; + } + if (amdsmi_get_gpu_vram_usage_p) { + amdsmi_vram_usage_t vu; + if (amdsmi_get_gpu_vram_usage_p(device_handles[d], &vu) == + AMDSMI_STATUS_SUCCESS) { + if (idx >= MAX_EVENTS_PER_DEVICE * device_count) + return PAPI_ENOSUPP; + snprintf(name_buf, sizeof(name_buf), "vram_total_mb:device=%d", d); + snprintf(descr_buf, sizeof(descr_buf), "Device %d total VRAM (MB)", d); + if (add_event(&idx, name_buf, descr_buf, d, 0, 0, PAPI_MODE_READ, + access_amdsmi_vram_usage) != PAPI_OK) + return PAPI_ENOMEM; + if (idx >= MAX_EVENTS_PER_DEVICE * device_count) + return PAPI_ENOSUPP; + snprintf(name_buf, sizeof(name_buf), "vram_used_mb:device=%d", d); + snprintf(descr_buf, sizeof(descr_buf), "Device %d used VRAM (MB)", d); + if (add_event(&idx, name_buf, descr_buf, d, 1, 0, PAPI_MODE_READ, + access_amdsmi_vram_usage) != PAPI_OK) + return PAPI_ENOMEM; + } + } + } + /* GPU power metrics: average power, power cap, and cap range */ + for (int d = 0; d < gpu_count; ++d) { + // Safety check for device handle + if (!device_handles || !device_handles[d]) + continue; + // Register power average event - test directly + amdsmi_power_info_t dummy_power; + if (amdsmi_get_power_info_p && + amdsmi_get_power_info_p(device_handles[d], &dummy_power) == + AMDSMI_STATUS_SUCCESS) { + // Average power consumption (W) + snprintf(name_buf, sizeof(name_buf), "power_average:device=%d", d); + snprintf(descr_buf, sizeof(descr_buf), + "Device %d average power consumption (W)", d); + if (add_event(&idx, name_buf, descr_buf, d, 0, 0, PAPI_MODE_READ, + access_amdsmi_power_average) != PAPI_OK) + return PAPI_ENOMEM; + } + // Register power cap events (if available) - test directly + amdsmi_power_cap_info_t dummy_cap_info; + if (amdsmi_get_power_cap_info_p && + amdsmi_get_power_cap_info_p(device_handles[d], 0, &dummy_cap_info) == + AMDSMI_STATUS_SUCCESS) { + // Current power cap limit + snprintf(name_buf, sizeof(name_buf), "power_cap:device=%d", d); + snprintf(descr_buf, sizeof(descr_buf), + "Device %d current power cap (W)", d); + if (add_event(&idx, name_buf, descr_buf, d, + 0, 0, PAPI_MODE_READ | PAPI_MODE_WRITE, + access_amdsmi_power_cap) != PAPI_OK) + return PAPI_ENOMEM; + // Minimum allowed power cap + CHECK_EVENT_IDX(idx); + snprintf(name_buf, sizeof(name_buf), "power_cap_range_min:device=%d", d); + snprintf(descr_buf, sizeof(descr_buf), + "Device %d minimum allowed power cap (W)", d); + if (add_event(&idx, name_buf, descr_buf, d, 1, 0, PAPI_MODE_READ, + access_amdsmi_power_cap_range) != PAPI_OK) + return PAPI_ENOMEM; + // Maximum allowed power cap + CHECK_EVENT_IDX(idx); + snprintf(name_buf, sizeof(name_buf), "power_cap_range_max:device=%d", d); + snprintf(descr_buf, sizeof(descr_buf), + "Device %d maximum allowed power cap (W)", d); + if (add_event(&idx, name_buf, descr_buf, d, 2, 0, PAPI_MODE_READ, + access_amdsmi_power_cap_range) != PAPI_OK) + return PAPI_ENOMEM; + // Default power cap + CHECK_EVENT_IDX(idx); + snprintf(name_buf, sizeof(name_buf), "power_cap_default:device=%d", d); + snprintf(descr_buf, sizeof(descr_buf), + "Device %d default power cap (W)", d); + if (add_event(&idx, name_buf, descr_buf, d, 3, 0, PAPI_MODE_READ, + access_amdsmi_power_cap_range) != PAPI_OK) + return PAPI_ENOMEM; + // DPM power cap + CHECK_EVENT_IDX(idx); + snprintf(name_buf, sizeof(name_buf), "power_cap_dpm:device=%d", d); + snprintf(descr_buf, sizeof(descr_buf), + "Device %d DPM power cap (W)", d); + if (add_event(&idx, name_buf, descr_buf, d, 4, 0, PAPI_MODE_READ, + access_amdsmi_power_cap_range) != PAPI_OK) + return PAPI_ENOMEM; + } + } + /* PCIe throughput and replay counter metrics */ + uint64_t tx = 0, rx = 0, pkt = 0; + amdsmi_status_t st_thr = + amdsmi_get_gpu_pci_throughput_p(device_handles[0], &tx, &rx, &pkt); + + for (int d = 0; d < gpu_count; ++d) { + if (st_thr == AMDSMI_STATUS_SUCCESS) { + /* bytes sent per second */ + snprintf(name_buf, sizeof(name_buf), "pci_throughput_sent:device=%d", d); + snprintf(descr_buf, sizeof(descr_buf), + "Device %d PCIe bytes sent per second", d); + if (add_event(&idx, name_buf, descr_buf, d, 0, 0, PAPI_MODE_READ, + access_amdsmi_pci_throughput) != PAPI_OK) + return PAPI_ENOMEM; + /* bytes received per second */ + snprintf(name_buf, sizeof(name_buf), "pci_throughput_received:device=%d", d); + snprintf(descr_buf, sizeof(descr_buf), + "Device %d PCIe bytes received per second", d); + if (add_event(&idx, name_buf, descr_buf, d, 1, 0, PAPI_MODE_READ, + access_amdsmi_pci_throughput) != PAPI_OK) + return PAPI_ENOMEM; + /* max packet size */ + snprintf(name_buf, sizeof(name_buf), + "pci_throughput_max_packet:device=%d", d); + snprintf(descr_buf, sizeof(descr_buf), + "Device %d PCIe max packet size (bytes)", d); + if (add_event(&idx, name_buf, descr_buf, d, 2, 0, PAPI_MODE_READ, + access_amdsmi_pci_throughput) != PAPI_OK) + return PAPI_ENOMEM; + } + uint64_t replay = 0; + if (amdsmi_get_gpu_pci_replay_counter_p(device_handles[d], &replay) == + AMDSMI_STATUS_SUCCESS) { + snprintf(name_buf, sizeof(name_buf), "pci_replay_counter:device=%d", d); + snprintf(descr_buf, sizeof(descr_buf), + "Device %d PCIe replay (NAK) counter", d); + if (add_event(&idx, name_buf, descr_buf, d, 0, 0, PAPI_MODE_READ, + access_amdsmi_pci_replay_counter) != PAPI_OK) + return PAPI_ENOMEM; + } + + if (amdsmi_get_gpu_pci_bandwidth_p) { + amdsmi_pcie_bandwidth_t bw; + if (amdsmi_get_gpu_pci_bandwidth_p(device_handles[d], &bw) == + AMDSMI_STATUS_SUCCESS) { + CHECK_EVENT_IDX(idx); + snprintf(name_buf, sizeof(name_buf), + "pci_bandwidth_supported:device=%d", d); + snprintf(descr_buf, sizeof(descr_buf), + "Device %d number of supported PCIe transfer rates", d); + if (add_event(&idx, name_buf, descr_buf, d, 0, 0, PAPI_MODE_READ, + access_amdsmi_pci_bandwidth) != PAPI_OK) + return PAPI_ENOMEM; + CHECK_EVENT_IDX(idx); + snprintf(name_buf, sizeof(name_buf), + "pci_bandwidth_current:device=%d", d); + snprintf(descr_buf, sizeof(descr_buf), + "Device %d current PCIe transfer rate (MT/s)", d); + if (add_event(&idx, name_buf, descr_buf, d, 1, 0, PAPI_MODE_READ, + access_amdsmi_pci_bandwidth) != PAPI_OK) + return PAPI_ENOMEM; + CHECK_EVENT_IDX(idx); + snprintf(name_buf, sizeof(name_buf), + "pci_bandwidth_lanes:device=%d", d); + snprintf(descr_buf, sizeof(descr_buf), + "Device %d current PCIe lane count", d); + if (add_event(&idx, name_buf, descr_buf, d, 2, 0, PAPI_MODE_READ, + access_amdsmi_pci_bandwidth) != PAPI_OK) + return PAPI_ENOMEM; + } + } + } + /* Additional GPU metrics and system information */ + /* GPU engine utilization metrics - test each device individually */ + for (int d = 0; d < gpu_count; ++d) { + // Safety check for device handle + if (!device_handles || !device_handles[d]) + continue; + // Register GFX activity event - test directly + amdsmi_engine_usage_t dummy_usage; + if (amdsmi_get_gpu_activity_p && + amdsmi_get_gpu_activity_p(device_handles[d], &dummy_usage) == + AMDSMI_STATUS_SUCCESS) { + snprintf(name_buf, sizeof(name_buf), "gfx_activity:device=%d", d); + snprintf(descr_buf, sizeof(descr_buf), + "Device %d GFX engine activity (%%)", d); + if (add_event(&idx, name_buf, descr_buf, d, 0, 0, PAPI_MODE_READ, + access_amdsmi_gpu_activity) != PAPI_OK) + return PAPI_ENOMEM; + snprintf(name_buf, sizeof(name_buf), "umc_activity:device=%d", d); + snprintf(descr_buf, sizeof(descr_buf), + "Device %d UMC engine activity (%%)", d); + if (add_event(&idx, name_buf, descr_buf, d, 1, 0, PAPI_MODE_READ, + access_amdsmi_gpu_activity) != PAPI_OK) + return PAPI_ENOMEM; + snprintf(name_buf, sizeof(name_buf), "mm_activity:device=%d", d); + snprintf(descr_buf, sizeof(descr_buf), + "Device %d MM engine activity (%%)", d); + if (add_event(&idx, name_buf, descr_buf, d, 2, 0, PAPI_MODE_READ, + access_amdsmi_gpu_activity) != PAPI_OK) + return PAPI_ENOMEM; + } + } + /* GPU utilization counters */ + if (amdsmi_get_utilization_count_p) { + for (int d = 0; d < gpu_count; ++d) { + amdsmi_utilization_counter_t uc; + uint64_t ts; + uc.type = AMDSMI_COARSE_GRAIN_GFX_ACTIVITY; + if (amdsmi_get_utilization_count_p(device_handles[d], &uc, 1, &ts) == + AMDSMI_STATUS_SUCCESS) { + CHECK_EVENT_IDX(idx); + snprintf(name_buf, sizeof(name_buf), + "util_counter_gfx:device=%d", d); + snprintf(descr_buf, sizeof(descr_buf), + "Device %d coarse grain GFX activity counter", d); + if (add_event(&idx, name_buf, descr_buf, d, + AMDSMI_COARSE_GRAIN_GFX_ACTIVITY, 0, PAPI_MODE_READ, + access_amdsmi_utilization_count) != PAPI_OK) + return PAPI_ENOMEM; + } + uc.type = AMDSMI_COARSE_GRAIN_MEM_ACTIVITY; + if (amdsmi_get_utilization_count_p(device_handles[d], &uc, 1, &ts) == + AMDSMI_STATUS_SUCCESS) { + CHECK_EVENT_IDX(idx); + snprintf(name_buf, sizeof(name_buf), + "util_counter_mem:device=%d", d); + snprintf(descr_buf, sizeof(descr_buf), + "Device %d coarse grain memory activity counter", d); + if (add_event(&idx, name_buf, descr_buf, d, + AMDSMI_COARSE_GRAIN_MEM_ACTIVITY, 0, PAPI_MODE_READ, + access_amdsmi_utilization_count) != PAPI_OK) + return PAPI_ENOMEM; + } + uc.type = AMDSMI_COARSE_DECODER_ACTIVITY; + if (amdsmi_get_utilization_count_p(device_handles[d], &uc, 1, &ts) == + AMDSMI_STATUS_SUCCESS) { + CHECK_EVENT_IDX(idx); + snprintf(name_buf, sizeof(name_buf), + "util_counter_dec:device=%d", d); + snprintf(descr_buf, sizeof(descr_buf), + "Device %d coarse grain decoder activity counter", d); + if (add_event(&idx, name_buf, descr_buf, d, + AMDSMI_COARSE_DECODER_ACTIVITY, 0, PAPI_MODE_READ, + access_amdsmi_utilization_count) != PAPI_OK) + return PAPI_ENOMEM; + } + } + } + /* GPU clock frequency levels for multiple clock domains */ + for (int d = 0; d < gpu_count; ++d) { + amdsmi_clk_type_t clk_types[] = {AMDSMI_CLK_TYPE_SYS, AMDSMI_CLK_TYPE_DF, + AMDSMI_CLK_TYPE_DCEF}; + const char *clk_names[] = {"sys", "df", "dcef"}; + for (int t = 0; t < 3; ++t) { + amdsmi_frequencies_t f; + if (amdsmi_get_clk_freq_p(device_handles[d], clk_types[t], &f) != + AMDSMI_STATUS_SUCCESS || + f.num_supported == 0) + continue; + // Number of supported frequencies for this clock domain + snprintf(name_buf, sizeof(name_buf), "clk_freq_%s_count:device=%d", + clk_names[t], d); + snprintf(descr_buf, sizeof(descr_buf), + "Device %d number of supported %s clock frequencies", d, + clk_names[t]); + if (add_event(&idx, name_buf, descr_buf, d, t, 0, PAPI_MODE_READ, + access_amdsmi_clk_freq) != PAPI_OK) + return PAPI_ENOMEM; + // Current clock frequency for this domain + snprintf(name_buf, sizeof(name_buf), "clk_freq_%s_current:device=%d", + clk_names[t], d); + snprintf(descr_buf, sizeof(descr_buf), + "Device %d current %s clock frequency (MHz)", d, clk_names[t]); + if (add_event(&idx, name_buf, descr_buf, d, t, 1, PAPI_MODE_READ, + access_amdsmi_clk_freq) != PAPI_OK) + return PAPI_ENOMEM; + // Supported frequency levels for this domain + for (uint32_t fi = 0; fi < f.num_supported; ++fi) { + snprintf(name_buf, sizeof(name_buf), "clk_freq_%s_level_%u:device=%d", + clk_names[t], fi, d); + snprintf(descr_buf, sizeof(descr_buf), + "Device %d supported %s clock frequency level %u (MHz)", d, + clk_names[t], fi); + if (add_event(&idx, name_buf, descr_buf, d, t, fi + 2, PAPI_MODE_READ, + access_amdsmi_clk_freq) != PAPI_OK) + return PAPI_ENOMEM; + } + } + } + if (amdsmi_get_clock_info_p) { + for (int d = 0; d < gpu_count; ++d) { + amdsmi_clk_type_t clk_types[] = {AMDSMI_CLK_TYPE_SYS, AMDSMI_CLK_TYPE_MEM}; + const char *clk_names[] = {"sys", "mem"}; + const char *field_names[] = {"current", "min", "max", "locked", + "deep_sleep"}; + const char *field_descr[] = { + "current frequency (MHz)", "minimum frequency (MHz)", + "maximum frequency (MHz)", "lock state (bool)", + "deep sleep frequency (MHz)"}; + for (int t = 0; t < 2; ++t) { + amdsmi_clk_info_t info; + if (amdsmi_get_clock_info_p(device_handles[d], clk_types[t], &info) != + AMDSMI_STATUS_SUCCESS) + continue; + for (int f = 0; f < 5; ++f) { + CHECK_EVENT_IDX(idx); + snprintf(name_buf, sizeof(name_buf), "clk_%s_%s:device=%d", + clk_names[t], field_names[f], d); + snprintf(descr_buf, sizeof(descr_buf), "Device %d %s %s", d, + clk_names[t], field_descr[f]); + if (add_event(&idx, name_buf, descr_buf, d, t, f, PAPI_MODE_READ, + access_amdsmi_clock_info) != PAPI_OK) + return PAPI_ENOMEM; + } + } + } + } + /* GPU identification and topology metrics */ + for (int d = 0; d < gpu_count; ++d) { + uint16_t id16; + uint64_t id64; + int32_t numa; + // GPU ID + if (amdsmi_get_gpu_id_p(device_handles[d], &id16) == + AMDSMI_STATUS_SUCCESS) { + snprintf(name_buf, sizeof(name_buf), "gpu_id:device=%d", d); + snprintf(descr_buf, sizeof(descr_buf), + "Device %d GPU identifier (Device ID)", d); + if (add_event(&idx, name_buf, descr_buf, d, 0, 0, PAPI_MODE_READ, + access_amdsmi_gpu_info) != PAPI_OK) + return PAPI_ENOMEM; + } + // GPU Revision + if (amdsmi_get_gpu_revision_p(device_handles[d], &id16) == + AMDSMI_STATUS_SUCCESS) { + snprintf(name_buf, sizeof(name_buf), "gpu_revision:device=%d", d); + snprintf(descr_buf, sizeof(descr_buf), "Device %d GPU revision ID", d); + if (add_event(&idx, name_buf, descr_buf, d, 1, 0, PAPI_MODE_READ, + access_amdsmi_gpu_info) != PAPI_OK) + return PAPI_ENOMEM; + } + // GPU Subsystem ID + if (amdsmi_get_gpu_subsystem_id_p(device_handles[d], &id16) == + AMDSMI_STATUS_SUCCESS) { + snprintf(name_buf, sizeof(name_buf), "gpu_subsystem_id:device=%d", d); + snprintf(descr_buf, sizeof(descr_buf), "Device %d GPU subsystem ID", d); + if (add_event(&idx, name_buf, descr_buf, d, 2, 0, PAPI_MODE_READ, + access_amdsmi_gpu_info) != PAPI_OK) + return PAPI_ENOMEM; + } + // GPU BDF ID + if (amdsmi_get_gpu_bdf_id_p(device_handles[d], &id64) == + AMDSMI_STATUS_SUCCESS) { + snprintf(name_buf, sizeof(name_buf), "gpu_bdfid:device=%d", d); + snprintf(descr_buf, sizeof(descr_buf), + "Device %d GPU PCI BDF identifier", d); + if (add_event(&idx, name_buf, descr_buf, d, 3, 0, PAPI_MODE_READ, + access_amdsmi_gpu_info) != PAPI_OK) + return PAPI_ENOMEM; + } + // GPU device BDF components + if (amdsmi_get_gpu_device_bdf_p) { + amdsmi_bdf_t bdf; + if (amdsmi_get_gpu_device_bdf_p(device_handles[d], &bdf) == + AMDSMI_STATUS_SUCCESS) { + const char *bdf_names[] = {"gpu_bdf_domain", "gpu_bdf_bus", + "gpu_bdf_device", "gpu_bdf_function"}; + const char *bdf_descr[] = { + "GPU PCI domain number", "GPU PCI bus number", + "GPU PCI device number", "GPU PCI function number"}; + for (uint32_t v = 0; v < 4; ++v) { + CHECK_EVENT_IDX(idx); + snprintf(name_buf, sizeof(name_buf), "%s:device=%d", + bdf_names[v], d); + snprintf(descr_buf, sizeof(descr_buf), "Device %d %s", d, + bdf_descr[v]); + if (add_event(&idx, name_buf, descr_buf, d, v, 0, PAPI_MODE_READ, + access_amdsmi_device_bdf) != PAPI_OK) + return PAPI_ENOMEM; + } + } + } + if (amdsmi_get_xgmi_info_p) { + amdsmi_xgmi_info_t xi; + if (amdsmi_get_xgmi_info_p(device_handles[d], &xi) == AMDSMI_STATUS_SUCCESS) { + const char *xinames[] = {"xgmi_lanes", "xgmi_hive_id", "xgmi_node_id", + "xgmi_index"}; + const char *xidescr[] = {"Device %d XGMI lane count", + "Device %d XGMI hive identifier", + "Device %d XGMI node identifier", + "Device %d XGMI link index"}; + for (uint32_t v = 0; v < 4; ++v) { + CHECK_EVENT_IDX(idx); + snprintf(name_buf, sizeof(name_buf), "%s:device=%d", xinames[v], d); + snprintf(descr_buf, sizeof(descr_buf), xidescr[v], d); + if (add_event(&idx, name_buf, descr_buf, d, v, 0, PAPI_MODE_READ, + access_amdsmi_xgmi_info) != PAPI_OK) + return PAPI_ENOMEM; + } + } + } + if (amdsmi_get_gpu_kfd_info_p) { + amdsmi_kfd_info_t kinfo; + if (amdsmi_get_gpu_kfd_info_p(device_handles[d], &kinfo) == + AMDSMI_STATUS_SUCCESS) { + const char *knames[] = {"kfd_id", "kfd_node_id", + "kfd_current_partition_id"}; + const char *kdescr[] = {"Device %d KFD identifier", + "Device %d KFD node id", + "Device %d KFD current partition id"}; + for (uint32_t v = 0; v < 3; ++v) { + CHECK_EVENT_IDX(idx); + snprintf(name_buf, sizeof(name_buf), "%s:device=%d", knames[v], d); + snprintf(descr_buf, sizeof(descr_buf), kdescr[v], d); + if (add_event(&idx, name_buf, descr_buf, d, v, 0, PAPI_MODE_READ, + access_amdsmi_kfd_info) != PAPI_OK) + return PAPI_ENOMEM; + } + } + } + // NUMA node via topology API + if (amdsmi_topo_get_numa_node_number_p) { + uint32_t node; + if (amdsmi_topo_get_numa_node_number_p(device_handles[d], &node) == + AMDSMI_STATUS_SUCCESS) { + CHECK_EVENT_IDX(idx); + snprintf(name_buf, sizeof(name_buf), "topo_numa_node:device=%d", d); + snprintf(descr_buf, sizeof(descr_buf), + "Device %d NUMA node number", d); + if (add_event(&idx, name_buf, descr_buf, d, 0, 0, PAPI_MODE_READ, + access_amdsmi_topo_numa) != PAPI_OK) + return PAPI_ENOMEM; + } + } + // GPU Virtualization Mode +#if AMDSMI_LIB_VERSION_MAJOR >= 25 + amdsmi_virtualization_mode_t vmode; + if (amdsmi_lib_major >= 25 && amdsmi_get_gpu_virtualization_mode_p && + amdsmi_get_gpu_virtualization_mode_p(device_handles[d], &vmode) == + AMDSMI_STATUS_SUCCESS) { + snprintf(name_buf, sizeof(name_buf), "gpu_virtualization_mode:device=%d", + d); + snprintf(descr_buf, sizeof(descr_buf), + "Device %d GPU virtualization mode", d); + if (add_event(&idx, name_buf, descr_buf, d, 4, 0, PAPI_MODE_READ, + access_amdsmi_gpu_info) != PAPI_OK) + return PAPI_ENOMEM; + } +#endif + // GPU NUMA Node + if (amdsmi_get_gpu_topo_numa_affinity_p(device_handles[d], &numa) == + AMDSMI_STATUS_SUCCESS) { + snprintf(name_buf, sizeof(name_buf), "numa_node:device=%d", d); + snprintf(descr_buf, sizeof(descr_buf), "Device %d NUMA node", d); + if (add_event(&idx, name_buf, descr_buf, d, 5, 0, PAPI_MODE_READ, + access_amdsmi_gpu_info) != PAPI_OK) + return PAPI_ENOMEM; + } + + if (amdsmi_get_gpu_process_list_p) { + amdsmi_proc_info_t plist[2]; + uint32_t maxp = 2; + if (amdsmi_get_gpu_process_list_p(device_handles[d], &maxp, plist) == + AMDSMI_STATUS_SUCCESS) { + const char *pmetric_names[] = {"pid", "mem", "eng_gfx", + "eng_enc", "gtt_mem", "cpu_mem", + "vram_mem", "cu_occupancy"}; + const char *pmetric_descr[] = { + "PID", "memory usage (bytes)", + "GFX engine time (ns)", "ENC engine time (ns)", + "GTT memory (bytes)", "CPU memory (bytes)", + "VRAM memory (bytes)", "Compute units utilized"}; + for (uint32_t p = 0; p < 2; ++p) { + for (uint32_t v = 0; v < 8; ++v) { + CHECK_EVENT_IDX(idx); + snprintf(name_buf, sizeof(name_buf), + "process_%s:device=%d:proc=%u", pmetric_names[v], d, p); + snprintf(descr_buf, sizeof(descr_buf), + "Device %d process %u %s", d, p, pmetric_descr[v]); + if (add_event(&idx, name_buf, descr_buf, d, v, p, PAPI_MODE_READ, + access_amdsmi_process_info) != PAPI_OK) + return PAPI_ENOMEM; + } + } + } + } + + if (amdsmi_get_gpu_process_isolation_p) { + uint32_t pis = 0; + if (amdsmi_get_gpu_process_isolation_p(device_handles[d], &pis) == + AMDSMI_STATUS_SUCCESS) { + snprintf(name_buf, sizeof(name_buf), "process_isolation:device=%d", d); + snprintf(descr_buf, sizeof(descr_buf), + "Device %d process isolation status", d); + if (add_event(&idx, name_buf, descr_buf, d, 0, 0, PAPI_MODE_READ, + access_amdsmi_process_isolation) != PAPI_OK) + return PAPI_ENOMEM; + } + } + + if (amdsmi_get_gpu_xcd_counter_p) { + uint16_t xcd = 0; + if (amdsmi_get_gpu_xcd_counter_p(device_handles[d], &xcd) == + AMDSMI_STATUS_SUCCESS) { + snprintf(name_buf, sizeof(name_buf), "xcd_counter:device=%d", d); + snprintf(descr_buf, sizeof(descr_buf), "Device %d XCD counter", d); + if (add_event(&idx, name_buf, descr_buf, d, 0, 0, PAPI_MODE_READ, + access_amdsmi_xcd_counter) != PAPI_OK) + return PAPI_ENOMEM; + } + } + + if (amdsmi_get_minmax_bandwidth_between_processors_p) { + for (int r = 0; r < gpu_count; ++r) { + if (r == d) + continue; + uint64_t min_bw = 0, max_bw = 0; + if (amdsmi_get_minmax_bandwidth_between_processors_p( + device_handles[d], device_handles[r], &min_bw, &max_bw) == + AMDSMI_STATUS_SUCCESS) { + CHECK_EVENT_IDX(idx); + snprintf(name_buf, sizeof(name_buf), + "xgmi_min_bandwidth:src=%d:dst=%d", d, r); + snprintf(descr_buf, sizeof(descr_buf), + "Min XGMI bandwidth from device %d to %d (MB/s)", d, r); + if (add_event(&idx, name_buf, descr_buf, d, 0, r, PAPI_MODE_READ, + access_amdsmi_xgmi_bandwidth) != PAPI_OK) + return PAPI_ENOMEM; + CHECK_EVENT_IDX(idx); + snprintf(name_buf, sizeof(name_buf), + "xgmi_max_bandwidth:src=%d:dst=%d", d, r); + snprintf(descr_buf, sizeof(descr_buf), + "Max XGMI bandwidth from device %d to %d (MB/s)", d, r); + if (add_event(&idx, name_buf, descr_buf, d, 1, r, PAPI_MODE_READ, + access_amdsmi_xgmi_bandwidth) != PAPI_OK) + return PAPI_ENOMEM; + } + } + } + + if (amdsmi_gpu_counter_group_supported_p && + amdsmi_get_gpu_available_counters_p && amdsmi_gpu_create_counter_p && + amdsmi_gpu_control_counter_p && amdsmi_gpu_read_counter_p && + amdsmi_gpu_destroy_counter_p) { + if (amdsmi_gpu_counter_group_supported_p( + device_handles[d], AMDSMI_EVNT_GRP_XGMI) == + AMDSMI_STATUS_SUCCESS) { + uint32_t avail = 0; + if (amdsmi_get_gpu_available_counters_p( + device_handles[d], AMDSMI_EVNT_GRP_XGMI, &avail) == + AMDSMI_STATUS_SUCCESS && + avail > 0) { + static const struct { + const char *suffix; + amdsmi_event_type_t type[2]; + } xgmi_desc[] = { + {"nop_tx", {AMDSMI_EVNT_XGMI_0_NOP_TX, + AMDSMI_EVNT_XGMI_1_NOP_TX}}, + {"request_tx", + {AMDSMI_EVNT_XGMI_0_REQUEST_TX, + AMDSMI_EVNT_XGMI_1_REQUEST_TX}}, + {"response_tx", + {AMDSMI_EVNT_XGMI_0_RESPONSE_TX, + AMDSMI_EVNT_XGMI_1_RESPONSE_TX}}, + {"beats_tx", {AMDSMI_EVNT_XGMI_0_BEATS_TX, + AMDSMI_EVNT_XGMI_1_BEATS_TX}}, + }; + for (int link = 0; link < 2; ++link) { + for (size_t m = 0; m < sizeof(xgmi_desc) / sizeof(xgmi_desc[0]); + ++m) { + CHECK_EVENT_IDX(idx); + snprintf(name_buf, sizeof(name_buf), + "xgmi_%s:device=%d:link=%d", xgmi_desc[m].suffix, d, link); + snprintf(descr_buf, sizeof(descr_buf), + "Device %d XGMI %s on link %d", d, xgmi_desc[m].suffix, + link); + if (add_counter_event(&idx, name_buf, descr_buf, d, + xgmi_desc[m].type[link], link) != PAPI_OK) + return PAPI_ENOMEM; + } + } + } + } + } + + if (amdsmi_get_fw_info_p) { + amdsmi_fw_info_t finfo; + if (amdsmi_get_fw_info_p(device_handles[d], &finfo) == + AMDSMI_STATUS_SUCCESS) { + uint8_t n = finfo.num_fw_info; + if (n > AMDSMI_FW_ID__MAX) + n = AMDSMI_FW_ID__MAX; + for (uint8_t f = 0; f < n; ++f) { + CHECK_EVENT_IDX(idx); + uint32_t fid = finfo.fw_info_list[f].fw_id; + snprintf(name_buf, sizeof(name_buf), "fw_version_id%u:device=%d", fid, + d); + snprintf(descr_buf, sizeof(descr_buf), + "Device %d firmware id %u version", d, fid); + if (add_event(&idx, name_buf, descr_buf, d, fid, 0, PAPI_MODE_READ, + access_amdsmi_fw_version) != PAPI_OK) + return PAPI_ENOMEM; + } + } + } + + if (amdsmi_get_gpu_board_info_p) { + amdsmi_board_info_t binfo; + if (amdsmi_get_gpu_board_info_p(device_handles[d], &binfo) == + AMDSMI_STATUS_SUCCESS) { + CHECK_EVENT_IDX(idx); + snprintf(name_buf, sizeof(name_buf), "board_serial_hash:device=%d", d); + snprintf(descr_buf, sizeof(descr_buf), + "Device %d board serial number (hash)", d); + if (add_event(&idx, name_buf, descr_buf, d, 0, 0, PAPI_MODE_READ, + access_amdsmi_board_serial_hash) != PAPI_OK) + return PAPI_ENOMEM; + } + } + + if (amdsmi_get_gpu_vram_info_p) { +#if AMDSMI_LIB_VERSION_MAJOR >= 25 + if (amdsmi_lib_major >= 25) { + amdsmi_vram_info_t vinfo; + if (amdsmi_get_gpu_vram_info_p(device_handles[d], &vinfo) == + AMDSMI_STATUS_SUCCESS) { + CHECK_EVENT_IDX(idx); + snprintf(name_buf, sizeof(name_buf), + "vram_max_bandwidth:device=%d", d); + snprintf(descr_buf, sizeof(descr_buf), + "Device %d VRAM max bandwidth (GB/s)", d); + if (add_event(&idx, name_buf, descr_buf, d, 0, 0, PAPI_MODE_READ, + access_amdsmi_vram_max_bandwidth) != PAPI_OK) + return PAPI_ENOMEM; + } + } +#endif + } + + if (amdsmi_get_gpu_memory_reserved_pages_p) { + uint32_t nump = 0; + if (amdsmi_get_gpu_memory_reserved_pages_p(device_handles[d], &nump, + NULL) == AMDSMI_STATUS_SUCCESS) { + CHECK_EVENT_IDX(idx); + snprintf(name_buf, sizeof(name_buf), + "memory_reserved_pages:device=%d", d); + snprintf(descr_buf, sizeof(descr_buf), + "Device %d reserved memory pages", d); + if (add_event(&idx, name_buf, descr_buf, d, 0, 0, PAPI_MODE_READ, + access_amdsmi_memory_reserved_pages) != PAPI_OK) + return PAPI_ENOMEM; + } + } + + if (amdsmi_get_gpu_bad_page_info_p) { + uint32_t nump = 0; + if (amdsmi_get_gpu_bad_page_info_p(device_handles[d], &nump, NULL) == + AMDSMI_STATUS_SUCCESS) { + CHECK_EVENT_IDX(idx); + snprintf(name_buf, sizeof(name_buf), "bad_page_count:device=%d", d); + snprintf(descr_buf, sizeof(descr_buf), "Device %d retired page count", + d); + if (add_event(&idx, name_buf, descr_buf, d, 0, 0, PAPI_MODE_READ, + access_amdsmi_bad_page_count) != PAPI_OK) + return PAPI_ENOMEM; + for (uint32_t p = 0; p < nump; ++p) { + CHECK_EVENT_IDX(idx); + snprintf(name_buf, sizeof(name_buf), + "bad_page_address:device=%d:page=%u", d, p); + snprintf(descr_buf, sizeof(descr_buf), + "Device %d retired page %u address", d, p); + if (add_event(&idx, name_buf, descr_buf, d, 0, p, PAPI_MODE_READ, + access_amdsmi_bad_page_record) != PAPI_OK) + return PAPI_ENOMEM; + CHECK_EVENT_IDX(idx); + snprintf(name_buf, sizeof(name_buf), + "bad_page_size:device=%d:page=%u", d, p); + snprintf(descr_buf, sizeof(descr_buf), + "Device %d retired page %u size", d, p); + if (add_event(&idx, name_buf, descr_buf, d, 1, p, PAPI_MODE_READ, + access_amdsmi_bad_page_record) != PAPI_OK) + return PAPI_ENOMEM; + CHECK_EVENT_IDX(idx); + snprintf(name_buf, sizeof(name_buf), + "bad_page_status:device=%d:page=%u", d, p); + snprintf(descr_buf, sizeof(descr_buf), + "Device %d retired page %u status", d, p); + if (add_event(&idx, name_buf, descr_buf, d, 2, p, PAPI_MODE_READ, + access_amdsmi_bad_page_record) != PAPI_OK) + return PAPI_ENOMEM; + } + } + } + + if (amdsmi_get_gpu_bad_page_threshold_p) { + uint32_t thr = 0; + if (amdsmi_get_gpu_bad_page_threshold_p(device_handles[d], &thr) == + AMDSMI_STATUS_SUCCESS) { + CHECK_EVENT_IDX(idx); + snprintf(name_buf, sizeof(name_buf), "bad_page_threshold:device=%d", d); + snprintf(descr_buf, sizeof(descr_buf), "Device %d bad page threshold", + d); + if (add_event(&idx, name_buf, descr_buf, d, 0, 0, PAPI_MODE_READ, + access_amdsmi_bad_page_threshold) != PAPI_OK) + return PAPI_ENOMEM; + } + } + + if (amdsmi_get_power_info_v2_p) { + /* Probe for available power sensors. */ + for (uint32_t s = 0; s < 2; ++s) { + amdsmi_power_info_t pinfo; + if (amdsmi_get_power_info_v2_p(device_handles[d], s, &pinfo) != + AMDSMI_STATUS_SUCCESS) + break; + + /* Register current socket power in Watts */ + CHECK_EVENT_IDX(idx); + snprintf(name_buf, sizeof(name_buf), + "power_sensor_current_watts:device=%d:sensor=%u", d, s); + snprintf(descr_buf, sizeof(descr_buf), + "Device %d power sensor %u current socket power (W)", d, s); + if (add_event(&idx, name_buf, descr_buf, d, 0, s, PAPI_MODE_READ, + access_amdsmi_power_sensor) != PAPI_OK) + return PAPI_ENOMEM; + + /* Register average socket power in Watts */ + CHECK_EVENT_IDX(idx); + snprintf(name_buf, sizeof(name_buf), + "power_sensor_average_watts:device=%d:sensor=%u", d, s); + snprintf(descr_buf, sizeof(descr_buf), + "Device %d power sensor %u average socket power (W)", d, s); + if (add_event(&idx, name_buf, descr_buf, d, 1, s, PAPI_MODE_READ, + access_amdsmi_power_sensor) != PAPI_OK) + return PAPI_ENOMEM; + + /* Register socket power in microwatts */ +#if AMDSMI_LIB_VERSION_MAJOR >= 25 + if (amdsmi_lib_major >= 25) { + CHECK_EVENT_IDX(idx); + snprintf(name_buf, sizeof(name_buf), + "power_sensor_socket_microwatts:device=%d:sensor=%u", d, s); + snprintf(descr_buf, sizeof(descr_buf), + "Device %d power sensor %u socket power (uW)", d, s); + if (add_event(&idx, name_buf, descr_buf, d, 2, s, PAPI_MODE_READ, + access_amdsmi_power_sensor) != PAPI_OK) + return PAPI_ENOMEM; + } +#endif + + /* Register GFX voltage */ + CHECK_EVENT_IDX(idx); + snprintf(name_buf, sizeof(name_buf), + "power_sensor_gfx_voltage_mv:device=%d:sensor=%u", d, s); + snprintf(descr_buf, sizeof(descr_buf), + "Device %d power sensor %u GFX voltage (mV)", d, s); + if (add_event(&idx, name_buf, descr_buf, d, 3, s, PAPI_MODE_READ, + access_amdsmi_power_sensor) != PAPI_OK) + return PAPI_ENOMEM; + + /* Register SOC voltage */ + CHECK_EVENT_IDX(idx); + snprintf(name_buf, sizeof(name_buf), + "power_sensor_soc_voltage_mv:device=%d:sensor=%u", d, s); + snprintf(descr_buf, sizeof(descr_buf), + "Device %d power sensor %u SOC voltage (mV)", d, s); + if (add_event(&idx, name_buf, descr_buf, d, 4, s, PAPI_MODE_READ, + access_amdsmi_power_sensor) != PAPI_OK) + return PAPI_ENOMEM; + + /* Register MEM voltage */ + CHECK_EVENT_IDX(idx); + snprintf(name_buf, sizeof(name_buf), + "power_sensor_mem_voltage_mv:device=%d:sensor=%u", d, s); + snprintf(descr_buf, sizeof(descr_buf), + "Device %d power sensor %u MEM voltage (mV)", d, s); + if (add_event(&idx, name_buf, descr_buf, d, 5, s, PAPI_MODE_READ, + access_amdsmi_power_sensor) != PAPI_OK) + return PAPI_ENOMEM; + + /* Register power limit */ + CHECK_EVENT_IDX(idx); + snprintf(name_buf, sizeof(name_buf), + "power_sensor_limit_watts:device=%d:sensor=%u", d, s); + snprintf(descr_buf, sizeof(descr_buf), + "Device %d power sensor %u power limit (W)", d, s); + if (add_event(&idx, name_buf, descr_buf, d, 6, s, PAPI_MODE_READ, + access_amdsmi_power_sensor) != PAPI_OK) + return PAPI_ENOMEM; + } + } + + if (amdsmi_get_gpu_metrics_header_info_p) { + amd_metrics_table_header_t hdr = {0}; // <= zero-init + + // If the API defines a size/version field, set it before the call: + // hdr.metrics_header_size = sizeof(hdr); // uncomment if such a field exists + + if (amdsmi_get_gpu_metrics_header_info_p(device_handles[d], &hdr) + == AMDSMI_STATUS_SUCCESS) { + const char *hnames[] = {"metrics_header_size", + "metrics_header_format_rev", + "metrics_header_content_rev"}; + const char *hdescr[] = {"Device %d metrics header structure size", + "Device %d metrics header format revision", + "Device %d metrics header content revision"}; + for (uint32_t v = 0; v < 3; ++v) { + CHECK_EVENT_IDX(idx); + snprintf(name_buf, sizeof(name_buf), "%s:device=%d", hnames[v], d); + snprintf(descr_buf, sizeof(descr_buf), hdescr[v], d); + if (add_event(&idx, name_buf, descr_buf, d, v, 0, PAPI_MODE_READ, + access_amdsmi_metrics_header_info) != PAPI_OK) + return PAPI_ENOMEM; + } + } + } + + if (amdsmi_get_gpu_metrics_info_p) { + amdsmi_gpu_metrics_t metrics; + if (amdsmi_get_gpu_metrics_info_p(device_handles[d], &metrics) == + AMDSMI_STATUS_SUCCESS) { + /* Register throttle status */ + CHECK_EVENT_IDX(idx); + snprintf(name_buf, sizeof(name_buf), "gpu_throttle_status:device=%d", d); + snprintf(descr_buf, sizeof(descr_buf), + "Device %d throttle status", d); + if (add_event(&idx, name_buf, descr_buf, d, 0, 0, PAPI_MODE_READ, + access_amdsmi_gpu_metrics) != PAPI_OK) + return PAPI_ENOMEM; + + /* Register independent throttle status */ + CHECK_EVENT_IDX(idx); + snprintf(name_buf, sizeof(name_buf), + "gpu_indep_throttle_status:device=%d", d); + snprintf(descr_buf, sizeof(descr_buf), + "Device %d independent throttle status", d); + if (add_event(&idx, name_buf, descr_buf, d, 1, 0, PAPI_MODE_READ, + access_amdsmi_gpu_metrics) != PAPI_OK) + return PAPI_ENOMEM; + + /* Register PCIe link width */ + CHECK_EVENT_IDX(idx); + snprintf(name_buf, sizeof(name_buf), "pcie_link_width:device=%d", d); + snprintf(descr_buf, sizeof(descr_buf), + "Device %d PCIe link width (lanes)", d); + if (add_event(&idx, name_buf, descr_buf, d, 2, 0, PAPI_MODE_READ, + access_amdsmi_gpu_metrics) != PAPI_OK) + return PAPI_ENOMEM; + + /* Register PCIe link speed */ + CHECK_EVENT_IDX(idx); + snprintf(name_buf, sizeof(name_buf), "pcie_link_speed:device=%d", d); + snprintf(descr_buf, sizeof(descr_buf), + "Device %d PCIe link speed (0.1 GT/s)", d); + if (add_event(&idx, name_buf, descr_buf, d, 3, 0, PAPI_MODE_READ, + access_amdsmi_gpu_metrics) != PAPI_OK) + return PAPI_ENOMEM; + + /* Register PCIe bandwidth and replay counters */ + CHECK_EVENT_IDX(idx); + snprintf(name_buf, sizeof(name_buf), "pcie_bandwidth_acc:device=%d", d); + snprintf(descr_buf, sizeof(descr_buf), + "Device %d PCIe accumulated bandwidth (GB/s)", d); + if (add_event(&idx, name_buf, descr_buf, d, 4, 0, PAPI_MODE_READ, + access_amdsmi_gpu_metrics) != PAPI_OK) + return PAPI_ENOMEM; + + CHECK_EVENT_IDX(idx); + snprintf(name_buf, sizeof(name_buf), "pcie_bandwidth_inst:device=%d", d); + snprintf(descr_buf, sizeof(descr_buf), + "Device %d PCIe instantaneous bandwidth (GB/s)", d); + if (add_event(&idx, name_buf, descr_buf, d, 5, 0, PAPI_MODE_READ, + access_amdsmi_gpu_metrics) != PAPI_OK) + return PAPI_ENOMEM; + + CHECK_EVENT_IDX(idx); + snprintf(name_buf, sizeof(name_buf), + "pcie_l0_to_recov_count_acc:device=%d", d); + snprintf(descr_buf, sizeof(descr_buf), + "Device %d PCIe L0->recovery count", d); + if (add_event(&idx, name_buf, descr_buf, d, 6, 0, PAPI_MODE_READ, + access_amdsmi_gpu_metrics) != PAPI_OK) + return PAPI_ENOMEM; + + CHECK_EVENT_IDX(idx); + snprintf(name_buf, sizeof(name_buf), + "pcie_replay_count_acc:device=%d", d); + snprintf(descr_buf, sizeof(descr_buf), "Device %d PCIe replay count", d); + if (add_event(&idx, name_buf, descr_buf, d, 7, 0, PAPI_MODE_READ, + access_amdsmi_gpu_metrics) != PAPI_OK) + return PAPI_ENOMEM; + + CHECK_EVENT_IDX(idx); + snprintf(name_buf, sizeof(name_buf), + "pcie_replay_rover_count_acc:device=%d", d); + snprintf(descr_buf, sizeof(descr_buf), + "Device %d PCIe replay rollover count", d); + if (add_event(&idx, name_buf, descr_buf, d, 8, 0, PAPI_MODE_READ, + access_amdsmi_gpu_metrics) != PAPI_OK) + return PAPI_ENOMEM; + + CHECK_EVENT_IDX(idx); + snprintf(name_buf, sizeof(name_buf), + "pcie_nak_sent_count_acc:device=%d", d); + snprintf(descr_buf, sizeof(descr_buf), "Device %d PCIe NAK sent count", + d); + if (add_event(&idx, name_buf, descr_buf, d, 9, 0, PAPI_MODE_READ, + access_amdsmi_gpu_metrics) != PAPI_OK) + return PAPI_ENOMEM; + + CHECK_EVENT_IDX(idx); + snprintf(name_buf, sizeof(name_buf), + "pcie_nak_rcvd_count_acc:device=%d", d); + snprintf(descr_buf, sizeof(descr_buf), + "Device %d PCIe NAK received count", d); + if (add_event(&idx, name_buf, descr_buf, d, 10, 0, PAPI_MODE_READ, + access_amdsmi_gpu_metrics) != PAPI_OK) + return PAPI_ENOMEM; + } + } + + if (amdsmi_init_gpu_event_notification_p && + amdsmi_set_gpu_event_notification_mask_p && + amdsmi_get_gpu_event_notification_p && + amdsmi_stop_gpu_event_notification_p) { + CHECK_EVENT_IDX(idx); + snprintf(name_buf, sizeof(name_buf), "thermal_throttle_events:device=%d", d); + snprintf(descr_buf, sizeof(descr_buf), + "Device %d thermal throttle event notifications", d); + if (add_event(&idx, name_buf, descr_buf, d, AMDSMI_EVT_NOTIF_THERMAL_THROTTLE, + 0, PAPI_MODE_READ, access_amdsmi_event_notification) != PAPI_OK) + return PAPI_ENOMEM; + } + } + /* Energy consumption counter */ + for (int d = 0; d < gpu_count; ++d) { + uint64_t energy = 0; + float resolution = 0.0; + uint64_t timestamp = 0; + if (amdsmi_get_energy_count_p(device_handles[d], &energy, &resolution, + ×tamp) != AMDSMI_STATUS_SUCCESS) + continue; + snprintf(name_buf, sizeof(name_buf), "energy_consumed:device=%d", d); + snprintf(descr_buf, sizeof(descr_buf), + "Device %d energy consumed (microJoules)", d); + if (add_event(&idx, name_buf, descr_buf, d, 0, 0, PAPI_MODE_READ, + access_amdsmi_energy_count) != PAPI_OK) + return PAPI_ENOMEM; + + snprintf(name_buf, sizeof(name_buf), "energy_resolution:device=%d", d); + snprintf(descr_buf, sizeof(descr_buf), + "Device %d energy counter resolution (microJoules)", d); + if (add_event(&idx, name_buf, descr_buf, d, 1, 0, PAPI_MODE_READ, + access_amdsmi_energy_count) != PAPI_OK) + return PAPI_ENOMEM; + + snprintf(name_buf, sizeof(name_buf), "energy_timestamp:device=%d", d); + snprintf(descr_buf, sizeof(descr_buf), + "Device %d energy counter timestamp (ns)", d); + if (add_event(&idx, name_buf, descr_buf, d, 2, 0, PAPI_MODE_READ, + access_amdsmi_energy_count) != PAPI_OK) + return PAPI_ENOMEM; + } + /* GPU power profile information */ + for (int d = 0; d < gpu_count; ++d) { + amdsmi_power_profile_status_t profile_status; + if (amdsmi_get_gpu_power_profile_presets_p( + device_handles[d], 0, &profile_status) != AMDSMI_STATUS_SUCCESS) + continue; + snprintf(name_buf, sizeof(name_buf), "power_profiles_count:device=%d", d); + snprintf(descr_buf, sizeof(descr_buf), + "Device %d number of supported power profiles", d); + if (add_event(&idx, name_buf, descr_buf, d, 0, 0, PAPI_MODE_READ, + access_amdsmi_power_profile_status) != PAPI_OK) + return PAPI_ENOMEM; + snprintf(name_buf, sizeof(name_buf), "power_profile_current:device=%d", d); + snprintf(descr_buf, sizeof(descr_buf), + "Device %d current power profile mask", d); + if (add_event(&idx, name_buf, descr_buf, d, 1, 0, PAPI_MODE_READ, + access_amdsmi_power_profile_status) != PAPI_OK) + return PAPI_ENOMEM; + } + /* GPU violation status metrics */ + if (amdsmi_get_violation_status_p) { + for (int d = 0; d < gpu_count; ++d) { + amdsmi_violation_status_t vinfo; + if (amdsmi_get_violation_status_p(device_handles[d], &vinfo) != + AMDSMI_STATUS_SUCCESS) + continue; + const char *names[] = { + "ppt_pwr_violation_acc", "socket_thrm_violation_acc", + "vr_thrm_violation_acc", "ppt_pwr_violation_pct", + "socket_thrm_violation_pct", "vr_thrm_violation_pct", + "ppt_pwr_violation_active", "socket_thrm_violation_active", + "vr_thrm_violation_active"}; + const char *descr[] = { + "Package power tracking violation count", + "Socket thermal violation count", + "Voltage regulator thermal violation count", + "Package power tracking violation percentage", + "Socket thermal violation percentage", + "Voltage regulator thermal violation percentage", + "Package power tracking violation active flag", + "Socket thermal violation active flag", + "Voltage regulator thermal violation active flag"}; + for (int v = 0; v < 9; ++v) { + CHECK_EVENT_IDX(idx); + snprintf(name_buf, sizeof(name_buf), "%s:device=%d", names[v], d); + snprintf(descr_buf, sizeof(descr_buf), "Device %d %s", d, descr[v]); + if (add_event(&idx, name_buf, descr_buf, d, v, 0, PAPI_MODE_READ, + access_amdsmi_violation_status) != PAPI_OK) + return PAPI_ENOMEM; + } + } + } +#ifndef AMDSMI_DISABLE_ESMI + /* CPU metrics events */ + if (cpu_count > 0) { + // CPU socket-level events + for (int s = 0; s < cpu_count; ++s) { + int dev = gpu_count + s; + uint32_t pwr; + if (amdsmi_get_cpu_socket_power_p(device_handles[dev], &pwr) == + AMDSMI_STATUS_SUCCESS) { + snprintf(name_buf, sizeof(name_buf), "power:socket=%d", s); + snprintf(descr_buf, sizeof(descr_buf), "Socket %d power (W)", s); + if (add_event(&idx, name_buf, descr_buf, dev, 0, 0, PAPI_MODE_READ, + access_amdsmi_cpu_socket_power) != PAPI_OK) + return PAPI_ENOMEM; + } + uint64_t sock_energy; + if (amdsmi_get_cpu_socket_energy_p(device_handles[dev], &sock_energy) == + AMDSMI_STATUS_SUCCESS) { + snprintf(name_buf, sizeof(name_buf), "energy:socket=%d", s); + snprintf(descr_buf, sizeof(descr_buf), + "Socket %d energy consumed (uJ)", s); + if (add_event(&idx, name_buf, descr_buf, dev, 0, 0, PAPI_MODE_READ, + access_amdsmi_cpu_socket_energy) != PAPI_OK) + return PAPI_ENOMEM; + } + uint16_t fmax, fmin; + if (amdsmi_get_cpu_socket_freq_range_p(device_handles[dev], &fmax, + &fmin) == AMDSMI_STATUS_SUCCESS) { + snprintf(name_buf, sizeof(name_buf), "freq_max:socket=%d", s); + snprintf(descr_buf, sizeof(descr_buf), + "Socket %d maximum frequency (MHz)", s); + if (add_event(&idx, name_buf, descr_buf, dev, 1, 0, PAPI_MODE_READ, + access_amdsmi_cpu_socket_freq_range) != PAPI_OK) + return PAPI_ENOMEM; + snprintf(name_buf, sizeof(name_buf), "freq_min:socket=%d", s); + snprintf(descr_buf, sizeof(descr_buf), + "Socket %d minimum frequency (MHz)", s); + if (add_event(&idx, name_buf, descr_buf, dev, 0, 0, PAPI_MODE_READ, + access_amdsmi_cpu_socket_freq_range) != PAPI_OK) + return PAPI_ENOMEM; + } + uint32_t cap; + amdsmi_status_t st_cap = + amdsmi_get_cpu_socket_power_cap_p(device_handles[dev], &cap); + uint32_t cap_max; + amdsmi_status_t st_capmax = + amdsmi_get_cpu_socket_power_cap_max_p(device_handles[dev], &cap_max); + if (st_cap == AMDSMI_STATUS_SUCCESS || + st_capmax == AMDSMI_STATUS_SUCCESS) { + if (st_cap == AMDSMI_STATUS_SUCCESS) { + snprintf(name_buf, sizeof(name_buf), "power_cap:socket=%d", s); + snprintf(descr_buf, sizeof(descr_buf), + "Socket %d current power cap (W)", s); + if (add_event(&idx, name_buf, descr_buf, dev, 0, 0, PAPI_MODE_READ, + access_amdsmi_cpu_power_cap) != PAPI_OK) + return PAPI_ENOMEM; + } + if (st_capmax == AMDSMI_STATUS_SUCCESS) { + snprintf(name_buf, sizeof(name_buf), "power_cap_max:socket=%d", s); + snprintf(descr_buf, sizeof(descr_buf), + "Socket %d max power cap (W)", s); + if (add_event(&idx, name_buf, descr_buf, dev, 1, 0, PAPI_MODE_READ, + access_amdsmi_cpu_power_cap) != PAPI_OK) + return PAPI_ENOMEM; + } + } + uint16_t freq; + char *src_type = NULL; + if (amdsmi_get_cpu_socket_current_active_freq_limit_p( + device_handles[dev], &freq, &src_type) == AMDSMI_STATUS_SUCCESS) { + if (src_type) + free(src_type); + snprintf(name_buf, sizeof(name_buf), "freq_limit:socket=%d", s); + snprintf(descr_buf, sizeof(descr_buf), + "Socket %d current frequency limit (MHz)", s); + if (add_event(&idx, name_buf, descr_buf, dev, 0, 0, PAPI_MODE_READ, + access_amdsmi_cpu_socket_freq_limit) != PAPI_OK) + return PAPI_ENOMEM; + } + uint32_t cclk; + if (amdsmi_get_cpu_cclk_limit_p && + amdsmi_get_cpu_cclk_limit_p(device_handles[dev], &cclk) == + AMDSMI_STATUS_SUCCESS) { + snprintf(name_buf, sizeof(name_buf), "cclk_limit:socket=%d", s); + snprintf(descr_buf, sizeof(descr_buf), + "Socket %d core clock limit (MHz)", s); + if (add_event(&idx, name_buf, descr_buf, dev, 0, 0, PAPI_MODE_READ, + access_amdsmi_cpu_cclk_limit) != PAPI_OK) + return PAPI_ENOMEM; + } + uint32_t fclk, mclk; + if (amdsmi_get_cpu_fclk_mclk_p && + amdsmi_get_cpu_fclk_mclk_p(device_handles[dev], &fclk, &mclk) == + AMDSMI_STATUS_SUCCESS) { + snprintf(name_buf, sizeof(name_buf), "fclk:socket=%d", s); + snprintf(descr_buf, sizeof(descr_buf), + "Socket %d fclk (MHz)", s); + if (add_event(&idx, name_buf, descr_buf, dev, 0, 0, PAPI_MODE_READ, + access_amdsmi_cpu_fclk_mclk) != PAPI_OK) + return PAPI_ENOMEM; + snprintf(name_buf, sizeof(name_buf), "mclk:socket=%d", s); + snprintf(descr_buf, sizeof(descr_buf), + "Socket %d mclk (MHz)", s); + if (add_event(&idx, name_buf, descr_buf, dev, 1, 0, PAPI_MODE_READ, + access_amdsmi_cpu_fclk_mclk) != PAPI_OK) + return PAPI_ENOMEM; + } + amdsmi_ddr_bw_metrics_t ddr_bw; + if (amdsmi_get_cpu_ddr_bw_p && + amdsmi_get_cpu_ddr_bw_p(device_handles[dev], &ddr_bw) == + AMDSMI_STATUS_SUCCESS) { + snprintf(name_buf, sizeof(name_buf), "ddr_bw_max:socket=%d", s); + snprintf(descr_buf, sizeof(descr_buf), + "Socket %d DDR max bandwidth (GB/s)", s); + if (add_event(&idx, name_buf, descr_buf, dev, 0, 0, PAPI_MODE_READ, + access_amdsmi_cpu_ddr_bw) != PAPI_OK) + return PAPI_ENOMEM; + snprintf(name_buf, sizeof(name_buf), "ddr_bw_utilized:socket=%d", s); + snprintf(descr_buf, sizeof(descr_buf), + "Socket %d DDR utilized bandwidth (GB/s)", s); + if (add_event(&idx, name_buf, descr_buf, dev, 1, 0, PAPI_MODE_READ, + access_amdsmi_cpu_ddr_bw) != PAPI_OK) + return PAPI_ENOMEM; + snprintf(name_buf, sizeof(name_buf), + "ddr_bw_utilized_pct:socket=%d", s); + snprintf(descr_buf, sizeof(descr_buf), + "Socket %d DDR bandwidth utilization (pct)", s); + if (add_event(&idx, name_buf, descr_buf, dev, 2, 0, PAPI_MODE_READ, + access_amdsmi_cpu_ddr_bw) != PAPI_OK) + return PAPI_ENOMEM; + } + amdsmi_hsmp_driver_version_t dver; + if (amdsmi_get_cpu_hsmp_driver_version_p && + amdsmi_get_cpu_hsmp_driver_version_p(device_handles[dev], &dver) == + AMDSMI_STATUS_SUCCESS) { + snprintf(name_buf, sizeof(name_buf), + "hsmp_driver_major:socket=%d", s); + snprintf(descr_buf, sizeof(descr_buf), + "Socket %d HSMP driver major version", s); + if (add_event(&idx, name_buf, descr_buf, dev, 0, 0, PAPI_MODE_READ, + access_amdsmi_cpu_hsmp_driver_version) != PAPI_OK) + return PAPI_ENOMEM; + snprintf(name_buf, sizeof(name_buf), + "hsmp_driver_minor:socket=%d", s); + snprintf(descr_buf, sizeof(descr_buf), + "Socket %d HSMP driver minor version", s); + if (add_event(&idx, name_buf, descr_buf, dev, 1, 0, PAPI_MODE_READ, + access_amdsmi_cpu_hsmp_driver_version) != PAPI_OK) + return PAPI_ENOMEM; + } + uint32_t proto; + if (amdsmi_get_cpu_hsmp_proto_ver_p && + amdsmi_get_cpu_hsmp_proto_ver_p(device_handles[dev], &proto) == + AMDSMI_STATUS_SUCCESS) { + snprintf(name_buf, sizeof(name_buf), + "hsmp_proto_ver:socket=%d", s); + snprintf(descr_buf, sizeof(descr_buf), + "Socket %d HSMP protocol version", s); + if (add_event(&idx, name_buf, descr_buf, dev, 0, 0, PAPI_MODE_READ, + access_amdsmi_cpu_hsmp_proto_ver) != PAPI_OK) + return PAPI_ENOMEM; + } + uint32_t prochot; + if (amdsmi_get_cpu_prochot_status_p && + amdsmi_get_cpu_prochot_status_p(device_handles[dev], &prochot) == + AMDSMI_STATUS_SUCCESS) { + snprintf(name_buf, sizeof(name_buf), + "prochot_status:socket=%d", s); + snprintf(descr_buf, sizeof(descr_buf), + "Socket %d PROCHOT status", s); + if (add_event(&idx, name_buf, descr_buf, dev, 0, 0, PAPI_MODE_READ, + access_amdsmi_cpu_prochot_status) != PAPI_OK) + return PAPI_ENOMEM; + } + uint32_t svi_power; + if (amdsmi_get_cpu_pwr_svi_telemetry_all_rails_p && + amdsmi_get_cpu_pwr_svi_telemetry_all_rails_p(device_handles[dev], + &svi_power) == + AMDSMI_STATUS_SUCCESS) { + snprintf(name_buf, sizeof(name_buf), "svi_power:socket=%d", s); + snprintf(descr_buf, sizeof(descr_buf), + "Socket %d SVI power (all rails, W)", s); + if (add_event(&idx, name_buf, descr_buf, dev, 0, 0, PAPI_MODE_READ, + access_amdsmi_cpu_svi_power) != PAPI_OK) + return PAPI_ENOMEM; + } + amdsmi_smu_fw_version_t fw; + if (amdsmi_get_cpu_smu_fw_version_p(device_handles[dev], &fw) == + AMDSMI_STATUS_SUCCESS) { + snprintf(name_buf, sizeof(name_buf), "smu_fw_version:socket=%d", s); + snprintf(descr_buf, sizeof(descr_buf), + "Socket %d SMU firmware version (encoded)", s); + if (add_event(&idx, name_buf, descr_buf, dev, 0, 0, PAPI_MODE_READ, + access_amdsmi_smu_fw_version) != PAPI_OK) + return PAPI_ENOMEM; + } + if (amdsmi_get_cpu_current_io_bandwidth_p) { + const char *links[] = {"P0", "P1", "P2", "P3", "P4"}; + const char *bwnames[] = {"agg", "read", "write"}; + amdsmi_io_bw_encoding_t bw_types[] = {AGG_BW0, RD_BW0, WR_BW0}; + for (int l = 0; l < 5; ++l) { + for (int t = 0; t < 3; ++t) { + amdsmi_link_id_bw_type_t link = {bw_types[t], (char *)links[l]}; + uint32_t bw = 0; + if (amdsmi_get_cpu_current_io_bandwidth_p(device_handles[dev], link, + &bw) != + AMDSMI_STATUS_SUCCESS) + continue; + CHECK_EVENT_IDX(idx); + snprintf(name_buf, sizeof(name_buf), + "io_bw_%s_%s:socket=%d", links[l], bwnames[t], s); + snprintf(descr_buf, sizeof(descr_buf), + "Socket %d IO link %s %s bandwidth (MB/s)", s, + links[l], bwnames[t]); + if (add_event(&idx, name_buf, descr_buf, dev, l, t, PAPI_MODE_READ, + access_amdsmi_cpu_io_bw) != PAPI_OK) + return PAPI_ENOMEM; + } + } + } + if (amdsmi_get_cpu_current_xgmi_bw_p) { + const char *links[] = {"G0", "G1", "G2", "G3", + "G4", "G5", "G6", "G7"}; + const char *bwnames[] = {"agg", "read", "write"}; + amdsmi_io_bw_encoding_t bw_types[] = {AGG_BW0, RD_BW0, WR_BW0}; + for (int l = 0; l < 8; ++l) { + for (int t = 0; t < 3; ++t) { + amdsmi_link_id_bw_type_t link = {bw_types[t], (char *)links[l]}; + uint32_t bw = 0; + if (amdsmi_get_cpu_current_xgmi_bw_p(device_handles[dev], link, + &bw) != + AMDSMI_STATUS_SUCCESS) + continue; + CHECK_EVENT_IDX(idx); + snprintf(name_buf, sizeof(name_buf), + "xgmi_bw_%s_%s:socket=%d", links[l], bwnames[t], s); + snprintf(descr_buf, sizeof(descr_buf), + "Socket %d XGMI link %s %s bandwidth (MB/s)", s, + links[l], bwnames[t]); + if (add_event(&idx, name_buf, descr_buf, dev, l, t, PAPI_MODE_READ, + access_amdsmi_cpu_xgmi_bw) != PAPI_OK) + return PAPI_ENOMEM; + } + } + } + } + // CPU core-level events + for (int s = 0; s < cpu_count; ++s) { + int dev = gpu_count + s; + for (uint32_t c = 0; c < cores_per_socket[s]; ++c) { + uint64_t energy; + if (amdsmi_get_cpu_core_energy_p(cpu_core_handles[s][c], &energy) == + AMDSMI_STATUS_SUCCESS) { + snprintf(name_buf, sizeof(name_buf), "energy:socket=%d:core=%d", s, c); + snprintf(descr_buf, sizeof(descr_buf), + "Socket %d Core %d energy (uJ)", s, c); + if (add_event(&idx, name_buf, descr_buf, dev, 0, c, PAPI_MODE_READ, + access_amdsmi_cpu_core_energy) != PAPI_OK) + return PAPI_ENOMEM; + } + uint32_t freq; + if (amdsmi_get_cpu_core_current_freq_limit_p( + cpu_core_handles[s][c], &freq) == AMDSMI_STATUS_SUCCESS) { + snprintf(name_buf, sizeof(name_buf), "freq_limit:socket=%d:core=%d", + s, c); + snprintf(descr_buf, sizeof(descr_buf), + "Socket %d Core %d frequency limit (MHz)", s, c); + if (add_event(&idx, name_buf, descr_buf, dev, 0, c, PAPI_MODE_READ, + access_amdsmi_cpu_core_freq_limit) != PAPI_OK) + return PAPI_ENOMEM; + } + uint32_t boost; + if (amdsmi_get_cpu_core_boostlimit_p(cpu_core_handles[s][c], &boost) == + AMDSMI_STATUS_SUCCESS) { + snprintf(name_buf, sizeof(name_buf), "boostlimit:socket=%d:core=%d", + s, c); + snprintf(descr_buf, sizeof(descr_buf), + "Socket %d Core %d boost limit (MHz)", s, c); + if (add_event(&idx, name_buf, descr_buf, dev, 0, c, PAPI_MODE_READ, + access_amdsmi_cpu_core_boostlimit) != PAPI_OK) + return PAPI_ENOMEM; + } + } + } + // CPU DIMM events + for (int s = 0; s < cpu_count; ++s) { + int dev = gpu_count + s; + for (uint8_t dimm = 0; dimm < 16; ++dimm) { + amdsmi_dimm_thermal_t dimm_temp; + amdsmi_dimm_power_t dimm_pow; + amdsmi_temp_range_refresh_rate_t range_info; + amdsmi_status_t st_temp = amdsmi_get_cpu_dimm_thermal_sensor_p( + device_handles[dev], dimm, &dimm_temp); + amdsmi_status_t st_power = amdsmi_get_cpu_dimm_power_consumption_p( + device_handles[dev], dimm, &dimm_pow); + amdsmi_status_t st_range = + amdsmi_get_cpu_dimm_temp_range_and_refresh_rate_p( + device_handles[dev], dimm, &range_info); + if (st_temp != AMDSMI_STATUS_SUCCESS && + st_power != AMDSMI_STATUS_SUCCESS && + st_range != AMDSMI_STATUS_SUCCESS) + continue; + if (st_temp == AMDSMI_STATUS_SUCCESS) { + snprintf(name_buf, sizeof(name_buf), "dimm_temp:socket=%d:dimm=%d", s, + dimm); + snprintf(descr_buf, sizeof(descr_buf), + "Socket %d DIMM %d temperature (C)", s, dimm); + if (add_event(&idx, name_buf, descr_buf, dev, 0, dimm, PAPI_MODE_READ, + access_amdsmi_dimm_temp) != PAPI_OK) + return PAPI_ENOMEM; + } + if (st_power == AMDSMI_STATUS_SUCCESS) { + snprintf(name_buf, sizeof(name_buf), "dimm_power:socket=%d:dimm=%d", + s, dimm); + snprintf(descr_buf, sizeof(descr_buf), "Socket %d DIMM %d power (mW)", + s, dimm); + if (add_event(&idx, name_buf, descr_buf, dev, 0, dimm, PAPI_MODE_READ, + access_amdsmi_dimm_power) != PAPI_OK) + return PAPI_ENOMEM; + } + if (st_range == AMDSMI_STATUS_SUCCESS) { + snprintf(name_buf, sizeof(name_buf), + "dimm_temp_range:socket=%d:dimm=%d", s, dimm); + snprintf(descr_buf, sizeof(descr_buf), + "Socket %d DIMM %d temperature range", s, dimm); + if (add_event(&idx, name_buf, descr_buf, dev, 0, dimm, PAPI_MODE_READ, + access_amdsmi_dimm_range_refresh) != PAPI_OK) + return PAPI_ENOMEM; + snprintf(name_buf, sizeof(name_buf), + "dimm_refresh_rate:socket=%d:dimm=%d", s, dimm); + snprintf(descr_buf, sizeof(descr_buf), + "Socket %d DIMM %d refresh rate mode", s, dimm); + if (add_event(&idx, name_buf, descr_buf, dev, 1, dimm, PAPI_MODE_READ, + access_amdsmi_dimm_range_refresh) != PAPI_OK) + return PAPI_ENOMEM; + } + } + } + // System-wide CPU events + uint32_t threads; + if (amdsmi_get_threads_per_core_p(&threads) == AMDSMI_STATUS_SUCCESS) { + snprintf(name_buf, sizeof(name_buf), "threads_per_core"); + snprintf(descr_buf, sizeof(descr_buf), "SMT threads per core"); + if (add_event(&idx, name_buf, descr_buf, -1, 0, 0, PAPI_MODE_READ, + access_amdsmi_threads_per_core) != PAPI_OK) + return PAPI_ENOMEM; + } + uint32_t family; + if (amdsmi_get_cpu_family_p(&family) == AMDSMI_STATUS_SUCCESS) { + snprintf(name_buf, sizeof(name_buf), "cpu_family"); + snprintf(descr_buf, sizeof(descr_buf), "CPU family ID"); + if (add_event(&idx, name_buf, descr_buf, -1, 0, 0, PAPI_MODE_READ, + access_amdsmi_cpu_family) != PAPI_OK) + return PAPI_ENOMEM; + } + uint32_t model; + if (amdsmi_get_cpu_model_p(&model) == AMDSMI_STATUS_SUCCESS) { + snprintf(name_buf, sizeof(name_buf), "cpu_model"); + snprintf(descr_buf, sizeof(descr_buf), "CPU model ID"); + if (add_event(&idx, name_buf, descr_buf, -1, 0, 0, PAPI_MODE_READ, + access_amdsmi_cpu_model) != PAPI_OK) + return PAPI_ENOMEM; + } + } +#endif + + /* -------- Additional GPU discovery & version info (read-only) -------- */ + /* Library version (global) */ + if (amdsmi_get_lib_version_p) { + amdsmi_version_t vinfo; + if (amdsmi_get_lib_version_p(&vinfo) == AMDSMI_STATUS_SUCCESS) { + CHECK_EVENT_IDX(idx); + snprintf(name_buf, sizeof(name_buf), "lib_version_major"); + snprintf(descr_buf, sizeof(descr_buf), "AMD SMI library major version"); + if (add_event(&idx, name_buf, descr_buf, -1, 0, 0, PAPI_MODE_READ, + access_amdsmi_lib_version) != PAPI_OK) + return PAPI_ENOMEM; + CHECK_EVENT_IDX(idx); + snprintf(name_buf, sizeof(name_buf), "lib_version_minor"); + snprintf(descr_buf, sizeof(descr_buf), "AMD SMI library minor version"); + if (add_event(&idx, name_buf, descr_buf, -1, 1, 0, PAPI_MODE_READ, + access_amdsmi_lib_version) != PAPI_OK) + return PAPI_ENOMEM; + CHECK_EVENT_IDX(idx); + snprintf(name_buf, sizeof(name_buf), "lib_version_release"); + snprintf(descr_buf, sizeof(descr_buf), + "AMD SMI library release/patch version"); + if (add_event(&idx, name_buf, descr_buf, -1, 2, 0, PAPI_MODE_READ, + access_amdsmi_lib_version) != PAPI_OK) + return PAPI_ENOMEM; + } + } + for (int d = 0; d < gpu_count; ++d) { + if (!device_handles || !device_handles[d]) + continue; + /* Device UUID (hash) */ + if (amdsmi_get_gpu_device_uuid_p) { + unsigned int uuid_len = 0; + amdsmi_status_t st = + amdsmi_get_gpu_device_uuid_p(device_handles[d], &uuid_len, NULL); + /* Some builds require preflight to get length; we just attempt a fixed buffer */ + char uuid_buf[128]; + uuid_len = sizeof(uuid_buf); + st = amdsmi_get_gpu_device_uuid_p(device_handles[d], &uuid_len, uuid_buf); + if (st == AMDSMI_STATUS_SUCCESS) { + CHECK_EVENT_IDX(idx); + snprintf(name_buf, sizeof(name_buf), "uuid_hash:device=%d", d); + snprintf(descr_buf, sizeof(descr_buf), + "Device %d UUID (djb2 64-bit hash)", d); + if (add_event(&idx, name_buf, descr_buf, d, 0, 0, PAPI_MODE_READ, + access_amdsmi_uuid_hash) != PAPI_OK) + return PAPI_ENOMEM; + CHECK_EVENT_IDX(idx); + snprintf(name_buf, sizeof(name_buf), "uuid_length:device=%d", d); + snprintf(descr_buf, sizeof(descr_buf), "Device %d UUID length", d); + if (add_event(&idx, name_buf, descr_buf, d, 1, 0, PAPI_MODE_READ, + access_amdsmi_uuid_hash) != PAPI_OK) + return PAPI_ENOMEM; + } + } + /* Vendor / VRAM vendor / Subsystem name (hash) */ + if (amdsmi_get_gpu_vendor_name_p) { + char tmp[256] = {0}; + if (amdsmi_get_gpu_vendor_name_p(device_handles[d], tmp, sizeof(tmp)) == + AMDSMI_STATUS_SUCCESS) { + CHECK_EVENT_IDX(idx); + snprintf(name_buf, sizeof(name_buf), "vendor_name_hash:device=%d", d); + snprintf(descr_buf, sizeof(descr_buf), "Device %d vendor name (hash)", d); + if (add_event(&idx, name_buf, descr_buf, d, 0, 0, PAPI_MODE_READ, + access_amdsmi_gpu_string_hash) != PAPI_OK) + return PAPI_ENOMEM; + } + } + + if (amdsmi_get_gpu_vram_vendor_p) { + char tmp[256] = {0}; + if (amdsmi_get_gpu_vram_vendor_p(device_handles[d], tmp, + (uint32_t)sizeof(tmp)) == + AMDSMI_STATUS_SUCCESS) { + CHECK_EVENT_IDX(idx); + snprintf(name_buf, sizeof(name_buf), "vram_vendor_hash:device=%d", d); + snprintf(descr_buf, sizeof(descr_buf), "Device %d VRAM vendor (hash)", d); + if (add_event(&idx, name_buf, descr_buf, d, 1, 0, PAPI_MODE_READ, + access_amdsmi_gpu_string_hash) != PAPI_OK) + return PAPI_ENOMEM; + } + } + + if (amdsmi_get_gpu_subsystem_name_p) { + char tmp[256] = {0}; + if (amdsmi_get_gpu_subsystem_name_p(device_handles[d], tmp, sizeof(tmp)) == + AMDSMI_STATUS_SUCCESS) { + CHECK_EVENT_IDX(idx); + snprintf(name_buf, sizeof(name_buf), "subsystem_name_hash:device=%d", d); + snprintf(descr_buf, sizeof(descr_buf), + "Device %d subsystem name (hash)", d); + if (add_event(&idx, name_buf, descr_buf, d, 2, 0, PAPI_MODE_READ, + access_amdsmi_gpu_string_hash) != PAPI_OK) + return PAPI_ENOMEM; + } + } + + /* Enumeration info (drm render/card, hsa/hip ids) */ +#if AMDSMI_LIB_VERSION_MAJOR >= 25 + if (amdsmi_lib_major >= 25 && amdsmi_get_gpu_enumeration_info_p) { + amdsmi_enumeration_info_t einfo; + if (amdsmi_get_gpu_enumeration_info_p(device_handles[d], &einfo) == + AMDSMI_STATUS_SUCCESS) { + CHECK_EVENT_IDX(idx); + snprintf(name_buf, sizeof(name_buf), "enum_drm_render:device=%d", d); + snprintf(descr_buf, sizeof(descr_buf), "Device %d DRM render node", d); + if (add_event(&idx, name_buf, descr_buf, d, 0, 0, PAPI_MODE_READ, + access_amdsmi_enumeration_info) != PAPI_OK) + return PAPI_ENOMEM; + CHECK_EVENT_IDX(idx); + snprintf(name_buf, sizeof(name_buf), "enum_drm_card:device=%d", d); + snprintf(descr_buf, sizeof(descr_buf), "Device %d DRM card index", d); + if (add_event(&idx, name_buf, descr_buf, d, 1, 0, PAPI_MODE_READ, + access_amdsmi_enumeration_info) != PAPI_OK) + return PAPI_ENOMEM; + CHECK_EVENT_IDX(idx); + snprintf(name_buf, sizeof(name_buf), "enum_hsa_id:device=%d", d); + snprintf(descr_buf, sizeof(descr_buf), "Device %d HSA ID", d); + if (add_event(&idx, name_buf, descr_buf, d, 2, 0, PAPI_MODE_READ, + access_amdsmi_enumeration_info) != PAPI_OK) + return PAPI_ENOMEM; + CHECK_EVENT_IDX(idx); + snprintf(name_buf, sizeof(name_buf), "enum_hip_id:device=%d", d); + snprintf(descr_buf, sizeof(descr_buf), "Device %d HIP ID", d); + if (add_event(&idx, name_buf, descr_buf, d, 3, 0, PAPI_MODE_READ, + access_amdsmi_enumeration_info) != PAPI_OK) + return PAPI_ENOMEM; + } + } +#endif + /* ASIC info (numeric IDs & CU count) */ + if (amdsmi_get_gpu_asic_info_p) { + amdsmi_asic_info_t ainfo; + if (amdsmi_get_gpu_asic_info_p(device_handles[d], &ainfo) == + AMDSMI_STATUS_SUCCESS) { + CHECK_EVENT_IDX(idx); + snprintf(name_buf, sizeof(name_buf), "asic_vendor_id:device=%d", d); + snprintf(descr_buf, sizeof(descr_buf), "Device %d ASIC vendor id", d); + if (add_event(&idx, name_buf, descr_buf, d, 0, 0, PAPI_MODE_READ, + access_amdsmi_asic_info) != PAPI_OK) + return PAPI_ENOSUPP; + + CHECK_EVENT_IDX(idx); + snprintf(name_buf, sizeof(name_buf), "asic_device_id:device=%d", d); + snprintf(descr_buf, sizeof(descr_buf), "Device %d ASIC device id", d); + if (add_event(&idx, name_buf, descr_buf, d, 1, 0, PAPI_MODE_READ, + access_amdsmi_asic_info) != PAPI_OK) + return PAPI_ENOSUPP; + + CHECK_EVENT_IDX(idx); + snprintf(name_buf, sizeof(name_buf), + "asic_subsystem_vendor_id:device=%d", d); + snprintf(descr_buf, sizeof(descr_buf), + "Device %d ASIC subsystem vendor id", d); + if (add_event(&idx, name_buf, descr_buf, d, 2, 0, PAPI_MODE_READ, + access_amdsmi_asic_info) != PAPI_OK) + return PAPI_ENOSUPP; + + CHECK_EVENT_IDX(idx); + snprintf(name_buf, sizeof(name_buf), "asic_subsystem_id:device=%d", d); + snprintf(descr_buf, sizeof(descr_buf), "Device %d ASIC subsystem id", d); + if (add_event(&idx, name_buf, descr_buf, d, 3, 0, PAPI_MODE_READ, + access_amdsmi_asic_info) != PAPI_OK) + return PAPI_ENOSUPP; + + CHECK_EVENT_IDX(idx); + snprintf(name_buf, sizeof(name_buf), "asic_revision:device=%d", d); + snprintf(descr_buf, sizeof(descr_buf), "Device %d ASIC revision id", d); + if (add_event(&idx, name_buf, descr_buf, d, 4, 0, PAPI_MODE_READ, + access_amdsmi_asic_info) != PAPI_OK) + return PAPI_ENOSUPP; + + CHECK_EVENT_IDX(idx); + snprintf(name_buf, sizeof(name_buf), "compute_units:device=%d", d); + snprintf(descr_buf, sizeof(descr_buf), + "Device %d number of compute units", d); + if (add_event(&idx, name_buf, descr_buf, d, 5, 0, PAPI_MODE_READ, + access_amdsmi_asic_info) != PAPI_OK) + return PAPI_ENOSUPP; + } + } + if (amdsmi_get_gpu_compute_partition_p) { + char part[128] = {0}; + if (amdsmi_get_gpu_compute_partition_p(device_handles[d], part, + sizeof(part)) == + AMDSMI_STATUS_SUCCESS) { + CHECK_EVENT_IDX(idx); + snprintf(name_buf, sizeof(name_buf), + "compute_partition_hash:device=%d", d); + snprintf(descr_buf, sizeof(descr_buf), + "Device %d compute partition (hash)", d); + if (add_event(&idx, name_buf, descr_buf, d, 0, 0, PAPI_MODE_READ, + access_amdsmi_compute_partition_hash) != PAPI_OK) + return PAPI_ENOMEM; + } + } + if (amdsmi_get_gpu_memory_partition_p) { + char part[128] = {0}; + uint32_t len = (uint32_t)sizeof(part); + amdsmi_status_t status = + amdsmi_get_gpu_memory_partition_p(device_handles[d], part, len); + part[sizeof(part) - 1] = '\0'; // belt-and-suspenders NUL + if (status == AMDSMI_STATUS_SUCCESS && part[0] != '\0') { + CHECK_EVENT_IDX(idx); + snprintf(name_buf, sizeof(name_buf), "memory_partition_hash:device=%d", d); + snprintf(descr_buf, sizeof(descr_buf), "Device %d memory partition (hash)", d); + if (add_event(&idx, name_buf, descr_buf, d, 0, 0, PAPI_MODE_READ, + access_amdsmi_memory_partition_hash) != PAPI_OK) + return PAPI_ENOMEM; + } + } + /* +#if AMDSMI_LIB_VERSION_MAJOR >= 25 + if (amdsmi_get_gpu_memory_partition_config_p) { + amdsmi_memory_partition_config_t cfg = {0}; + // Probe memory partition configuration + if (amdsmi_get_gpu_memory_partition_config_p(device_handles[d], &cfg) == + AMDSMI_STATUS_SUCCESS) { + const char *mpc_names[] = {"memory_partition_caps", + "memory_partition_mode", + "memory_partition_numa_count"}; + const char *mpc_descr[] = {"Device %d memory partition capabilities", + "Device %d memory partition mode", + "Device %d NUMA range count"}; + for (uint32_t v = 0; v < 3; ++v) { + CHECK_EVENT_IDX(idx); + snprintf(name_buf, sizeof(name_buf), "%s:device=%d", mpc_names[v], d); + snprintf(descr_buf, sizeof(descr_buf), mpc_descr[v], d); + if (add_event(&idx, name_buf, descr_buf, d, v, 0, PAPI_MODE_READ, + access_amdsmi_memory_partition_config) != PAPI_OK) + return PAPI_ENOMEM; + } + } + } +#endif + if (amdsmi_get_gpu_accelerator_partition_profile_p) { + amdsmi_accelerator_partition_profile_t prof = {0}; + uint32_t ids[AMDSMI_MAX_ACCELERATOR_PARTITIONS] = {0}; + amdsmi_status_t status = + amdsmi_get_gpu_accelerator_partition_profile_p(device_handles[d], &prof, ids); + if (status == AMDSMI_STATUS_SUCCESS && + prof.num_partitions > 0 && + prof.num_partitions <= AMDSMI_MAX_ACCELERATOR_PARTITIONS) { + CHECK_EVENT_IDX(idx); + snprintf(name_buf, sizeof(name_buf), "accelerator_num_partitions:device=%d", d); + snprintf(descr_buf, sizeof(descr_buf), "Device %d accelerator partition count", d); + if (add_event(&idx, name_buf, descr_buf, d, 0, 0, PAPI_MODE_READ, + access_amdsmi_accelerator_num_partitions) != PAPI_OK) + return PAPI_ENOMEM; + } + } + */ + /* Driver info (strings hashed) */ + if (amdsmi_get_gpu_driver_info_p) { + amdsmi_driver_info_t dinfo = {0}; + if (amdsmi_get_gpu_driver_info_p(device_handles[d], &dinfo) == + AMDSMI_STATUS_SUCCESS) { + CHECK_EVENT_IDX(idx); + snprintf(name_buf, sizeof(name_buf), "driver_name_hash:device=%d", d); + snprintf(descr_buf, sizeof(descr_buf), + "Device %d driver name (hash)", d); + if (add_event(&idx, name_buf, descr_buf, d, 3, 0, PAPI_MODE_READ, + access_amdsmi_gpu_string_hash) != PAPI_OK) + return PAPI_ENOMEM; + CHECK_EVENT_IDX(idx); + snprintf(name_buf, sizeof(name_buf), "driver_date_hash:device=%d", d); + snprintf(descr_buf, sizeof(descr_buf), "Device %d driver date (hash)", d); + if (add_event(&idx, name_buf, descr_buf, d, 4, 0, PAPI_MODE_READ, + access_amdsmi_gpu_string_hash) != PAPI_OK) + return PAPI_ENOMEM; + } + } + /* VBIOS info (strings hashed) */ + // (vBIOS events omitted) + if (amdsmi_get_link_metrics_p) { + amdsmi_link_metrics_t lm; + if (amdsmi_get_link_metrics_p(device_handles[d], &lm) == + AMDSMI_STATUS_SUCCESS) { + int types[] = {AMDSMI_LINK_TYPE_XGMI, AMDSMI_LINK_TYPE_PCIE}; + const char *type_names[] = {"xgmi", "pcie"}; + for (int ti = 0; ti < 2; ++ti) { + uint32_t link_type = (uint32_t)types[ti]; + uint32_t sv = (link_type << 16) | 0xFFFF; + int present = 0; + uint32_t n = lm.num_links; + if (n > AMDSMI_MAX_NUM_XGMI_PHYSICAL_LINK) + n = AMDSMI_MAX_NUM_XGMI_PHYSICAL_LINK; + for (uint32_t li = 0; li < n; ++li) { + if (lm.links[li].link_type == link_type) { + present = 1; + break; + } + } + if (!present) + continue; + const char *mnames[] = {"read_kb", "write_kb", "bit_rate", + "max_bandwidth"}; + const char *mdescr[] = {"read throughput (KB)", + "write throughput (KB)", + "link bit rate (Gb/s)", + "max bandwidth (Gb/s)"}; + for (uint32_t v = 0; v < 4; ++v) { + CHECK_EVENT_IDX(idx); + snprintf(name_buf, sizeof(name_buf), "%s_%s:device=%d", + type_names[ti], mnames[v], d); + snprintf(descr_buf, sizeof(descr_buf), "Device %d %s %s", d, + type_names[ti], mdescr[v]); + if (add_event(&idx, name_buf, descr_buf, d, v, sv, PAPI_MODE_READ, + access_amdsmi_link_metrics) != PAPI_OK) + return PAPI_ENOMEM; + } + } + } + } +#if AMDSMI_LIB_VERSION_MAJOR >= 25 + if (amdsmi_get_gpu_xgmi_link_status_p) { + amdsmi_xgmi_link_status_t st; + if (amdsmi_get_gpu_xgmi_link_status_p(device_handles[d], &st) == + AMDSMI_STATUS_SUCCESS) { + uint32_t n = st.total_links; + if (n > AMDSMI_MAX_NUM_XGMI_LINKS) + n = AMDSMI_MAX_NUM_XGMI_LINKS; + for (uint32_t li = 0; li < n; ++li) { + CHECK_EVENT_IDX(idx); + snprintf(name_buf, sizeof(name_buf), + "xgmi_link_status:device=%d:link=%u", d, li); + snprintf(descr_buf, sizeof(descr_buf), + "Device %d XGMI link %u status", d, li); + if (add_event(&idx, name_buf, descr_buf, d, 0, li, PAPI_MODE_READ, + access_amdsmi_xgmi_link_status) != PAPI_OK) + return PAPI_ENOMEM; + } + } + } +#endif + if (amdsmi_gpu_xgmi_error_status_p) { + amdsmi_xgmi_status_t st; + if (amdsmi_gpu_xgmi_error_status_p(device_handles[d], &st) == + AMDSMI_STATUS_SUCCESS) { + CHECK_EVENT_IDX(idx); + snprintf(name_buf, sizeof(name_buf), "xgmi_error_status:device=%d", d); + snprintf(descr_buf, sizeof(descr_buf), "Device %d XGMI error status", d); + if (add_event(&idx, name_buf, descr_buf, d, 0, 0, PAPI_MODE_READ, + access_amdsmi_xgmi_error_status) != PAPI_OK) + return PAPI_ENOMEM; + } + } + if (amdsmi_get_link_topology_nearest_p) { + amdsmi_link_type_t lt_types[] = {AMDSMI_LINK_TYPE_XGMI, + AMDSMI_LINK_TYPE_PCIE}; + const char *lt_names[] = {"xgmi", "pcie"}; + for (int ti = 0; ti < 2; ++ti) { + amdsmi_topology_nearest_t info; + memset(&info, 0, sizeof(info)); + if (amdsmi_get_link_topology_nearest_p(device_handles[d], lt_types[ti], + &info) == AMDSMI_STATUS_SUCCESS) { + CHECK_EVENT_IDX(idx); + snprintf(name_buf, sizeof(name_buf), "%s_nearest_count:device=%d", + lt_names[ti], d); + snprintf(descr_buf, sizeof(descr_buf), + "Device %d %s nearest GPU count", d, lt_names[ti]); + if (add_event(&idx, name_buf, descr_buf, d, (uint32_t)lt_types[ti], 0, + PAPI_MODE_READ, access_amdsmi_link_topology_nearest) != + PAPI_OK) + return PAPI_ENOMEM; + } + } + } + for (int p = 0; p < gpu_count; ++p) { + if (p == d) + continue; + if (amdsmi_topo_get_link_weight_p) { + CHECK_EVENT_IDX(idx); + snprintf(name_buf, sizeof(name_buf), + "link_weight:device=%d,peer=%d", d, p); + snprintf(descr_buf, sizeof(descr_buf), + "Link weight between device %d and %d", d, p); + if (add_event(&idx, name_buf, descr_buf, d, 0, p, PAPI_MODE_READ, + access_amdsmi_link_weight) != PAPI_OK) + return PAPI_ENOMEM; + } + if (amdsmi_topo_get_link_type_p) { + CHECK_EVENT_IDX(idx); + snprintf(name_buf, sizeof(name_buf), + "link_hops:device=%d,peer=%d", d, p); + snprintf(descr_buf, sizeof(descr_buf), + "Hops between device %d and %d", d, p); + if (add_event(&idx, name_buf, descr_buf, d, 0, p, PAPI_MODE_READ, + access_amdsmi_link_type) != PAPI_OK) + return PAPI_ENOMEM; + CHECK_EVENT_IDX(idx); + snprintf(name_buf, sizeof(name_buf), + "link_type:device=%d,peer=%d", d, p); + snprintf(descr_buf, sizeof(descr_buf), + "IO link type between device %d and %d", d, p); + if (add_event(&idx, name_buf, descr_buf, d, 1, p, PAPI_MODE_READ, + access_amdsmi_link_type) != PAPI_OK) + return PAPI_ENOMEM; + } + /* + if (amdsmi_topo_get_p2p_status_p) { + const char *p2p_names[] = {"p2p_type", "p2p_coherent", + "p2p_atomics32", "p2p_atomics64", + "p2p_dma", "p2p_bidir"}; + const char *p2p_desc[] = { + "P2P IO link type", "P2P coherent support", + "P2P 32-bit atomics", "P2P 64-bit atomics", + "P2P DMA support", "P2P bidirectional support"}; + for (int v = 0; v < 6; ++v) { + CHECK_EVENT_IDX(idx); + snprintf(name_buf, sizeof(name_buf), "%s:device=%d,peer=%d", + p2p_names[v], d, p); + snprintf(descr_buf, sizeof(descr_buf), "Device %d vs %d %s", d, p, + p2p_desc[v]); + if (add_event(&idx, name_buf, descr_buf, d, v, p, PAPI_MODE_READ, + access_amdsmi_p2p_status) != PAPI_OK) + return PAPI_ENOMEM; + } + } + if (amdsmi_is_P2P_accessible_p) { + CHECK_EVENT_IDX(idx); + snprintf(name_buf, sizeof(name_buf), + "p2p_accessible:device=%d,peer=%d", d, p); + snprintf(descr_buf, sizeof(descr_buf), + "P2P accessibility between device %d and %d", d, p); + if (add_event(&idx, name_buf, descr_buf, d, 0, p, PAPI_MODE_READ, + access_amdsmi_p2p_accessible) != PAPI_OK) + return PAPI_ENOMEM; + } + */ + } + } + ntv_table.count = idx; + return PAPI_OK; +} + diff --git a/src/components/amd_smi/amds.h b/src/components/amd_smi/amds.h new file mode 100644 index 000000000..5f5bef584 --- /dev/null +++ b/src/components/amd_smi/amds.h @@ -0,0 +1,38 @@ +/** + * @file amds.h + * @author Dong Jun Woun + * djwoun@gmail.com + * + */ + +#ifndef __AMDS_H__ +#define __AMDS_H__ + +#define AMDS_EVENTS_OPENED (0x1) +#define AMDS_EVENTS_RUNNING (0x2) + +typedef struct amds_ctx *amds_ctx_t; + +/* initialization and shutdown */ +int amds_init(void); +int amds_shutdown(void); + +/* native event queries */ +int amds_evt_enum(unsigned int *EventCode, int modifier); +int amds_evt_code_to_descr(unsigned int EventCode, char *descr, int len); +int amds_evt_name_to_code(const char *name, unsigned int *EventCode); +int amds_evt_code_to_name(unsigned int EventCode, char *name, int len); + +/* error handling */ +int amds_err_get_last(const char **err_string); + +/* profiling context operations */ +int amds_ctx_open(unsigned int *event_ids, int num_events, amds_ctx_t *ctx); +int amds_ctx_close(amds_ctx_t ctx); +int amds_ctx_start(amds_ctx_t ctx); +int amds_ctx_stop(amds_ctx_t ctx); +int amds_ctx_read(amds_ctx_t ctx, long long **counts); +int amds_ctx_write(amds_ctx_t ctx, long long *counts); +int amds_ctx_reset(amds_ctx_t ctx); + +#endif /* __AMDS_H__ */ diff --git a/src/components/amd_smi/amds_accessors.c b/src/components/amd_smi/amds_accessors.c new file mode 100644 index 000000000..cbd86e848 --- /dev/null +++ b/src/components/amd_smi/amds_accessors.c @@ -0,0 +1,2843 @@ +/** + * @file amds_accessors.c + * @author Dong Jun Woun + * djwoun@gmail.com + * + */ + +#include "amds_priv.h" +#include "papi.h" +#include "papi_memory.h" +#include +#include +#include +#include +/* -------- Helpers and new accessors (GPU read-only additions) -------- */ +static uint64_t _str_to_u64_hash(const char *s) { + /* djb2 64-bit */ + uint64_t hash = 5381; + if (!s) + return 0; + int c; + while ((c = *s++)) { + hash = ((hash << 5) + hash) + (uint8_t)c; + } + return hash; +} +int access_amdsmi_lib_version(int mode, void *arg) { + if (mode != PAPI_MODE_READ) + return PAPI_ENOSUPP; + if (!amdsmi_get_lib_version_p) + return PAPI_ENOSUPP; + native_event_t *event = (native_event_t *)arg; + amdsmi_version_t vinfo; + memset(&vinfo, 0, sizeof(vinfo)); + amdsmi_status_t st = amdsmi_get_lib_version_p(&vinfo); + if (st != AMDSMI_STATUS_SUCCESS) + return PAPI_EMISC; + switch (event->variant) { + case 0: + event->value = (int64_t)vinfo.major; + break; + case 1: + event->value = (int64_t)vinfo.minor; + break; + case 2: + event->value = (int64_t)vinfo.release; + break; + default: + return PAPI_EMISC; + } + return PAPI_OK; +} +int access_amdsmi_uuid_hash(int mode, void *arg) { + if (mode != PAPI_MODE_READ) + return PAPI_ENOSUPP; + if (!amdsmi_get_gpu_device_uuid_p) + return PAPI_ENOSUPP; + native_event_t *event = (native_event_t *)arg; + if (event->device < 0 || event->device >= device_count || !device_handles[event->device]) + return PAPI_EMISC; + char buf[128] = {0}; + unsigned int len = sizeof(buf); + amdsmi_status_t st = amdsmi_get_gpu_device_uuid_p(device_handles[event->device], &len, buf); + if (st != AMDSMI_STATUS_SUCCESS) + return PAPI_EMISC; + switch (event->variant) { + case 0: /* hash */ + event->value = (int64_t)_str_to_u64_hash(buf); + break; + case 1: /* length */ + event->value = (int64_t)len; + break; + default: + return PAPI_EMISC; + } + return PAPI_OK; +} +int access_amdsmi_gpu_string_hash(int mode, void *arg) { + if (mode != PAPI_MODE_READ) + return PAPI_ENOSUPP; + native_event_t *event = (native_event_t *)arg; + if (event->device < 0 || event->device >= device_count || !device_handles[event->device]) + return PAPI_EMISC; + char buf[256] = {0}; + amdsmi_status_t st = AMDSMI_STATUS_NOT_SUPPORTED; + switch (event->variant) { + case 0: /* vendor name */ + if (!amdsmi_get_gpu_vendor_name_p) + return PAPI_ENOSUPP; + st = amdsmi_get_gpu_vendor_name_p(device_handles[event->device], buf, sizeof(buf)); + break; + case 1: /* vram vendor */ + if (!amdsmi_get_gpu_vram_vendor_p) + return PAPI_ENOSUPP; + st = amdsmi_get_gpu_vram_vendor_p(device_handles[event->device], buf, sizeof(buf)); + break; + case 2: /* subsystem name */ + if (!amdsmi_get_gpu_subsystem_name_p) + return PAPI_ENOSUPP; + st = amdsmi_get_gpu_subsystem_name_p(device_handles[event->device], buf, sizeof(buf)); + break; + case 3: /* driver name */ + case 4: /* driver date */ + if (!amdsmi_get_gpu_driver_info_p) + return PAPI_ENOSUPP; + { + amdsmi_driver_info_t dinfo; + memset(&dinfo, 0, sizeof(dinfo)); + st = amdsmi_get_gpu_driver_info_p(device_handles[event->device], &dinfo); + if (st == AMDSMI_STATUS_SUCCESS) { + if (event->variant == 3) + strncpy(buf, dinfo.driver_name, sizeof(buf) - 1); + else + strncpy(buf, dinfo.driver_date, sizeof(buf) - 1); + } + } + break; + case 5: /* vbios version */ + case 6: /* vbios part number */ + case 7: /* vbios build date */ + if (!amdsmi_get_gpu_vbios_info_p) + return PAPI_ENOSUPP; + { + amdsmi_vbios_info_t vb; + memset(&vb, 0, sizeof(vb)); + st = amdsmi_get_gpu_vbios_info_p(device_handles[event->device], &vb); + if (st == AMDSMI_STATUS_SUCCESS) { + if (event->variant == 5) + strncpy(buf, vb.version, sizeof(buf) - 1); + else if (event->variant == 6) + strncpy(buf, vb.part_number, sizeof(buf) - 1); + else + strncpy(buf, vb.build_date, sizeof(buf) - 1); + } + } + break; + default: + return PAPI_ENOSUPP; + } + if (st != AMDSMI_STATUS_SUCCESS) + return PAPI_EMISC; + event->value = (int64_t)_str_to_u64_hash(buf); + return PAPI_OK; +} +#if AMDSMI_LIB_VERSION_MAJOR >= 25 +int access_amdsmi_enumeration_info(int mode, void *arg) { + if (mode != PAPI_MODE_READ) + return PAPI_ENOSUPP; + if (amdsmi_lib_major < 25 || !amdsmi_get_gpu_enumeration_info_p) + return PAPI_ENOSUPP; + native_event_t *event = (native_event_t *)arg; + if (event->device < 0 || event->device >= device_count || !device_handles[event->device]) + return PAPI_EMISC; + amdsmi_enumeration_info_t info; + memset(&info, 0, sizeof(info)); + amdsmi_status_t st = amdsmi_get_gpu_enumeration_info_p(device_handles[event->device], &info); + if (st != AMDSMI_STATUS_SUCCESS) + return PAPI_EMISC; + switch (event->variant) { + case 0: + event->value = (int64_t)info.drm_render; + break; + case 1: + event->value = (int64_t)info.drm_card; + break; + case 2: + event->value = (int64_t)info.hsa_id; + break; + case 3: + event->value = (int64_t)info.hip_id; + break; + default: + return PAPI_EMISC; + } + return PAPI_OK; +} +#endif +int access_amdsmi_asic_info(int mode, void *arg) { + if (mode != PAPI_MODE_READ) + return PAPI_ENOSUPP; + if (!amdsmi_get_gpu_asic_info_p) + return PAPI_ENOSUPP; + native_event_t *event = (native_event_t *)arg; + if (event->device < 0 || event->device >= device_count || !device_handles[event->device]) + return PAPI_EMISC; + amdsmi_asic_info_t info; + memset(&info, 0, sizeof(info)); + amdsmi_status_t st = amdsmi_get_gpu_asic_info_p(device_handles[event->device], &info); + if (st != AMDSMI_STATUS_SUCCESS) + return PAPI_EMISC; + switch (event->variant) { + case 0: + event->value = (int64_t)info.vendor_id; + break; + case 1: + event->value = (int64_t)info.device_id; + break; + case 2: + event->value = (int64_t)info.subvendor_id; + break; + case 3: + event->value = (int64_t)0 /* not provided in amdsmi_asic_info_t */; + break; + case 4: + event->value = (int64_t)info.rev_id; + break; + case 5: + event->value = (int64_t)info.num_of_compute_units; + break; + default: + return PAPI_EMISC; + } + return PAPI_OK; +} +int access_amdsmi_link_metrics(int mode, void *arg) { + if (mode != PAPI_MODE_READ || !amdsmi_get_link_metrics_p) + return PAPI_ENOSUPP; + native_event_t *event = (native_event_t *)arg; + if (event->device < 0 || event->device >= device_count || + !device_handles[event->device]) + return PAPI_EMISC; + + amdsmi_link_metrics_t lm; + memset(&lm, 0, sizeof(lm)); + if (amdsmi_get_link_metrics_p(device_handles[event->device], &lm) != + AMDSMI_STATUS_SUCCESS) + return PAPI_EMISC; + + uint32_t count = lm.num_links; + if (count > AMDSMI_MAX_NUM_XGMI_PHYSICAL_LINK) + count = AMDSMI_MAX_NUM_XGMI_PHYSICAL_LINK; + + uint32_t enc = event->subvariant; + uint32_t link_type = enc >> 16; + uint32_t link_index = enc & 0xFFFF; /* 0xFFFF aggregates all links */ + + uint64_t total = 0; + if (link_index == 0xFFFF) { + for (uint32_t i = 0; i < count; ++i) { + if (link_type && lm.links[i].link_type != link_type) + continue; + switch (event->variant) { + case 0: + total += lm.links[i].read; /* KB */ + break; + case 1: + total += lm.links[i].write; /* KB */ + break; + case 2: + total += lm.links[i].bit_rate; /* Gb/s */ + break; + case 3: + total += lm.links[i].max_bandwidth; /* Gb/s */ + break; + default: + return PAPI_ENOSUPP; + } + } + } else { + if (link_index >= count) + return PAPI_EMISC; + if (link_type && lm.links[link_index].link_type != link_type) + return PAPI_EMISC; + switch (event->variant) { + case 0: + total = lm.links[link_index].read; /* KB */ + break; + case 1: + total = lm.links[link_index].write; /* KB */ + break; + case 2: + total = lm.links[link_index].bit_rate; /* Gb/s */ + break; + case 3: + total = lm.links[link_index].max_bandwidth; /* Gb/s */ + break; + default: + return PAPI_ENOSUPP; + } + } + + if (total > (uint64_t)INT64_MAX) + total = (uint64_t)INT64_MAX; + event->value = (int64_t)total; + return PAPI_OK; +} + +#if AMDSMI_LIB_VERSION_MAJOR >= 25 +int access_amdsmi_xgmi_link_status(int mode, void *arg) { + if (mode != PAPI_MODE_READ || !amdsmi_get_gpu_xgmi_link_status_p) + return PAPI_ENOSUPP; + native_event_t *event = (native_event_t *)arg; + if (event->device < 0 || event->device >= device_count || + !device_handles[event->device]) + return PAPI_EMISC; + amdsmi_xgmi_link_status_t st; + memset(&st, 0, sizeof(st)); + if (amdsmi_get_gpu_xgmi_link_status_p(device_handles[event->device], &st) != + AMDSMI_STATUS_SUCCESS) + return PAPI_EMISC; + uint32_t li = (uint32_t)event->subvariant; + if (li >= st.total_links || li >= AMDSMI_MAX_NUM_XGMI_LINKS) + return PAPI_EMISC; + event->value = (int64_t)st.status[li]; + return PAPI_OK; +} +#endif + +int access_amdsmi_xgmi_error_status(int mode, void *arg) { + if (mode != PAPI_MODE_READ || !amdsmi_gpu_xgmi_error_status_p) + return PAPI_ENOSUPP; + native_event_t *event = (native_event_t *)arg; + if (event->device < 0 || event->device >= device_count || + !device_handles[event->device]) + return PAPI_EMISC; + amdsmi_xgmi_status_t st; + if (amdsmi_gpu_xgmi_error_status_p(device_handles[event->device], &st) != + AMDSMI_STATUS_SUCCESS) + return PAPI_EMISC; + event->value = (int64_t)st; + return PAPI_OK; +} + +int access_amdsmi_link_weight(int mode, void *arg) { + if (mode != PAPI_MODE_READ || !amdsmi_topo_get_link_weight_p) + return PAPI_ENOSUPP; + native_event_t *event = (native_event_t *)arg; + int src = event->device; + int dst = (int)event->subvariant; + if (src < 0 || src >= device_count || dst < 0 || dst >= device_count || + !device_handles[src] || !device_handles[dst] || src == dst) + return PAPI_EMISC; + uint64_t weight = 0; + if (amdsmi_topo_get_link_weight_p(device_handles[src], + device_handles[dst], &weight) != + AMDSMI_STATUS_SUCCESS) + return PAPI_EMISC; + if (weight > (uint64_t)INT64_MAX) + weight = (uint64_t)INT64_MAX; + event->value = (int64_t)weight; + return PAPI_OK; +} + +int access_amdsmi_link_type(int mode, void *arg) { + if (mode != PAPI_MODE_READ || !amdsmi_topo_get_link_type_p) + return PAPI_ENOSUPP; + native_event_t *event = (native_event_t *)arg; + int src = event->device; + int dst = (int)event->subvariant; + if (src < 0 || src >= device_count || dst < 0 || dst >= device_count || + !device_handles[src] || !device_handles[dst] || src == dst) + return PAPI_EMISC; + uint64_t hops = 0; + amdsmi_io_link_type_t type; + if (amdsmi_topo_get_link_type_p(device_handles[src], device_handles[dst], + &hops, &type) != AMDSMI_STATUS_SUCCESS) + return PAPI_EMISC; + if (event->variant == 0) { + if (hops > (uint64_t)INT64_MAX) + hops = (uint64_t)INT64_MAX; + event->value = (int64_t)hops; + } else if (event->variant == 1) { + event->value = (int64_t)type; + } else { + return PAPI_ENOSUPP; + } + return PAPI_OK; +} + +int access_amdsmi_p2p_status(int mode, void *arg) { + if (mode != PAPI_MODE_READ || !amdsmi_topo_get_p2p_status_p) + return PAPI_ENOSUPP; + + native_event_t *event = (native_event_t *)arg; + const int src = event->device; + const int dst = (int)event->subvariant; + + if (src < 0 || src >= device_count || dst < 0 || dst >= device_count || + !device_handles[src] || !device_handles[dst] || src == dst) + return PAPI_EMISC; + + // 1) Prefer the cheap predicate to avoid the buggy slow path: + bool accessible = false; + if (amdsmi_is_P2P_accessible_p && + amdsmi_is_P2P_accessible_p(device_handles[src], device_handles[dst], + &accessible) == AMDSMI_STATUS_SUCCESS && + accessible) { + // 2) Only for accessible pairs, ask for detailed capabilities: + amdsmi_io_link_type_t type = 0; + amdsmi_p2p_capability_t cap = {0}; + if (amdsmi_topo_get_p2p_status_p(device_handles[src], device_handles[dst], + &type, &cap) != AMDSMI_STATUS_SUCCESS) + return PAPI_EMISC; // unexpected for accessible pairs + + switch (event->variant) { + case 0: event->value = (int64_t)type; break; + case 1: event->value = cap.is_iolink_coherent; break; + case 2: event->value = cap.is_iolink_atomics_32bit; break; + case 3: event->value = cap.is_iolink_atomics_64bit; break; + case 4: event->value = cap.is_iolink_dma; break; + case 5: event->value = cap.is_iolink_bi_directional; break; + default: return PAPI_ENOSUPP; + } + return PAPI_OK; + } + + // 3) Non-accessible or predicate missing: report a sensible value without + // touching the buggy call. Type (variant 0) can still be queried safely via + // amdsmi_topo_get_link_type; the rest are false by definition. + if (event->variant == 0 && amdsmi_topo_get_link_type_p) { + uint64_t hops = 0; + amdsmi_io_link_type_t type = 0; // UNKNOWN/PCIE/XGMI per platform + if (amdsmi_topo_get_link_type_p(device_handles[src], device_handles[dst], + &hops, &type) == AMDSMI_STATUS_SUCCESS) { + event->value = (int64_t)type; + return PAPI_OK; + } + // If link_type also fails, fall through to no data. + } + + // For non-accessible pairs, the capability booleans are zero. + event->value = 0; + return PAPI_OK; +} + + +int access_amdsmi_p2p_accessible(int mode, void *arg) { + if (mode != PAPI_MODE_READ || !amdsmi_is_P2P_accessible_p) + return PAPI_ENOSUPP; + native_event_t *event = (native_event_t *)arg; + int src = event->device; + int dst = (int)event->subvariant; + if (src < 0 || src >= device_count || dst < 0 || dst >= device_count || + !device_handles[src] || !device_handles[dst] || src == dst) + return PAPI_EMISC; + bool accessible = false; + if (amdsmi_is_P2P_accessible_p(device_handles[src], device_handles[dst], + &accessible) != AMDSMI_STATUS_SUCCESS) + return PAPI_EMISC; + event->value = accessible ? 1 : 0; + return PAPI_OK; +} + +int access_amdsmi_link_topology_nearest(int mode, void *arg) { + if (mode != PAPI_MODE_READ || !amdsmi_get_link_topology_nearest_p) + return PAPI_ENOSUPP; + native_event_t *event = (native_event_t *)arg; + if (event->device < 0 || event->device >= device_count || + !device_handles[event->device]) + return PAPI_EMISC; + amdsmi_topology_nearest_t info; + memset(&info, 0, sizeof(info)); + if (amdsmi_get_link_topology_nearest_p( + device_handles[event->device], (amdsmi_link_type_t)event->variant, + &info) != AMDSMI_STATUS_SUCCESS) + return PAPI_EMISC; + event->value = (int64_t)info.count; + return PAPI_OK; +} + +int access_amdsmi_topo_numa(int mode, void *arg) { + if (mode != PAPI_MODE_READ || !amdsmi_topo_get_numa_node_number_p) + return PAPI_ENOSUPP; + native_event_t *event = (native_event_t *)arg; + if (event->device < 0 || event->device >= device_count || + !device_handles[event->device]) + return PAPI_EMISC; + uint32_t node = 0; + if (amdsmi_topo_get_numa_node_number_p(device_handles[event->device], &node) != + AMDSMI_STATUS_SUCCESS) + return PAPI_EMISC; + event->value = (int64_t)node; + return PAPI_OK; +} + +int access_amdsmi_device_bdf(int mode, void *arg) { + if (mode != PAPI_MODE_READ || !amdsmi_get_gpu_device_bdf_p) + return PAPI_ENOSUPP; + native_event_t *event = (native_event_t *)arg; + if (event->device < 0 || event->device >= device_count || + !device_handles[event->device]) + return PAPI_EMISC; + amdsmi_bdf_t bdf; + memset(&bdf, 0, sizeof(bdf)); + if (amdsmi_get_gpu_device_bdf_p(device_handles[event->device], &bdf) != + AMDSMI_STATUS_SUCCESS) + return PAPI_EMISC; + switch (event->variant) { + case 0: + event->value = (int64_t)bdf.domain_number; + break; + case 1: + event->value = (int64_t)bdf.bus_number; + break; + case 2: + event->value = (int64_t)bdf.device_number; + break; + case 3: + event->value = (int64_t)bdf.function_number; + break; + default: + return PAPI_ENOSUPP; + } + return PAPI_OK; +} + +int access_amdsmi_kfd_info(int mode, void *arg) { + if (mode != PAPI_MODE_READ || !amdsmi_get_gpu_kfd_info_p) + return PAPI_ENOSUPP; + native_event_t *event = (native_event_t *)arg; + if (event->device < 0 || event->device >= device_count || + !device_handles[event->device]) + return PAPI_EMISC; + amdsmi_kfd_info_t info; + memset(&info, 0, sizeof(info)); + if (amdsmi_get_gpu_kfd_info_p(device_handles[event->device], &info) != + AMDSMI_STATUS_SUCCESS) + return PAPI_EMISC; + switch (event->variant) { + case 0: + event->value = (int64_t)info.kfd_id; + break; + case 1: + event->value = (int64_t)info.node_id; + break; + case 2: + event->value = (int64_t)info.current_partition_id; + break; + default: + return PAPI_ENOSUPP; + } + return PAPI_OK; +} + +int access_amdsmi_xgmi_info(int mode, void *arg) { + if (mode != PAPI_MODE_READ || !amdsmi_get_xgmi_info_p) + return PAPI_ENOSUPP; + native_event_t *event = (native_event_t *)arg; + if (event->device < 0 || event->device >= device_count || + !device_handles[event->device]) + return PAPI_EMISC; + amdsmi_xgmi_info_t info; + memset(&info, 0, sizeof(info)); + if (amdsmi_get_xgmi_info_p(device_handles[event->device], &info) != + AMDSMI_STATUS_SUCCESS) + return PAPI_EMISC; + switch (event->variant) { + case 0: + event->value = (int64_t)info.xgmi_lanes; + break; + case 1: + event->value = (int64_t)info.xgmi_hive_id; + break; + case 2: + event->value = (int64_t)info.xgmi_node_id; + break; + case 3: + event->value = (int64_t)info.index; + break; + default: + return PAPI_ENOSUPP; + } + return PAPI_OK; +} + +int access_amdsmi_process_info(int mode, void *arg) { + if (mode != PAPI_MODE_READ || !amdsmi_get_gpu_process_list_p) + return PAPI_ENOSUPP; + native_event_t *event = (native_event_t *)arg; + if (event->device < 0 || event->device >= device_count || + !device_handles[event->device]) + return PAPI_EMISC; + + amdsmi_proc_info_t list[16]; + uint32_t count = 16; + amdsmi_status_t st = + amdsmi_get_gpu_process_list_p(device_handles[event->device], &count, list); + if (st != AMDSMI_STATUS_SUCCESS) + return PAPI_EMISC; + + uint32_t proc = event->subvariant; + if (proc >= count) { + event->value = 0; + return PAPI_OK; + } + + amdsmi_proc_info_t *p = &list[proc]; + switch (event->variant) { + case 0: + event->value = (int64_t)p->pid; + break; + case 1: + event->value = (int64_t)p->mem; + break; + case 2: + event->value = (int64_t)p->engine_usage.gfx; + break; + case 3: + event->value = (int64_t)p->engine_usage.enc; + break; + case 4: + event->value = (int64_t)p->memory_usage.gtt_mem; + break; + case 5: + event->value = (int64_t)p->memory_usage.cpu_mem; + break; + case 6: + event->value = (int64_t)p->memory_usage.vram_mem; + break; + case 7: + /* cu_occupancy added in AMD SMI 6.4.3; earlier versions store it in + the first reserved slot which remains zero. */ +#if defined(AMDSMI_LIB_VERSION_MINOR) && AMDSMI_LIB_VERSION_MINOR >= 4 + event->value = (int64_t)p->cu_occupancy; +#else + event->value = (int64_t)p->reserved[0]; +#endif + break; + default: + return PAPI_ENOSUPP; + } + return PAPI_OK; +} +int access_amdsmi_ecc_total(int mode, void *arg) { + if (mode != PAPI_MODE_READ || !amdsmi_get_gpu_total_ecc_count_p) + return PAPI_ENOSUPP; + native_event_t *event = (native_event_t *)arg; + if (event->device < 0 || event->device >= device_count || + !device_handles[event->device]) + return PAPI_EMISC; + + amdsmi_error_count_t ec; + memset(&ec, 0, sizeof(ec)); + if (amdsmi_get_gpu_total_ecc_count_p(device_handles[event->device], &ec) != + AMDSMI_STATUS_SUCCESS) + return PAPI_EMISC; + + uint64_t val; + switch (event->variant) { + case 0: + val = ec.correctable_count; + break; + case 1: + val = ec.uncorrectable_count; + break; + case 2: + val = ec.deferred_count; + break; + default: + return PAPI_ENOSUPP; + } + + if (val > (uint64_t)INT64_MAX) + val = (uint64_t)INT64_MAX; + event->value = (int64_t)val; + return PAPI_OK; +} + +int access_amdsmi_ecc_block(int mode, void *arg) { + if (mode != PAPI_MODE_READ || !amdsmi_get_gpu_ecc_count_p) + return PAPI_ENOSUPP; + native_event_t *event = (native_event_t *)arg; + if (event->device < 0 || event->device >= device_count || + !device_handles[event->device]) + return PAPI_EMISC; + + amdsmi_error_count_t ec; + memset(&ec, 0, sizeof(ec)); + if (amdsmi_get_gpu_ecc_count_p(device_handles[event->device], + (amdsmi_gpu_block_t)event->subvariant, &ec) != + AMDSMI_STATUS_SUCCESS) + return PAPI_EMISC; + + uint64_t val; + switch (event->variant) { + case 0: + val = ec.correctable_count; + break; + case 1: + val = ec.uncorrectable_count; + break; + case 2: + val = ec.deferred_count; + break; + default: + return PAPI_ENOSUPP; + } + + if (val > (uint64_t)INT64_MAX) + val = (uint64_t)INT64_MAX; + event->value = (int64_t)val; + return PAPI_OK; +} + +int access_amdsmi_ecc_status(int mode, void *arg) { + if (mode != PAPI_MODE_READ || !amdsmi_get_gpu_ecc_status_p) + return PAPI_ENOSUPP; + native_event_t *event = (native_event_t *)arg; + if (event->device < 0 || event->device >= device_count || + !device_handles[event->device]) + return PAPI_EMISC; + if (event->variant != 0) + return PAPI_ENOSUPP; + + amdsmi_ras_err_state_t st; + if (amdsmi_get_gpu_ecc_status_p(device_handles[event->device], + (amdsmi_gpu_block_t)event->subvariant, &st) != + AMDSMI_STATUS_SUCCESS) + return PAPI_EMISC; + event->value = (int64_t)st; + return PAPI_OK; +} + +int access_amdsmi_ecc_enabled_mask(int mode, void *arg) { + if (mode != PAPI_MODE_READ || !amdsmi_get_gpu_ecc_enabled_p) + return PAPI_ENOSUPP; + native_event_t *event = (native_event_t *)arg; + if (event->device < 0 || event->device >= device_count || + !device_handles[event->device]) + return PAPI_EMISC; + uint64_t mask = 0; + if (amdsmi_get_gpu_ecc_enabled_p(device_handles[event->device], &mask) != + AMDSMI_STATUS_SUCCESS) + return PAPI_EMISC; + event->value = (int64_t)mask; + return PAPI_OK; +} +int access_amdsmi_compute_partition_hash(int mode, void *arg) { + if (mode != PAPI_MODE_READ || !amdsmi_get_gpu_compute_partition_p) + return PAPI_ENOSUPP; + native_event_t *event = (native_event_t *)arg; + if (event->device < 0 || event->device >= device_count || + !device_handles[event->device]) + return PAPI_EMISC; + char buf[128] = {0}; + if (amdsmi_get_gpu_compute_partition_p(device_handles[event->device], buf, + sizeof(buf)) != AMDSMI_STATUS_SUCCESS) + return PAPI_EMISC; + event->value = (int64_t)_str_to_u64_hash(buf); + return PAPI_OK; +} +int access_amdsmi_memory_partition_hash(int mode, void *arg) { + if (mode != PAPI_MODE_READ || !amdsmi_get_gpu_memory_partition_p) + return PAPI_ENOSUPP; + native_event_t *event = (native_event_t *)arg; + if (event->device < 0 || event->device >= device_count || + !device_handles[event->device]) + return PAPI_EMISC; + if (amdsmi_is_gpu_memory_partition_supported_p) { + bool supported = false; + if (amdsmi_is_gpu_memory_partition_supported_p(device_handles[event->device], + &supported) != + AMDSMI_STATUS_SUCCESS || + !supported) + return PAPI_ENOSUPP; + } + char buf[128] = {0}; + if (amdsmi_get_gpu_memory_partition_p(device_handles[event->device], buf, + sizeof(buf)) != AMDSMI_STATUS_SUCCESS) + return PAPI_EMISC; + buf[sizeof(buf) - 1] = '\0'; + event->value = (int64_t)_str_to_u64_hash(buf); + return PAPI_OK; +} + +#if AMDSMI_LIB_VERSION_MAJOR >= 25 +int access_amdsmi_memory_partition_config(int mode, void *arg) { + if (mode != PAPI_MODE_READ || !amdsmi_get_gpu_memory_partition_config_p) + return PAPI_ENOSUPP; + native_event_t *event = (native_event_t *)arg; + if (event->device < 0 || event->device >= device_count || + !device_handles[event->device]) + return PAPI_EMISC; + if (amdsmi_is_gpu_memory_partition_supported_p) { + bool supported = false; + if (amdsmi_is_gpu_memory_partition_supported_p(device_handles[event->device], + &supported) != + AMDSMI_STATUS_SUCCESS || + !supported) + return PAPI_ENOSUPP; + } + amdsmi_memory_partition_config_t cfg = {0}; + if (amdsmi_get_gpu_memory_partition_config_p(device_handles[event->device], + &cfg) != AMDSMI_STATUS_SUCCESS) + return PAPI_EMISC; + switch (event->variant) { + case 0: + /* Union holds bit flags; expose the mask value */ + event->value = (int64_t)cfg.partition_caps.nps_cap_mask; + break; + case 1: + event->value = (int64_t)cfg.mp_mode; + break; + case 2: + event->value = (int64_t)cfg.num_numa_ranges; + break; + default: + return PAPI_ENOSUPP; + } + return PAPI_OK; +} +#endif +int access_amdsmi_accelerator_num_partitions(int mode, void *arg) { + if (mode != PAPI_MODE_READ || !amdsmi_get_gpu_accelerator_partition_profile_p) + return PAPI_ENOSUPP; + native_event_t *event = (native_event_t *)arg; + if (event->device < 0 || event->device >= device_count || + !device_handles[event->device]) + return PAPI_EMISC; + amdsmi_accelerator_partition_profile_t prof = {0}; + uint32_t ids[AMDSMI_MAX_ACCELERATOR_PARTITIONS] = {0}; + if (amdsmi_get_gpu_accelerator_partition_profile_p(device_handles[event->device], + &prof, ids) != + AMDSMI_STATUS_SUCCESS) + return PAPI_EMISC; + event->value = (int64_t)prof.num_partitions; + return PAPI_OK; +} +/* Access function implementations (read/write operations for each event) */ +int access_amdsmi_temp_metric(int mode, void *arg) { + native_event_t *event = (native_event_t *)arg; + if (event->device < 0 || event->device >= device_count || !device_handles || !device_handles[event->device]) { + return PAPI_EMISC; /* ensure device handle is valid */ + } + if (mode != PAPI_MODE_READ) { + return PAPI_ENOSUPP; + } + int64_t tmp = 0; + amdsmi_status_t status = + amdsmi_get_temp_metric_p(device_handles[event->device], + (amdsmi_temperature_type_t)event->subvariant, + (amdsmi_temperature_metric_t)event->variant, + &tmp); + if (status == AMDSMI_STATUS_SUCCESS) { + event->value = (uint64_t)tmp; + return PAPI_OK; + } + return PAPI_EMISC; +} +int access_amdsmi_fan_rpms(int mode, void *arg) { + native_event_t *event = (native_event_t *)arg; + if (event->device < 0 || event->device >= device_count || !device_handles || !device_handles[event->device]) { + return PAPI_EMISC; + } + if (mode != PAPI_MODE_READ) { + return PAPI_ENOSUPP; + } + int64_t speed = 0; + amdsmi_status_t status = amdsmi_get_gpu_fan_rpms_p(device_handles[event->device], event->subvariant, &speed); + if (status != AMDSMI_STATUS_SUCCESS) { + return PAPI_EMISC; + } + event->value = speed; + return PAPI_OK; +} +int access_amdsmi_fan_speed(int mode, void *arg) { + native_event_t *event = (native_event_t *)arg; + if (event->device < 0 || event->device >= device_count || !device_handles || !device_handles[event->device]) { + return PAPI_EMISC; + } + if (mode != PAPI_MODE_READ) { + return PAPI_ENOSUPP; // writing fan speed not supported + } + int64_t val = 0; + amdsmi_status_t status = amdsmi_get_gpu_fan_speed_p(device_handles[event->device], event->subvariant, &val); + if (status != AMDSMI_STATUS_SUCCESS) { + return PAPI_EMISC; + } + event->value = val; + return PAPI_OK; +} +int access_amdsmi_mem_total(int mode, void *arg) { + native_event_t *event = (native_event_t *)arg; + if (event->device < 0 || event->device >= device_count || !device_handles || !device_handles[event->device]) { + return PAPI_EMISC; + } + if (mode != PAPI_MODE_READ) + return PAPI_ENOSUPP; + uint64_t data = 0; + amdsmi_status_t status = amdsmi_get_total_memory_p(device_handles[event->device], (amdsmi_memory_type_t)event->variant, &data); + if (status != AMDSMI_STATUS_SUCCESS) { + return PAPI_EMISC; + } + event->value = (int64_t)data; + return PAPI_OK; +} +int access_amdsmi_mem_usage(int mode, void *arg) { + native_event_t *event = (native_event_t *)arg; + if (event->device < 0 || event->device >= device_count || !device_handles || !device_handles[event->device]) { + return PAPI_EMISC; + } + if (mode != PAPI_MODE_READ) + return PAPI_ENOSUPP; + uint64_t data = 0; + amdsmi_status_t status = amdsmi_get_memory_usage_p(device_handles[event->device], (amdsmi_memory_type_t)event->variant, &data); + if (status != AMDSMI_STATUS_SUCCESS) { + return PAPI_EMISC; + } + event->value = (int64_t)data; + return PAPI_OK; +} +int access_amdsmi_power_cap(int mode, void *arg) { + native_event_t *event = (native_event_t *)arg; + if (event->device < 0 || event->device >= device_count || !device_handles || !device_handles[event->device]) { + return PAPI_EMISC; + } + if (mode == PAPI_MODE_READ) { + // Read current power cap + amdsmi_power_cap_info_t info; + memset(&info, 0, sizeof(info)); + amdsmi_status_t status = amdsmi_get_power_cap_info_p(device_handles[event->device], 0, &info); // sensor index 0 + if (status != AMDSMI_STATUS_SUCCESS) { + return PAPI_EMISC; + } + event->value = (int64_t)info.power_cap; + return PAPI_OK; + } else if (mode == PAPI_MODE_WRITE) { + // Set new power cap (value expected in microWatts if API uses uW) + uint64_t new_cap = (uint64_t)event->value; + amdsmi_status_t status = amdsmi_set_power_cap_p(device_handles[event->device], 0, new_cap); + return (status == AMDSMI_STATUS_SUCCESS ? PAPI_OK : PAPI_EMISC); + } + return PAPI_ENOSUPP; +} +int access_amdsmi_power_cap_range(int mode, void *arg) { + native_event_t *event = (native_event_t *)arg; + if (event->device < 0 || event->device >= device_count || !device_handles || !device_handles[event->device]) { + return PAPI_EMISC; + } + if (mode != PAPI_MODE_READ) + return PAPI_ENOSUPP; + amdsmi_power_cap_info_t info; + memset(&info, 0, sizeof(info)); + amdsmi_status_t status = amdsmi_get_power_cap_info_p(device_handles[event->device], 0, &info); + if (status != AMDSMI_STATUS_SUCCESS) { + return PAPI_EMISC; + } + if (event->variant == 1) { + event->value = (int64_t)info.min_power_cap; + } else if (event->variant == 2) { + event->value = (int64_t)info.max_power_cap; + } else if (event->variant == 3) { + event->value = (int64_t)info.default_power_cap; + } else if (event->variant == 4) { + event->value = (int64_t)info.dpm_cap; + } else { + return PAPI_EMISC; + } + return PAPI_OK; +} +int access_amdsmi_power_average(int mode, void *arg) { + native_event_t *event = (native_event_t *)arg; + if (event->device < 0 || event->device >= device_count || !device_handles || !device_handles[event->device]) { + return PAPI_EMISC; + } + if (mode != PAPI_MODE_READ) + return PAPI_ENOSUPP; + amdsmi_power_info_t power; + memset(&power, 0, sizeof(power)); + amdsmi_status_t status = amdsmi_get_power_info_p(device_handles[event->device], &power); + if (status != AMDSMI_STATUS_SUCCESS) { + return PAPI_EMISC; + } + event->value = (int64_t)power.average_socket_power; + return PAPI_OK; +} +int access_amdsmi_pci_throughput(int mode, void *arg) { + native_event_t *event = (native_event_t *)arg; + if (event->device < 0 || event->device >= device_count || !device_handles || !device_handles[event->device]) { + return PAPI_EMISC; + } + if (mode != PAPI_MODE_READ) + return PAPI_ENOSUPP; + uint64_t sent = 0, received = 0, max_pkt = 0; + amdsmi_status_t status = amdsmi_get_gpu_pci_throughput_p(device_handles[event->device], &sent, &received, &max_pkt); + if (status != AMDSMI_STATUS_SUCCESS) { + return PAPI_EMISC; + } + switch (event->variant) { + case 0: + event->value = (int64_t)sent; + break; + case 1: + event->value = (int64_t)received; + break; + case 2: + event->value = (int64_t)max_pkt; + break; + default: + return PAPI_EMISC; + } + return PAPI_OK; +} +int access_amdsmi_pci_replay_counter(int mode, void *arg) { + native_event_t *event = (native_event_t *)arg; + if (event->device < 0 || event->device >= device_count || !device_handles || !device_handles[event->device]) { + return PAPI_EMISC; + } + if (mode != PAPI_MODE_READ) + return PAPI_ENOSUPP; + uint64_t counter = 0; + amdsmi_status_t status = amdsmi_get_gpu_pci_replay_counter_p(device_handles[event->device], &counter); + if (status != AMDSMI_STATUS_SUCCESS) { + return PAPI_EMISC; + } + event->value = (int64_t)counter; + return PAPI_OK; +} +int access_amdsmi_clk_freq(int mode, void *arg) { + native_event_t *event = (native_event_t *)arg; + if (event->device < 0 || event->device >= device_count || + !device_handles || !device_handles[event->device]) { + return PAPI_EMISC; + } + if (mode != PAPI_MODE_READ) + return PAPI_ENOSUPP; + + amdsmi_frequencies_t freq_info; + memset(&freq_info, 0, sizeof(freq_info)); /* critical */ + + amdsmi_clk_type_t clk_type = AMDSMI_CLK_TYPE_SYS; + if (event->variant == 1) clk_type = AMDSMI_CLK_TYPE_DF; + else if (event->variant == 2) clk_type = AMDSMI_CLK_TYPE_DCEF; + + amdsmi_status_t status = + amdsmi_get_clk_freq_p(device_handles[event->device], clk_type, &freq_info); + if (status != AMDSMI_STATUS_SUCCESS) { + event->value = 0; + return PAPI_OK; + } + + if (event->subvariant == 0) { + event->value = freq_info.num_supported; + } else if (event->subvariant == 1) { + event->value = (freq_info.num_supported > 0) ? freq_info.frequency[0] : 0; + } else { + int idx = event->subvariant - 2; + if (idx >= 0 && (uint32_t)idx < freq_info.num_supported) { + event->value = freq_info.frequency[idx]; + } else { + event->value = 0; + } + } + return PAPI_OK; +} + + +int access_amdsmi_clock_info(int mode, void *arg) { + native_event_t *event = (native_event_t *)arg; + if (event->device < 0 || event->device >= device_count || + !device_handles[event->device]) { + return PAPI_EMISC; + } + if (mode != PAPI_MODE_READ) + return PAPI_ENOSUPP; + + amdsmi_clk_type_t clk_types[] = {AMDSMI_CLK_TYPE_SYS, AMDSMI_CLK_TYPE_MEM}; + if (event->variant < 0 || event->variant >= 2) + return PAPI_EMISC; + + amdsmi_clk_info_t info; + memset(&info, 0, sizeof(info)); /* critical */ + + amdsmi_status_t status = + amdsmi_get_clock_info_p(device_handles[event->device], + clk_types[event->variant], &info); + if (status != AMDSMI_STATUS_SUCCESS) { + event->value = 0; + return PAPI_OK; + } + + switch (event->subvariant) { + case 0: event->value = info.clk; break; + case 1: event->value = info.min_clk; break; + case 2: event->value = info.max_clk; break; + case 3: event->value = info.clk_locked; break; + case 4: event->value = info.clk_deep_sleep;break; + default: return PAPI_EMISC; + } + return PAPI_OK; +} + + +int access_amdsmi_metrics_header_info(int mode, void *arg) { + if (mode != PAPI_MODE_READ || !amdsmi_get_gpu_metrics_header_info_p) + return PAPI_ENOSUPP; + native_event_t *event = (native_event_t *)arg; + if (event->device < 0 || event->device >= device_count || + !device_handles[event->device]) + return PAPI_EMISC; + amd_metrics_table_header_t hdr; + memset(&hdr, 0, sizeof(hdr)); + if (amdsmi_get_gpu_metrics_header_info_p(device_handles[event->device], &hdr) != + AMDSMI_STATUS_SUCCESS) + return PAPI_EMISC; + switch (event->variant) { + case 0: + event->value = hdr.structure_size; + break; + case 1: + event->value = hdr.format_revision; + break; + case 2: + event->value = hdr.content_revision; + break; + default: + return PAPI_ENOSUPP; + } + return PAPI_OK; +} +int access_amdsmi_gpu_metrics(int mode, void *arg) { + native_event_t *event = (native_event_t *)arg; + if (event->device < 0 || event->device >= device_count || !device_handles || !device_handles[event->device]) { + return PAPI_EMISC; + } + if (mode != PAPI_MODE_READ) + return PAPI_ENOSUPP; + amdsmi_gpu_metrics_t metrics; + memset(&metrics, 0, sizeof(metrics)); + amdsmi_status_t status = amdsmi_get_gpu_metrics_info_p(device_handles[event->device], &metrics); + if (status != AMDSMI_STATUS_SUCCESS) { + return PAPI_EMISC; + } + switch (event->variant) { + case 0: + event->value = metrics.throttle_status; + break; + case 1: + event->value = (int64_t)metrics.indep_throttle_status; + break; + case 2: + event->value = metrics.pcie_link_width; + break; + case 3: + event->value = metrics.pcie_link_speed; + break; + case 4: + event->value = (int64_t)metrics.pcie_bandwidth_acc; + break; + case 5: + event->value = (int64_t)metrics.pcie_bandwidth_inst; + break; + case 6: + event->value = (int64_t)metrics.pcie_l0_to_recov_count_acc; + break; + case 7: + event->value = (int64_t)metrics.pcie_replay_count_acc; + break; + case 8: + event->value = (int64_t)metrics.pcie_replay_rover_count_acc; + break; + case 9: + event->value = metrics.pcie_nak_sent_count_acc; + break; + case 10: + event->value = metrics.pcie_nak_rcvd_count_acc; + break; + default: + return PAPI_ENOSUPP; + } + return PAPI_OK; +} +int access_amdsmi_gpu_info(int mode, void *arg) { + native_event_t *event = (native_event_t *)arg; + if (event->device < 0 || event->device >= device_count || !device_handles || !device_handles[event->device]) { + return PAPI_EMISC; + } + if (mode != PAPI_MODE_READ) { + return PAPI_ENOSUPP; + } + amdsmi_status_t status; + switch (event->variant) { + case 0: { + uint16_t id = 0; + status = amdsmi_get_gpu_id_p(device_handles[event->device], &id); + if (status == AMDSMI_STATUS_SUCCESS) { + event->value = id; + } + break; + } + case 1: { + uint16_t rev = 0; + status = amdsmi_get_gpu_revision_p(device_handles[event->device], &rev); + if (status == AMDSMI_STATUS_SUCCESS) { + event->value = rev; + } + break; + } + case 2: { + uint16_t subid = 0; + status = amdsmi_get_gpu_subsystem_id_p(device_handles[event->device], &subid); + if (status == AMDSMI_STATUS_SUCCESS) { + event->value = subid; + } + break; + } + case 3: { + uint64_t bdfid = 0; + status = amdsmi_get_gpu_bdf_id_p(device_handles[event->device], &bdfid); + if (status == AMDSMI_STATUS_SUCCESS) { + event->value = (int64_t)bdfid; + } + break; + } +#if AMDSMI_LIB_VERSION_MAJOR >= 25 + case 4: { + if (amdsmi_lib_major < 25 || !amdsmi_get_gpu_virtualization_mode_p) + return PAPI_ENOSUPP; + amdsmi_virtualization_mode_t mode_val; + status = amdsmi_get_gpu_virtualization_mode_p(device_handles[event->device], &mode_val); + if (status == AMDSMI_STATUS_SUCCESS) { + event->value = mode_val; + } + break; + } +#endif + case 5: { + int32_t numa_node = -1; + status = amdsmi_get_gpu_topo_numa_affinity_p(device_handles[event->device], &numa_node); + if (status == AMDSMI_STATUS_SUCCESS) { + event->value = numa_node; + } + break; + } + default: + return PAPI_EMISC; + } + return (status == AMDSMI_STATUS_SUCCESS ? PAPI_OK : PAPI_EMISC); +} +int access_amdsmi_gpu_activity(int mode, void *arg) { + native_event_t *event = (native_event_t *)arg; + if (event->device < 0 || event->device >= device_count || !device_handles || !device_handles[event->device]) { + return PAPI_EMISC; + } + if (mode != PAPI_MODE_READ) { + return PAPI_ENOSUPP; + } + amdsmi_engine_usage_t usage; + memset(&usage, 0, sizeof(usage)); + amdsmi_status_t status = amdsmi_get_gpu_activity_p(device_handles[event->device], &usage); + if (status != AMDSMI_STATUS_SUCCESS) { + return PAPI_EMISC; + } + switch (event->variant) { + case 0: + event->value = usage.gfx_activity; + break; + case 1: + event->value = usage.umc_activity; + break; + case 2: + event->value = usage.mm_activity; + break; + default: + return PAPI_EMISC; + } + return PAPI_OK; +} +int access_amdsmi_fan_speed_max(int mode, void *arg) { + native_event_t *event = (native_event_t *)arg; + if (event->device < 0 || event->device >= device_count || !device_handles || !device_handles[event->device]) { + return PAPI_EMISC; + } + if (mode != PAPI_MODE_READ) { + return PAPI_ENOSUPP; + } + int64_t max_speed = 0; + amdsmi_status_t status = amdsmi_get_gpu_fan_speed_max_p(device_handles[event->device], event->subvariant, &max_speed); + if (status != AMDSMI_STATUS_SUCCESS) { + return PAPI_EMISC; + } + event->value = max_speed; + return PAPI_OK; +} +int access_amdsmi_pci_bandwidth(int mode, void *arg) { + if (mode != PAPI_MODE_READ || !amdsmi_get_gpu_pci_bandwidth_p) + return PAPI_ENOSUPP; + + native_event_t *event = (native_event_t *)arg; + if (event->device < 0 || event->device >= device_count || + !device_handles || !device_handles[event->device]) + return PAPI_EMISC; + + amdsmi_pcie_bandwidth_t bw; + memset(&bw, 0, sizeof(bw)); /* critical */ + + if (amdsmi_get_gpu_pci_bandwidth_p(device_handles[event->device], &bw) != + AMDSMI_STATUS_SUCCESS) { + event->value = 0; + return PAPI_OK; + } + + uint32_t cur = bw.transfer_rate.current; + if (cur >= bw.transfer_rate.num_supported) { + event->value = 0; + return PAPI_OK; + } + + switch (event->variant) { + case 0: event->value = bw.transfer_rate.num_supported; break; + case 1: event->value = (int64_t)bw.transfer_rate.frequency[cur]; break; + case 2: event->value = bw.lanes[cur]; break; + default: return PAPI_ENOSUPP; + } + return PAPI_OK; +} + + +int access_amdsmi_energy_count(int mode, void *arg) { + native_event_t *event = (native_event_t *)arg; + if (event->device < 0 || event->device >= device_count || !device_handles || !device_handles[event->device]) { + return PAPI_EMISC; + } + if (mode != PAPI_MODE_READ) { + return PAPI_ENOSUPP; + } + uint64_t energy = 0; + float resolution = 0.0; + uint64_t timestamp = 0; + amdsmi_status_t status = amdsmi_get_energy_count_p(device_handles[event->device], &energy, &resolution, ×tamp); + if (status != AMDSMI_STATUS_SUCCESS) { + return PAPI_EMISC; + } + switch (event->variant) { + case 0: + // Convert accumulated energy count to microJoules + event->value = (int64_t)(energy * resolution); + break; + case 1: + // Resolution reported in Joules; convert to microJoules per count + event->value = (int64_t)(resolution * 1.0e6); + break; + case 2: + // Raw timestamp returned by the SMI library (nanoseconds) + event->value = (int64_t)timestamp; + break; + default: + return PAPI_EMISC; + } + return PAPI_OK; +} + +int access_amdsmi_xgmi_bandwidth(int mode, void *arg) { + if (mode != PAPI_MODE_READ || !amdsmi_get_minmax_bandwidth_between_processors_p) + return PAPI_ENOSUPP; + native_event_t *event = (native_event_t *)arg; + if (event->device < 0 || event->device >= gpu_count || !device_handles || + !device_handles[event->device]) + return PAPI_EMISC; + if (event->subvariant < 0 || event->subvariant >= gpu_count || + !device_handles[event->subvariant]) + return PAPI_EMISC; + + amdsmi_processor_handle src = device_handles[event->device]; + amdsmi_processor_handle dst = device_handles[event->subvariant]; + uint64_t min_bw = 0, max_bw = 0; + if (amdsmi_get_minmax_bandwidth_between_processors_p(src, dst, &min_bw, + &max_bw) != + AMDSMI_STATUS_SUCCESS) + return PAPI_EMISC; + + event->value = (event->variant == 0) ? (int64_t)min_bw : (int64_t)max_bw; + return PAPI_OK; +} +int access_amdsmi_power_profile_status(int mode, void *arg) { + native_event_t *event = (native_event_t *)arg; + if (event->device < 0 || event->device >= device_count || !device_handles || !device_handles[event->device]) { + return PAPI_EMISC; + } + if (mode != PAPI_MODE_READ) { + return PAPI_ENOSUPP; + } + amdsmi_power_profile_status_t status_info; + amdsmi_status_t status = amdsmi_get_gpu_power_profile_presets_p(device_handles[event->device], 0, &status_info); + if (status != AMDSMI_STATUS_SUCCESS) { + return PAPI_EMISC; + } + if (event->variant == 0) { + event->value = status_info.num_profiles; + } else if (event->variant == 1) { + event->value = (int64_t)status_info.current; + } else { + return PAPI_EMISC; + } + return PAPI_OK; +} +#ifndef AMDSMI_DISABLE_ESMI +/* The functions below implement CPU metrics access */ +int access_amdsmi_cpu_socket_power(int mode, void *arg) { + native_event_t *event = (native_event_t *)arg; + if (event->device < 0 || event->device >= device_count || !device_handles || !device_handles[event->device]) { + return PAPI_EMISC; + } + if (mode != PAPI_MODE_READ) { + return PAPI_ENOSUPP; + } + uint32_t power = 0; + amdsmi_status_t status = amdsmi_get_cpu_socket_power_p(device_handles[event->device], &power); + if (status != AMDSMI_STATUS_SUCCESS) { + return PAPI_EMISC; + } + event->value = (int64_t)power; + return PAPI_OK; +} +int access_amdsmi_cpu_socket_energy(int mode, void *arg) { + native_event_t *event = (native_event_t *)arg; + if (event->device < 0 || event->device >= device_count || !device_handles || !device_handles[event->device]) { + return PAPI_EMISC; + } + if (mode != PAPI_MODE_READ) { + return PAPI_ENOSUPP; + } + uint64_t energy = 0; + amdsmi_status_t status = amdsmi_get_cpu_socket_energy_p(device_handles[event->device], &energy); + if (status != AMDSMI_STATUS_SUCCESS) { + return PAPI_EMISC; + } + event->value = (int64_t)energy; + return PAPI_OK; +} +int access_amdsmi_cpu_socket_freq_limit(int mode, void *arg) { + native_event_t *event = (native_event_t *)arg; + if (event->device < 0 || event->device >= device_count || !device_handles || !device_handles[event->device]) { + return PAPI_EMISC; + } + if (mode != PAPI_MODE_READ) { + return PAPI_ENOSUPP; + } + uint16_t freq = 0; + char *src = NULL; + amdsmi_status_t status = amdsmi_get_cpu_socket_current_active_freq_limit_p(device_handles[event->device], &freq, &src); + if (status != AMDSMI_STATUS_SUCCESS) { + return PAPI_EMISC; + } + if (src) + free(src); + event->value = freq; + return PAPI_OK; +} +int access_amdsmi_cpu_socket_freq_range(int mode, void *arg) { + native_event_t *event = (native_event_t *)arg; + if (event->device < 0 || event->device >= device_count || !device_handles || !device_handles[event->device]) { + return PAPI_EMISC; + } + if (mode != PAPI_MODE_READ) { + return PAPI_ENOSUPP; + } + uint16_t fmax = 0, fmin = 0; + amdsmi_status_t status = amdsmi_get_cpu_socket_freq_range_p(device_handles[event->device], &fmax, &fmin); + if (status != AMDSMI_STATUS_SUCCESS) { + return PAPI_EMISC; + } + if (event->variant == 0) { + event->value = fmin; + } else { + event->value = fmax; + } + return PAPI_OK; +} +int access_amdsmi_cpu_power_cap(int mode, void *arg) { + native_event_t *event = (native_event_t *)arg; + if (event->device < 0 || event->device >= device_count || !device_handles || !device_handles[event->device]) { + return PAPI_EMISC; + } + if (mode != PAPI_MODE_READ) { + return PAPI_ENOSUPP; + } + uint32_t cap_value = 0; + amdsmi_status_t status; + if (event->variant == 0) { + status = amdsmi_get_cpu_socket_power_cap_p(device_handles[event->device], &cap_value); + } else { + status = amdsmi_get_cpu_socket_power_cap_max_p(device_handles[event->device], &cap_value); + } + if (status != AMDSMI_STATUS_SUCCESS) { + return PAPI_EMISC; + } + event->value = (int64_t)cap_value; + return PAPI_OK; +} +int access_amdsmi_cpu_core_energy(int mode, void *arg) { + native_event_t *event = (native_event_t *)arg; + if (event->device < 0 || event->device >= device_count || !device_handles || + !device_handles[event->device]) { + return PAPI_EMISC; + } + if (mode != PAPI_MODE_READ) { + return PAPI_ENOSUPP; + } + int s_index = event->device - gpu_count; + if (s_index < 0 || s_index >= cpu_count) { + return PAPI_EMISC; + } + uint64_t energy = 0; + amdsmi_status_t status = amdsmi_get_cpu_core_energy_p(cpu_core_handles[s_index][event->subvariant], &energy); + if (status != AMDSMI_STATUS_SUCCESS) { + return PAPI_EMISC; + } + event->value = (int64_t)energy; + return PAPI_OK; +} +int access_amdsmi_cpu_core_freq_limit(int mode, void *arg) { + native_event_t *event = (native_event_t *)arg; + if (event->device < 0 || event->device >= device_count || !device_handles || + !device_handles[event->device]) { + return PAPI_EMISC; + } + if (mode != PAPI_MODE_READ) { + return PAPI_ENOSUPP; + } + int s_index = event->device - gpu_count; + if (s_index < 0 || s_index >= cpu_count) { + return PAPI_EMISC; + } + uint32_t freq = 0; + amdsmi_status_t status = amdsmi_get_cpu_core_current_freq_limit_p(cpu_core_handles[s_index][event->subvariant], &freq); + if (status != AMDSMI_STATUS_SUCCESS) { + return PAPI_EMISC; + } + event->value = freq; + return PAPI_OK; +} +int access_amdsmi_cpu_core_boostlimit(int mode, void *arg) { + native_event_t *event = (native_event_t *)arg; + if (event->device < 0 || event->device >= device_count || !device_handles || + !device_handles[event->device]) { + return PAPI_EMISC; + } + if (mode != PAPI_MODE_READ) { + return PAPI_ENOSUPP; + } + int s_index = event->device - gpu_count; + if (s_index < 0 || s_index >= cpu_count) { + return PAPI_EMISC; + } + uint32_t boost = 0; + amdsmi_status_t status = amdsmi_get_cpu_core_boostlimit_p(cpu_core_handles[s_index][event->subvariant], &boost); + if (status != AMDSMI_STATUS_SUCCESS) { + return PAPI_EMISC; + } + event->value = boost; + return PAPI_OK; +} +int access_amdsmi_cpu_cclk_limit(int mode, void *arg) { + native_event_t *event = (native_event_t *)arg; + if (event->device < 0 || event->device >= device_count || !device_handles || + !device_handles[event->device]) { + return PAPI_EMISC; + } + if (mode != PAPI_MODE_READ) + return PAPI_ENOSUPP; + uint32_t cclk = 0; + amdsmi_status_t status = + amdsmi_get_cpu_cclk_limit_p(device_handles[event->device], &cclk); + if (status != AMDSMI_STATUS_SUCCESS) + return PAPI_EMISC; + event->value = cclk; + return PAPI_OK; +} +int access_amdsmi_cpu_io_bw(int mode, void *arg) { + native_event_t *event = (native_event_t *)arg; + if (event->device < 0 || event->device >= device_count || !device_handles || + !device_handles[event->device]) { + return PAPI_EMISC; + } + if (mode != PAPI_MODE_READ) + return PAPI_ENOSUPP; + const char *links[] = {"P0", "P1", "P2", "P3", "P4"}; + amdsmi_io_bw_encoding_t bw_types[] = {AGG_BW0, RD_BW0, WR_BW0}; + if (event->variant < 0 || event->variant >= 5 || event->subvariant < 0 || + event->subvariant >= 3) + return PAPI_EMISC; + amdsmi_link_id_bw_type_t link = {bw_types[event->subvariant], + (char *)links[event->variant]}; + uint32_t bw = 0; + amdsmi_status_t status = amdsmi_get_cpu_current_io_bandwidth_p( + device_handles[event->device], link, &bw); + if (status != AMDSMI_STATUS_SUCCESS) + return PAPI_EMISC; + event->value = bw; + return PAPI_OK; +} +int access_amdsmi_cpu_xgmi_bw(int mode, void *arg) { + native_event_t *event = (native_event_t *)arg; + if (event->device < 0 || event->device >= device_count || !device_handles || + !device_handles[event->device]) { + return PAPI_EMISC; + } + if (mode != PAPI_MODE_READ) + return PAPI_ENOSUPP; + const char *links[] = {"G0", "G1", "G2", "G3", + "G4", "G5", "G6", "G7"}; + amdsmi_io_bw_encoding_t bw_types[] = {AGG_BW0, RD_BW0, WR_BW0}; + if (event->variant < 0 || event->variant >= 8 || event->subvariant < 0 || + event->subvariant >= 3) + return PAPI_EMISC; + amdsmi_link_id_bw_type_t link = {bw_types[event->subvariant], + (char *)links[event->variant]}; + uint32_t bw = 0; + amdsmi_status_t status = amdsmi_get_cpu_current_xgmi_bw_p( + device_handles[event->device], link, &bw); + if (status != AMDSMI_STATUS_SUCCESS) + return PAPI_EMISC; + event->value = bw; + return PAPI_OK; +} +int access_amdsmi_cpu_ddr_bw(int mode, void *arg) { + native_event_t *event = (native_event_t *)arg; + if (event->device < 0 || event->device >= device_count || !device_handles || + !device_handles[event->device]) { + return PAPI_EMISC; + } + if (mode != PAPI_MODE_READ) + return PAPI_ENOSUPP; + amdsmi_ddr_bw_metrics_t bw; + memset(&bw, 0, sizeof(bw)); + amdsmi_status_t status = + amdsmi_get_cpu_ddr_bw_p(device_handles[event->device], &bw); + if (status != AMDSMI_STATUS_SUCCESS) + return PAPI_EMISC; + switch (event->variant) { + case 0: + event->value = bw.max_bw; + break; + case 1: + event->value = bw.utilized_bw; + break; + case 2: + event->value = bw.utilized_pct; + break; + default: + return PAPI_EMISC; + } + return PAPI_OK; +} +int access_amdsmi_cpu_fclk_mclk(int mode, void *arg) { + native_event_t *event = (native_event_t *)arg; + if (event->device < 0 || event->device >= device_count || !device_handles || + !device_handles[event->device]) { + return PAPI_EMISC; + } + if (mode != PAPI_MODE_READ) + return PAPI_ENOSUPP; + uint32_t fclk = 0, mclk = 0; + amdsmi_status_t status = amdsmi_get_cpu_fclk_mclk_p( + device_handles[event->device], &fclk, &mclk); + if (status != AMDSMI_STATUS_SUCCESS) + return PAPI_EMISC; + if (event->variant == 0) + event->value = fclk; + else if (event->variant == 1) + event->value = mclk; + else + return PAPI_EMISC; + return PAPI_OK; +} +int access_amdsmi_cpu_hsmp_driver_version(int mode, void *arg) { + native_event_t *event = (native_event_t *)arg; + if (event->device < 0 || event->device >= device_count || !device_handles || + !device_handles[event->device]) { + return PAPI_EMISC; + } + if (mode != PAPI_MODE_READ) + return PAPI_ENOSUPP; + amdsmi_hsmp_driver_version_t ver; + memset(&ver, 0, sizeof(ver)); + amdsmi_status_t status = amdsmi_get_cpu_hsmp_driver_version_p( + device_handles[event->device], &ver); + if (status != AMDSMI_STATUS_SUCCESS) + return PAPI_EMISC; + if (event->variant == 0) + event->value = ver.major; + else if (event->variant == 1) + event->value = ver.minor; + else + return PAPI_EMISC; + return PAPI_OK; +} +int access_amdsmi_cpu_hsmp_proto_ver(int mode, void *arg) { + native_event_t *event = (native_event_t *)arg; + if (event->device < 0 || event->device >= device_count || !device_handles || + !device_handles[event->device]) { + return PAPI_EMISC; + } + if (mode != PAPI_MODE_READ) + return PAPI_ENOSUPP; + uint32_t ver = 0; + amdsmi_status_t status = + amdsmi_get_cpu_hsmp_proto_ver_p(device_handles[event->device], &ver); + if (status != AMDSMI_STATUS_SUCCESS) + return PAPI_EMISC; + event->value = ver; + return PAPI_OK; +} +int access_amdsmi_cpu_prochot_status(int mode, void *arg) { + native_event_t *event = (native_event_t *)arg; + if (event->device < 0 || event->device >= device_count || !device_handles || + !device_handles[event->device]) { + return PAPI_EMISC; + } + if (mode != PAPI_MODE_READ) + return PAPI_ENOSUPP; + uint32_t status = 0; + amdsmi_status_t smi_status = amdsmi_get_cpu_prochot_status_p( + device_handles[event->device], &status); + if (smi_status != AMDSMI_STATUS_SUCCESS) + return PAPI_EMISC; + event->value = status; + return PAPI_OK; +} +int access_amdsmi_cpu_svi_power(int mode, void *arg) { + native_event_t *event = (native_event_t *)arg; + if (event->device < 0 || event->device >= device_count || !device_handles || + !device_handles[event->device]) { + return PAPI_EMISC; + } + if (mode != PAPI_MODE_READ) + return PAPI_ENOSUPP; + uint32_t power = 0; + amdsmi_status_t status = amdsmi_get_cpu_pwr_svi_telemetry_all_rails_p( + device_handles[event->device], &power); + if (status != AMDSMI_STATUS_SUCCESS) + return PAPI_EMISC; + event->value = power; + return PAPI_OK; +} +int access_amdsmi_dimm_temp(int mode, void *arg) { + native_event_t *event = (native_event_t *)arg; + if (event->device < 0 || event->device >= device_count || !device_handles || + !device_handles[event->device]) { + return PAPI_EMISC; + } + if (mode != PAPI_MODE_READ) { + return PAPI_ENOSUPP; + } + amdsmi_dimm_thermal_t dimm_temp; + memset(&dimm_temp, 0, sizeof(dimm_temp)); + amdsmi_status_t status = amdsmi_get_cpu_dimm_thermal_sensor_p(device_handles[event->device], (uint8_t)event->subvariant, &dimm_temp); + if (status != AMDSMI_STATUS_SUCCESS) { + return PAPI_EMISC; + } + event->value = (int64_t)dimm_temp.temp; + return PAPI_OK; +} +int access_amdsmi_dimm_power(int mode, void *arg) { + native_event_t *event = (native_event_t *)arg; + if (event->device < 0 || event->device >= device_count || !device_handles || + !device_handles[event->device]) { + return PAPI_EMISC; + } + if (mode != PAPI_MODE_READ) { + return PAPI_ENOSUPP; + } + amdsmi_dimm_power_t dimm_pow; + memset(&dimm_pow, 0, sizeof(dimm_pow)); + amdsmi_status_t status = amdsmi_get_cpu_dimm_power_consumption_p(device_handles[event->device], (uint8_t)event->subvariant, &dimm_pow); + if (status != AMDSMI_STATUS_SUCCESS) { + return PAPI_EMISC; + } + event->value = dimm_pow.power; + return PAPI_OK; +} +int access_amdsmi_dimm_range_refresh(int mode, void *arg) { + native_event_t *event = (native_event_t *)arg; + if (event->device < 0 || event->device >= device_count || !device_handles || + !device_handles[event->device]) { + return PAPI_EMISC; + } + if (mode != PAPI_MODE_READ) { + return PAPI_ENOSUPP; + } + amdsmi_temp_range_refresh_rate_t rate; + memset(&rate, 0, sizeof(rate)); + amdsmi_status_t status = + amdsmi_get_cpu_dimm_temp_range_and_refresh_rate_p(device_handles[event->device], (uint8_t)event->subvariant, &rate); + if (status != AMDSMI_STATUS_SUCCESS) { + return PAPI_EMISC; + } + if (event->variant == 0) { + event->value = rate.range; + } else { + event->value = rate.ref_rate; + } + return PAPI_OK; +} +int access_amdsmi_threads_per_core(int mode, void *arg) { + (void)arg; + if (mode != PAPI_MODE_READ) { + return PAPI_ENOSUPP; + } + uint32_t threads = 0; + amdsmi_status_t status = amdsmi_get_threads_per_core_p(&threads); + if (status != AMDSMI_STATUS_SUCCESS) { + return PAPI_EMISC; + } + ((native_event_t *)arg)->value = threads; + return PAPI_OK; +} +int access_amdsmi_cpu_family(int mode, void *arg) { + (void)arg; + if (mode != PAPI_MODE_READ) { + return PAPI_ENOSUPP; + } + uint32_t family = 0; + amdsmi_status_t status = amdsmi_get_cpu_family_p(&family); + if (status != AMDSMI_STATUS_SUCCESS) { + return PAPI_EMISC; + } + ((native_event_t *)arg)->value = family; + return PAPI_OK; +} +int access_amdsmi_cpu_model(int mode, void *arg) { + (void)arg; + if (mode != PAPI_MODE_READ) { + return PAPI_ENOSUPP; + } + uint32_t model = 0; + amdsmi_status_t status = amdsmi_get_cpu_model_p(&model); + if (status != AMDSMI_STATUS_SUCCESS) { + return PAPI_EMISC; + } + ((native_event_t *)arg)->value = model; + return PAPI_OK; +} +int access_amdsmi_smu_fw_version(int mode, void *arg) { + native_event_t *event = (native_event_t *)arg; + if (event->device < 0 || event->device >= device_count || !device_handles || + !device_handles[event->device]) { + return PAPI_EMISC; + } + if (mode != PAPI_MODE_READ) { + return PAPI_ENOSUPP; + } + amdsmi_smu_fw_version_t fw; + memset(&fw, 0, sizeof(fw)); + amdsmi_status_t status = amdsmi_get_cpu_smu_fw_version_p(device_handles[event->device], &fw); + if (status != AMDSMI_STATUS_SUCCESS) { + return PAPI_EMISC; + } + int encoded = ((int)fw.major << 16) | ((int)fw.minor << 8) | fw.debug; + event->value = encoded; + return PAPI_OK; +} +#endif + +int access_amdsmi_cache_stat(int mode, void *arg) { + native_event_t *event = (native_event_t *)arg; + if (event->device < 0 || event->device >= device_count || !device_handles || !device_handles[event->device]) { + return PAPI_EMISC; + } + if (mode != PAPI_MODE_READ) { + return PAPI_ENOSUPP; + } + if (!amdsmi_get_gpu_cache_info_p) + return PAPI_ENOSUPP; + + amdsmi_gpu_cache_info_t info; + amdsmi_status_t st = amdsmi_get_gpu_cache_info_p(device_handles[event->device], &info); + if (st != AMDSMI_STATUS_SUCCESS) + return PAPI_EMISC; + + /* subvariant = cache index chosen during registration */ + if (event->subvariant >= info.num_cache_types) + return PAPI_EMISC; + + uint64_t val = 0; + switch (event->variant) { + case 0: /* size in bytes (reported in KB) */ + val = (uint64_t)info.cache[event->subvariant].cache_size * 1024ULL; + break; + case 1: /* maximum number of CUs sharing this cache */ + val = (uint64_t)info.cache[event->subvariant].max_num_cu_shared; + break; + case 2: /* number of cache instances */ + val = (uint64_t)info.cache[event->subvariant].num_cache_instance; + break; + default: + return PAPI_EINVAL; + } + event->value = val; + return PAPI_OK; +} + +int access_amdsmi_overdrive_level(int mode, void *arg) { + native_event_t *event = (native_event_t *)arg; + if (event->device < 0 || event->device >= device_count || !device_handles || !device_handles[event->device]) { + return PAPI_EMISC; + } + if (mode != PAPI_MODE_READ) + return PAPI_ENOSUPP; + if (!amdsmi_get_gpu_overdrive_level_p) + return PAPI_ENOSUPP; + + uint32_t od = 0; + amdsmi_status_t st = amdsmi_get_gpu_overdrive_level_p(device_handles[event->device], &od); + if (st != AMDSMI_STATUS_SUCCESS) + return PAPI_EMISC; + event->value = (uint64_t)od; + return PAPI_OK; +} + +int access_amdsmi_mem_overdrive_level(int mode, void *arg) { + native_event_t *event = (native_event_t *)arg; + if (event->device < 0 || event->device >= device_count || !device_handles || !device_handles[event->device]) { + return PAPI_EMISC; + } + if (mode != PAPI_MODE_READ) + return PAPI_ENOSUPP; + if (!amdsmi_get_gpu_mem_overdrive_level_p) + return PAPI_ENOSUPP; + + uint32_t od = 0; + amdsmi_status_t st = amdsmi_get_gpu_mem_overdrive_level_p(device_handles[event->device], &od); + if (st != AMDSMI_STATUS_SUCCESS) + return PAPI_EMISC; + event->value = (uint64_t)od; + return PAPI_OK; +} + +int access_amdsmi_od_volt_regions_count(int mode, void *arg) { + native_event_t *event = (native_event_t *)arg; + if (event->device < 0 || event->device >= device_count || !device_handles || !device_handles[event->device]) { + return PAPI_EMISC; + } + if (mode != PAPI_MODE_READ) + return PAPI_ENOSUPP; + if (!amdsmi_get_gpu_od_volt_curve_regions_p) + return PAPI_ENOSUPP; + + /* Probe to get count; API requires a buffer, so do a two-call pattern */ + uint32_t num = 0; + amdsmi_freq_volt_region_t *buf = NULL; + + /* First call: ask for 0 (expect MORE_DATA/INSUFFICIENT_SIZE with num set) */ + amdsmi_status_t st = amdsmi_get_gpu_od_volt_curve_regions_p(device_handles[event->device], &num, buf); + if (st == AMDSMI_STATUS_INSUFFICIENT_SIZE || st == AMDSMI_STATUS_NO_DATA) { + if (num == 0) + return PAPI_EMISC; + buf = (amdsmi_freq_volt_region_t *)papi_calloc(num, sizeof(amdsmi_freq_volt_region_t)); + if (!buf) + return PAPI_ENOMEM; + st = amdsmi_get_gpu_od_volt_curve_regions_p(device_handles[event->device], &num, buf); + } + if (st != AMDSMI_STATUS_SUCCESS) { + if (buf) + papi_free(buf); + return PAPI_EMISC; + } + event->value = (uint64_t)num; + if (buf) + papi_free(buf); + return PAPI_OK; +} + +int access_amdsmi_od_volt_curve_range(int mode, void *arg) { + if (mode != PAPI_MODE_READ) + return PAPI_ENOSUPP; + if (!amdsmi_get_gpu_od_volt_curve_regions_p) + return PAPI_ENOSUPP; + native_event_t *event = (native_event_t *)arg; + if (event->device < 0 || event->device >= device_count || !device_handles || !device_handles[event->device]) { + return PAPI_EMISC; + } + + uint32_t num = 0; + amdsmi_status_t st = amdsmi_get_gpu_od_volt_curve_regions_p(device_handles[event->device], &num, NULL); + if (st != AMDSMI_STATUS_SUCCESS) + return PAPI_EMISC; + if (event->subvariant >= num) + return PAPI_EMISC; + + amdsmi_freq_volt_region_t *regs = (amdsmi_freq_volt_region_t *)papi_calloc(num, sizeof(amdsmi_freq_volt_region_t)); + if (!regs) + return PAPI_ENOMEM; + st = amdsmi_get_gpu_od_volt_curve_regions_p(device_handles[event->device], &num, regs); + if (st != AMDSMI_STATUS_SUCCESS) { + papi_free(regs); + return PAPI_EMISC; + } + + amdsmi_freq_volt_region_t r = regs[event->subvariant]; + papi_free(regs); + + switch (event->variant) { + case 0: + event->value = (int64_t)r.freq_range.lower_bound; + break; + case 1: + event->value = (int64_t)r.freq_range.upper_bound; + break; + case 2: + event->value = (int64_t)r.volt_range.lower_bound; + break; + case 3: + event->value = (int64_t)r.volt_range.upper_bound; + break; + default: + return PAPI_ENOSUPP; + } + return PAPI_OK; +} + +int access_amdsmi_od_volt_info(int mode, void *arg) { + if (mode != PAPI_MODE_READ) + return PAPI_ENOSUPP; + if (!amdsmi_get_gpu_od_volt_info_p) + return PAPI_ENOSUPP; + + native_event_t *event = (native_event_t *)arg; + if (event->device < 0 || event->device >= device_count || + !device_handles || !device_handles[event->device]) { + return PAPI_EMISC; + } + + amdsmi_od_volt_freq_data_t info; + memset(&info, 0, sizeof(info)); + + amdsmi_status_t st = + amdsmi_get_gpu_od_volt_info_p(device_handles[event->device], &info); + if (st != AMDSMI_STATUS_SUCCESS) + return PAPI_EMISC; + + switch (event->variant) { + case 0: event->value = (int64_t)info.curr_sclk_range.lower_bound; break; + case 1: event->value = (int64_t)info.curr_sclk_range.upper_bound; break; + case 2: event->value = (int64_t)info.curr_mclk_range.lower_bound; break; + case 3: event->value = (int64_t)info.curr_mclk_range.upper_bound; break; + case 4: event->value = (int64_t)info.sclk_freq_limits.lower_bound; break; + case 5: event->value = (int64_t)info.sclk_freq_limits.upper_bound; break; + case 6: event->value = (int64_t)info.mclk_freq_limits.lower_bound; break; + case 7: event->value = (int64_t)info.mclk_freq_limits.upper_bound; break; + case 8: + if (event->subvariant >= AMDSMI_NUM_VOLTAGE_CURVE_POINTS) return PAPI_EMISC; + event->value = (int64_t)info.curve.vc_points[event->subvariant].frequency; + break; + case 9: + if (event->subvariant >= AMDSMI_NUM_VOLTAGE_CURVE_POINTS) return PAPI_EMISC; + event->value = (int64_t)info.curve.vc_points[event->subvariant].voltage; + break; + default: + return PAPI_ENOSUPP; + } + return PAPI_OK; +} + + +int access_amdsmi_perf_level(int mode, void *arg) { + native_event_t *event = (native_event_t *)arg; + if (event->device < 0 || event->device >= device_count || !device_handles || !device_handles[event->device]) { + return PAPI_EMISC; + } + if (mode != PAPI_MODE_READ) + return PAPI_ENOSUPP; + if (!amdsmi_get_gpu_perf_level_p) + return PAPI_ENOSUPP; + + amdsmi_dev_perf_level_t perf = AMDSMI_DEV_PERF_LEVEL_UNKNOWN; + amdsmi_status_t st = amdsmi_get_gpu_perf_level_p(device_handles[event->device], &perf); + if (st != AMDSMI_STATUS_SUCCESS) + return PAPI_EMISC; + event->value = (uint64_t)perf; + return PAPI_OK; +} + +int access_amdsmi_pm_metrics_count(int mode, void *arg) { + native_event_t *event = (native_event_t *)arg; + if (event->device < 0 || event->device >= device_count || !device_handles || !device_handles[event->device]) { + return PAPI_EMISC; + } + if (mode != PAPI_MODE_READ) + return PAPI_ENOSUPP; + if (amdsmi_lib_major < 25 || !amdsmi_get_gpu_pm_metrics_info_p) + return PAPI_ENOSUPP; + + amdsmi_name_value_t *metrics = NULL; + uint32_t count = 0; + amdsmi_status_t st = amdsmi_get_gpu_pm_metrics_info_p(device_handles[event->device], &metrics, &count); + if (metrics) + free(metrics); /* library allocates */ + if (st != AMDSMI_STATUS_SUCCESS) + return PAPI_EMISC; + event->value = (uint64_t)count; + return PAPI_OK; +} + +int access_amdsmi_pm_metric_value(int mode, void *arg) { + native_event_t *event = (native_event_t *)arg; + if (event->device < 0 || event->device >= device_count || !device_handles || !device_handles[event->device]) { + return PAPI_EMISC; + } + if (mode != PAPI_MODE_READ) + return PAPI_ENOSUPP; + if (amdsmi_lib_major < 25 || !amdsmi_get_gpu_pm_metrics_info_p) + return PAPI_ENOSUPP; + + amdsmi_name_value_t *metrics = NULL; + uint32_t count = 0; + amdsmi_status_t st = amdsmi_get_gpu_pm_metrics_info_p(device_handles[event->device], &metrics, &count); + if (st != AMDSMI_STATUS_SUCCESS || event->variant >= count) { + if (metrics) + free(metrics); + return PAPI_EMISC; + } + event->value = (int64_t)metrics[event->variant].value; + free(metrics); + return PAPI_OK; +} + +int access_amdsmi_pm_enabled(int mode, void *arg) { + if (mode != PAPI_MODE_READ || !amdsmi_is_gpu_power_management_enabled_p) + return PAPI_ENOSUPP; + native_event_t *event = (native_event_t *)arg; + if (event->device < 0 || event->device >= device_count || + !device_handles[event->device]) + return PAPI_EMISC; + bool enabled = false; + if (amdsmi_is_gpu_power_management_enabled_p(device_handles[event->device], + &enabled) != + AMDSMI_STATUS_SUCCESS) + return PAPI_EMISC; + event->value = enabled ? 1 : 0; + return PAPI_OK; +} + +int access_amdsmi_ras_ecc_schema(int mode, void *arg) { + native_event_t *event = (native_event_t *)arg; + if (event->device < 0 || event->device >= device_count || !device_handles || !device_handles[event->device]) { + return PAPI_EMISC; + } + if (mode != PAPI_MODE_READ) + return PAPI_ENOSUPP; + if (!amdsmi_get_gpu_ras_feature_info_p) + return PAPI_ENOSUPP; + + amdsmi_ras_feature_t ras = {0}; + amdsmi_status_t st = amdsmi_get_gpu_ras_feature_info_p(device_handles[event->device], &ras); + if (st != AMDSMI_STATUS_SUCCESS) + return PAPI_EMISC; + event->value = (uint64_t)ras.ecc_correction_schema_flag; + return PAPI_OK; +} + +int access_amdsmi_ras_eeprom_version(int mode, void *arg) { + native_event_t *event = (native_event_t *)arg; + if (event->device < 0 || event->device >= device_count || !device_handles || !device_handles[event->device]) { + return PAPI_EMISC; + } + if (mode != PAPI_MODE_READ) + return PAPI_ENOSUPP; + if (!amdsmi_get_gpu_ras_feature_info_p) + return PAPI_ENOSUPP; + + amdsmi_ras_feature_t ras = {0}; + amdsmi_status_t st = amdsmi_get_gpu_ras_feature_info_p(device_handles[event->device], &ras); + if (st != AMDSMI_STATUS_SUCCESS) + return PAPI_EMISC; + event->value = (uint64_t)ras.ras_eeprom_version; + return PAPI_OK; +} + +int access_amdsmi_ras_eeprom_validate(int mode, void *arg) { + if (mode != PAPI_MODE_READ || !amdsmi_gpu_validate_ras_eeprom_p) + return PAPI_ENOSUPP; + native_event_t *event = (native_event_t *)arg; + if (event->device < 0 || event->device >= device_count || + !device_handles[event->device]) + return PAPI_EMISC; + amdsmi_status_t st = + amdsmi_gpu_validate_ras_eeprom_p(device_handles[event->device]); + event->value = (int64_t)st; + return PAPI_OK; +} + +int access_amdsmi_ras_block_state(int mode, void *arg) { + native_event_t *event = (native_event_t *)arg; + if (event->device < 0 || event->device >= device_count || !device_handles || !device_handles[event->device]) { + return PAPI_EMISC; + } + if (mode != PAPI_MODE_READ) + return PAPI_ENOSUPP; + if (!amdsmi_get_gpu_ras_block_features_enabled_p) + return PAPI_ENOSUPP; + + amdsmi_ras_err_state_t state; + amdsmi_status_t st = + amdsmi_get_gpu_ras_block_features_enabled_p(device_handles[event->device], (amdsmi_gpu_block_t)event->variant, &state); + if (st != AMDSMI_STATUS_SUCCESS) + return PAPI_EMISC; + event->value = (uint64_t)state; + return PAPI_OK; +} + +int access_amdsmi_reg_count(int mode, void *arg) { + native_event_t *event = (native_event_t *)arg; + if (event->device < 0 || event->device >= device_count || !device_handles || !device_handles[event->device]) { + return PAPI_EMISC; + } + if (mode != PAPI_MODE_READ) + return PAPI_ENOSUPP; + if (amdsmi_lib_major < 25 || !amdsmi_get_gpu_reg_table_info_p) + return PAPI_ENOSUPP; + + amdsmi_reg_type_t reg_type = (amdsmi_reg_type_t)event->variant; /* set at registration */ + amdsmi_name_value_t *regs = NULL; + uint32_t num = 0; + amdsmi_status_t st = amdsmi_get_gpu_reg_table_info_p(device_handles[event->device], reg_type, ®s, &num); + if (regs) + free(regs); + if (st != AMDSMI_STATUS_SUCCESS) + return PAPI_EMISC; + event->value = (uint64_t)num; + return PAPI_OK; +} + +int access_amdsmi_reg_value(int mode, void *arg) { + native_event_t *event = (native_event_t *)arg; + if (event->device < 0 || event->device >= device_count || !device_handles || !device_handles[event->device]) { + return PAPI_EMISC; + } + if (mode != PAPI_MODE_READ) + return PAPI_ENOSUPP; + if (amdsmi_lib_major < 25 || !amdsmi_get_gpu_reg_table_info_p) + return PAPI_ENOSUPP; + + amdsmi_reg_type_t reg_type = (amdsmi_reg_type_t)event->variant; + amdsmi_name_value_t *regs = NULL; + uint32_t num = 0; + amdsmi_status_t st = amdsmi_get_gpu_reg_table_info_p(device_handles[event->device], reg_type, ®s, &num); + if (st != AMDSMI_STATUS_SUCCESS || event->subvariant >= num) { + if (regs) + free(regs); + return PAPI_EMISC; + } + event->value = (int64_t)regs[event->subvariant].value; + free(regs); + return PAPI_OK; +} + +int access_amdsmi_voltage(int mode, void *arg) { + native_event_t *event = (native_event_t *)arg; + if (event->device < 0 || event->device >= device_count || !device_handles || !device_handles[event->device]) { + return PAPI_EMISC; + } + if (mode != PAPI_MODE_READ) + return PAPI_ENOSUPP; + if (!amdsmi_get_gpu_volt_metric_p) + return PAPI_ENOSUPP; + + amdsmi_voltage_type_t sensor = (amdsmi_voltage_type_t)event->subvariant; /* set at registration */ + amdsmi_voltage_metric_t metric = (amdsmi_voltage_metric_t)event->variant; /* e.g., AMDSMI_VOLT_CURRENT */ + int64_t mv = 0; + amdsmi_status_t st = amdsmi_get_gpu_volt_metric_p(device_handles[event->device], sensor, metric, &mv); + if (st != AMDSMI_STATUS_SUCCESS) + return PAPI_EMISC; + event->value = (uint64_t)mv; /* API reports mV */ + return PAPI_OK; +} + +int access_amdsmi_vram_width(int mode, void *arg) { + native_event_t *event = (native_event_t *)arg; + if (event->device < 0 || event->device >= device_count || !device_handles || !device_handles[event->device]) { + return PAPI_EMISC; + } + if (mode != PAPI_MODE_READ) + return PAPI_ENOSUPP; + if (!amdsmi_get_gpu_vram_info_p) + return PAPI_ENOSUPP; + + amdsmi_vram_info_t info; + memset(&info, 0, sizeof(info)); + amdsmi_status_t st = amdsmi_get_gpu_vram_info_p(device_handles[event->device], &info); + if (st != AMDSMI_STATUS_SUCCESS) + return PAPI_EMISC; + event->value = (uint64_t)info.vram_bit_width; + return PAPI_OK; +} + +int access_amdsmi_vram_size(int mode, void *arg) { + native_event_t *event = (native_event_t *)arg; + if (event->device < 0 || event->device >= device_count || !device_handles || !device_handles[event->device]) { + return PAPI_EMISC; + } + if (mode != PAPI_MODE_READ) + return PAPI_ENOSUPP; + if (!amdsmi_get_gpu_vram_info_p) + return PAPI_ENOSUPP; + + amdsmi_vram_info_t info; + memset(&info, 0, sizeof(info)); + amdsmi_status_t st = amdsmi_get_gpu_vram_info_p(device_handles[event->device], &info); + if (st != AMDSMI_STATUS_SUCCESS) + return PAPI_EMISC; + /* vram_size reported in MB */ + event->value = (uint64_t)info.vram_size * 1024ULL * 1024ULL; + return PAPI_OK; +} + +int access_amdsmi_vram_type(int mode, void *arg) { + native_event_t *event = (native_event_t *)arg; + if (event->device < 0 || event->device >= device_count || !device_handles || !device_handles[event->device]) { + return PAPI_EMISC; + } + if (mode != PAPI_MODE_READ) + return PAPI_ENOSUPP; + if (!amdsmi_get_gpu_vram_info_p) + return PAPI_ENOSUPP; + + amdsmi_vram_info_t info; + memset(&info, 0, sizeof(info)); + amdsmi_status_t st = amdsmi_get_gpu_vram_info_p(device_handles[event->device], &info); + if (st != AMDSMI_STATUS_SUCCESS) + return PAPI_EMISC; + event->value = (uint64_t)info.vram_type; + return PAPI_OK; +} + +int access_amdsmi_vram_vendor(int mode, void *arg) { + native_event_t *event = (native_event_t *)arg; + if (event->device < 0 || event->device >= device_count || !device_handles || !device_handles[event->device]) { + return PAPI_EMISC; + } + if (mode != PAPI_MODE_READ) + return PAPI_ENOSUPP; + if (!amdsmi_get_gpu_vram_info_p) + return PAPI_ENOSUPP; + + amdsmi_vram_info_t info; + memset(&info, 0, sizeof(info)); + amdsmi_status_t st = amdsmi_get_gpu_vram_info_p(device_handles[event->device], &info); + if (st != AMDSMI_STATUS_SUCCESS) + return PAPI_EMISC; + event->value = (uint64_t)info.vram_vendor; + return PAPI_OK; +} + +int access_amdsmi_vram_usage(int mode, void *arg) { + if (mode != PAPI_MODE_READ) return PAPI_ENOSUPP; + + native_event_t *event = (native_event_t *)arg; + if (event->device < 0 || event->device >= device_count || + !device_handles || !device_handles[event->device]) { + return PAPI_EMISC; + } + + /* variant: 0 = total MB, 1 = used MB */ + if (event->variant == 0) { + /* TOTAL: prefer vram_info to avoid the buggy usage path */ + if (!amdsmi_get_gpu_vram_info_p) return PAPI_ENOSUPP; + + amdsmi_vram_info_t vinf; + memset(&vinf, 0, sizeof(vinf)); + if (amdsmi_get_gpu_vram_info_p(device_handles[event->device], &vinf) + != AMDSMI_STATUS_SUCCESS) { + event->value = 0; /* deterministic, not UB */ + return PAPI_OK; + } + /* vinf.vram_size is reported in MB by AMD SMI */ + event->value = (uint64_t)vinf.vram_size; + return PAPI_OK; + } + + /* USED: keep using vram_usage for the “used” number */ + if (!amdsmi_get_gpu_vram_usage_p) return PAPI_ENOSUPP; + + amdsmi_vram_usage_t u; + memset(&u, 0, sizeof(u)); + if (amdsmi_get_gpu_vram_usage_p(device_handles[event->device], &u) + != AMDSMI_STATUS_SUCCESS) { + event->value = 0; + return PAPI_OK; + } + event->value = (uint64_t)u.vram_used; /* MB */ + return PAPI_OK; +} + + +int access_amdsmi_soc_pstate_id(int mode, void *arg) { + native_event_t *event = (native_event_t *)arg; + if (event->device < 0 || event->device >= device_count || !device_handles || !device_handles[event->device]) { + return PAPI_EMISC; + } + if (mode != PAPI_MODE_READ) + return PAPI_ENOSUPP; + if (!amdsmi_get_soc_pstate_p) + return PAPI_ENOSUPP; + + amdsmi_dpm_policy_t pol = {0}; + amdsmi_status_t st = amdsmi_get_soc_pstate_p(device_handles[event->device], &pol); + if (st != AMDSMI_STATUS_SUCCESS) + return PAPI_EMISC; + event->value = (uint64_t)pol.current; + return PAPI_OK; +} + +int access_amdsmi_soc_pstate_supported(int mode, void *arg) { + native_event_t *event = (native_event_t *)arg; + if (event->device < 0 || event->device >= device_count || !device_handles || !device_handles[event->device]) { + return PAPI_EMISC; + } + if (mode != PAPI_MODE_READ) + return PAPI_ENOSUPP; + if (!amdsmi_get_soc_pstate_p) + return PAPI_ENOSUPP; + + amdsmi_dpm_policy_t pol = {0}; + amdsmi_status_t st = amdsmi_get_soc_pstate_p(device_handles[event->device], &pol); + if (st != AMDSMI_STATUS_SUCCESS) + return PAPI_EMISC; + event->value = (uint64_t)pol.num_supported; + return PAPI_OK; +} + +int access_amdsmi_xgmi_plpd_id(int mode, void *arg) { + native_event_t *event = (native_event_t *)arg; + if (event->device < 0 || event->device >= device_count || !device_handles || !device_handles[event->device]) { + return PAPI_EMISC; + } + if (mode != PAPI_MODE_READ) + return PAPI_ENOSUPP; + if (!amdsmi_get_xgmi_plpd_p) + return PAPI_ENOSUPP; + + amdsmi_dpm_policy_t pol = {0}; + amdsmi_status_t st = amdsmi_get_xgmi_plpd_p(device_handles[event->device], &pol); + if (st != AMDSMI_STATUS_SUCCESS) + return PAPI_EMISC; + event->value = (uint64_t)pol.current; + return PAPI_OK; +} + +int access_amdsmi_xgmi_plpd_supported(int mode, void *arg) { + native_event_t *event = (native_event_t *)arg; + if (event->device < 0 || event->device >= device_count || !device_handles || !device_handles[event->device]) { + return PAPI_EMISC; + } + if (mode != PAPI_MODE_READ) + return PAPI_ENOSUPP; + if (!amdsmi_get_xgmi_plpd_p) + return PAPI_ENOSUPP; + + amdsmi_dpm_policy_t pol = {0}; + amdsmi_status_t st = amdsmi_get_xgmi_plpd_p(device_handles[event->device], &pol); + if (st != AMDSMI_STATUS_SUCCESS) + return PAPI_EMISC; + event->value = (uint64_t)pol.num_supported; + return PAPI_OK; +} + +int access_amdsmi_process_isolation(int mode, void *arg) { + if (mode != PAPI_MODE_READ) + return PAPI_ENOSUPP; + if (!amdsmi_get_gpu_process_isolation_p) + return PAPI_ENOSUPP; + native_event_t *event = (native_event_t *)arg; + if (event->device < 0 || event->device >= device_count || !device_handles[event->device]) + return PAPI_EMISC; + uint32_t val = 0; + amdsmi_status_t st = amdsmi_get_gpu_process_isolation_p(device_handles[event->device], &val); + if (st != AMDSMI_STATUS_SUCCESS) + return PAPI_EMISC; + event->value = (int64_t)val; + return PAPI_OK; +} + +int access_amdsmi_xcd_counter(int mode, void *arg) { + if (mode != PAPI_MODE_READ) + return PAPI_ENOSUPP; + if (!amdsmi_get_gpu_xcd_counter_p) + return PAPI_ENOSUPP; + native_event_t *event = (native_event_t *)arg; + if (event->device < 0 || event->device >= device_count || !device_handles[event->device]) + return PAPI_EMISC; + uint16_t cnt = 0; + amdsmi_status_t st = amdsmi_get_gpu_xcd_counter_p(device_handles[event->device], &cnt); + if (st != AMDSMI_STATUS_SUCCESS) + return PAPI_EMISC; + event->value = (int64_t)cnt; + return PAPI_OK; +} + +int access_amdsmi_board_serial_hash(int mode, void *arg) { + if (mode != PAPI_MODE_READ) + return PAPI_ENOSUPP; + if (!amdsmi_get_gpu_board_info_p) + return PAPI_ENOSUPP; + native_event_t *event = (native_event_t *)arg; + if (event->device < 0 || event->device >= device_count || !device_handles[event->device]) + return PAPI_EMISC; + amdsmi_board_info_t info; + memset(&info, 0, sizeof(info)); + amdsmi_status_t st = amdsmi_get_gpu_board_info_p(device_handles[event->device], &info); + if (st != AMDSMI_STATUS_SUCCESS) + return PAPI_EMISC; + event->value = (int64_t)_str_to_u64_hash(info.product_serial); + return PAPI_OK; +} + +int access_amdsmi_fw_version(int mode, void *arg) { + if (mode != PAPI_MODE_READ) + return PAPI_ENOSUPP; + if (!amdsmi_get_fw_info_p) + return PAPI_ENOSUPP; + native_event_t *event = (native_event_t *)arg; + if (event->device < 0 || event->device >= device_count || !device_handles || !device_handles[event->device]) + return PAPI_EMISC; + + amdsmi_fw_info_t info; + memset(&info, 0, sizeof(info)); + amdsmi_status_t st = amdsmi_get_fw_info_p(device_handles[event->device], &info); + if (st != AMDSMI_STATUS_SUCCESS) + return PAPI_EMISC; + + amdsmi_fw_block_t id = (amdsmi_fw_block_t)event->variant; + uint8_t n = info.num_fw_info; + if (n > AMDSMI_FW_ID__MAX) + n = AMDSMI_FW_ID__MAX; + for (uint8_t i = 0; i < n; ++i) { + if (info.fw_info_list[i].fw_id == id) { + event->value = (int64_t)info.fw_info_list[i].fw_version; + return PAPI_OK; + } + } + return PAPI_EMISC; +} + +#if AMDSMI_LIB_VERSION_MAJOR >= 25 +int access_amdsmi_vram_max_bandwidth(int mode, void *arg) { + if (mode != PAPI_MODE_READ) + return PAPI_ENOSUPP; + if (amdsmi_lib_major < 25 || !amdsmi_get_gpu_vram_info_p) + return PAPI_ENOSUPP; + native_event_t *event = (native_event_t *)arg; + if (event->device < 0 || event->device >= device_count || !device_handles[event->device]) + return PAPI_EMISC; + amdsmi_vram_info_t info; + memset(&info, 0, sizeof(info)); + amdsmi_status_t st = amdsmi_get_gpu_vram_info_p(device_handles[event->device], &info); + if (st != AMDSMI_STATUS_SUCCESS) + return PAPI_EMISC; + event->value = (int64_t)info.vram_max_bandwidth; /* GB/s */ + return PAPI_OK; +} +#endif + +int access_amdsmi_memory_reserved_pages(int mode, void *arg) { + if (mode != PAPI_MODE_READ || !amdsmi_get_gpu_memory_reserved_pages_p) + return PAPI_ENOSUPP; + native_event_t *event = (native_event_t *)arg; + if (event->device < 0 || event->device >= device_count || + !device_handles[event->device]) + return PAPI_EMISC; + uint32_t num = 0; + if (amdsmi_get_gpu_memory_reserved_pages_p(device_handles[event->device], &num, + NULL) != AMDSMI_STATUS_SUCCESS) + return PAPI_EMISC; + event->value = (int64_t)num; + return PAPI_OK; +} + +int access_amdsmi_bad_page_count(int mode, void *arg) { + if (mode != PAPI_MODE_READ) + return PAPI_ENOSUPP; + if (!amdsmi_get_gpu_bad_page_info_p) + return PAPI_ENOSUPP; + native_event_t *event = (native_event_t *)arg; + if (event->device < 0 || event->device >= device_count || !device_handles[event->device]) + return PAPI_EMISC; + uint32_t num = 0; + amdsmi_status_t st = amdsmi_get_gpu_bad_page_info_p(device_handles[event->device], &num, NULL); + if (st != AMDSMI_STATUS_SUCCESS) + return PAPI_EMISC; + event->value = (int64_t)num; + return PAPI_OK; +} + +int access_amdsmi_bad_page_threshold(int mode, void *arg) { + if (mode != PAPI_MODE_READ) + return PAPI_ENOSUPP; + if (!amdsmi_get_gpu_bad_page_threshold_p) + return PAPI_ENOSUPP; + native_event_t *event = (native_event_t *)arg; + if (event->device < 0 || event->device >= device_count || !device_handles[event->device]) + return PAPI_EMISC; + uint32_t thr = 0; + amdsmi_status_t st = amdsmi_get_gpu_bad_page_threshold_p(device_handles[event->device], &thr); + if (st != AMDSMI_STATUS_SUCCESS) + return PAPI_EMISC; + event->value = (int64_t)thr; + return PAPI_OK; +} + +int access_amdsmi_bad_page_record(int mode, void *arg) { + if (mode != PAPI_MODE_READ) + return PAPI_ENOSUPP; + if (!amdsmi_get_gpu_bad_page_info_p) + return PAPI_ENOSUPP; + native_event_t *event = (native_event_t *)arg; + if (event->device < 0 || event->device >= device_count || !device_handles[event->device]) + return PAPI_EMISC; + uint32_t num = 0; + amdsmi_status_t st = amdsmi_get_gpu_bad_page_info_p(device_handles[event->device], &num, NULL); + if (st != AMDSMI_STATUS_SUCCESS) + return PAPI_EMISC; + if (event->subvariant >= num) + return PAPI_EMISC; + amdsmi_retired_page_record_t *recs = (amdsmi_retired_page_record_t *)papi_calloc(num, sizeof(amdsmi_retired_page_record_t)); + if (!recs) + return PAPI_ENOMEM; + st = amdsmi_get_gpu_bad_page_info_p(device_handles[event->device], &num, recs); + if (st != AMDSMI_STATUS_SUCCESS) { + papi_free(recs); + return PAPI_EMISC; + } + amdsmi_retired_page_record_t rec = recs[event->subvariant]; + papi_free(recs); + switch (event->variant) { + case 0: + event->value = (int64_t)rec.page_address; + break; + case 1: + event->value = (int64_t)rec.page_size; + break; + case 2: + event->value = (int64_t)rec.status; + break; + default: + return PAPI_ENOSUPP; + } + return PAPI_OK; +} + +int access_amdsmi_power_sensor(int mode, void *arg) { + if (mode != PAPI_MODE_READ) + return PAPI_ENOSUPP; + if (!amdsmi_get_power_info_v2_p) + return PAPI_ENOSUPP; + + native_event_t *event = (native_event_t *)arg; + if (event->device < 0 || event->device >= device_count || + !device_handles[event->device]) + return PAPI_EMISC; + + amdsmi_power_info_t info; + memset(&info, 0, sizeof(info)); /* critical: avoid uninitialised fields */ + + amdsmi_status_t st = + amdsmi_get_power_info_v2_p(device_handles[event->device], + (uint32_t)event->subvariant, &info); + if (st != AMDSMI_STATUS_SUCCESS) { + event->value = 0; + return PAPI_OK; + } + + switch (event->variant) { + case 0: event->value = (int64_t)info.current_socket_power; break; /* W */ + case 1: event->value = (int64_t)info.average_socket_power; break; /* W */ +#if AMDSMI_LIB_VERSION_MAJOR >= 25 + case 2: event->value = (int64_t)info.socket_power; break; /* uW */ +#endif + case 3: event->value = (int64_t)info.gfx_voltage; break; /* mV */ + case 4: event->value = (int64_t)info.soc_voltage; break; /* mV */ + case 5: event->value = (int64_t)info.mem_voltage; break; /* mV */ + case 6: event->value = (int64_t)info.power_limit; break; /* W */ + default: return PAPI_ENOSUPP; + } + return PAPI_OK; +} + + +int access_amdsmi_pcie_info(int mode, void *arg) { + if (mode != PAPI_MODE_READ) + return PAPI_ENOSUPP; + if (!amdsmi_get_pcie_info_p) + return PAPI_ENOSUPP; + native_event_t *event = (native_event_t *)arg; + if (event->device < 0 || event->device >= device_count || !device_handles[event->device]) + return PAPI_EMISC; + amdsmi_pcie_info_t info; + memset(&info, 0, sizeof(info)); + amdsmi_status_t st = amdsmi_get_pcie_info_p(device_handles[event->device], &info); + if (st != AMDSMI_STATUS_SUCCESS) + return PAPI_EMISC; + // Variant mapping: + // 0 max width, 1 max speed, 2 interface version, 3 slot type, + // 4 max interface version (lib >=25), + // 5 current width, 6 current speed, 7 bandwidth, + // 8 replay count, 9 L0->recovery count, 10 replay rollover count, + // 11 NAK sent count, 12 NAK received count, + // 13 other-end recovery count + switch (event->variant) { + case 0: + event->value = info.pcie_static.max_pcie_width; + break; + case 1: + event->value = (int64_t)info.pcie_static.max_pcie_speed; + break; + case 2: + event->value = (int64_t)info.pcie_static.pcie_interface_version; + break; + case 3: + event->value = (int64_t)info.pcie_static.slot_type; + break; +#if AMDSMI_LIB_VERSION_MAJOR >= 25 + case 4: + if (amdsmi_lib_major < 25) + return PAPI_ENOSUPP; + event->value = (int64_t)info.pcie_static.max_pcie_interface_version; + break; +#endif + case 5: + event->value = info.pcie_metric.pcie_width; + break; + case 6: + event->value = (int64_t)info.pcie_metric.pcie_speed; + break; + case 7: + event->value = (int64_t)info.pcie_metric.pcie_bandwidth; + break; + case 8: + event->value = (int64_t)info.pcie_metric.pcie_replay_count; + break; + case 9: + event->value = (int64_t)info.pcie_metric.pcie_l0_to_recovery_count; + break; + case 10: + event->value = (int64_t)info.pcie_metric.pcie_replay_roll_over_count; + break; + case 11: + event->value = (int64_t)info.pcie_metric.pcie_nak_sent_count; + break; + case 12: + event->value = (int64_t)info.pcie_metric.pcie_nak_received_count; + break; + case 13: + event->value = (int64_t)info.pcie_metric.pcie_lc_perf_other_end_recovery_count; + break; + default: + return PAPI_ENOSUPP; + } + return PAPI_OK; +} + +int access_amdsmi_event_notification(int mode, void *arg) { + if (mode != PAPI_MODE_READ) + return PAPI_ENOSUPP; + if (!amdsmi_init_gpu_event_notification_p || !amdsmi_set_gpu_event_notification_mask_p || !amdsmi_get_gpu_event_notification_p || + !amdsmi_stop_gpu_event_notification_p) + return PAPI_ENOSUPP; + native_event_t *event = (native_event_t *)arg; + if (event->device < 0 || event->device >= device_count || !device_handles[event->device]) + return PAPI_EMISC; + amdsmi_status_t st = amdsmi_init_gpu_event_notification_p(device_handles[event->device]); + if (st != AMDSMI_STATUS_SUCCESS) + return PAPI_EMISC; + uint64_t mask = AMDSMI_EVENT_MASK_FROM_INDEX(event->variant); + st = amdsmi_set_gpu_event_notification_mask_p(device_handles[event->device], mask); + if (st != AMDSMI_STATUS_SUCCESS) { + amdsmi_stop_gpu_event_notification_p(device_handles[event->device]); + return PAPI_EMISC; + } + amdsmi_evt_notification_data_t data[8]; + uint32_t num = 8; + st = amdsmi_get_gpu_event_notification_p(0, &num, data); + uint32_t cnt = 0; + if (st == AMDSMI_STATUS_SUCCESS) { + for (uint32_t i = 0; i < num; ++i) + if (data[i].event == (amdsmi_evt_notification_type_t)event->variant) + cnt++; + } + amdsmi_stop_gpu_event_notification_p(device_handles[event->device]); + event->value = (int64_t)cnt; + return PAPI_OK; +} + +int access_amdsmi_utilization_count(int mode, void *arg) { + if (mode != PAPI_MODE_READ) + return PAPI_ENOSUPP; + if (!amdsmi_get_utilization_count_p) + return PAPI_ENOSUPP; + native_event_t *event = (native_event_t *)arg; + if (event->device < 0 || event->device >= device_count || !device_handles || + !device_handles[event->device]) + return PAPI_EMISC; + amdsmi_utilization_counter_t cnt; + memset(&cnt, 0, sizeof(cnt)); + cnt.type = (amdsmi_utilization_counter_type_t)event->variant; + uint64_t ts = 0; + amdsmi_status_t st = + amdsmi_get_utilization_count_p(device_handles[event->device], &cnt, 1, &ts); + if (st != AMDSMI_STATUS_SUCCESS) + return PAPI_EMISC; + event->value = (int64_t)cnt.value; + return PAPI_OK; +} + +int access_amdsmi_violation_status(int mode, void *arg) { + if (mode != PAPI_MODE_READ) + return PAPI_ENOSUPP; + if (!amdsmi_get_violation_status_p) + return PAPI_ENOSUPP; + native_event_t *event = (native_event_t *)arg; + if (event->device < 0 || event->device >= device_count || !device_handles || + !device_handles[event->device]) + return PAPI_EMISC; + amdsmi_violation_status_t info; + memset(&info, 0, sizeof(info)); + amdsmi_status_t st = + amdsmi_get_violation_status_p(device_handles[event->device], &info); + if (st != AMDSMI_STATUS_SUCCESS) + return PAPI_EMISC; + switch (event->variant) { + case 0: + event->value = (int64_t)info.acc_ppt_pwr; + break; + case 1: + event->value = (int64_t)info.acc_socket_thrm; + break; + case 2: + event->value = (int64_t)info.acc_vr_thrm; + break; + case 3: + event->value = (int64_t)info.per_ppt_pwr; + break; + case 4: + event->value = (int64_t)info.per_socket_thrm; + break; + case 5: + event->value = (int64_t)info.per_vr_thrm; + break; + case 6: + event->value = (int64_t)info.active_ppt_pwr; + break; + case 7: + event->value = (int64_t)info.active_socket_thrm; + break; + case 8: + event->value = (int64_t)info.active_vr_thrm; + break; + default: + return PAPI_ENOSUPP; + } + return PAPI_OK; +} diff --git a/src/components/amd_smi/amds_ctx.c b/src/components/amd_smi/amds_ctx.c new file mode 100644 index 000000000..a8a65d9f2 --- /dev/null +++ b/src/components/amd_smi/amds_ctx.c @@ -0,0 +1,188 @@ +/** + * @file amds_ctx.c + * @author Dong Jun Woun + * djwoun@gmail.com + * + */ + +#include "amds.h" +#include "amds_priv.h" +#include "papi.h" +#include "papi_memory.h" +#include "papi_internal.h" + +unsigned int _amd_smi_lock; +static uint64_t device_mask = 0; + +static int acquire_devices(unsigned int *events_id, int num_events, int32_t *bitmask) { + int32_t mask_acq = 0; + for (int i = 0; i < num_events; ++i) { + int32_t dev_id = ntv_table_p->events[events_id[i]].device; + if (dev_id < 0) + continue; + mask_acq |= (1 << dev_id); + } + _papi_hwi_lock(_amd_smi_lock); + if (mask_acq & device_mask) { + _papi_hwi_unlock(_amd_smi_lock); + return PAPI_ECNFLCT; // conflict: device already in use + } + device_mask |= mask_acq; + _papi_hwi_unlock(_amd_smi_lock); + *bitmask = mask_acq; + return PAPI_OK; +} +static int release_devices(int32_t *bitmask) { + int32_t mask_rel = *bitmask; + _papi_hwi_lock(_amd_smi_lock); + if ((mask_rel & device_mask) != mask_rel) { + _papi_hwi_unlock(_amd_smi_lock); + return PAPI_EMISC; + } + device_mask ^= mask_rel; + _papi_hwi_unlock(_amd_smi_lock); + *bitmask = 0; + return PAPI_OK; +} + +/* Context management: open/close, start/stop, read/write, reset */ +struct amds_ctx { + int state; + unsigned int *events_id; + int num_events; + long long *counters; + int32_t device_mask; +}; +int amds_ctx_open(unsigned int *event_ids, int num_events, amds_ctx_t *ctx) { + amds_ctx_t new_ctx = (amds_ctx_t)papi_calloc(1, sizeof(struct amds_ctx)); + if (new_ctx == NULL) { + return PAPI_ENOMEM; + } + new_ctx->events_id = event_ids; // Store pointer + new_ctx->num_events = num_events; + new_ctx->counters = (long long *)papi_calloc(num_events, sizeof(long long)); + if (new_ctx->counters == NULL) { + papi_free(new_ctx); + return PAPI_ENOMEM; + } + // Acquire devices needed by these events to avoid conflicts + int papi_errno = acquire_devices(event_ids, num_events, &new_ctx->device_mask); + if (papi_errno != PAPI_OK) { + papi_free(new_ctx->counters); + papi_free(new_ctx); + return papi_errno; + } + for (int i = 0; i < num_events; ++i) { + native_event_t *ev = &ntv_table_p->events[event_ids[i]]; + if (ev->open_func) { + papi_errno = ev->open_func(ev); + if (papi_errno != PAPI_OK) { + for (int j = 0; j < i; ++j) { + native_event_t *prev = &ntv_table_p->events[event_ids[j]]; + if (prev->close_func) + prev->close_func(prev); + } + release_devices(&new_ctx->device_mask); + papi_free(new_ctx->counters); + papi_free(new_ctx); + return papi_errno; + } + } + } + *ctx = new_ctx; + return PAPI_OK; +} +int amds_ctx_close(amds_ctx_t ctx) { + if (!ctx) + return PAPI_OK; + for (int i = 0; i < ctx->num_events; ++i) { + native_event_t *ev = &ntv_table_p->events[ctx->events_id[i]]; + if (ev->close_func) + ev->close_func(ev); + } + // release device usage + release_devices(&ctx->device_mask); + papi_free(ctx->counters); + papi_free(ctx); + return PAPI_OK; +} +int amds_ctx_start(amds_ctx_t ctx) { + int papi_errno = PAPI_OK; + for (int i = 0; i < ctx->num_events; ++i) { + native_event_t *ev = &ntv_table_p->events[ctx->events_id[i]]; + if (ev->start_func) { + papi_errno = ev->start_func(ev); + if (papi_errno != PAPI_OK) + return papi_errno; + } + } + ctx->state |= AMDS_EVENTS_RUNNING; + return papi_errno; +} +int amds_ctx_stop(amds_ctx_t ctx) { + if (!(ctx->state & AMDS_EVENTS_RUNNING)) { + return PAPI_OK; + } + int papi_errno = PAPI_OK; + for (int i = 0; i < ctx->num_events; ++i) { + native_event_t *ev = &ntv_table_p->events[ctx->events_id[i]]; + if (ev->stop_func) { + int papi_errno_stop = ev->stop_func(ev); + if (papi_errno == PAPI_OK) + papi_errno = papi_errno_stop; + } + } + ctx->state &= ~AMDS_EVENTS_RUNNING; + return papi_errno; +} +int amds_ctx_read(amds_ctx_t ctx, long long **counts) { + if (!ctx || !counts) return PAPI_EINVAL; + + /* Always produce a fully defined buffer */ + for (int i = 0; i < ctx->num_events; ++i) { + ctx->counters[i] = 0; /* default if read fails */ + } + + /* Optional: track first error, but don't bail early */ + int papi_errno = PAPI_OK; + + for (int i = 0; i < ctx->num_events; ++i) { + unsigned int id = ctx->events_id[i]; + native_event_t *ev = &ntv_table_p->events[id]; + + int papi_errno_access = PAPI_OK; + if (ev->access_func) { + papi_errno_access = ev->access_func(PAPI_MODE_READ, ev); + } + if (papi_errno_access == PAPI_OK) { + ctx->counters[i] = (long long)ev->value; + } else if (papi_errno == PAPI_OK) { + papi_errno = papi_errno_access; /* remember, but keep going */ + } + } + + *counts = ctx->counters; + + return papi_errno; +} + +int amds_ctx_write(amds_ctx_t ctx, long long *counts) { + int papi_errno = PAPI_OK; + for (int i = 0; i < ctx->num_events; ++i) { + unsigned int id = ctx->events_id[i]; + ntv_table_p->events[id].value = counts[i]; + papi_errno = ntv_table_p->events[id].access_func(PAPI_MODE_WRITE, &ntv_table_p->events[id]); + if (papi_errno != PAPI_OK) { + return papi_errno; + } + } + return papi_errno; +} +int amds_ctx_reset(amds_ctx_t ctx) { + for (int i = 0; i < ctx->num_events; ++i) { + unsigned int id = ctx->events_id[i]; + ntv_table_p->events[id].value = 0; + ctx->counters[i] = 0; + } + return PAPI_OK; +} diff --git a/src/components/amd_smi/amds_evtapi.c b/src/components/amd_smi/amds_evtapi.c new file mode 100644 index 000000000..28d63be88 --- /dev/null +++ b/src/components/amd_smi/amds_evtapi.c @@ -0,0 +1,55 @@ +/** + * @file amds_evtapi.c + * @author Dong Jun Woun + * djwoun@gmail.com + * + */ + +#include "amds.h" +#include "amds_priv.h" +#include "htable.h" +#include "papi.h" +#include +/* Event enumeration: iterate over native events */ +int amds_evt_enum(unsigned int *EventCode, int modifier) { + if (modifier == PAPI_ENUM_FIRST) { + if (ntv_table_p->count == 0) { + return PAPI_ENOEVNT; + } + *EventCode = 0; + return PAPI_OK; + } else if (modifier == PAPI_ENUM_EVENTS) { + if (*EventCode + 1 < (unsigned int)ntv_table_p->count) { + *EventCode = *EventCode + 1; + return PAPI_OK; + } else { + return PAPI_ENOEVNT; + } + } + return PAPI_EINVAL; +} +int amds_evt_code_to_name(unsigned int EventCode, char *name, int len) { + if (EventCode >= (unsigned int)ntv_table_p->count) { + return PAPI_EINVAL; + } + strncpy(name, ntv_table_p->events[EventCode].name, len); + name[len-1] = '\0'; + return PAPI_OK; +} +int amds_evt_name_to_code(const char *name, unsigned int *EventCode) { + native_event_t *event = NULL; + int hret = htable_find(htable, name, (void **)&event); + if (hret != HTABLE_SUCCESS) { + return (hret == HTABLE_ENOVAL) ? PAPI_ENOEVNT : PAPI_ECMP; + } + *EventCode = event->id; + return PAPI_OK; +} +int amds_evt_code_to_descr(unsigned int EventCode, char *descr, int len) { + if (EventCode >= (unsigned int)ntv_table_p->count) { + return PAPI_EINVAL; + } + strncpy(descr, ntv_table_p->events[EventCode].descr, len); + descr[len-1] = '\0'; + return PAPI_OK; +} diff --git a/src/components/amd_smi/amds_funcs.h b/src/components/amd_smi/amds_funcs.h new file mode 100644 index 000000000..de7a480ff --- /dev/null +++ b/src/components/amd_smi/amds_funcs.h @@ -0,0 +1,294 @@ +/** + * @file amds_funcs.h + * @author Dong Jun Woun + * djwoun@gmail.com + * + */ + +#ifndef AMDS_FUNCS_H +#define AMDS_FUNCS_H + +#define AMD_SMI_GPU_FUNCTIONS_BASE(_) \ + _(amdsmi_init_p, amdsmi_status_t, (uint64_t)) \ + _(amdsmi_shut_down_p, amdsmi_status_t, (void)) \ + _(amdsmi_get_socket_handles_p, amdsmi_status_t, \ + (uint32_t *, amdsmi_socket_handle *)) \ + _(amdsmi_get_processor_handles_by_type_p, amdsmi_status_t, \ + (amdsmi_socket_handle, processor_type_t, amdsmi_processor_handle *, \ + uint32_t *)) \ + _(amdsmi_get_processor_handles_p, amdsmi_status_t, \ + (amdsmi_socket_handle, uint32_t *, amdsmi_processor_handle *)) \ + _(amdsmi_get_processor_info_p, amdsmi_status_t, \ + (amdsmi_processor_handle, size_t, char *)) \ + _(amdsmi_get_processor_type_p, amdsmi_status_t, \ + (amdsmi_processor_handle, processor_type_t *)) \ + _(amdsmi_get_socket_info_p, amdsmi_status_t, \ + (amdsmi_socket_handle, size_t, char *)) \ + _(amdsmi_get_utilization_count_p, amdsmi_status_t, \ + (amdsmi_processor_handle, amdsmi_utilization_counter_t *, uint32_t, \ + uint64_t *)) \ + _(amdsmi_get_violation_status_p, amdsmi_status_t, \ + (amdsmi_processor_handle, amdsmi_violation_status_t *)) \ + _(amdsmi_get_temp_metric_p, amdsmi_status_t, \ + (amdsmi_processor_handle, amdsmi_temperature_type_t, \ + amdsmi_temperature_metric_t, int64_t *)) \ + _(amdsmi_get_gpu_fan_rpms_p, amdsmi_status_t, \ + (amdsmi_processor_handle, uint32_t, int64_t *)) \ + _(amdsmi_get_gpu_fan_speed_p, amdsmi_status_t, \ + (amdsmi_processor_handle, uint32_t, int64_t *)) \ + _(amdsmi_get_gpu_fan_speed_max_p, amdsmi_status_t, \ + (amdsmi_processor_handle, uint32_t, int64_t *)) \ + _(amdsmi_get_total_memory_p, amdsmi_status_t, \ + (amdsmi_processor_handle, amdsmi_memory_type_t, uint64_t *)) \ + _(amdsmi_get_memory_usage_p, amdsmi_status_t, \ + (amdsmi_processor_handle, amdsmi_memory_type_t, uint64_t *)) \ + _(amdsmi_get_gpu_activity_p, amdsmi_status_t, \ + (amdsmi_processor_handle, amdsmi_engine_usage_t *)) \ + _(amdsmi_get_power_cap_info_p, amdsmi_status_t, \ + (amdsmi_processor_handle, uint32_t, amdsmi_power_cap_info_t *)) \ + _(amdsmi_get_gpu_power_cap_set_p, amdsmi_status_t, \ + (amdsmi_processor_handle, uint32_t, uint64_t)) \ + _(amdsmi_get_power_info_p, amdsmi_status_t, \ + (amdsmi_processor_handle, amdsmi_power_info_t *)) \ + _(amdsmi_set_power_cap_p, amdsmi_status_t, \ + (amdsmi_processor_handle, uint32_t, uint64_t)) \ + _(amdsmi_get_gpu_pci_throughput_p, amdsmi_status_t, \ + (amdsmi_processor_handle, uint64_t *, uint64_t *, uint64_t *)) \ + _(amdsmi_get_gpu_pci_replay_counter_p, amdsmi_status_t, \ + (amdsmi_processor_handle, uint64_t *)) \ + _(amdsmi_get_clk_freq_p, amdsmi_status_t, \ + (amdsmi_processor_handle, amdsmi_clk_type_t, amdsmi_frequencies_t *)) \ + _(amdsmi_get_clock_info_p, amdsmi_status_t, \ + (amdsmi_processor_handle, amdsmi_clk_type_t, amdsmi_clk_info_t *)) \ + _(amdsmi_set_clk_freq_p, amdsmi_status_t, \ + (amdsmi_processor_handle, amdsmi_clk_type_t, uint64_t)) \ + _(amdsmi_get_gpu_metrics_info_p, amdsmi_status_t, \ + (amdsmi_processor_handle, amdsmi_gpu_metrics_t *)) \ + _(amdsmi_get_lib_version_p, amdsmi_status_t, (amdsmi_version_t *)) \ + _(amdsmi_get_gpu_driver_info_p, amdsmi_status_t, \ + (amdsmi_processor_handle, amdsmi_driver_info_t *)) \ + _(amdsmi_get_gpu_asic_info_p, amdsmi_status_t, \ + (amdsmi_processor_handle, amdsmi_asic_info_t *)) \ + _(amdsmi_get_gpu_board_info_p, amdsmi_status_t, \ + (amdsmi_processor_handle, amdsmi_board_info_t *)) \ + _(amdsmi_get_fw_info_p, amdsmi_status_t, \ + (amdsmi_processor_handle, amdsmi_fw_info_t *)) \ + _(amdsmi_get_gpu_vbios_info_p, amdsmi_status_t, \ + (amdsmi_processor_handle, amdsmi_vbios_info_t *)) \ + _(amdsmi_get_gpu_device_uuid_p, amdsmi_status_t, \ + (amdsmi_processor_handle, unsigned int *, char *)) \ + _(amdsmi_get_gpu_vendor_name_p, amdsmi_status_t, \ + (amdsmi_processor_handle, char *, size_t)) \ + _(amdsmi_get_gpu_vram_vendor_p, amdsmi_status_t, \ + (amdsmi_processor_handle, char *, uint32_t)) \ + _(amdsmi_get_gpu_subsystem_name_p, amdsmi_status_t, \ + (amdsmi_processor_handle, char *, size_t)) \ + _(amdsmi_get_link_metrics_p, amdsmi_status_t, \ + (amdsmi_processor_handle, amdsmi_link_metrics_t *)) \ + _(amdsmi_get_minmax_bandwidth_between_processors_p, amdsmi_status_t, \ + (amdsmi_processor_handle, amdsmi_processor_handle, uint64_t *, \ + uint64_t *)) \ + _(amdsmi_get_gpu_process_list_p, amdsmi_status_t, \ + (amdsmi_processor_handle, uint32_t *, amdsmi_proc_info_t *)) \ + _(amdsmi_get_gpu_ecc_enabled_p, amdsmi_status_t, \ + (amdsmi_processor_handle, uint64_t *)) \ + _(amdsmi_get_gpu_total_ecc_count_p, amdsmi_status_t, \ + (amdsmi_processor_handle, amdsmi_error_count_t *)) \ + _(amdsmi_get_gpu_ecc_count_p, amdsmi_status_t, \ + (amdsmi_processor_handle, amdsmi_gpu_block_t, amdsmi_error_count_t *)) \ + _(amdsmi_get_gpu_ecc_status_p, amdsmi_status_t, \ + (amdsmi_processor_handle, amdsmi_gpu_block_t, amdsmi_ras_err_state_t *)) \ + _(amdsmi_get_gpu_compute_partition_p, amdsmi_status_t, \ + (amdsmi_processor_handle, char *, uint32_t)) \ + _(amdsmi_get_gpu_memory_partition_p, amdsmi_status_t, \ + (amdsmi_processor_handle, char *, uint32_t)) \ + _(amdsmi_get_gpu_accelerator_partition_profile_p, amdsmi_status_t, \ + (amdsmi_processor_handle, amdsmi_accelerator_partition_profile_t *, \ + uint32_t *)) \ + _(amdsmi_get_gpu_id_p, amdsmi_status_t, \ + (amdsmi_processor_handle, uint16_t *)) \ + _(amdsmi_get_gpu_revision_p, amdsmi_status_t, \ + (amdsmi_processor_handle, uint16_t *)) \ + _(amdsmi_get_gpu_subsystem_id_p, amdsmi_status_t, \ + (amdsmi_processor_handle, uint16_t *)) \ + _(amdsmi_get_gpu_process_isolation_p, amdsmi_status_t, \ + (amdsmi_processor_handle, uint32_t *)) \ + _(amdsmi_get_gpu_xcd_counter_p, amdsmi_status_t, \ + (amdsmi_processor_handle, uint16_t *)) \ + _(amdsmi_get_gpu_pci_bandwidth_p, amdsmi_status_t, \ + (amdsmi_processor_handle, amdsmi_pcie_bandwidth_t *)) \ + _(amdsmi_get_gpu_bdf_id_p, amdsmi_status_t, \ + (amdsmi_processor_handle, uint64_t *)) \ + _(amdsmi_get_gpu_device_bdf_p, amdsmi_status_t, \ + (amdsmi_processor_handle, amdsmi_bdf_t *)) \ + _(amdsmi_get_gpu_topo_numa_affinity_p, amdsmi_status_t, \ + (amdsmi_processor_handle, int32_t *)) \ + _(amdsmi_topo_get_numa_node_number_p, amdsmi_status_t, \ + (amdsmi_processor_handle, uint32_t *)) \ + _(amdsmi_topo_get_link_weight_p, amdsmi_status_t, \ + (amdsmi_processor_handle, amdsmi_processor_handle, uint64_t *)) \ + _(amdsmi_topo_get_link_type_p, amdsmi_status_t, \ + (amdsmi_processor_handle, amdsmi_processor_handle, uint64_t *, \ + amdsmi_io_link_type_t *)) \ + _(amdsmi_topo_get_p2p_status_p, amdsmi_status_t, \ + (amdsmi_processor_handle, amdsmi_processor_handle, amdsmi_io_link_type_t *,\ + amdsmi_p2p_capability_t *)) \ + _(amdsmi_is_P2P_accessible_p, amdsmi_status_t, \ + (amdsmi_processor_handle, amdsmi_processor_handle, bool *)) \ + _(amdsmi_get_link_topology_nearest_p, amdsmi_status_t, \ + (amdsmi_processor_handle, amdsmi_link_type_t, \ + amdsmi_topology_nearest_t *)) \ + _(amdsmi_get_energy_count_p, amdsmi_status_t, \ + (amdsmi_processor_handle, uint64_t *, float *, uint64_t *)) \ + _(amdsmi_get_gpu_power_profile_presets_p, amdsmi_status_t, \ + (amdsmi_processor_handle, uint32_t, amdsmi_power_profile_status_t *)) \ + _(amdsmi_get_gpu_cache_info_p, amdsmi_status_t, \ + (amdsmi_processor_handle, amdsmi_gpu_cache_info_t *)) \ + _(amdsmi_get_gpu_mem_overdrive_level_p, amdsmi_status_t, \ + (amdsmi_processor_handle, uint32_t *)) \ + _(amdsmi_get_gpu_od_volt_curve_regions_p, amdsmi_status_t, \ + (amdsmi_processor_handle, uint32_t *, amdsmi_freq_volt_region_t *)) \ + _(amdsmi_get_gpu_od_volt_info_p, amdsmi_status_t, \ + (amdsmi_processor_handle, amdsmi_od_volt_freq_data_t *)) \ + _(amdsmi_get_gpu_overdrive_level_p, amdsmi_status_t, \ + (amdsmi_processor_handle, uint32_t *)) \ + _(amdsmi_get_gpu_perf_level_p, amdsmi_status_t, \ + (amdsmi_processor_handle, amdsmi_dev_perf_level_t *)) \ + _(amdsmi_get_gpu_pm_metrics_info_p, amdsmi_status_t, \ + (amdsmi_processor_handle, amdsmi_name_value_t **, uint32_t *)) \ + _(amdsmi_get_gpu_ras_feature_info_p, amdsmi_status_t, \ + (amdsmi_processor_handle, amdsmi_ras_feature_t *)) \ + _(amdsmi_get_gpu_ras_block_features_enabled_p, amdsmi_status_t, \ + (amdsmi_processor_handle, amdsmi_gpu_block_t, amdsmi_ras_err_state_t *)) \ + _(amdsmi_get_gpu_reg_table_info_p, amdsmi_status_t, \ + (amdsmi_processor_handle, amdsmi_reg_type_t, amdsmi_name_value_t **, \ + uint32_t *)) \ + _(amdsmi_get_gpu_volt_metric_p, amdsmi_status_t, \ + (amdsmi_processor_handle, amdsmi_voltage_type_t, amdsmi_voltage_metric_t, \ + int64_t *)) \ + _(amdsmi_get_gpu_vram_info_p, amdsmi_status_t, \ + (amdsmi_processor_handle, amdsmi_vram_info_t *)) \ + _(amdsmi_get_gpu_vram_usage_p, amdsmi_status_t, \ + (amdsmi_processor_handle, amdsmi_vram_usage_t *)) \ + _(amdsmi_get_pcie_info_p, amdsmi_status_t, \ + (amdsmi_processor_handle, amdsmi_pcie_info_t *)) \ + _(amdsmi_get_processor_count_from_handles_p, amdsmi_status_t, \ + (amdsmi_processor_handle *, uint32_t *, uint32_t *, uint32_t *, \ + uint32_t *)) \ + _(amdsmi_get_soc_pstate_p, amdsmi_status_t, \ + (amdsmi_processor_handle, amdsmi_dpm_policy_t *)) \ + _(amdsmi_get_xgmi_plpd_p, amdsmi_status_t, \ + (amdsmi_processor_handle, amdsmi_dpm_policy_t *)) \ + _(amdsmi_get_gpu_bad_page_info_p, amdsmi_status_t, \ + (amdsmi_processor_handle, uint32_t *, amdsmi_retired_page_record_t *)) \ + _(amdsmi_get_gpu_bad_page_threshold_p, amdsmi_status_t, \ + (amdsmi_processor_handle, uint32_t *)) \ + _(amdsmi_get_power_info_v2_p, amdsmi_status_t, \ + (amdsmi_processor_handle, uint32_t, amdsmi_power_info_t *)) \ + _(amdsmi_init_gpu_event_notification_p, amdsmi_status_t, \ + (amdsmi_processor_handle)) \ + _(amdsmi_set_gpu_event_notification_mask_p, amdsmi_status_t, \ + (amdsmi_processor_handle, uint64_t)) \ + _(amdsmi_get_gpu_event_notification_p, amdsmi_status_t, \ + (int, uint32_t *, amdsmi_evt_notification_data_t *)) \ + _(amdsmi_stop_gpu_event_notification_p, amdsmi_status_t, \ + (amdsmi_processor_handle)) \ + _(amdsmi_gpu_counter_group_supported_p, amdsmi_status_t, \ + (amdsmi_processor_handle, amdsmi_event_group_t)) \ + _(amdsmi_get_gpu_available_counters_p, amdsmi_status_t, \ + (amdsmi_processor_handle, amdsmi_event_group_t, uint32_t *)) \ + _(amdsmi_gpu_create_counter_p, amdsmi_status_t, \ + (amdsmi_processor_handle, amdsmi_event_type_t, \ + amdsmi_event_handle_t *)) \ + _(amdsmi_gpu_control_counter_p, amdsmi_status_t, \ + (amdsmi_event_handle_t, amdsmi_counter_command_t, void *)) \ + _(amdsmi_gpu_read_counter_p, amdsmi_status_t, \ + (amdsmi_event_handle_t, amdsmi_counter_value_t *)) \ + _(amdsmi_get_gpu_kfd_info_p, amdsmi_status_t, \ + (amdsmi_processor_handle, amdsmi_kfd_info_t *)) \ + _(amdsmi_is_gpu_memory_partition_supported_p, amdsmi_status_t, \ + (amdsmi_processor_handle, bool *)) \ + _(amdsmi_get_gpu_memory_reserved_pages_p, amdsmi_status_t, \ + (amdsmi_processor_handle, uint32_t *, amdsmi_retired_page_record_t *)) \ + _(amdsmi_get_gpu_metrics_header_info_p, amdsmi_status_t, \ + (amdsmi_processor_handle, amd_metrics_table_header_t *)) \ + _(amdsmi_get_xgmi_info_p, amdsmi_status_t, \ + (amdsmi_processor_handle, amdsmi_xgmi_info_t *)) \ + _(amdsmi_gpu_xgmi_error_status_p, amdsmi_status_t, \ + (amdsmi_processor_handle, amdsmi_xgmi_status_t *)) \ + _(amdsmi_is_gpu_power_management_enabled_p, amdsmi_status_t, \ + (amdsmi_processor_handle, bool *)) \ + _(amdsmi_gpu_validate_ras_eeprom_p, amdsmi_status_t, \ + (amdsmi_processor_handle)) \ + _(amdsmi_gpu_destroy_counter_p, amdsmi_status_t, \ + (amdsmi_event_handle_t)) + +#if AMDSMI_LIB_VERSION_MAJOR >= 25 +#define AMD_SMI_GPU_FUNCTIONS(_) \ + AMD_SMI_GPU_FUNCTIONS_BASE(_) \ + _(amdsmi_get_gpu_memory_partition_config_p, amdsmi_status_t, \ + (amdsmi_processor_handle, amdsmi_memory_partition_config_t *)) \ + _(amdsmi_get_gpu_xgmi_link_status_p, amdsmi_status_t, \ + (amdsmi_processor_handle, amdsmi_xgmi_link_status_t *)) \ + _(amdsmi_get_gpu_enumeration_info_p, amdsmi_status_t, \ + (amdsmi_processor_handle, amdsmi_enumeration_info_t *)) \ + _(amdsmi_get_gpu_virtualization_mode_p, amdsmi_status_t, \ + (amdsmi_processor_handle, amdsmi_virtualization_mode_t *)) +#else +#define AMD_SMI_GPU_FUNCTIONS(_) AMD_SMI_GPU_FUNCTIONS_BASE(_) +#endif + +#define AMD_SMI_CPU_FUNCTIONS(_) \ + _(amdsmi_get_cpu_handles_p, amdsmi_status_t, \ + (uint32_t *, amdsmi_processor_handle *)) \ + _(amdsmi_get_cpucore_handles_p, amdsmi_status_t, \ + (uint32_t *, amdsmi_processor_handle *)) \ + _(amdsmi_get_cpu_socket_power_p, amdsmi_status_t, \ + (amdsmi_processor_handle, uint32_t *)) \ + _(amdsmi_get_cpu_socket_power_cap_p, amdsmi_status_t, \ + (amdsmi_processor_handle, uint32_t *)) \ + _(amdsmi_get_cpu_socket_power_cap_max_p, amdsmi_status_t, \ + (amdsmi_processor_handle, uint32_t *)) \ + _(amdsmi_get_cpu_core_energy_p, amdsmi_status_t, \ + (amdsmi_processor_handle, uint64_t *)) \ + _(amdsmi_get_cpu_socket_energy_p, amdsmi_status_t, \ + (amdsmi_processor_handle, uint64_t *)) \ + _(amdsmi_get_cpu_smu_fw_version_p, amdsmi_status_t, \ + (amdsmi_processor_handle, amdsmi_smu_fw_version_t *)) \ + _(amdsmi_get_threads_per_core_p, amdsmi_status_t, (uint32_t *)) \ + _(amdsmi_get_cpu_family_p, amdsmi_status_t, (uint32_t *)) \ + _(amdsmi_get_cpu_model_p, amdsmi_status_t, (uint32_t *)) \ + _(amdsmi_get_cpu_core_boostlimit_p, amdsmi_status_t, \ + (amdsmi_processor_handle, uint32_t *)) \ + _(amdsmi_get_cpu_socket_current_active_freq_limit_p, amdsmi_status_t, \ + (amdsmi_processor_handle, uint16_t *, char **)) \ + _(amdsmi_get_cpu_socket_freq_range_p, amdsmi_status_t, \ + (amdsmi_processor_handle, uint16_t *, uint16_t *)) \ + _(amdsmi_get_cpu_core_current_freq_limit_p, amdsmi_status_t, \ + (amdsmi_processor_handle, uint32_t *)) \ + _(amdsmi_get_cpu_cclk_limit_p, amdsmi_status_t, \ + (amdsmi_processor_handle, uint32_t *)) \ + _(amdsmi_get_cpu_current_io_bandwidth_p, amdsmi_status_t, \ + (amdsmi_processor_handle, amdsmi_link_id_bw_type_t, uint32_t *)) \ + _(amdsmi_get_cpu_current_xgmi_bw_p, amdsmi_status_t, \ + (amdsmi_processor_handle, amdsmi_link_id_bw_type_t, uint32_t *)) \ + _(amdsmi_get_cpu_ddr_bw_p, amdsmi_status_t, \ + (amdsmi_processor_handle, amdsmi_ddr_bw_metrics_t *)) \ + _(amdsmi_get_cpu_fclk_mclk_p, amdsmi_status_t, \ + (amdsmi_processor_handle, uint32_t *, uint32_t *)) \ + _(amdsmi_get_cpu_hsmp_driver_version_p, amdsmi_status_t, \ + (amdsmi_processor_handle, amdsmi_hsmp_driver_version_t *)) \ + _(amdsmi_get_cpu_hsmp_proto_ver_p, amdsmi_status_t, \ + (amdsmi_processor_handle, uint32_t *)) \ + _(amdsmi_get_cpu_prochot_status_p, amdsmi_status_t, \ + (amdsmi_processor_handle, uint32_t *)) \ + _(amdsmi_get_cpu_pwr_svi_telemetry_all_rails_p, amdsmi_status_t, \ + (amdsmi_processor_handle, uint32_t *)) \ + _(amdsmi_get_cpu_dimm_temp_range_and_refresh_rate_p, amdsmi_status_t, \ + (amdsmi_processor_handle, uint8_t, amdsmi_temp_range_refresh_rate_t *)) \ + _(amdsmi_get_cpu_dimm_power_consumption_p, amdsmi_status_t, \ + (amdsmi_processor_handle, uint8_t, amdsmi_dimm_power_t *)) \ + _(amdsmi_get_cpu_dimm_thermal_sensor_p, amdsmi_status_t, \ + (amdsmi_processor_handle, uint8_t, amdsmi_dimm_thermal_t *)) + +#endif /* AMDS_FUNCS_H */ diff --git a/src/components/amd_smi/amds_priv.h b/src/components/amd_smi/amds_priv.h new file mode 100644 index 000000000..2d62c5322 --- /dev/null +++ b/src/components/amd_smi/amds_priv.h @@ -0,0 +1,201 @@ +/** + * @file amds_priv.h + * @author Dong Jun Woun + * djwoun@gmail.com + * + */ + +#ifndef __AMDS_PRIV_H__ +#define __AMDS_PRIV_H__ + +#define AMDSMI_DISABLE_ESMI + +#include +#include + +#ifndef AMDSMI_LIB_VERSION_MAJOR +#define AMDSMI_LIB_VERSION_MAJOR 0 +#endif + +/* Mode enumeration used by accessors */ +typedef enum { + PAPI_MODE_READ = 1, + PAPI_MODE_WRITE, + PAPI_MODE_RDWR, +} rocs_access_mode_e; + +typedef int (*amds_accessor_t)(int mode, void *arg); + +/* Native event descriptor */ +typedef struct native_event { + unsigned int id; + char *name, *descr; + int32_t device; + uint64_t value; + uint32_t mode, variant, subvariant; + void *priv; + int (*open_func)(struct native_event *); + int (*close_func)(struct native_event *); + int (*start_func)(struct native_event *); + int (*stop_func)(struct native_event *); + amds_accessor_t access_func; +} native_event_t; + +typedef struct { + native_event_t *events; + int count; +} native_event_table_t; + +/* Global state accessors */ +int32_t amds_get_device_count(void); +amdsmi_processor_handle *amds_get_device_handles(void); +int32_t amds_get_gpu_count(void); +int32_t amds_get_cpu_count(void); +amdsmi_processor_handle **amds_get_cpu_core_handles(void); +uint32_t *amds_get_cores_per_socket(void); +void *amds_get_htable(void); +native_event_table_t *amds_get_ntv_table(void); +uint32_t amds_get_lib_major(void); + +#ifndef AMDS_PRIV_IMPL +#define device_handles (amds_get_device_handles()) +#define device_count (amds_get_device_count()) +#define gpu_count (amds_get_gpu_count()) +#define cpu_count (amds_get_cpu_count()) +#define cpu_core_handles (amds_get_cpu_core_handles()) +#define cores_per_socket (amds_get_cores_per_socket()) +#define htable (amds_get_htable()) +#define ntv_table_p (amds_get_ntv_table()) +#define amdsmi_lib_major (amds_get_lib_major()) +#endif + +/* AMD SMI function pointers */ +#include "amds_funcs.h" +#define DECLARE_AMDSMI(name, ret, args) extern ret(*name) args; +AMD_SMI_GPU_FUNCTIONS(DECLARE_AMDSMI) +#ifndef AMDSMI_DISABLE_ESMI +AMD_SMI_CPU_FUNCTIONS(DECLARE_AMDSMI) +#endif +#undef DECLARE_AMDSMI + +/* Accessor prototypes */ +int access_amdsmi_temp_metric(int mode, void *arg); +int access_amdsmi_fan_speed(int mode, void *arg); +int access_amdsmi_fan_rpms(int mode, void *arg); +int access_amdsmi_mem_total(int mode, void *arg); +int access_amdsmi_mem_usage(int mode, void *arg); +int access_amdsmi_power_cap(int mode, void *arg); +int access_amdsmi_power_cap_range(int mode, void *arg); +int access_amdsmi_power_average(int mode, void *arg); +int access_amdsmi_pci_throughput(int mode, void *arg); +int access_amdsmi_pci_replay_counter(int mode, void *arg); +int access_amdsmi_clk_freq(int mode, void *arg); +int access_amdsmi_clock_info(int mode, void *arg); +int access_amdsmi_gpu_metrics(int mode, void *arg); +int access_amdsmi_gpu_info(int mode, void *arg); +int access_amdsmi_gpu_activity(int mode, void *arg); +int access_amdsmi_fan_speed_max(int mode, void *arg); +int access_amdsmi_pci_bandwidth(int mode, void *arg); +int access_amdsmi_energy_count(int mode, void *arg); +int access_amdsmi_power_profile_status(int mode, void *arg); +int access_amdsmi_uuid_hash(int mode, void *arg); +int access_amdsmi_gpu_string_hash(int mode, void *arg); +int access_amdsmi_asic_info(int mode, void *arg); +int access_amdsmi_link_metrics(int mode, void *arg); +int access_amdsmi_link_weight(int mode, void *arg); +int access_amdsmi_link_type(int mode, void *arg); +int access_amdsmi_p2p_status(int mode, void *arg); +int access_amdsmi_p2p_accessible(int mode, void *arg); +int access_amdsmi_link_topology_nearest(int mode, void *arg); +int access_amdsmi_topo_numa(int mode, void *arg); +int access_amdsmi_device_bdf(int mode, void *arg); +int access_amdsmi_kfd_info(int mode, void *arg); +int access_amdsmi_xgmi_info(int mode, void *arg); +int access_amdsmi_process_info(int mode, void *arg); +int access_amdsmi_ecc_total(int mode, void *arg); +int access_amdsmi_ecc_block(int mode, void *arg); +int access_amdsmi_ecc_status(int mode, void *arg); +int access_amdsmi_ecc_enabled_mask(int mode, void *arg); +int access_amdsmi_compute_partition_hash(int mode, void *arg); +int access_amdsmi_memory_partition_hash(int mode, void *arg); +int access_amdsmi_memory_reserved_pages(int mode, void *arg); +int access_amdsmi_accelerator_num_partitions(int mode, void *arg); +int access_amdsmi_lib_version(int mode, void *arg); +int access_amdsmi_cache_stat(int mode, void *arg); +int access_amdsmi_overdrive_level(int mode, void *arg); +int access_amdsmi_mem_overdrive_level(int mode, void *arg); +int access_amdsmi_od_volt_regions_count(int mode, void *arg); +int access_amdsmi_od_volt_curve_range(int mode, void *arg); +int access_amdsmi_od_volt_info(int mode, void *arg); +int access_amdsmi_perf_level(int mode, void *arg); +int access_amdsmi_pm_metrics_count(int mode, void *arg); +int access_amdsmi_pm_metric_value(int mode, void *arg); +int access_amdsmi_pm_enabled(int mode, void *arg); +int access_amdsmi_ras_ecc_schema(int mode, void *arg); +int access_amdsmi_ras_eeprom_version(int mode, void *arg); +int access_amdsmi_ras_eeprom_validate(int mode, void *arg); +int access_amdsmi_ras_block_state(int mode, void *arg); +int access_amdsmi_reg_count(int mode, void *arg); +int access_amdsmi_reg_value(int mode, void *arg); +int access_amdsmi_voltage(int mode, void *arg); +int access_amdsmi_vram_width(int mode, void *arg); +int access_amdsmi_vram_size(int mode, void *arg); +int access_amdsmi_vram_type(int mode, void *arg); +int access_amdsmi_vram_vendor(int mode, void *arg); +int access_amdsmi_vram_usage(int mode, void *arg); +int access_amdsmi_soc_pstate_id(int mode, void *arg); +int access_amdsmi_soc_pstate_supported(int mode, void *arg); +int access_amdsmi_metrics_header_info(int mode, void *arg); +int access_amdsmi_xgmi_error_status(int mode, void *arg); +int access_amdsmi_xgmi_plpd_id(int mode, void *arg); +int access_amdsmi_xgmi_plpd_supported(int mode, void *arg); +int access_amdsmi_process_isolation(int mode, void *arg); +int access_amdsmi_xcd_counter(int mode, void *arg); +int access_amdsmi_board_serial_hash(int mode, void *arg); +int access_amdsmi_fw_version(int mode, void *arg); +int access_amdsmi_bad_page_count(int mode, void *arg); +int access_amdsmi_bad_page_threshold(int mode, void *arg); +int access_amdsmi_bad_page_record(int mode, void *arg); +int access_amdsmi_power_sensor(int mode, void *arg); +int access_amdsmi_pcie_info(int mode, void *arg); +int access_amdsmi_event_notification(int mode, void *arg); +int access_amdsmi_xgmi_bandwidth(int mode, void *arg); +int access_amdsmi_utilization_count(int mode, void *arg); +int access_amdsmi_violation_status(int mode, void *arg); + +/* Consolidated AMDSMI_LIB_VERSION_MAJOR >= 25 block */ +#if AMDSMI_LIB_VERSION_MAJOR >= 25 +int access_amdsmi_enumeration_info(int mode, void *arg); +int access_amdsmi_memory_partition_config(int mode, void *arg); +int access_amdsmi_xgmi_link_status(int mode, void *arg); +int access_amdsmi_vram_max_bandwidth(int mode, void *arg); +#endif + +#ifndef AMDSMI_DISABLE_ESMI +int access_amdsmi_cpu_socket_power(int mode, void *arg); +int access_amdsmi_cpu_socket_energy(int mode, void *arg); +int access_amdsmi_cpu_socket_freq_limit(int mode, void *arg); +int access_amdsmi_cpu_socket_freq_range(int mode, void *arg); +int access_amdsmi_cpu_power_cap(int mode, void *arg); +int access_amdsmi_cpu_core_energy(int mode, void *arg); +int access_amdsmi_cpu_core_freq_limit(int mode, void *arg); +int access_amdsmi_cpu_core_boostlimit(int mode, void *arg); +int access_amdsmi_cpu_cclk_limit(int mode, void *arg); +int access_amdsmi_cpu_io_bw(int mode, void *arg); +int access_amdsmi_cpu_xgmi_bw(int mode, void *arg); +int access_amdsmi_cpu_ddr_bw(int mode, void *arg); +int access_amdsmi_cpu_fclk_mclk(int mode, void *arg); +int access_amdsmi_cpu_hsmp_driver_version(int mode, void *arg); +int access_amdsmi_cpu_hsmp_proto_ver(int mode, void *arg); +int access_amdsmi_cpu_prochot_status(int mode, void *arg); +int access_amdsmi_cpu_svi_power(int mode, void *arg); +int access_amdsmi_dimm_temp(int mode, void *arg); +int access_amdsmi_dimm_power(int mode, void *arg); +int access_amdsmi_dimm_range_refresh(int mode, void *arg); +int access_amdsmi_threads_per_core(int mode, void *arg); +int access_amdsmi_cpu_family(int mode, void *arg); +int access_amdsmi_cpu_model(int mode, void *arg); +int access_amdsmi_smu_fw_version(int mode, void *arg); +#endif + +#endif /* __AMDS_PRIV_H__ */ diff --git a/src/components/amd_smi/htable.h b/src/components/amd_smi/htable.h new file mode 100644 index 000000000..1d82f4906 --- /dev/null +++ b/src/components/amd_smi/htable.h @@ -0,0 +1,392 @@ +/** + * @file htable.c + * @author Giuseppe Congiu + * gcongiu@icl.utk.edu + * @author Dong Jun Woun + * djwoun@gmail.com + * + */ + +#ifndef __HTABLE_H__ +#define __HTABLE_H__ + +#include +#include +#include "papi.h" +#include "papi_internal.h" +#include "papi_memory.h" + +/* Return codes for hash table operations */ +#define HTABLE_SUCCESS 0 /* Operation successful */ +#define HTABLE_ENOVAL -1 /* Key not found in table */ +#define HTABLE_EINVAL -2 /* Invalid argument (e.g., NULL handle or key) */ +#define HTABLE_ENOMEM -3 /* Allocation failure */ + +#define HTABLE_MIN_SIZE 8 /* Minimum number of buckets */ +#define HTABLE_NEEDS_TO_GROW(table) ((table)->size > 0 && (table)->capacity / (table)->size < 2) +#define HTABLE_NEEDS_TO_SHRINK(table) ((table)->size > 0 && (table)->capacity / (table)->size > 8) + +/* Hash table entry definition (separate chaining) */ +struct hash_table_entry { + char *key; /* Dynamically allocated key string */ + void *val; /* Value associated with the key */ + struct hash_table_entry *next; /* Next entry in the bucket's linked list */ +}; + +/* Hash table structure */ +struct hash_table { + uint32_t capacity; /* Number of buckets (table size) */ + uint32_t size; /* Number of entries currently stored */ + struct hash_table_entry **buckets; /* Array of bucket heads for separate chaining */ +}; + +/* Internal helper function prototypes (not part of public API) */ +static uint64_t hash_func(const char *str); +static int create_table(uint64_t capacity, struct hash_table **table); +static int destroy_table(struct hash_table *table); +static int rehash_table(struct hash_table *old_table, struct hash_table *new_table); +static int destroy_table_entries(struct hash_table *table); +static int create_table_entry(const char *key, void *val, struct hash_table_entry **entry); +static int destroy_table_entry(struct hash_table_entry *entry); +static int insert_table_entry(struct hash_table *table, struct hash_table_entry *entry); +static int delete_table_entry(struct hash_table *table, struct hash_table_entry *entry); +static int find_table_entry(struct hash_table *table, const char *key, struct hash_table_entry **entry); + +/* Initialize a new hash table. Handle is an out-parameter for the table pointer. */ +static inline int htable_init(void **handle) +{ + if (handle == NULL) { + return HTABLE_EINVAL; + } + int htable_errno = HTABLE_SUCCESS; + struct hash_table *table = NULL; + /* Create initial table with minimum capacity */ + htable_errno = create_table(HTABLE_MIN_SIZE, &table); + if (htable_errno != HTABLE_SUCCESS) { + *handle = NULL; + return htable_errno; + } + *handle = table; + return HTABLE_SUCCESS; +} + +/* Shutdown an existing hash table, freeing all allocated memory. */ +static inline int htable_shutdown(void *handle) +{ + struct hash_table *table = (struct hash_table *) handle; + if (table == NULL) { + return HTABLE_EINVAL; + } + /* Free all entries and the table itself */ + destroy_table_entries(table); + destroy_table(table); + return HTABLE_SUCCESS; +} + +/* Insert a key-value pair into the hash table. Updates value if key already exists. */ +static inline int htable_insert(void *handle, const char *key, void *in) +{ + struct hash_table *table = (struct hash_table *) handle; + if (table == NULL || key == NULL) { + return HTABLE_EINVAL; + } + int htable_errno; + struct hash_table_entry *entry = NULL; + /* Check if key already exists */ + htable_errno = find_table_entry(table, key, &entry); + if (htable_errno == HTABLE_SUCCESS) { + /* Key exists: update its value */ + entry->val = in; + return HTABLE_SUCCESS; + } + /* Key not found: create a new entry */ + htable_errno = create_table_entry(key, in, &entry); + if (htable_errno != HTABLE_SUCCESS) { + return htable_errno; + } + /* Link the new entry into the table */ + htable_errno = insert_table_entry(table, entry); + if (htable_errno != HTABLE_SUCCESS) { + /* Insertion failed: free the entry and return error */ + papi_free(entry->key); + papi_free(entry); + return htable_errno; + } + /* Check if rehash (grow table) is needed after insertion */ + htable_errno = rehash_table(table, NULL); /* use NULL to indicate self-resize (growth) */ + if (htable_errno != HTABLE_SUCCESS) { + return htable_errno; + } + return HTABLE_SUCCESS; +} + +/* Remove an entry by key from the hash table. No effect if key not found. */ +static inline int htable_delete(void *handle, const char *key) +{ + struct hash_table *table = (struct hash_table *) handle; + if (table == NULL || key == NULL) { + return HTABLE_EINVAL; + } + struct hash_table_entry *entry = NULL; + int htable_errno = find_table_entry(table, key, &entry); + if (htable_errno != HTABLE_SUCCESS) { + /* Key not found or other error */ + return htable_errno; + } + /* Unlink the entry from the table (does not free memory yet) */ + htable_errno = delete_table_entry(table, entry); + if (htable_errno != HTABLE_SUCCESS) { + return htable_errno; + } + /* Free the removed entry structure */ + htable_errno = destroy_table_entry(entry); + if (htable_errno != HTABLE_SUCCESS) { + return htable_errno; + } + /* Check if rehash (shrink table) is needed after deletion */ + htable_errno = rehash_table(table, NULL); /* attempt shrink after deletion */ + if (htable_errno == HTABLE_ENOMEM) { + return htable_errno; + } + return htable_errno; +} + +/* Find an entry by key in the hash table. + * Returns HTABLE_SUCCESS and sets *out if found, else HTABLE_ENOVAL. */ +static inline int htable_find(void *handle, const char *key, void **out) +{ + struct hash_table *table = (struct hash_table *) handle; + if (table == NULL || key == NULL || out == NULL) { + return HTABLE_EINVAL; + } + struct hash_table_entry *entry = NULL; + int htable_errno = find_table_entry(table, key, &entry); + if (htable_errno != HTABLE_SUCCESS) { + *out = NULL; /* ensure output is NULL if not found */ + return htable_errno; + } + *out = entry->val; + return HTABLE_SUCCESS; +} + +/* djb2 string hash function – returns a 64-bit hash for the given string */ +static uint64_t hash_func(const char *str) +{ + uint64_t hash = 5381ULL; + int c; + while ((c = *str++) != 0) { + hash = ((hash << 5) + hash) + (uint8_t)c; /* hash * 33 + c */ + } + return hash; +} + +/* Allocate and initialize a new hash_table structure with the given capacity. */ +static int create_table(uint64_t capacity, struct hash_table **table) +{ + if (capacity < 1 || table == NULL) { + return HTABLE_EINVAL; + } + int htable_errno = HTABLE_SUCCESS; + struct hash_table *t = papi_calloc(1, sizeof(struct hash_table)); + if (t == NULL) { + return HTABLE_ENOMEM; + } + t->buckets = papi_calloc(capacity, sizeof(struct hash_table_entry *)); + if (t->buckets == NULL) { + papi_free(t); + return HTABLE_ENOMEM; + } + t->capacity = (uint32_t) capacity; + t->size = 0; + *table = t; + return HTABLE_SUCCESS; +} + +/* Free the memory associated with a hash_table (structure and bucket array). */ +static int destroy_table(struct hash_table *table) +{ + if (table == NULL) { + return HTABLE_SUCCESS; + } + if (table->buckets != NULL) { + papi_free(table->buckets); + } + papi_free(table); + return HTABLE_SUCCESS; +} + +/* Rehash the entries from old_table into new_table or perform in-place resizing. + If new_table is NULL, this function checks old_table and resizes it if needed. */ +static int rehash_table(struct hash_table *old_table, struct hash_table *new_table) +{ + int htable_errno = HTABLE_SUCCESS; + if (new_table == NULL) { + /* Self-resizing mode: determine if growth or shrink is needed */ + char resize = (HTABLE_NEEDS_TO_GROW(old_table) << 1) | HTABLE_NEEDS_TO_SHRINK(old_table); + if (!resize) { + return HTABLE_SUCCESS; /* no resizing needed */ + } + /* Determine new capacity (double or half) */ + uint64_t new_capacity = (resize & 0x2) ? + (uint64_t)old_table->capacity * 2 + : (uint64_t)old_table->capacity / 2; + if (new_capacity < HTABLE_MIN_SIZE) { + new_capacity = HTABLE_MIN_SIZE; + } + /* Allocate a new table structure and buckets */ + htable_errno = create_table(new_capacity, &new_table); + if (htable_errno != HTABLE_SUCCESS) { + return htable_errno; + } + /* Move all entries from old_table into new_table */ + for (uint64_t i = 0; i < old_table->capacity; ++i) { + struct hash_table_entry *entry = old_table->buckets[i]; + while (entry != NULL) { + struct hash_table_entry *next_entry = entry->next; + /* Compute new bucket index (capacity is always power-of-2) */ + uint64_t new_index = hash_func(entry->key) & (new_table->capacity - 1); + /* Insert entry at head of new_table's bucket list */ + entry->next = new_table->buckets[new_index]; + new_table->buckets[new_index] = entry; + entry = next_entry; + } + } + new_table->size = old_table->size; + /* Replace old_table's data with new_table's data */ + struct hash_table_entry **old_buckets = old_table->buckets; + old_table->capacity = new_table->capacity; + old_table->size = new_table->size; + old_table->buckets = new_table->buckets; + new_table->buckets = NULL; /* avoid double-free */ + /* Free old bucket array and temporary table structure */ + papi_free(old_buckets); + destroy_table(new_table); + return HTABLE_SUCCESS; + } + /* Explicit rehash into a provided new_table (for manual resizing, if needed) */ + for (uint64_t j = 0; j < old_table->capacity; ++j) { + struct hash_table_entry *entry = old_table->buckets[j]; + while (entry != NULL) { + struct hash_table_entry *next_entry = entry->next; + uint64_t new_index = hash_func(entry->key) & (new_table->capacity - 1); + entry->next = new_table->buckets[new_index]; + new_table->buckets[new_index] = entry; + entry = next_entry; + } + } + new_table->size = old_table->size; + return HTABLE_SUCCESS; +} + +/* Free all entries in the hash table (but not the table or buckets themselves). */ +static int destroy_table_entries(struct hash_table *table) +{ + if (table == NULL) { + return HTABLE_SUCCESS; + } + for (uint64_t i = 0; i < table->capacity; ++i) { + struct hash_table_entry *entry = table->buckets[i]; + while (entry != NULL) { + struct hash_table_entry *tmp = entry; + entry = entry->next; + papi_free(tmp->key); + papi_free(tmp); + } + table->buckets[i] = NULL; + } + table->size = 0; + return HTABLE_SUCCESS; +} + +/* Create a new hash_table_entry with the given key and value. Copies the key string. */ +static int create_table_entry(const char *key, void *val, struct hash_table_entry **entry) +{ + if (key == NULL || entry == NULL) { + return HTABLE_EINVAL; + } + struct hash_table_entry *e = papi_calloc(1, sizeof(struct hash_table_entry)); + if (e == NULL) { + return HTABLE_ENOMEM; + } + e->key = papi_strdup(key); + if (e->key == NULL) { /* strdup failure */ + papi_free(e); + return HTABLE_ENOMEM; + } + e->val = val; + e->next = NULL; + *entry = e; + return HTABLE_SUCCESS; +} + +/* Destroy a single hash_table_entry (free its key and memory). */ +static int destroy_table_entry(struct hash_table_entry *entry) +{ + if (entry == NULL) { + return HTABLE_EINVAL; + } + papi_free(entry->key); + papi_free(entry); + return HTABLE_SUCCESS; +} + +/* Insert a hash_table_entry into the table (at the head of its bucket list). */ +static int insert_table_entry(struct hash_table *table, struct hash_table_entry *entry) +{ + if (table == NULL || entry == NULL) { + return HTABLE_EINVAL; + } + /* Compute bucket index and insert at head of list */ + uint64_t index = hash_func(entry->key) & (table->capacity - 1); + entry->next = table->buckets[index]; + table->buckets[index] = entry; + table->size += 1; + return HTABLE_SUCCESS; +} + +/* Remove a hash_table_entry from its bucket list (does not free the entry). */ +static int delete_table_entry(struct hash_table *table, struct hash_table_entry *entry) +{ + if (table == NULL || entry == NULL) { + return HTABLE_EINVAL; + } + uint64_t index = hash_func(entry->key) & (table->capacity - 1); + struct hash_table_entry *curr = table->buckets[index]; + struct hash_table_entry *prev = NULL; + while (curr != NULL) { + if (curr == entry) { + /* Found the entry to remove */ + if (prev == NULL) { + /* Entry is at head of the list */ + table->buckets[index] = curr->next; + } else { + /* Entry is in the middle or end of the list */ + prev->next = curr->next; + } + entry->next = NULL; + table->size -= 1; + return HTABLE_SUCCESS; + } + prev = curr; + curr = curr->next; + } + /* Entry not found (should not happen if a valid pointer was provided) */ + return HTABLE_ENOVAL; +} + +/* Find a hash_table_entry by key in the table. Sets *entry if found. */ +static int find_table_entry(struct hash_table *table, const char *key, struct hash_table_entry **entry) +{ + if (table == NULL || key == NULL || entry == NULL) { + return HTABLE_EINVAL; + } + uint64_t index = hash_func(key) & (table->capacity - 1); + struct hash_table_entry *curr = table->buckets[index]; + while (curr != NULL && strcmp(curr->key, key) != 0) { + curr = curr->next; + } + *entry = curr; + return (curr != NULL ? HTABLE_SUCCESS : HTABLE_ENOVAL); +} + +#endif /* __HTABLE_H__ */ diff --git a/src/components/amd_smi/linux-amd-smi.c b/src/components/amd_smi/linux-amd-smi.c new file mode 100644 index 000000000..3f5d02179 --- /dev/null +++ b/src/components/amd_smi/linux-amd-smi.c @@ -0,0 +1,357 @@ +/** + * @file linux-amd-smi.c + * @author Dong Jun Woun + * djwoun@gmail.com + * + */ + +#include +#include +#include +#include + +#include "papi.h" +#include "papi_internal.h" +#include "papi_vector.h" +#include "papi_memory.h" +#include "extras.h" +#include "amds.h" +#include "amds_priv.h" +extern unsigned int _amd_smi_lock; + +typedef struct { + int initialized; + int state; + int component_id; +} amdsmi_context_t; + +typedef struct { + unsigned int *events_id; + int num_events; + int component_id; + amds_ctx_t amds_ctx; +} amdsmi_control_t; + +papi_vector_t _amd_smi_vector; + +static int _amd_smi_init_private(void); + +static int _amd_smi_check_n_initialize(void) { + if (!_amd_smi_vector.cmp_info.initialized) + return _amd_smi_init_private(); + return _amd_smi_vector.cmp_info.disabled; +} + +static int _amd_smi_init_thread(hwd_context_t *ctx) { + amdsmi_context_t *amdsmi_ctx = (amdsmi_context_t *) ctx; + memset(amdsmi_ctx, 0, sizeof(*amdsmi_ctx)); + amdsmi_ctx->initialized = 1; + amdsmi_ctx->component_id = _amd_smi_vector.cmp_info.CmpIdx; + return PAPI_OK; +} + +static int _amd_smi_init_component(int cidx) { + _amd_smi_vector.cmp_info.CmpIdx = cidx; + _amd_smi_vector.cmp_info.num_native_events = -1; + _amd_smi_vector.cmp_info.num_cntrs = -1; + _amd_smi_vector.cmp_info.num_mpx_cntrs = -1; + _amd_smi_lock = PAPI_NUM_LOCK + NUM_INNER_LOCK + cidx; + + sprintf(_amd_smi_vector.cmp_info.disabled_reason, + "Not initialized. Access an AMD SMI event to initialize."); + _amd_smi_vector.cmp_info.disabled = PAPI_EDELAY_INIT; + + return PAPI_EDELAY_INIT; +} + +static int evt_get_count(int *count) { + unsigned int event_code = 0; + if (amds_evt_enum(&event_code, PAPI_ENUM_FIRST) == PAPI_OK) { + ++(*count); + } + while (amds_evt_enum(&event_code, PAPI_ENUM_EVENTS) == PAPI_OK) { + ++(*count); + } + return PAPI_OK; +} + +static int _amd_smi_init_private(void) { + int papi_errno = PAPI_OK; + PAPI_lock(COMPONENT_LOCK); + + if (_amd_smi_vector.cmp_info.initialized) { + papi_errno = _amd_smi_vector.cmp_info.disabled; + goto fn_exit; + } + + papi_errno = amds_init(); // initialize AMD SMI library and events + if (papi_errno != PAPI_OK) { + _amd_smi_vector.cmp_info.disabled = papi_errno; + const char *error_str; + amds_err_get_last(&error_str); + sprintf(_amd_smi_vector.cmp_info.disabled_reason, "%s", error_str); + goto fn_fail; + } + + int count = 0; + papi_errno = evt_get_count(&count); + _amd_smi_vector.cmp_info.num_native_events = count; + _amd_smi_vector.cmp_info.num_cntrs = count; + _amd_smi_vector.cmp_info.num_mpx_cntrs = count; + +fn_exit: + _amd_smi_vector.cmp_info.initialized = 1; + _amd_smi_vector.cmp_info.disabled = papi_errno; + PAPI_unlock(COMPONENT_LOCK); + return papi_errno; +fn_fail: + goto fn_exit; +} + +static int _amd_smi_init_control_state(hwd_control_state_t *ctrl) { + (void) ctrl; + return _amd_smi_check_n_initialize(); +} + +static int update_native_events(amdsmi_control_t *ctl, NativeInfo_t *ntvInfo, int ntvCount) +{ + if (!ctl) return PAPI_EINVAL; + if (ntvCount < 0) return PAPI_EINVAL; + + if (ntvCount == 0) { + if (ctl->events_id) papi_free(ctl->events_id); + ctl->events_id = NULL; + ctl->num_events = 0; + return PAPI_OK; + } + + if (!ntvInfo) return PAPI_EINVAL; + + // Allocate a new array; leave ctl unchanged until success. + unsigned int *events = papi_calloc((size_t)ntvCount, sizeof(*events)); + if (!events) { + // Old ctl->events_id/num_events remain intact on allocation failure. + return PAPI_ENOMEM; + } + + for (int i = 0; i < ntvCount; ++i) { + events[i] = ntvInfo[i].ni_event; + ntvInfo[i].ni_position = i; + } + + // Swap in the new array atomically. + if (ctl->events_id) papi_free(ctl->events_id); + ctl->events_id = events; + ctl->num_events = ntvCount; + + return PAPI_OK; +} + +static int _amd_smi_update_control_state(hwd_control_state_t *ctrl, NativeInfo_t *nativeInfo, + int nativeCount, hwd_context_t *ctx) { + int papi_errno = _amd_smi_check_n_initialize(); + if (papi_errno != PAPI_OK) { + return papi_errno; + } + amdsmi_control_t *amdsmi_ctl = (amdsmi_control_t *) ctrl; + amdsmi_context_t *amdsmi_ctx = (amdsmi_context_t *) ctx; + if (amdsmi_ctx->state & AMDS_EVENTS_RUNNING) { + return PAPI_EMISC; + } + papi_errno = update_native_events(amdsmi_ctl, nativeInfo, nativeCount); + if (papi_errno != PAPI_OK) { + return papi_errno; + } + return PAPI_OK; +} + +static int _amd_smi_start(hwd_context_t *ctx, hwd_control_state_t *ctrl) { + int papi_errno = PAPI_OK; + amdsmi_context_t *amdsmi_ctx = (amdsmi_context_t *) ctx; + amdsmi_control_t *amdsmi_ctl = (amdsmi_control_t *) ctrl; + + if (amdsmi_ctx->state & AMDS_EVENTS_RUNNING) { + return PAPI_EMISC; + } + papi_errno = amds_ctx_open(amdsmi_ctl->events_id, amdsmi_ctl->num_events, &amdsmi_ctl->amds_ctx); + if (papi_errno != PAPI_OK) { + return papi_errno; + } + amdsmi_ctx->state = AMDS_EVENTS_OPENED; + + papi_errno = amds_ctx_start(amdsmi_ctl->amds_ctx); + if (papi_errno != PAPI_OK) { + // If start fails, close the context and reset state + amds_ctx_close(amdsmi_ctl->amds_ctx); + amdsmi_ctx->state = 0; + amdsmi_ctl->amds_ctx = NULL; + return papi_errno; + } + amdsmi_ctx->state |= AMDS_EVENTS_RUNNING; + return PAPI_OK; +} + +static int _amd_smi_read(hwd_context_t *ctx, hwd_control_state_t *ctrl, + long long **values, int flags) { + (void)ctx; (void)flags; + amdsmi_context_t *amdsmi_ctx = (amdsmi_context_t *)ctx; + amdsmi_control_t *amdsmi_ctl = (amdsmi_control_t *)ctrl; + if (!(amdsmi_ctx->state & AMDS_EVENTS_RUNNING) || !amdsmi_ctl->amds_ctx) // fail only if ctx is gone + return PAPI_EMISC; + return amds_ctx_read(amdsmi_ctl->amds_ctx, values); +} + + +static int _amd_smi_write(hwd_context_t *ctx, hwd_control_state_t *ctrl, long long *values) { + amdsmi_context_t *amdsmi_ctx = (amdsmi_context_t *) ctx; + amdsmi_control_t *amdsmi_ctl = (amdsmi_control_t *) ctrl; + if (!(amdsmi_ctx->state & AMDS_EVENTS_RUNNING)) { + return PAPI_EMISC; + } + return amds_ctx_write(amdsmi_ctl->amds_ctx, values); +} + +static int _amd_smi_stop(hwd_context_t *ctx, hwd_control_state_t *ctrl) { + amdsmi_context_t *amdsmi_ctx = (amdsmi_context_t *)ctx; + amdsmi_control_t *amdsmi_ctl = (amdsmi_control_t *)ctrl; + if (!(amdsmi_ctx->state & AMDS_EVENTS_RUNNING)) return PAPI_EMISC; + + int papi_errno = amds_ctx_stop(amdsmi_ctl->amds_ctx); + amdsmi_ctx->state &= ~AMDS_EVENTS_RUNNING; + return papi_errno; +} + +static int _amd_smi_reset(hwd_context_t *ctx, hwd_control_state_t *ctrl) { + amdsmi_context_t *amdsmi_ctx = (amdsmi_context_t *) ctx; + amdsmi_control_t *amdsmi_ctl = (amdsmi_control_t *) ctrl; + if (!(amdsmi_ctx->state & AMDS_EVENTS_RUNNING)) { + return PAPI_EMISC; + } + return amds_ctx_reset(amdsmi_ctl->amds_ctx); +} + +static int _amd_smi_cleanup_eventset(hwd_control_state_t *ctrl) { + amdsmi_control_t *amdsmi_ctl = (amdsmi_control_t *)ctrl; + + if (amdsmi_ctl->amds_ctx) { + (void)amds_ctx_stop(amdsmi_ctl->amds_ctx); // safe if not running + (void)amds_ctx_close(amdsmi_ctl->amds_ctx); + amdsmi_ctl->amds_ctx = NULL; + } + + if (amdsmi_ctl->events_id) { + papi_free(amdsmi_ctl->events_id); + amdsmi_ctl->events_id = NULL; + amdsmi_ctl->num_events = 0; + } + return PAPI_OK; +} + + + +static int _amd_smi_shutdown_thread(hwd_context_t *ctx) { + amdsmi_context_t *amdsmi_ctx = (amdsmi_context_t *) ctx; + amdsmi_ctx->state = 0; + amdsmi_ctx->initialized = 0; + return PAPI_OK; +} + +static int _amd_smi_shutdown_component(void) { + if (!_amd_smi_vector.cmp_info.initialized) { + return PAPI_EMISC; + } + if (_amd_smi_vector.cmp_info.disabled != PAPI_OK) { + return PAPI_EMISC; + } + int papi_errno = amds_shutdown(); + _amd_smi_vector.cmp_info.initialized = 0; + return papi_errno; +} + +static int _amd_smi_ctrl(hwd_context_t *ctx, int code, _papi_int_option_t *option) { + (void) ctx; (void) code; (void) option; + // No special control actions needed for this component + return PAPI_OK; +} + +static int _amd_smi_set_domain(hwd_control_state_t *ctrl, int domain) { + (void) ctrl; (void) domain; + // Only default user/kernel domain is supported + return PAPI_OK; +} + +/* Native event API functions */ +static int _amd_smi_ntv_enum_events(unsigned int *EventCode, int modifier) { + int papi_errno = _amd_smi_check_n_initialize(); + if (papi_errno != PAPI_OK) { + return papi_errno; + } + return amds_evt_enum(EventCode, modifier); +} + +static int _amd_smi_ntv_code_to_name(unsigned int EventCode, char *name, int len) { + int papi_errno = _amd_smi_check_n_initialize(); + if (papi_errno != PAPI_OK) { + return papi_errno; + } + return amds_evt_code_to_name(EventCode, name, len); +} + +static int _amd_smi_ntv_name_to_code(const char *name, unsigned int *EventCode) { + int papi_errno = _amd_smi_check_n_initialize(); + if (papi_errno != PAPI_OK) { + return papi_errno; + } + return amds_evt_name_to_code(name, EventCode); +} + +static int _amd_smi_ntv_code_to_descr(unsigned int EventCode, char *desc, int len) { + int papi_errno = _amd_smi_check_n_initialize(); + if (papi_errno != PAPI_OK) { + return papi_errno; + } + return amds_evt_code_to_descr(EventCode, desc, len); +} + +/* Export the component interface */ +papi_vector_t _amd_smi_vector = { + .cmp_info = { + .name = "amd_smi", + .short_name = "amd_smi", + .version = "1.0", + .description = "AMD GPU System Management Interface via AMD SMI library", + .default_domain = PAPI_DOM_USER, + .default_granularity = PAPI_GRN_THR, + .available_granularities = PAPI_GRN_THR, + .hardware_intr_sig = PAPI_INT_SIGNAL, + .fast_real_timer = 0, + .fast_virtual_timer = 0, + .attach = 0, + .attach_must_ptrace = 0, + .available_domains = PAPI_DOM_USER | PAPI_DOM_KERNEL, + }, + .size = { + .context = sizeof(amdsmi_context_t), + .control_state = sizeof(amdsmi_control_t), + .reg_value = 1, + .reg_alloc = 1, + }, + .init_thread = _amd_smi_init_thread, + .init_component = _amd_smi_init_component, + .init_control_state = _amd_smi_init_control_state, + .update_control_state = _amd_smi_update_control_state, + .start = _amd_smi_start, + .stop = _amd_smi_stop, + .read = _amd_smi_read, + .write = _amd_smi_write, + .reset = _amd_smi_reset, + .cleanup_eventset = _amd_smi_cleanup_eventset, + .shutdown_thread = _amd_smi_shutdown_thread, + .shutdown_component = _amd_smi_shutdown_component, + .ctl = _amd_smi_ctrl, + .set_domain = _amd_smi_set_domain, + .ntv_enum_events = _amd_smi_ntv_enum_events, + .ntv_code_to_name = _amd_smi_ntv_code_to_name, + .ntv_name_to_code = _amd_smi_ntv_name_to_code, + .ntv_code_to_descr = _amd_smi_ntv_code_to_descr, +}; diff --git a/src/components/amd_smi/tests/Makefile b/src/components/amd_smi/tests/Makefile new file mode 100644 index 000000000..1d58a32d3 --- /dev/null +++ b/src/components/amd_smi/tests/Makefile @@ -0,0 +1,136 @@ +# ROCm + PAPI build settings +# Set PAPI_AMDSMI_ROOT to your ROCm install prefix (e.g., /opt/rocm or /opt/rocm-6.4.0). +# Used to find AMD SMI / HIP / ROCm headers and libraries. You can override on the command line. +PAPI_AMDSMI_ROOT ?= /opt/rocm-6.4.0 +HIP_PATH := $(PAPI_AMDSMI_ROOT) +HIPCC := $(HIP_PATH)/bin/hipcc + +# Toolchain / flags +CFLAGS ?= -DPAPI_NUM_COMP=3 +CSTD ?= -std=c11 +HIPFLAGS ?= $(CFLAGS) -std=c++17 +OPTFLAGS ?= -O2 + +# Libraries in the PAPI tree (override if your layout differs) +UTILOBJS ?= ../../../testlib/libtestlib.a +PAPILIB ?= ../../../libpapi.a + +# Include paths (adjust if your PAPI install lives elsewhere) +BASE_INCLUDES_FROM_LOG ?= -I. -I../../.. -I../../../testlib -I../../../validation_tests +PAPI_DIR_INCLUDE ?= ../../../include + +EFFECTIVE_INCLUDE := $(BASE_INCLUDES_FROM_LOG) +EFFECTIVE_INCLUDE += -I$(PAPI_DIR_INCLUDE) +EFFECTIVE_INCLUDE += -I$(PAPI_AMDSMI_ROOT)/include +EFFECTIVE_INCLUDE += -I$(PAPI_AMDSMI_ROOT)/include/rocm_smi +EFFECTIVE_INCLUDE += -I$(PAPI_AMDSMI_ROOT)/include/hip +EFFECTIVE_INCLUDE += -I$(PAPI_AMDSMI_ROOT)/include/hsa +EFFECTIVE_INCLUDE += -I$(PAPI_AMDSMI_ROOT)/include/rocprofiler +EFFECTIVE_INCLUDE += -I$(PAPI_AMDSMI_ROOT)/include/rocblas + +# Linker flags +COMMON_LDFLAGS = -ldl -g -lpthread +LDFLAGS_AMDSMI = $(UTILOBJS) $(PAPILIB) -L$(PAPI_AMDSMI_ROOT)/lib -lamd_smi $(COMMON_LDFLAGS) +LDFLAGS_GEMM = $(UTILOBJS) $(PAPILIB) -L$(PAPI_AMDSMI_ROOT)/lib -lrocblas $(COMMON_LDFLAGS) + +# AMD SMI tests to build with 'make tests' +TESTS_AMDSMI := amdsmi_energy_monotonic amdsmi_ctx_conflict + +# Default target +all: amdsmi_hello amdsmi_basics amdsmi_gemm $(TESTS_AMDSMI) + +# ------------------------ +# Compile rules +# ------------------------ + +# .c -> .o +%.o: %.c + @echo "Compiling C source $< to $@..." + @echo "HIPCC: $(HIPCC)" + @echo "CFLAGS: $(CFLAGS) CSTD: $(CSTD) OPTFLAGS: $(OPTFLAGS)" + @echo "INCLUDES: $(EFFECTIVE_INCLUDE)" + $(HIPCC) $(CFLAGS) $(CSTD) $(OPTFLAGS) $(EFFECTIVE_INCLUDE) -c $< -o $@ + +# amdsmi_gemm.c is compiled as HIP +amdsmi_gemm.o: amdsmi_gemm.c + @echo "Compiling HIP source $< to $@..." + @echo "HIPCC: $(HIPCC)" + @echo "HIPFLAGS: $(HIPFLAGS) OPTFLAGS: $(OPTFLAGS)" + @echo "INCLUDES: $(EFFECTIVE_INCLUDE)" + $(HIPCC) $(HIPFLAGS) $(OPTFLAGS) $(EFFECTIVE_INCLUDE) -x hip -c $< -o $@ + +# ------------------------ +# Link rules +# ------------------------ + +# amdsmi_hello replaces the older amdsmi_example +amdsmi_hello: amdsmi_hello.o $(UTILOBJS) $(PAPILIB) + @echo "Linking $@..." + @echo "Using LDFLAGS_AMDSMI: $(LDFLAGS_AMDSMI)" + $(HIPCC) $(CFLAGS) $(CSTD) $(OPTFLAGS) $(EFFECTIVE_INCLUDE) -o $@ amdsmi_hello.o $(LDFLAGS_AMDSMI) + +# Back-compat alias: allow 'make amdsmi_example' to build amdsmi_hello +amdsmi_example: amdsmi_hello + @true + +amdsmi_basics: amdsmi_basics.o $(UTILOBJS) $(PAPILIB) + @echo "Linking $@..." + @echo "Using LDFLAGS_AMDSMI: $(LDFLAGS_AMDSMI)" + $(HIPCC) $(CFLAGS) $(CSTD) $(OPTFLAGS) $(EFFECTIVE_INCLUDE) -o $@ amdsmi_basics.o $(LDFLAGS_AMDSMI) + +# GEMM sample linked against rocBLAS +amdsmi_gemm: amdsmi_gemm.o $(UTILOBJS) $(PAPILIB) + @echo "Linking $@..." + @echo "Using LDFLAGS_GEMM: $(LDFLAGS_GEMM)" + $(HIPCC) $(HIPFLAGS) $(OPTFLAGS) $(EFFECTIVE_INCLUDE) -o $@ amdsmi_gemm.o $(LDFLAGS_GEMM) + +# Tests +amdsmi_energy_monotonic: amdsmi_energy_monotonic.o $(UTILOBJS) $(PAPILIB) + @echo "Linking $@..." + @echo "Using LDFLAGS_AMDSMI: $(LDFLAGS_AMDSMI)" + $(HIPCC) $(CFLAGS) $(CSTD) $(OPTFLAGS) $(EFFECTIVE_INCLUDE) -o $@ amdsmi_energy_monotonic.o $(LDFLAGS_AMDSMI) + +amdsmi_ctx_conflict: amdsmi_ctx_conflict.o $(UTILOBJS) $(PAPILIB) + @echo "Linking $@..." + @echo "Using LDFLAGS_AMDSMI: $(LDFLAGS_AMDSMI)" + $(HIPCC) $(CFLAGS) $(CSTD) $(OPTFLAGS) $(EFFECTIVE_INCLUDE) -o $@ amdsmi_ctx_conflict.o $(LDFLAGS_AMDSMI) + +$(UTILOBJS): + $(MAKE) -C ../../../testlib libtestlib.a + +# Convenience meta-target +tests: $(TESTS_AMDSMI) + +# ------------------------ +# Cleanup and diagnostics +# ------------------------ + +clean: + @echo "Cleaning up..." + rm -f \ + amdsmi_hello amdsmi_hello.o \ + amdsmi_basics amdsmi_basics.o \ + amdsmi_gemm amdsmi_gemm.o \ + amdsmi_energy_monotonic amdsmi_energy_monotonic.o \ + amdsmi_ctx_conflict amdsmi_ctx_conflict.o \ + *.exe + +checkpath: + @echo "--- Variables ---" + @echo "PAPI_AMDSMI_ROOT = $(PAPI_AMDSMI_ROOT)" + @echo "HIPCC = $(HIPCC)" + @echo "INCLUDES = $(EFFECTIVE_INCLUDE)" + @echo "CFLAGS = $(CFLAGS)" + @echo "CSTD = $(CSTD)" + @echo "HIPFLAGS = $(HIPFLAGS)" + @echo "OPTFLAGS = $(OPTFLAGS)" + @echo "UTILOBJS = $(UTILOBJS)" + @echo "PAPILIB = $(PAPILIB)" + @echo "COMMON_LDFLAGS = $(COMMON_LDFLAGS)" + @echo "LDFLAGS_AMDSMI = $(LDFLAGS_AMDSMI)" + @echo "LDFLAGS_GEMM = $(LDFLAGS_GEMM)" + @echo "TESTS_AMDSMI = $(TESTS_AMDSMI)" + +.PHONY: all clean checkpath tests \ + amdsmi_hello amdsmi_example amdsmi_basics amdsmi_gemm \ + amdsmi_energy_monotonic amdsmi_ctx_conflict diff --git a/src/components/amd_smi/tests/amdsmi_basics.c b/src/components/amd_smi/tests/amdsmi_basics.c new file mode 100644 index 000000000..ada77b593 --- /dev/null +++ b/src/components/amd_smi/tests/amdsmi_basics.c @@ -0,0 +1,173 @@ +/** + * @file amdsmi_basics.c + * @author Dong Jun Woun + * djwoun@gmail.com + * @brief Enumerates every native AMD-SMI event exposed through PAPI and measures + * them one at a time. + */ + +#include "test_harness.h" +#include "papi.h" +#include +#include +#include +#include + +// Return true if papi_errno is a "warning, not failure" status for add/start/stop. +static inline bool is_warning_papi_errno(int papi_errno) { + return (papi_errno == PAPI_ENOEVNT) || (papi_errno == PAPI_ECNFLCT) || + (papi_errno == PAPI_EPERM); +} + +int main(int argc, char *argv[]) { + // Unbuffer stdout so the final status line shows promptly. + setvbuf(stdout, NULL, _IONBF, 0); + + harness_accept_tests_quiet(&argc, argv); + HarnessOpts opts = parse_harness_cli(argc, argv); + + // 1) Initialize PAPI. + int papi_errno = PAPI_library_init(PAPI_VER_CURRENT); + if (papi_errno != PAPI_VER_CURRENT) { + NOTE("PAPI_library_init failed: %s", PAPI_strerror(papi_errno)); + return eval_result(opts, 1); + } + + // 2) Locate the AMD-SMI component. + int cid = -1; + const int ncomps = PAPI_num_components(); + for (int i = 0; i < ncomps && cid < 0; ++i) { + const PAPI_component_info_t *cinfo = PAPI_get_component_info(i); + if (cinfo && strcmp(cinfo->name, "amd_smi") == 0) { + cid = i; + } + } + if (cid < 0) { + // Can't run this test on this build/platform (likely PAPI built without ROCm) — skip with warning. + SKIP("Unable to locate the amd_smi component (PAPI built without ROCm?)"); + } + + NOTE("Using AMD-SMI component id %d\n", cid); + + // 3) Enumerate every native event. + int ev_code = PAPI_NATIVE_MASK; + if (PAPI_enum_cmp_event(&ev_code, PAPI_ENUM_FIRST, cid) != PAPI_OK) { + // No events — treat as "nothing to do" (warning instead of failing). + SKIP("No native events found for AMD-SMI component"); + } + + int event_index = 0; + int passed = 0, warned = 0, failed = 0, skipped = 0; + + do { + char ev_name[PAPI_MAX_STR_LEN] = {0}; + if (PAPI_event_code_to_name(ev_code, ev_name) != PAPI_OK) { + // Shouldn't happen; skip silently. + ++skipped; + continue; + } + + // Skip process* events; these aren't testable in this harness. + if (strncmp(ev_name, "amd_smi:::process", 17) == 0 || + strncmp(ev_name, "process", 7) == 0) { + ++skipped; + NOTE("[%4d] Skipping %s (process events not testable)\n", event_index++, ev_name); + continue; + } + + NOTE("[%4d] Testing %s...", event_index, ev_name); + + // 4–7) Create a fresh EventSet, add the event, start, stop/read, print, cleanup. + int eventSet = PAPI_NULL; + papi_errno = PAPI_create_eventset(&eventSet); + if (papi_errno != PAPI_OK) { + // Hard failure to create an EventSet. + NOTE(" ? create_eventset failed: %s", PAPI_strerror(papi_errno)); + ++failed; ++event_index; + continue; + } + + // Explicitly assign the component. + papi_errno = PAPI_assign_eventset_component(eventSet, cid); + if (papi_errno != PAPI_OK) { + NOTE(" ? assign_eventset_component failed: %s", + PAPI_strerror(papi_errno)); + (void)PAPI_destroy_eventset(&eventSet); + ++failed; ++event_index; + continue; + } + + papi_errno = PAPI_add_event(eventSet, ev_code); + if (papi_errno != PAPI_OK) { + if (is_warning_papi_errno(papi_errno)) { + WARNF("Could not add %-50s (%s)", ev_name, + PAPI_strerror(papi_errno)); + (void)PAPI_destroy_eventset(&eventSet); + ++warned; ++event_index; + } else { + NOTE(" ? Could not add %s (%s)", ev_name, + PAPI_strerror(papi_errno)); + (void)PAPI_destroy_eventset(&eventSet); + ++failed; ++event_index; + } + continue; + } + + long long value = 0; + papi_errno = PAPI_start(eventSet); + if (papi_errno != PAPI_OK) { + if (is_warning_papi_errno(papi_errno)) { + WARNF("start %-54s (%s)", ev_name, PAPI_strerror(papi_errno)); + (void)PAPI_cleanup_eventset(eventSet); + (void)PAPI_destroy_eventset(&eventSet); + ++warned; ++event_index; + } else { + NOTE(" ? start failed for %s (%s)", ev_name, + PAPI_strerror(papi_errno)); + (void)PAPI_cleanup_eventset(eventSet); + (void)PAPI_destroy_eventset(&eventSet); + ++failed; ++event_index; + } + continue; + } + + // Read once via stop(). + papi_errno = PAPI_stop(eventSet, &value); + if (papi_errno != PAPI_OK) { + if (is_warning_papi_errno(papi_errno)) { + WARNF("stop %-54s (%s)", ev_name, PAPI_strerror(papi_errno)); + ++warned; + } else { + NOTE(" ? stop failed for %s (%s)", ev_name, + PAPI_strerror(papi_errno)); + ++failed; + } + (void)PAPI_cleanup_eventset(eventSet); + (void)PAPI_destroy_eventset(&eventSet); + ++event_index; + continue; + } + + // Success path. + ++passed; + if (opts.print) { + printf(" %-60s = %lld\n\n", ev_name, value); + } + + (void)PAPI_cleanup_eventset(eventSet); + (void)PAPI_destroy_eventset(&eventSet); + ++event_index; + + } while (PAPI_enum_cmp_event(&ev_code, PAPI_ENUM_EVENTS, cid) == PAPI_OK); + + if (opts.print) { + printf("Summary: passed=%d warned=%d skipped=%d failed=%d\n", + passed, warned, skipped, failed); + } + + PAPI_shutdown(); + + // Final: fail only if we had real failures; warnings/skips are allowed. + int exit_status = (failed == 0) ? 0 : 1; + return eval_result(opts, exit_status); +} diff --git a/src/components/amd_smi/tests/amdsmi_ctx_conflict.c b/src/components/amd_smi/tests/amdsmi_ctx_conflict.c new file mode 100644 index 000000000..2e72b5c51 --- /dev/null +++ b/src/components/amd_smi/tests/amdsmi_ctx_conflict.c @@ -0,0 +1,163 @@ +/** + * @file amdsmi_ctx_conflict.c + * @author Dong Jun Woun + * djwoun@gmail.com + * @brief Validates that an AMD-SMI native event exposed via PAPI is context-exclusive + * by attempting to start the same event in two threads. Expected result: + * thread 1 starts successfully; thread 2 fails with PAPI_ECNFLCT. + * + * Usage: + * ./amdsmi_ctx_conflict [] [harness options] + * If no event is provided, defaults to "amd_smi:::temp_current:device=0:sensor=1". + */ + +#include "test_harness.h" +#include "papi.h" + +#include +#include +#include +#include +#include +#include +#include + +/** PAPI thread-id callback. */ +static unsigned long get_tid(void) { return (unsigned long)pthread_self(); } + +struct ThreadState { + int start_papi_errno; +}; + +static _Atomic bool t1_started = false; + +/* Default event; can be overridden by argv[1] (any AMD-SMI native event string). */ +static const char* g_event = "amd_smi:::temp_current:device=0:sensor=1"; + +/** + * Thread 1: + * - Creates an EventSet, adds the selected event, and starts it. + * - Keeps it running briefly so thread 2 collides on start. + * Expected: PAPI_start succeeds. + */ +static void* thread_fn1(void* arg) { + PAPI_register_thread(); + struct ThreadState* st = (struct ThreadState*)arg; + + int EventSet = PAPI_NULL; + int papi_errno = PAPI_create_eventset(&EventSet); + if (papi_errno != PAPI_OK) { NOTE("t1 create: %s", PAPI_strerror(papi_errno)); st->start_papi_errno = papi_errno; PAPI_unregister_thread(); return NULL; } + + papi_errno = PAPI_add_named_event(EventSet, g_event); + if (papi_errno == PAPI_ENOEVNT) { SKIP("Event not supported on this platform"); } + if (papi_errno == PAPI_ECNFLCT || papi_errno == PAPI_EPERM) { SKIP("Cannot add event due to HW/resource limits"); } + if (papi_errno != PAPI_OK) { NOTE("t1 add: %s", PAPI_strerror(papi_errno)); st->start_papi_errno = papi_errno; PAPI_destroy_eventset(&EventSet); PAPI_unregister_thread(); return NULL; } + + papi_errno = PAPI_start(EventSet); + st->start_papi_errno = papi_errno; + if (papi_errno == PAPI_OK) { + /* Publish that t1 is actively running the event so t2 can attempt to collide. */ + atomic_store_explicit(&t1_started, true, memory_order_release); + long long v = 0; (void)PAPI_read(EventSet, &v); + usleep(100000); /* run long enough for thread 2 to attempt start */ + (void)PAPI_stop(EventSet, &v); + } else { + /* If t1 cannot start, the test cannot be executed cleanly: skip due to HW/resource limits. */ + SKIP("Cannot start thread1 due to HW/resource limits"); + } + + (void)PAPI_cleanup_eventset(EventSet); + (void)PAPI_destroy_eventset(&EventSet); + PAPI_unregister_thread(); + return NULL; +} + +/** + * Thread 2: + * - Waits until t1 is running, then attempts to start the same event. + * Expected: PAPI_start fails with PAPI_ECNFLCT (resource conflict). + */ +static void* thread_fn2(void* arg) { + PAPI_register_thread(); + struct ThreadState* st = (struct ThreadState*)arg; + + int EventSet = PAPI_NULL; + int papi_errno = PAPI_create_eventset(&EventSet); + if (papi_errno != PAPI_OK) { NOTE("t2 create: %s", PAPI_strerror(papi_errno)); st->start_papi_errno = papi_errno; PAPI_unregister_thread(); return NULL; } + + papi_errno = PAPI_add_named_event(EventSet, g_event); + if (papi_errno == PAPI_ENOEVNT) { SKIP("Event not supported on this platform"); } + if (papi_errno == PAPI_ECNFLCT || papi_errno == PAPI_EPERM) { SKIP("Cannot add event due to HW/resource limits"); } + if (papi_errno != PAPI_OK) { NOTE("t2 add: %s", PAPI_strerror(papi_errno)); st->start_papi_errno = papi_errno; (void)PAPI_destroy_eventset(&EventSet); PAPI_unregister_thread(); return NULL; } + + /* Busy-wait until t1 has started the event (adequate for a short test). */ + while (!atomic_load_explicit(&t1_started, memory_order_acquire)) { /* spin */ } + + papi_errno = PAPI_start(EventSet); + st->start_papi_errno = papi_errno; + if (papi_errno != PAPI_OK) { + NOTE("t2 start expected fail: %s", PAPI_strerror(papi_errno)); + } else { + NOTE("t2 start unexpectedly succeeded"); + long long v = 0; (void)PAPI_stop(EventSet, &v); + } + + (void)PAPI_cleanup_eventset(EventSet); + (void)PAPI_destroy_eventset(&EventSet); + PAPI_unregister_thread(); + return NULL; +} + +/** + * Program entry: + * - Parses harness options and optional event override. + * - Ensures PAPI_AMDSMI_ROOT is set and PAPI is initialized for threading. + * - Runs the two-thread contention test and evaluates pass/fail: + * PASS => t1 start == PAPI_OK and t2 start == PAPI_ECNFLCT + * FAIL => any other combination. + */ +int main(int argc, char** argv) { + /* Unbuffer stdout so the final status line always shows promptly. */ + setvbuf(stdout, NULL, _IONBF, 0); + + harness_accept_tests_quiet(&argc, argv); + HarnessOpts opts = parse_harness_cli(argc, argv); + + /* Optional override of the event: ./amdsmi_ctx_conflict "" */ + if (argc > 1 && strncmp(argv[1], "--", 2) != 0) g_event = argv[1]; + + const char* root = getenv("PAPI_AMDSMI_ROOT"); + if (!root || !*root) SKIP("PAPI_AMDSMI_ROOT not set"); + + int papi_errno = PAPI_library_init(PAPI_VER_CURRENT); + if (papi_errno != PAPI_VER_CURRENT) { NOTE("PAPI_library_init failed: %s", PAPI_strerror(papi_errno)); int e = eval_result(opts, 1); fflush(stdout); return e; } + + if (PAPI_thread_init(&get_tid) != PAPI_OK) { NOTE("PAPI_thread_init failed"); int e = eval_result(opts, 1); fflush(stdout); return e; } + + atomic_store_explicit(&t1_started, false, memory_order_relaxed); + + struct ThreadState s1; + struct ThreadState s2; + s1.start_papi_errno = PAPI_OK; + s2.start_papi_errno = PAPI_OK; + + pthread_t th1, th2; + pthread_create(&th1, NULL, thread_fn1, &s1); + pthread_create(&th2, NULL, thread_fn2, &s2); + pthread_join(th1, NULL); + pthread_join(th2, NULL); + + if (opts.print) { + printf("event: %s\n", g_event); + printf("t1 start papi_errno: %d (%s)\n", s1.start_papi_errno, PAPI_strerror(s1.start_papi_errno)); + printf("t2 start papi_errno: %d (%s)\n", s2.start_papi_errno, PAPI_strerror(s2.start_papi_errno)); + } + + /* PASS when expected contention occurred; else FAIL. */ + int final_status = (s1.start_papi_errno == PAPI_OK && s2.start_papi_errno == PAPI_ECNFLCT) ? 0 : 1; + if (final_status != 0) NOTE("Unexpected results (wanted t1 OK, t2 PAPI_ECNFLCT)."); + + int exit_code = eval_result(opts, final_status); + fflush(stdout); + return exit_code; +} diff --git a/src/components/amd_smi/tests/amdsmi_energy_monotonic.c b/src/components/amd_smi/tests/amdsmi_energy_monotonic.c new file mode 100644 index 000000000..b1cd2f9b9 --- /dev/null +++ b/src/components/amd_smi/tests/amdsmi_energy_monotonic.c @@ -0,0 +1,106 @@ +/** + * @file amdsmi_energy_monotonic.c + * @author Dong Jun Woun + * djwoun@gmail.com + * @brief Verifies that the AMD SMI energy counter exposed via PAPI increases + * monotonically by sampling twice about one second apart. + * + * @details This small harnessed test: + * 1) Ensures PAPI + AMD-SMI are available (via PAPI_AMDSMI_ROOT). + * 2) Adds the "amd_smi:::energy_consumed:device=0" event to an event set. + * 3) Starts counting, reads once, then polls for up to ~1s for an increase. + * 4) Reports PASS if the second sample is greater than the first. + * + * The NOTE/SKIP macros come from the project test harness. + */ + +#include "test_harness.h" +#include "papi.h" + +#include +#include +#include + +int main(int argc, char **argv) { + // Parse common test harness options (quiet/print/exit codes, etc.). + harness_accept_tests_quiet(&argc, argv); + HarnessOpts opts = parse_harness_cli(argc, argv); + + // Ensure the AMD-SMI PAPI component is configured. + const char* root = getenv("PAPI_AMDSMI_ROOT"); + if (!root || !*root) { + SKIP("PAPI_AMDSMI_ROOT not set"); + } + + // Initialize the PAPI library. + int papi_errno = PAPI_library_init(PAPI_VER_CURRENT); + if (papi_errno != PAPI_VER_CURRENT) { + NOTE("PAPI_library_init failed: %s", PAPI_strerror(papi_errno)); + return eval_result(opts, 1); + } + + // Create an empty event set and add the AMD-SMI energy counter for device 0. + int EventSet = PAPI_NULL; + papi_errno = PAPI_create_eventset(&EventSet); + if (papi_errno != PAPI_OK) { + NOTE("PAPI_create_eventset: %s", PAPI_strerror(papi_errno)); + return eval_result(opts, 1); + } + + const char *ev = "amd_smi:::energy_consumed:device=0"; + papi_errno = PAPI_add_named_event(EventSet, ev); + if (papi_errno == PAPI_ENOEVNT) { + SKIP("energy_consumed:device=0 not supported"); + } else if (papi_errno != PAPI_OK) { + NOTE("PAPI_add_named_event(%s): %s", ev, PAPI_strerror(papi_errno)); + return eval_result(opts, 1); + } + + // Begin counting. + papi_errno = PAPI_start(EventSet); + if (papi_errno != PAPI_OK) { + NOTE("PAPI_start: %s", PAPI_strerror(papi_errno)); + return eval_result(opts, 1); + } + + long long v1 = 0, v2 = 0; + + // First sample. + papi_errno = PAPI_read(EventSet, &v1); + if (papi_errno != PAPI_OK) { + NOTE("PAPI_read(1): %s", PAPI_strerror(papi_errno)); + long long dummy = 0; PAPI_stop(EventSet, &dummy); + return eval_result(opts, 1); + } + + // Poll for up to ~1 second for the energy counter to advance. + for (int i = 0; i < 10; ++i) { + usleep(100000); // 100 ms + + papi_errno = PAPI_read(EventSet, &v2); + if (papi_errno != PAPI_OK) { + NOTE("PAPI_read(2): %s", PAPI_strerror(papi_errno)); + long long dummy = 0; PAPI_stop(EventSet, &dummy); + return eval_result(opts, 1); + } + if (v2 > v1) break; // monotonic increase observed + } + + // Clean up PAPI resources. + long long dummy = 0; + PAPI_stop(EventSet, &dummy); + PAPI_cleanup_eventset(EventSet); + PAPI_destroy_eventset(&EventSet); + PAPI_shutdown(); + + if (opts.print) { + printf("energy_consumed: first=%lld second=%lld delta=%lld\n", + v1, v2, (v2 - v1)); + } + + // Fail if we never observed an increase. + int failed = (v2 <= v1) ? 1 : 0; + if (failed) NOTE("Energy did not increase"); + + return eval_result(opts, failed); +} diff --git a/src/components/amd_smi/tests/amdsmi_gemm.c b/src/components/amd_smi/tests/amdsmi_gemm.c new file mode 100644 index 000000000..eefa00755 --- /dev/null +++ b/src/components/amd_smi/tests/amdsmi_gemm.c @@ -0,0 +1,306 @@ +/** + * @file amdsmi_gemm.c + * @author Dong Jun Woun + * djwoun@gmail.com + * @brief Launches a large HIP DGEMM workload (on device 1) while sampling a + * small set of AMD SMI counters via PAPI (temperature, VRAM, power). + * + * The monitor thread polls the PAPI EventSet ~3 times per second while the kernel runs. + * This is intended for simple integration/soak testing rather than performance tuning. + * + * NOTE: The sampled AMD SMI events below target device=0, while the HIP workload + * runs on device 1. If you want the metrics for the same device that runs + * the kernel, change `device=0` to `device=1` in the event strings. + */ + +#include "test_harness.h" + +#include "papi.h" +#include "hip/hip_runtime.h" +#include +#include +#include +#include +#include + +/* ----------------------------- Configuration ----------------------------- */ + +#define M_DIM 7296 +#define K_DIM 14592 +#define N_DIM 7296 + +#define NUM_STREAMS 1 +#define ITERATIONS_PER_STREAM 1 + +/* --------------------------- HIP error helpers --------------------------- */ + +#define HIP_CHECK(cmd) do { \ + hipError_t e = cmd; \ + if (e != hipSuccess) { \ + fprintf(stderr, "Failed: HIP error %s:%d '%s' (code: %d)\n", \ + __FILE__, __LINE__, hipGetErrorString(e), e); \ + return 1; \ + } \ +} while(0) + +#define HIP_CHECK_CLEANUP(cmd) do { \ + hipError_t e = cmd; \ + if (e != hipSuccess) { \ + fprintf(stderr, "Warning: HIP cleanup error %s:%d '%s' (code: %d)\n", \ + __FILE__, __LINE__, hipGetErrorString(e), e); \ + } \ +} while(0) + +/* --------------------------- Monitoring thread --------------------------- */ + +/** + * @brief Background poller for PAPI EventSet values. + * + * If params->print is 1, it writes one line per sample to stdout with a timestamp. + */ +static volatile int stop_monitor = 0; + +struct monitor_params { + int EventSet; + struct timeval start_time; + int print; // 0/1: whether to print readings (controls stdout chatter) +}; + +static void *monitor_events(void *args) { + struct monitor_params *params = (struct monitor_params *)args; + int statusFlag; + long long values[5]; + + while (!stop_monitor) { + statusFlag = PAPI_read(params->EventSet, values); + if (statusFlag != PAPI_OK) { + fprintf(stderr, "PAPI read failed in monitor: %s\n", PAPI_strerror(statusFlag)); + break; + } + + struct timeval current_time; + gettimeofday(¤t_time, NULL); + double elapsed = (current_time.tv_sec - params->start_time.tv_sec) + + (current_time.tv_usec - params->start_time.tv_usec) / 1e6; + + if (params->print) { + fprintf(stdout, + "Time: %.6f sec -> event1: %lld, event2: %lld, event3: %lld, event4: %lld, event5: %lld\n", + elapsed, values[0], values[1], values[2], values[3], values[4]); + fflush(stdout); + } + + usleep(300000); // ~3 Hz + } + return NULL; +} + +/* ------------------------------- Workload -------------------------------- */ + +/** + * @brief Naive DGEMM: C = alpha * A * B + beta * C + * A: MxK, B: KxN, C: MxN (row-major) + */ +__global__ void dgemm_kernel(const double *A, const double *B, double *C, + int M, int N, int K, double alpha, double beta) { + int row = blockIdx.y * blockDim.y + threadIdx.y; + int col = blockIdx.x * blockDim.x + threadIdx.x; + + if (row < M && col < N) { + double sum = 0.0; + for (int k = 0; k < K; k++) { + sum += A[row * K + k] * B[k * N + col]; + } + C[row * N + col] = alpha * sum + beta * C[row * N + col]; + } +} + +/* ------------------------------- Test body -------------------------------- */ + +static int real_main(const HarnessOpts *opts) { + /* Gracefully skip if the PAPI AMD SMI component isn't available. */ + const char* root = getenv("PAPI_AMDSMI_ROOT"); + if (!root || !*root) { + SKIP("PAPI_AMDSMI_ROOT not set"); + } + + /* Initialize PAPI */ + int statusFlag = PAPI_library_init(PAPI_VER_CURRENT); + if (statusFlag != PAPI_VER_CURRENT) { + fprintf(stderr, "PAPI shared library version error: %s\n", PAPI_strerror(statusFlag)); + return 1; + } + + /* Create EventSet */ + int EventSet = PAPI_NULL; + statusFlag = PAPI_create_eventset(&EventSet); + if (statusFlag != PAPI_OK) { + fprintf(stderr, "PAPI create eventset: %s\n", PAPI_strerror(statusFlag)); + return 1; + } + + /* AMD SMI events to sample. + * NOTE: These target device=0. See the NOTE in the file header regarding device selection. + */ + const char *event1 = "amd_smi:::temp_current:device=0:sensor=1"; + const char *event2 = "amd_smi:::temp_current:device=0:sensor=2"; + const char *event3 = "amd_smi:::mem_total_VRAM:device=0"; + const char *event4 = "amd_smi:::mem_usage_VRAM:device=0"; + const char *event5 = "amd_smi:::power_average:device=0"; + + /* Add events; treat ENOEVNT as SKIP so the test suite stays portable. */ + statusFlag = PAPI_add_named_event(EventSet, event1); + if (statusFlag == PAPI_ENOEVNT) SKIP("Event not supported: temp_current:device=0:sensor=1"); + if (statusFlag != PAPI_OK) { fprintf(stderr, "add %s: %s\n", event1, PAPI_strerror(statusFlag)); return 1; } + + statusFlag = PAPI_add_named_event(EventSet, event2); + if (statusFlag == PAPI_ENOEVNT) SKIP("Event not supported: temp_current:device=0:sensor=2"); + if (statusFlag != PAPI_OK) { fprintf(stderr, "add %s: %s\n", event2, PAPI_strerror(statusFlag)); return 1; } + + statusFlag = PAPI_add_named_event(EventSet, event3); + if (statusFlag == PAPI_ENOEVNT) SKIP("Event not supported: mem_total_VRAM:device=0"); + if (statusFlag != PAPI_OK) { fprintf(stderr, "add %s: %s\n", event3, PAPI_strerror(statusFlag)); return 1; } + + statusFlag = PAPI_add_named_event(EventSet, event4); + if (statusFlag == PAPI_ENOEVNT) SKIP("Event not supported: mem_usage_VRAM:device=0"); + if (statusFlag != PAPI_OK) { fprintf(stderr, "add %s: %s\n", event4, PAPI_strerror(statusFlag)); return 1; } + + statusFlag = PAPI_add_named_event(EventSet, event5); + if (statusFlag == PAPI_ENOEVNT) SKIP("Event not supported: power_average:device=0"); + if (statusFlag != PAPI_OK) { fprintf(stderr, "add %s: %s\n", event5, PAPI_strerror(statusFlag)); return 1; } + + /* HIP runtime preflight so HIP_CHECK won't hard-exit. */ + int device_count = 0; + if (hipGetDeviceCount(&device_count) != hipSuccess || device_count <= 1) { + SKIP("HIP device 1 not available"); + } + + /* Use device 1 and (optionally) print basic properties. */ + HIP_CHECK(hipSetDevice(0)); + hipDeviceProp_t deviceProp; + HIP_CHECK(hipGetDeviceProperties(&deviceProp, 1)); + if (opts->print) { + printf("Device Name: %s\n", deviceProp.name); + printf("Compute Units: %d\n", deviceProp.multiProcessorCount); + printf("Max Threads Per Block: %d\n", deviceProp.maxThreadsPerBlock); + } + + /* Host buffers (pinned) */ + size_t size_A = ((size_t)M_DIM * K_DIM * sizeof(double)); + size_t size_B = ((size_t)K_DIM * N_DIM * sizeof(double)); + size_t size_C = ((size_t)M_DIM * N_DIM * sizeof(double)); + + double *h_A = NULL, *h_B = NULL, *h_C = NULL; + HIP_CHECK(hipHostMalloc(&h_A, size_A, hipHostMallocDefault)); + HIP_CHECK(hipHostMalloc(&h_B, size_B, hipHostMallocDefault)); + HIP_CHECK(hipHostMalloc(&h_C, size_C, hipHostMallocDefault)); + if (!h_A || !h_B || !h_C) { + fprintf(stderr, "Host memory allocation failed.\n"); + if (h_A) HIP_CHECK_CLEANUP(hipHostFree(h_A)); + if (h_B) HIP_CHECK_CLEANUP(hipHostFree(h_B)); + if (h_C) HIP_CHECK_CLEANUP(hipHostFree(h_C)); + return 1; + } + + for (int i = 0; i < M_DIM * K_DIM; i++) h_A[i] = (double)(i % 100); + for (int i = 0; i < K_DIM * N_DIM; i++) h_B[i] = (double)(i % 100); + for (int i = 0; i < M_DIM * N_DIM; i++) h_C[i] = 0.0; + + /* Device buffers per stream */ + double *d_A[NUM_STREAMS], *d_B[NUM_STREAMS], *d_C[NUM_STREAMS]; + for (int s = 0; s < NUM_STREAMS; s++) { + HIP_CHECK(hipMalloc((void**)&d_A[s], size_A)); + HIP_CHECK(hipMalloc((void**)&d_B[s], size_B)); + HIP_CHECK(hipMalloc((void**)&d_C[s], size_C)); + } + + hipStream_t streams[NUM_STREAMS]; + hipEvent_t events[NUM_STREAMS]; + for (int s = 0; s < NUM_STREAMS; s++) { + HIP_CHECK(hipStreamCreateWithFlags(&streams[s], hipStreamNonBlocking)); + HIP_CHECK(hipEventCreate(&events[s])); + } + + /* H2D copies */ + for (int s = 0; s < NUM_STREAMS; s++) { + HIP_CHECK(hipMemcpyAsync(d_A[s], h_A, size_A, hipMemcpyHostToDevice, streams[s])); + HIP_CHECK(hipMemcpyAsync(d_B[s], h_B, size_B, hipMemcpyHostToDevice, streams[s])); + HIP_CHECK(hipMemcpyAsync(d_C[s], h_C, size_C, hipMemcpyHostToDevice, streams[s])); + } + + /* Start counters */ + statusFlag = PAPI_start(EventSet); + if (statusFlag != PAPI_OK) { + fprintf(stderr, "PAPI_start: %s\n", PAPI_strerror(statusFlag)); + return 1; + } + + /* Launch monitor thread (prints unless suppressed) */ + pthread_t monitor_thread; + struct monitor_params params; + params.EventSet = EventSet; + params.print = opts->print ? 1 : 0; + gettimeofday(¶ms.start_time, NULL); + statusFlag = pthread_create(&monitor_thread, NULL, monitor_events, ¶ms); + if (statusFlag != 0) { + fprintf(stderr, "pthread_create failed\n"); + return 1; + } + + /* Ensure copies are done */ + for (int s = 0; s < NUM_STREAMS; s++) HIP_CHECK(hipStreamSynchronize(streams[s])); + + double alpha = 0.75; + double beta = 0.5; + + dim3 blockDim(32, 32); + dim3 gridDim((N_DIM + blockDim.x - 1) / blockDim.x, + (M_DIM + blockDim.y - 1) / blockDim.y); + + for (int iter = 0; iter < ITERATIONS_PER_STREAM; iter++) { + for (int s = 0; s < NUM_STREAMS; s++) { + hipLaunchKernelGGL(dgemm_kernel, gridDim, blockDim, 0, streams[s], + d_A[s], d_B[s], d_C[s], + M_DIM, N_DIM, K_DIM, alpha, beta); + HIP_CHECK(hipEventRecord(events[s], streams[s])); + HIP_CHECK(hipStreamSynchronize(streams[s])); + usleep(3000000); // Allow the monitor to capture a few samples + } + } + + /* Stop the monitor and clean up */ + stop_monitor = 1; + pthread_join(monitor_thread, NULL); + + for (int s = 0; s < NUM_STREAMS; s++) { + HIP_CHECK_CLEANUP(hipEventDestroy(events[s])); + HIP_CHECK_CLEANUP(hipStreamDestroy(streams[s])); + HIP_CHECK_CLEANUP(hipFree(d_A[s])); + HIP_CHECK_CLEANUP(hipFree(d_B[s])); + HIP_CHECK_CLEANUP(hipFree(d_C[s])); + } + HIP_CHECK_CLEANUP(hipHostFree(h_A)); + HIP_CHECK_CLEANUP(hipHostFree(h_B)); + HIP_CHECK_CLEANUP(hipHostFree(h_C)); + + long long stop_values[5] = {0}; // five events were added + statusFlag = PAPI_stop(EventSet, stop_values); + if (statusFlag != PAPI_OK) { fprintf(stderr, "PAPI_stop: %s\n", PAPI_strerror(statusFlag)); return 1; } + statusFlag = PAPI_cleanup_eventset(EventSet); + if (statusFlag != PAPI_OK) { fprintf(stderr, "PAPI_cleanup_eventset: %s\n", PAPI_strerror(statusFlag)); return 1; } + statusFlag = PAPI_destroy_eventset(&EventSet); + if (statusFlag != PAPI_OK) { fprintf(stderr, "PAPI_destroy_eventset: %s\n", PAPI_strerror(statusFlag)); return 1; } + + HIP_CHECK_CLEANUP(hipDeviceReset()); // Optional; reduces "still reachable" reports from HIP in leak checkers + PAPI_shutdown(); // Triggers component cleanup and AMD SMI shutdown + return 0; +} + +/* --------------------------- Test harness glue --------------------------- */ + +int main(int argc, char *argv[]) { + harness_accept_tests_quiet(&argc, argv); + HarnessOpts opts = parse_harness_cli(argc, argv); + int papi_errno = real_main(&opts); + return eval_result(opts, papi_errno); +} diff --git a/src/components/amd_smi/tests/amdsmi_hello.c b/src/components/amd_smi/tests/amdsmi_hello.c new file mode 100644 index 000000000..5fcdf7acf --- /dev/null +++ b/src/components/amd_smi/tests/amdsmi_hello.c @@ -0,0 +1,97 @@ +/** + * @file amdsmi_hello.c + * @author Dong Jun Woun + * @brief Minimal example that reads a single AMD-SMI event via PAPI's AMD-SMI component. + * @details Selects the event from argv[1] if provided; otherwise defaults to + * "amd_smi:::temp_current:device=0:sensor=1". Requires PAPI_AMDSMI_ROOT + * so the component can dlopen the AMD-SMI library. Uses the test harness + * (test_harness.h) for consistent output and skip handling. + */ + +#include "test_harness.h" + +#include "papi.h" +#include +#include +#include +#include + +int main(int argc, char** argv) { + // Disable stdout buffering so the harness status line appears immediately. + setvbuf(stdout, NULL, _IONBF, 0); + + harness_accept_tests_quiet(&argc, argv); + HarnessOpts opts = parse_harness_cli(argc, argv); + + // Event to measure (override with argv[1], e.g.: + // ./amdsmi_hello amd_smi:::power_average:device=0 + // ) + const char* ev = "amd_smi:::temp_current:device=0:sensor=1"; + if (argc > 1 && strncmp(argv[1], "--", 2) != 0) ev = argv[1]; + + // Check AMD-SMI root so the component can dlopen the library. + const char* root = getenv("PAPI_AMDSMI_ROOT"); + if (!root || !*root) { + SKIP("PAPI_AMDSMI_ROOT not set"); + } + + // Initialize PAPI. + int papi_errno = PAPI_library_init(PAPI_VER_CURRENT); + if (papi_errno != PAPI_VER_CURRENT) { + NOTE("PAPI_library_init failed: %s", PAPI_strerror(papi_errno)); + return eval_result(opts, 1); + } + + // Create an EventSet. + int EventSet = PAPI_NULL; + papi_errno = PAPI_create_eventset(&EventSet); + if (papi_errno != PAPI_OK) { + NOTE("PAPI_create_eventset: %s", PAPI_strerror(papi_errno)); + return eval_result(opts, 1); + } + + // Add the selected event. + papi_errno = PAPI_add_named_event(EventSet, ev); + if (papi_errno == PAPI_ENOEVNT || papi_errno == PAPI_ECNFLCT || + papi_errno == PAPI_EPERM) { + NOTE("Event unavailable or HW/resource-limited: %s (%s)", ev, + PAPI_strerror(papi_errno)); + SKIP("Event unavailable or HW/resource-limited"); + } else if (papi_errno != PAPI_OK) { + NOTE("PAPI_add_named_event(%s): %s", ev, PAPI_strerror(papi_errno)); + PAPI_destroy_eventset(&EventSet); + return eval_result(opts, 1); + } + + // Start counters -> short wait -> stop/read. + papi_errno = PAPI_start(EventSet); + if (papi_errno == PAPI_ECNFLCT || papi_errno == PAPI_EPERM) { + NOTE("Cannot start counters: %s", PAPI_strerror(papi_errno)); + SKIP("Cannot start counters"); + } else if (papi_errno != PAPI_OK) { + NOTE("PAPI_start: %s", PAPI_strerror(papi_errno)); + PAPI_destroy_eventset(&EventSet); + return eval_result(opts, 1); + } + + usleep(100000); // ~100 ms sampling interval for this simple demo. + + long long val = 0; + papi_errno = PAPI_stop(EventSet, &val); + if (papi_errno != PAPI_OK) { + NOTE("PAPI_stop: %s", PAPI_strerror(papi_errno)); + PAPI_destroy_eventset(&EventSet); + return eval_result(opts, 1); + } + + (void)PAPI_cleanup_eventset(EventSet); + (void)PAPI_destroy_eventset(&EventSet); + PAPI_shutdown(); + + // If --print was requested via the harness, emit the event name and value. + if (opts.print) { + printf("Event: %s\nValue: %lld\n", ev, val); + } + + return eval_result(opts, 0); +} diff --git a/src/components/amd_smi/tests/runtest.sh b/src/components/amd_smi/tests/runtest.sh new file mode 100644 index 000000000..10b6716ba --- /dev/null +++ b/src/components/amd_smi/tests/runtest.sh @@ -0,0 +1,63 @@ +#!/usr/bin/env bash +# Quiet by default; use -v/--verbose to see output from the tests. + +set -e +set -u +( set -o pipefail ) 2>/dev/null || true + +# Try to infer AMD SMI root if not set +: "${PAPI_AMDSMI_ROOT:=${PAPI_ROCM_ROOT:-/opt/rocm-6.4.0}}" + +banner() { printf "Running: \033[36m%s\033[0m %s\n" "$1" "${2-}"; } +sep() { printf "%s\n\n" "-------------------------------------"; } + +VERBOSE=0 + +usage() { + cat < +#include +#include +#include + +#include "papi.h" /* for PAPI_* error codes used by helper macros */ + +/** Options controlling harness behavior. */ +typedef struct HarnessOpts { + bool print; /**< Whether to print normal output. */ + bool expect_fail; /**< If true, a nonzero return is considered PASS. */ + int had_warning; /**< Set to 1 if ENOEVNT/ECNFLCT/EPERM or any warning occurred. */ +} HarnessOpts; + +/** Global harness state used by macros. */ +static HarnessOpts harness_opts; + +/** + * @brief Accept and normalize the positional quiet token. + * + * Recognizes the literal tokens "TESTS_QUIET" or "QUIET" on the command line, + * removes them from @p argv so they aren't misinterpreted as positional args, + * and sets TESTS_QUIET=1. If the TESTS_QUIET environment variable is set to a + * non-literal value, that value is filtered out of @p argv and the variable is + * unset so tests do not treat it as an argument. + * + * @param[in,out] argc Argument count. + * @param[in,out] argv Argument vector. + */ +static inline void harness_accept_tests_quiet(int *argc, char **argv) { + /* The PAPI test harness invokes each test with a single positional token + holding the value of the TESTS_QUIET environment variable. Only the + literal string "TESTS_QUIET" should trigger quiet mode. Any other value + is dropped from argv and the environment variable is ignored. */ + + char *badarg = NULL; + const char *tq_env = getenv("TESTS_QUIET"); + if (tq_env && strcmp(tq_env, "TESTS_QUIET") != 0) { + badarg = strdup(tq_env); /* remember stray value to filter from argv */ + unsetenv("TESTS_QUIET"); /* ignore non-literal TESTS_QUIET */ + } + + int w = 1; + int saw_quiet = 0; + for (int r = 1; r < *argc; ++r) { + const char *a = argv[r]; + if (a && (!strcmp(a, "TESTS_QUIET") || !strcmp(a, "QUIET"))) { + saw_quiet = 1; + continue; + } + if (badarg && a && strcmp(a, badarg) == 0) { + /* discard unexpected TESTS_QUIET value */ + continue; + } + argv[w++] = argv[r]; + } + argv[w] = NULL; + *argc = w; + if (saw_quiet) setenv("TESTS_QUIET", "1", 1); + if (badarg) free(badarg); +} + +/** + * @brief Parse common harness CLI/environment options. + * + * Defaults to printing unless TESTS_QUIET is present. Mirrors src/run_tests.sh + * behavior where invoking with -v unsets TESTS_QUIET (tests should emit output). + * + * Also sets/clears PAPI_AMDSMI_TEST_QUIET so individual tests can key off it. + * + * @param argc Argument count. + * @param argv Argument vector. + * @return Populated HarnessOpts (also stored in @ref harness_opts). + */ +static inline HarnessOpts parse_harness_cli(int argc, char **argv) { + /* Default to printing unless the TESTS_QUIET token is present. + This mirrors src/run_tests.sh where invoking with -v unsets + TESTS_QUIET, signalling that tests should emit output. */ + harness_opts.print = true; + harness_opts.expect_fail = false; + harness_opts.had_warning = 0; + + for (int i = 1; i < argc; ++i) { + if (strncmp(argv[i], "--expect=", 9) == 0) { + const char *v = argv[i] + 9; + harness_opts.expect_fail = (strcmp(v, "fail") == 0); + } + } + + /* Suppress output only if TESTS_QUIET is explicitly set. When + run_tests.sh is invoked without -v it passes the literal token + "TESTS_QUIET", which harness_accept_tests_quiet converts into + this environment variable. */ + const char *tq = getenv("TESTS_QUIET"); + if (tq && *tq) harness_opts.print = false; + + if (!harness_opts.print) { + const char* q = getenv("PAPI_AMDSMI_TEST_QUIET"); + if (!q || q[0] != '1') setenv("PAPI_AMDSMI_TEST_QUIET", "1", 1); + } else { + unsetenv("PAPI_AMDSMI_TEST_QUIET"); + } + return harness_opts; +} + +/** + * @brief Evaluate the test result and print a final status line. + * + * A zero @p result_code is PASS unless @ref HarnessOpts::expect_fail is true, + * in which case nonzero indicates PASS. If any warnings were recorded, output + * "PASSED with WARNING". + * + * @param opts The harness options in effect (warning flag may be + * updated from the global state). + * @param result_code The test's return code. + * @return 0 on PASS (per @p opts), 1 on FAIL. + */ +static inline int eval_result(HarnessOpts opts, int result_code) { + if (harness_opts.had_warning) { + opts.had_warning = harness_opts.had_warning; + } + + bool passed = opts.expect_fail ? (result_code != 0) : (result_code == 0); + if (passed) { + if (opts.had_warning) printf("PASSED with WARNING\n"); + else printf("PASSED\n"); + } else { + printf("FAILED!!!\n"); + } + return passed ? 0 : 1; +} + +/* ---------- Output helpers ---------- */ + +/** Print a note only when normal output is enabled. */ +#define NOTE(...) do { \ + if (harness_opts.print) { fprintf(stdout, __VA_ARGS__); fprintf(stdout, "\n"); } \ +} while (0) + +/** Mark a warning (does not exit). */ +#define WARNF(...) do { \ + harness_opts.had_warning = 1; \ + if (harness_opts.print) { fprintf(stdout, "WARNING: "); fprintf(stdout, __VA_ARGS__); fprintf(stdout, "\n"); } \ +} while (0) + +/* ---------- Cannot-conduct helpers ---------- */ +/* Treat certain hardware/resource limitations as success-with-warning. */ + +/** + * @brief Exit immediately as "PASSED with WARNING". + * Prints an optional formatted warning message when output is enabled. + */ +#define EXIT_WARNING(...) do { \ + harness_opts.had_warning = 1; \ + if (harness_opts.print && *#__VA_ARGS__) { fprintf(stdout, "WARNING: "); fprintf(stdout, __VA_ARGS__); fprintf(stdout, "\n"); } \ + printf("PASSED with WARNING\n"); fflush(stdout); exit(0); \ +} while (0) + +/** + * @brief If adding the event set fails due to unsupported or hardware/resource + * limits, exit as "PASSED with WARNING". + * + * Recognizes PAPI_ENOEVNT, PAPI_ECNFLCT, and PAPI_EPERM. + */ +#define EXIT_WARNING_ON_ADD(rc, evname) do { \ + if ((rc) == PAPI_ENOEVNT || (rc) == PAPI_ECNFLCT || (rc) == PAPI_EPERM) { \ + EXIT_WARNING("Event unavailable (%s): %s", \ + ((rc) == PAPI_ENOEVNT ? "ENOEVNT" : (rc) == PAPI_ECNFLCT ? "ECNFLCT" : "EPERM"), (evname)); \ + } \ +} while (0) + +/** + * @brief If starting counters fails due to hardware/resource limits, + * exit as "PASSED with WARNING". + * + * Recognizes PAPI_ECNFLCT and PAPI_EPERM. + */ +#define EXIT_WARNING_ON_START(rc, ctx) do { \ + if ((rc) == PAPI_ECNFLCT || (rc) == PAPI_EPERM) { \ + EXIT_WARNING("Cannot start counters (%s): %s", (ctx), PAPI_strerror(rc)); \ + } \ +} while (0) + +/** Keep SKIP as a cannot-conduct success-with-warning. */ +#define SKIP(reason) EXIT_WARNING("%s", (reason)) + +#endif /* TEST_HARNESS_H */ diff --git a/src/configure b/src/configure index c9141fedf..3b4d50a32 100755 --- a/src/configure +++ b/src/configure @@ -7010,6 +7010,10 @@ for comp in $components; do if test "x`find $PAPI_ROCMSMI_ROOT -name "rocm_smi.h"`" != "x" ; then CFLAGS="$CFLAGS -DHAVE_ROCM_SMI" fi + + if test "x`find $PAPI_AMDSMI_ROOT -name "amd_smi.h"`" != "x" ; then + CFLAGS="$CFLAGS -DHAVE_AMD_SMI" + fi fi done diff --git a/src/configure.in b/src/configure.in index 04d1eedbe..ce7f86844 100644 --- a/src/configure.in +++ b/src/configure.in @@ -2001,6 +2001,10 @@ for comp in $components; do if test "x`find $PAPI_ROCMSMI_ROOT -name "rocm_smi.h"`" != "x" ; then CFLAGS="$CFLAGS -DHAVE_ROCM_SMI" fi + + if test "x`find $PAPI_AMDMSMI_ROOT -name "amdsmi.h"`" != "x" ; then + CFLAGS="$CFLAGS -DHAVE_AMD_SMI" + fi fi done