From f754781dcf84351d9826c13e177b8ae1dac3b300 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Dani=C3=ABl=20de=20Kok?= Date: Mon, 10 Nov 2025 13:56:58 +0000 Subject: [PATCH 1/3] Flatten build variants to `build/` Prior to this change, kernels were stored in `build//`. However, this was fragile because the extension name had to correspond to the repository name. This change flattens kernels to be stored inside `build/`. For compatibility with older versions of kernels, we add a module `build//` that loads `build/`, this compatibility module will removed when the `kernels` update has been around for a while. --- lib/torch-extension/arch.nix | 17 +++++++++++------ lib/torch-extension/compat.py | 26 ++++++++++++++++++++++++++ lib/torch-extension/no-arch.nix | 4 +++- 3 files changed, 40 insertions(+), 7 deletions(-) create mode 100644 lib/torch-extension/compat.py diff --git a/lib/torch-extension/arch.nix b/lib/torch-extension/arch.nix index 6e90702..d495f06 100644 --- a/lib/torch-extension/arch.nix +++ b/lib/torch-extension/arch.nix @@ -223,21 +223,26 @@ stdenv.mkDerivation (prevAttrs: { postInstall = '' ( cd .. - cp -r torch-ext/${extensionName} $out/ + cp -r torch-ext/${extensionName}/* $out/ ) - cp $out/_${extensionName}_*/* $out/${extensionName} - rm -rf $out/_${extensionName}_* + mv $out/_${extensionName}_*/* $out/ + rm -d $out/_${extensionName}_${rev} + + # Set up a compatibility module for older kernels versions, remove when + # the updated kernels has been around for a while. + mkdir $out/${extensionName} + cp ${./compat.py} $out/${extensionName}/__init__.py '' + (lib.optionalString (stripRPath && stdenv.hostPlatform.isLinux)) '' - find $out/${extensionName} -name '*.so' \ + find $out/ -name '*.so' \ -exec patchelf --set-rpath "" {} \; '' + (lib.optionalString (stripRPath && stdenv.hostPlatform.isDarwin)) '' - find $out/${extensionName} -name '*.so' \ + find $out/ -name '*.so' \ -exec rewrite-nix-paths-macho {} \; # Stub some rpath. - find $out/${extensionName} -name '*.so' \ + find $out/ -name '*.so' \ -exec install_name_tool -add_rpath "@loader_path/lib" {} \; ''; diff --git a/lib/torch-extension/compat.py b/lib/torch-extension/compat.py new file mode 100644 index 0000000..03dbc1a --- /dev/null +++ b/lib/torch-extension/compat.py @@ -0,0 +1,26 @@ +import ctypes +import sys + +import importlib +from pathlib import Path +from types import ModuleType + +def _import_from_path(file_path: Path) -> ModuleType: + # We cannot use the module name as-is, after adding it to `sys.modules`, + # it would also be used for other imports. So, we make a module name that + # depends on the path for it to be unique using the hex-encoded hash of + # the path. + path_hash = "{:x}".format(ctypes.c_size_t(hash(file_path.absolute())).value) + module_name = path_hash + spec = importlib.util.spec_from_file_location(module_name, file_path) + if spec is None: + raise ImportError(f"Cannot load spec for {module_name} from {file_path}") + module = importlib.util.module_from_spec(spec) + if module is None: + raise ImportError(f"Cannot load module {module_name} from spec") + sys.modules[module_name] = module + spec.loader.exec_module(module) # type: ignore + return module + + +globals().update(vars(_import_from_path(Path(__file__).parent.parent / "__init__.py"))) diff --git a/lib/torch-extension/no-arch.nix b/lib/torch-extension/no-arch.nix index c591c85..82b7549 100644 --- a/lib/torch-extension/no-arch.nix +++ b/lib/torch-extension/no-arch.nix @@ -48,7 +48,9 @@ stdenv.mkDerivation (prevAttrs: { installPhase = '' mkdir -p $out - cp -r torch-ext/${extensionName} $out/ + cp -r torch-ext/${extensionName}/* $out/ + mkdir $out/${extensionName} + cp ${./compat.py} $out/${extensionName}/__init__.py ''; doInstallCheck = true; From c82c5409e4f1d346e74ed195f787696f1bb70760 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Dani=C3=ABl=20de=20Kok?= Date: Tue, 11 Nov 2025 10:02:20 +0000 Subject: [PATCH 2/3] Add some kernel layout checks - Validate that the kernel does not have a file/directory `torch-ext//` since we need that directory for the compat module. - Validate that the kernel has `torch-ext//__init__.py`. This is also verified by get-kernel-check, but that only fires *after* the kernel is already compiled. --- lib/torch-extension/arch.nix | 13 ++-- lib/torch-extension/no-arch.nix | 6 +- overlay.nix | 2 + .../get-kernel-check/get-kernel-check-hook.sh | 60 ++++++++++--------- pkgs/kernel-layout-check/default.nix | 5 ++ .../kernel-layout-check-hook.sh | 27 +++++++++ 6 files changed, 78 insertions(+), 35 deletions(-) create mode 100644 pkgs/kernel-layout-check/default.nix create mode 100755 pkgs/kernel-layout-check/kernel-layout-check-hook.sh diff --git a/lib/torch-extension/arch.nix b/lib/torch-extension/arch.nix index d495f06..0a4b4e0 100644 --- a/lib/torch-extension/arch.nix +++ b/lib/torch-extension/arch.nix @@ -13,6 +13,7 @@ cuda_nvcc, get-kernel-check, kernel-abi-check, + kernel-layout-check, ninja, python3, remove-bytecode-hook, @@ -79,7 +80,12 @@ in stdenv.mkDerivation (prevAttrs: { name = "${extensionName}-torch-ext"; - inherit doAbiCheck nvccThreads src; + inherit + doAbiCheck + extensionName + nvccThreads + src + ; # Generate build files. postPatch = '' @@ -123,10 +129,11 @@ stdenv.mkDerivation (prevAttrs: { ''; nativeBuildInputs = [ - kernel-abi-check cmake ninja build2cmake + kernel-abi-check + kernel-layout-check remove-bytecode-hook ] ++ lib.optionals doGetKernelCheck [ @@ -248,8 +255,6 @@ stdenv.mkDerivation (prevAttrs: { doInstallCheck = true; - getKernelCheck = extensionName; - # We need access to the host system on Darwin for the Metal compiler. __noChroot = metalSupport; diff --git a/lib/torch-extension/no-arch.nix b/lib/torch-extension/no-arch.nix index 82b7549..783a5ac 100644 --- a/lib/torch-extension/no-arch.nix +++ b/lib/torch-extension/no-arch.nix @@ -4,6 +4,7 @@ build2cmake, get-kernel-check, + kernel-layout-check, remove-bytecode-hook, torch, }: @@ -23,7 +24,7 @@ stdenv.mkDerivation (prevAttrs: { name = "${extensionName}-torch-ext"; - inherit src; + inherit extensionName src; # Add Torch as a dependency, so that devshells for universal kernels # also get torch as a build input. @@ -31,6 +32,7 @@ stdenv.mkDerivation (prevAttrs: { nativeBuildInputs = [ build2cmake + kernel-layout-check remove-bytecode-hook ] ++ lib.optionals doGetKernelCheck [ @@ -54,6 +56,4 @@ stdenv.mkDerivation (prevAttrs: { ''; doInstallCheck = true; - - getKernelCheck = extensionName; }) diff --git a/overlay.nix b/overlay.nix index 9797639..ccd54c7 100644 --- a/overlay.nix +++ b/overlay.nix @@ -9,6 +9,8 @@ final: prev: { kernel-abi-check = prev.callPackage ./pkgs/kernel-abi-check { }; + kernel-layout-check = prev.callPackage ./pkgs/kernel-layout-check { }; + rewrite-nix-paths-macho = prev.callPackage ./pkgs/rewrite-nix-paths-macho { }; remove-bytecode-hook = prev.callPackage ./pkgs/remove-bytecode-hook { }; diff --git a/pkgs/get-kernel-check/get-kernel-check-hook.sh b/pkgs/get-kernel-check/get-kernel-check-hook.sh index d59810b..73915f7 100755 --- a/pkgs/get-kernel-check/get-kernel-check-hook.sh +++ b/pkgs/get-kernel-check/get-kernel-check-hook.sh @@ -3,35 +3,39 @@ echo "Sourcing get-kernel-check-hook.sh" _getKernelCheckHook() { - if [ ! -z "${getKernelCheck}" ]; then - echo "Checking loading kernel with get_kernel" - echo "Check whether the kernel can be loaded with get-kernel: ${getKernelCheck}" - - # We strip the full library paths from the extension. Unfortunately, - # in a Nix environment, the library dependencies cannot be found - # anymore. So we have to add the Torch library directory to the - # dynamic linker path to get it to pick it up. - if [ $(uname -s) == "Darwin" ]; then - TORCH_DIR=$(python -c "from pathlib import Path; import torch; print(Path(torch.__file__).parent)") - export DYLD_LIBRARY_PATH="${TORCH_DIR}/lib:${DYLD_LIBRARY_PATH}" - fi - - TMPDIR=$(mktemp -d -t test.XXXXXX) || exit 1 - trap "rm -rf '$TMPDIR'" EXIT - - # Some kernels want to write stuff (especially when they use Triton). - HOME=$(mktemp -d -t test.XXXXXX) || exit 1 - trap "rm -rf '$HOME'" EXIT - - # Emulate the bundle layout that kernels expects. This even works - # for universal kernels, since kernels checks the non-universal - # path first. - BUILD_VARIANT=$(python -c "from kernels.utils import build_variant; print(build_variant())") - mkdir -p "${TMPDIR}/build" - ln -s "$out" "${TMPDIR}/build/${BUILD_VARIANT}" - - python -c "from pathlib import Path; import kernels; kernels.get_local_kernel(Path('${TMPDIR}'), '${getKernelCheck}')" + echo "Checking loading kernel with get_kernel" + + if [ -z ${extensionName+x} ]; then + echo "extensionName must be set in derivation" + exit 1 + fi + + echo "Check whether the kernel can be loaded with get-kernel: ${extensionName}" + + # We strip the full library paths from the extension. Unfortunately, + # in a Nix environment, the library dependencies cannot be found + # anymore. So we have to add the Torch library directory to the + # dynamic linker path to get it to pick it up. + if [ $(uname -s) == "Darwin" ]; then + TORCH_DIR=$(python -c "from pathlib import Path; import torch; print(Path(torch.__file__).parent)") + export DYLD_LIBRARY_PATH="${TORCH_DIR}/lib:${DYLD_LIBRARY_PATH}" fi + + TMPDIR=$(mktemp -d -t test.XXXXXX) || exit 1 + trap "rm -rf '$TMPDIR'" EXIT + + # Some kernels want to write stuff (especially when they use Triton). + HOME=$(mktemp -d -t test.XXXXXX) || exit 1 + trap "rm -rf '$HOME'" EXIT + + # Emulate the bundle layout that kernels expects. This even works + # for universal kernels, since kernels checks the non-universal + # path first. + BUILD_VARIANT=$(python -c "from kernels.utils import build_variant; print(build_variant())") + mkdir -p "${TMPDIR}/build" + ln -s "$out" "${TMPDIR}/build/${BUILD_VARIANT}" + + python -c "from pathlib import Path; import kernels; kernels.get_local_kernel(Path('${TMPDIR}'), '${extensionName}')" } postInstallCheckHooks+=(_getKernelCheckHook) diff --git a/pkgs/kernel-layout-check/default.nix b/pkgs/kernel-layout-check/default.nix new file mode 100644 index 0000000..259fa33 --- /dev/null +++ b/pkgs/kernel-layout-check/default.nix @@ -0,0 +1,5 @@ +{ makeSetupHook, python3 }: + +makeSetupHook { + name = "kernel-layout-check-hook"; +} ./kernel-layout-check-hook.sh diff --git a/pkgs/kernel-layout-check/kernel-layout-check-hook.sh b/pkgs/kernel-layout-check/kernel-layout-check-hook.sh new file mode 100755 index 0000000..1a72b55 --- /dev/null +++ b/pkgs/kernel-layout-check/kernel-layout-check-hook.sh @@ -0,0 +1,27 @@ +#!/bin/sh + +echo "Sourcing kernel-layout-check-hook.sh" + +kernelLayoutCheckHook() { + echo "Checking kernel layout" + + if [ -z ${extensionName+x} ]; then + echo "extensionName must be set in derivation" + exit 1 + fi + + if [ ! -f source/torch-ext/${extensionName}/__init__.py ]; then + echo "Python module at source/torch-ext/${extensionName} must contain __init__.py" + exit 1 + fi + + # TODO: remove once the old location is removed from kernels. + if [ -e source/torch-ext/${extensionName}/${extensionName} ]; then + echo "Python module at source/torch-ext/${extensionName} must not have ${extensionName} file or directory." + exit 1 + fi +} + +if [ -z "${dontCheckLayout-}" ]; then + postUnpackHooks+=(kernelLayoutCheckHook) +fi From 25cddd9a98e431a0de4d2fdd1f8281f001bb1773 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Dani=C3=ABl=20de=20Kok?= Date: Tue, 11 Nov 2025 10:18:22 +0000 Subject: [PATCH 3/3] Fix ABI check directory --- pkgs/kernel-abi-check/kernel-abi-check-hook.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pkgs/kernel-abi-check/kernel-abi-check-hook.sh b/pkgs/kernel-abi-check/kernel-abi-check-hook.sh index de7cf67..3bdcda6 100755 --- a/pkgs/kernel-abi-check/kernel-abi-check-hook.sh +++ b/pkgs/kernel-abi-check/kernel-abi-check-hook.sh @@ -5,7 +5,7 @@ _checkAbiHook() { echo "Skipping ABI check" else echo "Checking of ABI compatibility" - find "$out/${extensionName}" -name '*.so' -print0 | \ + find "$out/" -name '*.so' -print0 | \ xargs -0 -n1 kernel-abi-check fi }