From e01e0d7ce4eba23e366448482ef8af139303171f Mon Sep 17 00:00:00 2001 From: AsbjornOlling Date: Mon, 1 Dec 2025 16:24:01 +0100 Subject: [PATCH 1/5] use cpu features configuration from cargo --- llama-cpp-sys-2/build.rs | 68 ++++++++++++++++++++++++++++++++++++++-- 1 file changed, 66 insertions(+), 2 deletions(-) diff --git a/llama-cpp-sys-2/build.rs b/llama-cpp-sys-2/build.rs index aa70bee5..fdd32d46 100644 --- a/llama-cpp-sys-2/build.rs +++ b/llama-cpp-sys-2/build.rs @@ -508,6 +508,70 @@ fn main() { } } + // in this next bit, we select which cpu-specific features to compile for + // first check for target-cpu=native + let has_native_target_cpu = std::env::var("CARGO_ENCODED_RUSTFLAGS") + .map(|rustflags| { + rustflags + .split('\x1f') + .any(|f| f.contains("target-cpu=native")) + }) + .unwrap_or(false); + if has_native_target_cpu { + debug_log!("Detected target-cpu=native, compiling with GGML_NATIVE"); + config.define("GGML_NATIVE", "ON"); + } + // if native isn't specified, enable specific features for ggml + // Get the target features as a comma-separated string + else if let Ok(features) = std::env::var("CARGO_CFG_TARGET_FEATURE") { + debug_log!("Compiling with target features: {}", features); + // list of rust target_features here: + // https://doc.rust-lang.org/reference/attributes/codegen.html#the-target_feature-attribute + // GGML config flags have been found by looking at: + // llama.cpp/ggml/src/ggml-cpu/CMakeLists.txt + for feature in features.split(',') { + match feature { + "avx" => { + config.define("GGML_AVX", "ON"); + } + "avx2" => { + config.define("GGML_AVX2", "ON"); + } + "avx512bf16" => { + config.define("GGML_AVX512_BF16", "ON"); + } + "avx512vbmi" => { + config.define("GGML_AVX512_VBMI", "ON"); + } + "avx512vnni" => { + config.define("GGML_AVX512_VNNI", "ON"); + } + "avxvnni" => { + config.define("GGML_AVX_VNNI", "ON"); + } + "bmi2" => { + config.define("GGML_BMI2", "ON"); + } + "f16c" => { + config.define("GGML_F16C", "ON"); + } + "fma" => { + config.define("GGML_FMA", "ON"); + } + "sse4.2" => { + config.define("GGML_SSE42", "ON"); + } + _ => { + debug_log!( + "Unrecognized cpu feature: '{}' - skipping GGML config for it.", + feature + ); + continue; + } + }; + } + } + config.define( "BUILD_SHARED_LIBS", if build_shared_libs { "ON" } else { "OFF" }, @@ -627,9 +691,9 @@ fn main() { if matches!(target_os, TargetOs::Linux) && target_triple.contains("aarch64") - && env::var(format!("CARGO_FEATURE_{}", "native".to_uppercase())).is_err() + && has_native_target_cpu { - // If the native feature is not enabled, we take off the native ARM64 support. + // If the target-cpu is not specified as native, we take off the native ARM64 support. // It is useful in docker environments where the native feature is not enabled. config.define("GGML_NATIVE", "OFF"); config.define("GGML_CPU_ARM_ARCH", "armv8-a"); From 711d3e8d902994ed45a3ed951aed446e336a7865 Mon Sep 17 00:00:00 2001 From: AsbjornOlling Date: Tue, 2 Dec 2025 09:31:17 +0100 Subject: [PATCH 2/5] pass target cpu to cmake cflags march --- llama-cpp-sys-2/build.rs | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) diff --git a/llama-cpp-sys-2/build.rs b/llama-cpp-sys-2/build.rs index fdd32d46..5f69828f 100644 --- a/llama-cpp-sys-2/build.rs +++ b/llama-cpp-sys-2/build.rs @@ -517,6 +517,18 @@ fn main() { .any(|f| f.contains("target-cpu=native")) }) .unwrap_or(false); + + // Also extract the target-cpu value if specified (e.g., x86-64, x86-64-v2, etc.) + let target_cpu = std::env::var("CARGO_ENCODED_RUSTFLAGS") + .ok() + .and_then(|rustflags| { + rustflags + .split('\x1f') + .find(|f| f.contains("target-cpu=") && !f.contains("target-cpu=native")) + .and_then(|f| f.split("target-cpu=").nth(1)) + .map(|s| s.to_string()) + }); + if has_native_target_cpu { debug_log!("Detected target-cpu=native, compiling with GGML_NATIVE"); config.define("GGML_NATIVE", "ON"); @@ -525,6 +537,17 @@ fn main() { // Get the target features as a comma-separated string else if let Ok(features) = std::env::var("CARGO_CFG_TARGET_FEATURE") { debug_log!("Compiling with target features: {}", features); + config.define("GGML_NATIVE", "OFF"); + + // Set baseline architecture from target-cpu if specified + // This is critical to prevent the compiler from auto-vectorizing to the build host's capabilities + if let Some(ref cpu) = target_cpu { + debug_log!("Setting baseline architecture: -march={}", cpu); + // Pass the baseline architecture to CMake's C and CXX compilers + config.cflag(&format!("-march={}", cpu)); + config.cxxflag(&format!("-march={}", cpu)); + } + // list of rust target_features here: // https://doc.rust-lang.org/reference/attributes/codegen.html#the-target_feature-attribute // GGML config flags have been found by looking at: From 9d39309509a4d9b9ac7b81839575d391ebf9be57 Mon Sep 17 00:00:00 2001 From: AsbjornOlling Date: Tue, 2 Dec 2025 14:40:56 +0100 Subject: [PATCH 3/5] remove 'native' feature --- examples/embeddings/Cargo.toml | 1 - examples/mtmd/Cargo.toml | 1 - examples/reranker/Cargo.toml | 1 - examples/simple/Cargo.toml | 1 - llama-cpp-2/Cargo.toml | 1 - llama-cpp-sys-2/Cargo.toml | 1 - 6 files changed, 6 deletions(-) diff --git a/examples/embeddings/Cargo.toml b/examples/embeddings/Cargo.toml index 13858693..e5866549 100644 --- a/examples/embeddings/Cargo.toml +++ b/examples/embeddings/Cargo.toml @@ -13,7 +13,6 @@ anyhow = { workspace = true } [features] cuda = ["llama-cpp-2/cuda"] metal = ["llama-cpp-2/metal"] -native = ["llama-cpp-2/native"] vulkan = ["llama-cpp-2/vulkan"] [lints] diff --git a/examples/mtmd/Cargo.toml b/examples/mtmd/Cargo.toml index 426ddaa6..3863fc65 100644 --- a/examples/mtmd/Cargo.toml +++ b/examples/mtmd/Cargo.toml @@ -11,7 +11,6 @@ clap = { workspace = true, features = ["derive"] } [features] cuda = ["llama-cpp-2/cuda"] metal = ["llama-cpp-2/metal"] -native = ["llama-cpp-2/native"] vulkan = ["llama-cpp-2/vulkan"] [lints] diff --git a/examples/reranker/Cargo.toml b/examples/reranker/Cargo.toml index dfce8e37..2bedf9fb 100644 --- a/examples/reranker/Cargo.toml +++ b/examples/reranker/Cargo.toml @@ -14,7 +14,6 @@ encoding_rs = { workspace = true } [features] cuda = ["llama-cpp-2/cuda"] metal = ["llama-cpp-2/metal"] -native = ["llama-cpp-2/native"] vulkan = ["llama-cpp-2/vulkan"] [lints] diff --git a/examples/simple/Cargo.toml b/examples/simple/Cargo.toml index 964b061e..f8376c65 100644 --- a/examples/simple/Cargo.toml +++ b/examples/simple/Cargo.toml @@ -17,7 +17,6 @@ tracing-subscriber = { workspace = true } [features] cuda = ["llama-cpp-2/cuda"] metal = ["llama-cpp-2/metal"] -native = ["llama-cpp-2/native"] vulkan = ["llama-cpp-2/vulkan"] [lints] diff --git a/llama-cpp-2/Cargo.toml b/llama-cpp-2/Cargo.toml index 026487ef..54d41444 100644 --- a/llama-cpp-2/Cargo.toml +++ b/llama-cpp-2/Cargo.toml @@ -26,7 +26,6 @@ cuda-no-vmm = ["cuda", "llama-cpp-sys-2/cuda-no-vmm"] metal = ["llama-cpp-sys-2/metal"] dynamic-link = ["llama-cpp-sys-2/dynamic-link"] vulkan = ["llama-cpp-sys-2/vulkan"] -native = ["llama-cpp-sys-2/native"] openmp = ["llama-cpp-sys-2/openmp"] sampler = [] # Only has an impact on Android. diff --git a/llama-cpp-sys-2/Cargo.toml b/llama-cpp-sys-2/Cargo.toml index e7f03ef2..e0b17000 100644 --- a/llama-cpp-sys-2/Cargo.toml +++ b/llama-cpp-sys-2/Cargo.toml @@ -78,7 +78,6 @@ cuda-no-vmm = ["cuda"] metal = [] dynamic-link = [] vulkan = [] -native = [] openmp = [] # Only has an impact on Android. shared-stdcxx = [] From b08e6ecc172eab8d2e86ff7715cba21ffd054a94 Mon Sep 17 00:00:00 2001 From: AsbjornOlling Date: Tue, 2 Dec 2025 17:32:21 +0100 Subject: [PATCH 4/5] un-invert target_cpu check on linux/aarch64 builds, clean up code --- llama-cpp-sys-2/build.rs | 32 ++++++++++++-------------------- 1 file changed, 12 insertions(+), 20 deletions(-) diff --git a/llama-cpp-sys-2/build.rs b/llama-cpp-sys-2/build.rs index 5f69828f..c70ae5fe 100644 --- a/llama-cpp-sys-2/build.rs +++ b/llama-cpp-sys-2/build.rs @@ -508,17 +508,7 @@ fn main() { } } - // in this next bit, we select which cpu-specific features to compile for - // first check for target-cpu=native - let has_native_target_cpu = std::env::var("CARGO_ENCODED_RUSTFLAGS") - .map(|rustflags| { - rustflags - .split('\x1f') - .any(|f| f.contains("target-cpu=native")) - }) - .unwrap_or(false); - - // Also extract the target-cpu value if specified (e.g., x86-64, x86-64-v2, etc.) + // extract the target-cpu config value, if specified let target_cpu = std::env::var("CARGO_ENCODED_RUSTFLAGS") .ok() .and_then(|rustflags| { @@ -529,25 +519,27 @@ fn main() { .map(|s| s.to_string()) }); - if has_native_target_cpu { + if target_cpu == Some("native".into()) { debug_log!("Detected target-cpu=native, compiling with GGML_NATIVE"); config.define("GGML_NATIVE", "ON"); } - // if native isn't specified, enable specific features for ggml - // Get the target features as a comma-separated string - else if let Ok(features) = std::env::var("CARGO_CFG_TARGET_FEATURE") { - debug_log!("Compiling with target features: {}", features); + // if native isn't specified, enable specific features for ggml instead + else { + // rust code isn't using `target-cpu=native`, so llama.cpp shouldn't use GGML_NATIVE either config.define("GGML_NATIVE", "OFF"); - // Set baseline architecture from target-cpu if specified - // This is critical to prevent the compiler from auto-vectorizing to the build host's capabilities + // if `target-cpu` is set set, also set -march for llama.cpp to the same value if let Some(ref cpu) = target_cpu { debug_log!("Setting baseline architecture: -march={}", cpu); - // Pass the baseline architecture to CMake's C and CXX compilers config.cflag(&format!("-march={}", cpu)); config.cxxflag(&format!("-march={}", cpu)); } + // I expect this env var to always be present + let features = std::env::var("CARGO_CFG_TARGET_FEATURE") + .expect("Env var CARGO_CFG_TARGET_FEATURE not found."); + debug_log!("Compiling with target features: {}", features); + // list of rust target_features here: // https://doc.rust-lang.org/reference/attributes/codegen.html#the-target_feature-attribute // GGML config flags have been found by looking at: @@ -714,7 +706,7 @@ fn main() { if matches!(target_os, TargetOs::Linux) && target_triple.contains("aarch64") - && has_native_target_cpu + && target_cpu != Some("native".into()) { // If the target-cpu is not specified as native, we take off the native ARM64 support. // It is useful in docker environments where the native feature is not enabled. From b078bc95dc30e9967c1e7720aef7bae2e51b0405 Mon Sep 17 00:00:00 2001 From: AsbjornOlling Date: Tue, 2 Dec 2025 17:36:59 +0100 Subject: [PATCH 5/5] fix: don't skip 'target-cpu=native' when extracting target_cpu --- llama-cpp-sys-2/build.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/llama-cpp-sys-2/build.rs b/llama-cpp-sys-2/build.rs index c70ae5fe..ba96a2f1 100644 --- a/llama-cpp-sys-2/build.rs +++ b/llama-cpp-sys-2/build.rs @@ -514,7 +514,7 @@ fn main() { .and_then(|rustflags| { rustflags .split('\x1f') - .find(|f| f.contains("target-cpu=") && !f.contains("target-cpu=native")) + .find(|f| f.contains("target-cpu=")) .and_then(|f| f.split("target-cpu=").nth(1)) .map(|s| s.to_string()) });