From e01e0d7ce4eba23e366448482ef8af139303171f Mon Sep 17 00:00:00 2001
From: AsbjornOlling <asbjornolling@gmail.com>
Date: Mon, 1 Dec 2025 16:24:01 +0100
Subject: [PATCH 1/5] use cpu features configuration from cargo

---
 llama-cpp-sys-2/build.rs | 68 ++++++++++++++++++++++++++++++++++++++--
 1 file changed, 66 insertions(+), 2 deletions(-)

diff --git a/llama-cpp-sys-2/build.rs b/llama-cpp-sys-2/build.rs
index aa70bee5..fdd32d46 100644
--- a/llama-cpp-sys-2/build.rs
+++ b/llama-cpp-sys-2/build.rs
@@ -508,6 +508,70 @@ fn main() {
         }
     }
 
+    // in this next bit, we select which cpu-specific features to compile for
+    // first check for target-cpu=native
+    let has_native_target_cpu = std::env::var("CARGO_ENCODED_RUSTFLAGS")
+        .map(|rustflags| {
+            rustflags
+                .split('\x1f')
+                .any(|f| f.contains("target-cpu=native"))
+        })
+        .unwrap_or(false);
+    if has_native_target_cpu {
+        debug_log!("Detected target-cpu=native, compiling with GGML_NATIVE");
+        config.define("GGML_NATIVE", "ON");
+    }
+    // if native isn't specified, enable specific features for ggml
+    // Get the target features as a comma-separated string
+    else if let Ok(features) = std::env::var("CARGO_CFG_TARGET_FEATURE") {
+        debug_log!("Compiling with target features: {}", features);
+        // list of rust target_features here:
+        //   https://doc.rust-lang.org/reference/attributes/codegen.html#the-target_feature-attribute
+        // GGML config flags have been found by looking at:
+        //   llama.cpp/ggml/src/ggml-cpu/CMakeLists.txt
+        for feature in features.split(',') {
+            match feature {
+                "avx" => {
+                    config.define("GGML_AVX", "ON");
+                }
+                "avx2" => {
+                    config.define("GGML_AVX2", "ON");
+                }
+                "avx512bf16" => {
+                    config.define("GGML_AVX512_BF16", "ON");
+                }
+                "avx512vbmi" => {
+                    config.define("GGML_AVX512_VBMI", "ON");
+                }
+                "avx512vnni" => {
+                    config.define("GGML_AVX512_VNNI", "ON");
+                }
+                "avxvnni" => {
+                    config.define("GGML_AVX_VNNI", "ON");
+                }
+                "bmi2" => {
+                    config.define("GGML_BMI2", "ON");
+                }
+                "f16c" => {
+                    config.define("GGML_F16C", "ON");
+                }
+                "fma" => {
+                    config.define("GGML_FMA", "ON");
+                }
+                "sse4.2" => {
+                    config.define("GGML_SSE42", "ON");
+                }
+                _ => {
+                    debug_log!(
+                        "Unrecognized cpu feature: '{}' - skipping GGML config for it.",
+                        feature
+                    );
+                    continue;
+                }
+            };
+        }
+    }
+
     config.define(
         "BUILD_SHARED_LIBS",
         if build_shared_libs { "ON" } else { "OFF" },
@@ -627,9 +691,9 @@ fn main() {
 
     if matches!(target_os, TargetOs::Linux)
         && target_triple.contains("aarch64")
-        && env::var(format!("CARGO_FEATURE_{}", "native".to_uppercase())).is_err()
+        && has_native_target_cpu
     {
-        // If the native feature is not enabled, we take off the native ARM64 support.
+        // If the target-cpu is not specified as native, we take off the native ARM64 support.
         // It is useful in docker environments where the native feature is not enabled.
         config.define("GGML_NATIVE", "OFF");
         config.define("GGML_CPU_ARM_ARCH", "armv8-a");

From 711d3e8d902994ed45a3ed951aed446e336a7865 Mon Sep 17 00:00:00 2001
From: AsbjornOlling <asbjornolling@gmail.com>
Date: Tue, 2 Dec 2025 09:31:17 +0100
Subject: [PATCH 2/5] pass target cpu to cmake cflags march

---
 llama-cpp-sys-2/build.rs | 23 +++++++++++++++++++++++
 1 file changed, 23 insertions(+)

diff --git a/llama-cpp-sys-2/build.rs b/llama-cpp-sys-2/build.rs
index fdd32d46..5f69828f 100644
--- a/llama-cpp-sys-2/build.rs
+++ b/llama-cpp-sys-2/build.rs
@@ -517,6 +517,18 @@ fn main() {
                 .any(|f| f.contains("target-cpu=native"))
         })
         .unwrap_or(false);
+
+    // Also extract the target-cpu value if specified (e.g., x86-64, x86-64-v2, etc.)
+    let target_cpu = std::env::var("CARGO_ENCODED_RUSTFLAGS")
+        .ok()
+        .and_then(|rustflags| {
+            rustflags
+                .split('\x1f')
+                .find(|f| f.contains("target-cpu=") && !f.contains("target-cpu=native"))
+                .and_then(|f| f.split("target-cpu=").nth(1))
+                .map(|s| s.to_string())
+        });
+
     if has_native_target_cpu {
         debug_log!("Detected target-cpu=native, compiling with GGML_NATIVE");
         config.define("GGML_NATIVE", "ON");
@@ -525,6 +537,17 @@ fn main() {
     // Get the target features as a comma-separated string
     else if let Ok(features) = std::env::var("CARGO_CFG_TARGET_FEATURE") {
         debug_log!("Compiling with target features: {}", features);
+        config.define("GGML_NATIVE", "OFF");
+
+        // Set baseline architecture from target-cpu if specified
+        // This is critical to prevent the compiler from auto-vectorizing to the build host's capabilities
+        if let Some(ref cpu) = target_cpu {
+            debug_log!("Setting baseline architecture: -march={}", cpu);
+            // Pass the baseline architecture to CMake's C and CXX compilers
+            config.cflag(&format!("-march={}", cpu));
+            config.cxxflag(&format!("-march={}", cpu));
+        }
+
         // list of rust target_features here:
         //   https://doc.rust-lang.org/reference/attributes/codegen.html#the-target_feature-attribute
         // GGML config flags have been found by looking at:

From 9d39309509a4d9b9ac7b81839575d391ebf9be57 Mon Sep 17 00:00:00 2001
From: AsbjornOlling <asbjornolling@gmail.com>
Date: Tue, 2 Dec 2025 14:40:56 +0100
Subject: [PATCH 3/5] remove 'native' feature

---
 examples/embeddings/Cargo.toml | 1 -
 examples/mtmd/Cargo.toml       | 1 -
 examples/reranker/Cargo.toml   | 1 -
 examples/simple/Cargo.toml     | 1 -
 llama-cpp-2/Cargo.toml         | 1 -
 llama-cpp-sys-2/Cargo.toml     | 1 -
 6 files changed, 6 deletions(-)

diff --git a/examples/embeddings/Cargo.toml b/examples/embeddings/Cargo.toml
index 13858693..e5866549 100644
--- a/examples/embeddings/Cargo.toml
+++ b/examples/embeddings/Cargo.toml
@@ -13,7 +13,6 @@ anyhow = { workspace = true }
 [features]
 cuda = ["llama-cpp-2/cuda"]
 metal = ["llama-cpp-2/metal"]
-native = ["llama-cpp-2/native"]
 vulkan = ["llama-cpp-2/vulkan"]
 
 [lints]
diff --git a/examples/mtmd/Cargo.toml b/examples/mtmd/Cargo.toml
index 426ddaa6..3863fc65 100644
--- a/examples/mtmd/Cargo.toml
+++ b/examples/mtmd/Cargo.toml
@@ -11,7 +11,6 @@ clap = { workspace = true, features = ["derive"] }
 [features]
 cuda = ["llama-cpp-2/cuda"]
 metal = ["llama-cpp-2/metal"]
-native = ["llama-cpp-2/native"]
 vulkan = ["llama-cpp-2/vulkan"]
 
 [lints]
diff --git a/examples/reranker/Cargo.toml b/examples/reranker/Cargo.toml
index dfce8e37..2bedf9fb 100644
--- a/examples/reranker/Cargo.toml
+++ b/examples/reranker/Cargo.toml
@@ -14,7 +14,6 @@ encoding_rs = { workspace = true }
 [features]
 cuda = ["llama-cpp-2/cuda"]
 metal = ["llama-cpp-2/metal"]
-native = ["llama-cpp-2/native"]
 vulkan = ["llama-cpp-2/vulkan"]
 
 [lints]
diff --git a/examples/simple/Cargo.toml b/examples/simple/Cargo.toml
index 964b061e..f8376c65 100644
--- a/examples/simple/Cargo.toml
+++ b/examples/simple/Cargo.toml
@@ -17,7 +17,6 @@ tracing-subscriber = { workspace = true }
 [features]
 cuda = ["llama-cpp-2/cuda"]
 metal = ["llama-cpp-2/metal"]
-native = ["llama-cpp-2/native"]
 vulkan = ["llama-cpp-2/vulkan"]
 
 [lints]
diff --git a/llama-cpp-2/Cargo.toml b/llama-cpp-2/Cargo.toml
index 026487ef..54d41444 100644
--- a/llama-cpp-2/Cargo.toml
+++ b/llama-cpp-2/Cargo.toml
@@ -26,7 +26,6 @@ cuda-no-vmm = ["cuda", "llama-cpp-sys-2/cuda-no-vmm"]
 metal = ["llama-cpp-sys-2/metal"]
 dynamic-link = ["llama-cpp-sys-2/dynamic-link"]
 vulkan = ["llama-cpp-sys-2/vulkan"]
-native = ["llama-cpp-sys-2/native"]
 openmp = ["llama-cpp-sys-2/openmp"]
 sampler = []
 # Only has an impact on Android.
diff --git a/llama-cpp-sys-2/Cargo.toml b/llama-cpp-sys-2/Cargo.toml
index e7f03ef2..e0b17000 100644
--- a/llama-cpp-sys-2/Cargo.toml
+++ b/llama-cpp-sys-2/Cargo.toml
@@ -78,7 +78,6 @@ cuda-no-vmm = ["cuda"]
 metal = []
 dynamic-link = []
 vulkan = []
-native = []
 openmp = []
 # Only has an impact on Android.
 shared-stdcxx = []

From b08e6ecc172eab8d2e86ff7715cba21ffd054a94 Mon Sep 17 00:00:00 2001
From: AsbjornOlling <asbjornolling@gmail.com>
Date: Tue, 2 Dec 2025 17:32:21 +0100
Subject: [PATCH 4/5] un-invert target_cpu check on linux/aarch64 builds, clean
 up code

---
 llama-cpp-sys-2/build.rs | 32 ++++++++++++--------------------
 1 file changed, 12 insertions(+), 20 deletions(-)

diff --git a/llama-cpp-sys-2/build.rs b/llama-cpp-sys-2/build.rs
index 5f69828f..c70ae5fe 100644
--- a/llama-cpp-sys-2/build.rs
+++ b/llama-cpp-sys-2/build.rs
@@ -508,17 +508,7 @@ fn main() {
         }
     }
 
-    // in this next bit, we select which cpu-specific features to compile for
-    // first check for target-cpu=native
-    let has_native_target_cpu = std::env::var("CARGO_ENCODED_RUSTFLAGS")
-        .map(|rustflags| {
-            rustflags
-                .split('\x1f')
-                .any(|f| f.contains("target-cpu=native"))
-        })
-        .unwrap_or(false);
-
-    // Also extract the target-cpu value if specified (e.g., x86-64, x86-64-v2, etc.)
+    // extract the target-cpu config value, if specified
     let target_cpu = std::env::var("CARGO_ENCODED_RUSTFLAGS")
         .ok()
         .and_then(|rustflags| {
@@ -529,25 +519,27 @@ fn main() {
                 .map(|s| s.to_string())
         });
 
-    if has_native_target_cpu {
+    if target_cpu == Some("native".into()) {
         debug_log!("Detected target-cpu=native, compiling with GGML_NATIVE");
         config.define("GGML_NATIVE", "ON");
     }
-    // if native isn't specified, enable specific features for ggml
-    // Get the target features as a comma-separated string
-    else if let Ok(features) = std::env::var("CARGO_CFG_TARGET_FEATURE") {
-        debug_log!("Compiling with target features: {}", features);
+    // if native isn't specified, enable specific features for ggml instead
+    else {
+        // rust code isn't using `target-cpu=native`, so llama.cpp shouldn't use GGML_NATIVE either
         config.define("GGML_NATIVE", "OFF");
 
-        // Set baseline architecture from target-cpu if specified
-        // This is critical to prevent the compiler from auto-vectorizing to the build host's capabilities
+        // if `target-cpu` is set set, also set -march for llama.cpp to the same value
         if let Some(ref cpu) = target_cpu {
             debug_log!("Setting baseline architecture: -march={}", cpu);
-            // Pass the baseline architecture to CMake's C and CXX compilers
             config.cflag(&format!("-march={}", cpu));
             config.cxxflag(&format!("-march={}", cpu));
         }
 
+        // I expect this env var to always be present
+        let features = std::env::var("CARGO_CFG_TARGET_FEATURE")
+            .expect("Env var CARGO_CFG_TARGET_FEATURE not found.");
+        debug_log!("Compiling with target features: {}", features);
+
         // list of rust target_features here:
         //   https://doc.rust-lang.org/reference/attributes/codegen.html#the-target_feature-attribute
         // GGML config flags have been found by looking at:
@@ -714,7 +706,7 @@ fn main() {
 
     if matches!(target_os, TargetOs::Linux)
         && target_triple.contains("aarch64")
-        && has_native_target_cpu
+        && target_cpu != Some("native".into())
     {
         // If the target-cpu is not specified as native, we take off the native ARM64 support.
         // It is useful in docker environments where the native feature is not enabled.

From b078bc95dc30e9967c1e7720aef7bae2e51b0405 Mon Sep 17 00:00:00 2001
From: AsbjornOlling <asbjornolling@gmail.com>
Date: Tue, 2 Dec 2025 17:36:59 +0100
Subject: [PATCH 5/5] fix: don't skip 'target-cpu=native' when extracting
 target_cpu

---
 llama-cpp-sys-2/build.rs | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/llama-cpp-sys-2/build.rs b/llama-cpp-sys-2/build.rs
index c70ae5fe..ba96a2f1 100644
--- a/llama-cpp-sys-2/build.rs
+++ b/llama-cpp-sys-2/build.rs
@@ -514,7 +514,7 @@ fn main() {
         .and_then(|rustflags| {
             rustflags
                 .split('\x1f')
-                .find(|f| f.contains("target-cpu=") && !f.contains("target-cpu=native"))
+                .find(|f| f.contains("target-cpu="))
                 .and_then(|f| f.split("target-cpu=").nth(1))
                 .map(|s| s.to_string())
         });