feat: rustify LiteScorer — last unrustified compute function (1.8× speedup)

anulum · anulum · commit 5eb9442ede84 · 2026-04-05T05:23:08.000+02:00
Add rust_lite_score and rust_lite_score_batch to backfire-kernel compute
module. Implements word tokenisation, Jaccard overlap, entity extraction,
and negation asymmetry matching Python LiteScorer.score() exactly.
5 Rust unit tests + 8 Python parity tests. Benchmark: single 47→26µs,
batch (100 pairs) 1599→719µs.

Co-Authored-By: Arcane Sapience &lt;protoscience@anulum.li&gt;
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -82,13 +82,15 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
   - `rust_softmax` — row-wise softmax (wired into `_softmax_np()`, threshold ≥100 elements)
   - `rust_probs_to_divergence` — NLI divergence (wired into `_probs_to_divergence()`, threshold ≥10 rows)
   - `rust_probs_to_confidence` — NLI confidence (wired into `_probs_to_confidence()`, threshold ≥10 rows)
-  - 34 Rust unit tests + 38 Python parity tests.
+  - `rust_lite_score` — heuristic divergence scorer (wired into `LiteScorer.score()`)
+  - `rust_lite_score_batch` — batch heuristic scorer (wired into `LiteScorer.score_batch()`)
+  - 34 Rust unit tests + 44 Python parity tests.
 - **Rust compute benchmark** (`benchmarks/rust_compute_bench.py`): measures all
-  10 Rust accelerators vs Python fallbacks. Geometric mean **11.1× speedup**;
-  best: sanitizer_score (benign) 53.8×, worst: word_overlap 0.8× (FFI overhead
-  dominates for trivial string ops). Key results (median µs, 5000 iterations):
-  sanitizer_score 57→2.4µs, temporal_freshness 53→2.5µs, softmax(200×3)
-  204→20µs, probs_to_confidence(200×3) 539→16µs.
+  12 Rust accelerators vs Python fallbacks. Geometric mean **9.4× speedup**;
+  best: sanitizer_score (benign) 63.5×, lite_score 1.8×, lite_score_batch
+  (100 pairs) 2.2×. Key results (median µs, 5000 iterations):
+  sanitizer_score 58→2.1µs, temporal_freshness 51→2.9µs, softmax(200×3)
+  352→21µs, probs_to_confidence(200×3) 486→15µs, lite_score 47→26µs.
 
 ### Changed
 - **God File refactoring** — four large modules split into focused sub-modules
diff --git a/backfire-kernel/crates/backfire-core/src/compute.rs b/backfire-kernel/crates/backfire-core/src/compute.rs
@@ -25,6 +25,10 @@
 //! - [`softmax`] — row-wise softmax for NLI logits
 //! - [`probs_to_divergence`] — NLI probability → divergence score
 //! - [`probs_to_confidence`] — NLI probability → confidence score
+//! - [`lite_score`] — lightweight heuristic divergence (no-NLI fallback)
+//! - [`lite_score_batch`] — batch version of lite_score
+
+use std::collections::HashSet;
 
 use once_cell::sync::Lazy;
 use regex::Regex;
@@ -538,8 +542,6 @@ pub fn extract_reasoning_steps(text: &str) -> Vec<String> {
 ///
 /// Mirrors `_word_overlap()` from `reasoning_verifier.py`.
 pub fn word_overlap(text_a: &str, text_b: &str) -> f64 {
-    use std::collections::HashSet;
-
     let words_a: HashSet<String> = text_a
         .split_whitespace()
         .map(|w| w.to_lowercase())
@@ -661,6 +663,104 @@ pub fn probs_to_confidence(probs: &[f64], cols: usize) -> Vec<f64> {
     result
 }
 
+// ── Lite scorer ────────────────────────────────────────────────────
+
+static LITE_WORD_RE: Lazy<Regex> = Lazy::new(|| Regex::new(r"\b\w+\b").unwrap());
+
+static LITE_ENTITY_RE: Lazy<Regex> =
+    Lazy::new(|| Regex::new(r"\b[A-Z][a-z]+(?:\s+[A-Z][a-z]+)*\b").unwrap());
+
+static LITE_NEGATION_WORDS: Lazy<HashSet<&'static str>> = Lazy::new(|| {
+    [
+        "not", "no", "never", "neither", "nobody", "nothing", "nowhere", "nor",
+        "cannot", "can't", "don't", "doesn't", "didn't", "won't", "wouldn't",
+        "shouldn't", "isn't", "aren't", "wasn't", "weren't", "hasn't",
+        "haven't", "hadn't",
+    ]
+    .into_iter()
+    .collect()
+});
+
+/// Lightweight divergence scorer using word overlap, length ratio,
+/// named entity heuristics, and negation asymmetry.
+///
+/// Returns divergence in [0, 1]. 0 = aligned, 1 = contradicted.
+/// Mirrors `LiteScorer.score()` from `lite_scorer.py`.
+pub fn lite_score(premise: &str, hypothesis: &str) -> f64 {
+    if premise.is_empty() || hypothesis.is_empty() {
+        return 0.5;
+    }
+
+    let p_words: HashSet<String> = LITE_WORD_RE
+        .find_iter(&premise.to_lowercase())
+        .map(|m| m.as_str().to_string())
+        .collect();
+    let h_words: HashSet<String> = LITE_WORD_RE
+        .find_iter(&hypothesis.to_lowercase())
+        .map(|m| m.as_str().to_string())
+        .collect();
+
+    if p_words.is_empty() || h_words.is_empty() {
+        return 0.5;
+    }
+
+    // Jaccard overlap
+    let intersection = p_words.intersection(&h_words).count();
+    let union = p_words.union(&h_words).count();
+    let jaccard = intersection as f64 / union as f64;
+
+    // Length ratio penalty
+    let len_ratio =
+        premise.len().min(hypothesis.len()) as f64 / premise.len().max(hypothesis.len()) as f64;
+
+    // Named entity overlap
+    let p_ents: HashSet<String> = LITE_ENTITY_RE
+        .find_iter(premise)
+        .map(|m| m.as_str().to_string())
+        .collect();
+    let h_ents: HashSet<String> = LITE_ENTITY_RE
+        .find_iter(hypothesis)
+        .map(|m| m.as_str().to_string())
+        .collect();
+    let ent_overlap = if !p_ents.is_empty() && !h_ents.is_empty() {
+        let ei = p_ents.intersection(&h_ents).count();
+        let eu = p_ents.union(&h_ents).count();
+        ei as f64 / eu as f64
+    } else if !p_ents.is_empty() || !h_ents.is_empty() {
+        0.0
+    } else {
+        0.5
+    };
+
+    // Negation asymmetry
+    let p_neg = p_words
+        .iter()
+        .filter(|w| LITE_NEGATION_WORDS.contains(w.as_str()))
+        .count();
+    let h_neg = h_words
+        .iter()
+        .filter(|w| LITE_NEGATION_WORDS.contains(w.as_str()))
+        .count();
+    let neg_penalty = if (p_neg == 0) != (h_neg == 0) {
+        0.3
+    } else {
+        0.0
+    };
+
+    let similarity = 0.4 * jaccard + 0.2 * len_ratio + 0.2 * ent_overlap + 0.2 * (1.0 - neg_penalty);
+    (1.0 - similarity).clamp(0.0, 1.0)
+}
+
+/// Batch lite scoring for multiple (premise, hypothesis) pairs.
+///
+/// Mirrors `LiteScorer.score_batch()` from `lite_scorer.py`.
+pub fn lite_score_batch(pairs: &[(String, String)]) -> Vec<f64> {
+    pairs
+        .iter()
+        .map(|(p, h)| lite_score(p, h))
+        .collect()
+}
+
 // ── Tests ───────────────────────────────────────────────────────────
 
 #[cfg(test)]
@@ -927,4 +1027,46 @@ mod tests {
         let text = "\u{202E}\u{202E}\u{202E}ab";
         assert!(has_suspicious_unicode(text)); // 3/5 = 60%
     }
+
+    // -- lite_score --
+
+    #[test]
+    fn test_lite_score_identical() {
+        let s = lite_score("The sky is blue today.", "The sky is blue today.");
+        assert!(s < 0.15, "identical texts should have low divergence: {s}");
+    }
+
+    #[test]
+    fn test_lite_score_contradicted() {
+        let s = lite_score(
+            "The company never ships products late.",
+            "The company always ships products extremely late.",
+        );
+        // Negation asymmetry should raise divergence above identical-text baseline
+        assert!(s > 0.2, "contradicted should have higher divergence: {s}");
+    }
+
+    #[test]
+    fn test_lite_score_empty() {
+        assert!((lite_score("", "something") - 0.5).abs() < 1e-9);
+        assert!((lite_score("hello", "") - 0.5).abs() < 1e-9);
+    }
+
+    #[test]
+    fn test_lite_score_entity_mismatch() {
+        let s = lite_score("Apple released a new product.", "Samsung released a new product.");
+        // Same structure, different entity → entity overlap < 1
+        assert!(s > 0.1, "entity mismatch should increase divergence: {s}");
+    }
+
+    #[test]
+    fn test_lite_score_batch() {
+        let pairs = vec![
+            ("The sky is blue.".to_string(), "The sky is blue.".to_string()),
+            ("Yes it works.".to_string(), "No it does not work.".to_string()),
+        ];
+        let results = lite_score_batch(&pairs);
+        assert_eq!(results.len(), 2);
+        assert!(results[0] < results[1], "identical < contradicted");
+    }
 }
diff --git a/backfire-kernel/crates/backfire-ffi/src/lib.rs b/backfire-kernel/crates/backfire-ffi/src/lib.rs
@@ -1281,6 +1281,16 @@ fn rust_probs_to_confidence(probs: Vec<f64>, cols: usize) -> Vec<f64> {
     backfire_core::compute::probs_to_confidence(&probs, cols)
 }
 
+#[pyfunction]
+fn rust_lite_score(premise: &str, hypothesis: &str) -> f64 {
+    backfire_core::compute::lite_score(premise, hypothesis)
+}
+
+#[pyfunction]
+fn rust_lite_score_batch(pairs: Vec<(String, String)>) -> Vec<f64> {
+    backfire_core::compute::lite_score_batch(&pairs)
+}
+
 #[pymodule]
 fn backfire_kernel(m: &Bound<'_, PyModule>) -> PyResult<()> {
     // Core safety gate
@@ -1322,5 +1332,7 @@ fn backfire_kernel(m: &Bound<'_, PyModule>) -> PyResult<()> {
     m.add_function(wrap_pyfunction!(rust_softmax, m)?)?;
     m.add_function(wrap_pyfunction!(rust_probs_to_divergence, m)?)?;
     m.add_function(wrap_pyfunction!(rust_probs_to_confidence, m)?)?;
+    m.add_function(wrap_pyfunction!(rust_lite_score, m)?)?;
+    m.add_function(wrap_pyfunction!(rust_lite_score_batch, m)?)?;
     Ok(())
 }
diff --git a/benchmarks/rust_compute_bench.py b/benchmarks/rust_compute_bench.py
@@ -93,6 +93,25 @@
 OVERLAP_A = "The quick brown fox jumps over the lazy dog near the river bank"
 OVERLAP_B = "A quick brown fox leaps over a lazy dog by the river bank"
 
+LITE_PREMISE = (
+    "The Team Plan costs $19 per user per month and supports up to "
+    "25 users with email support. Phone support is available for all "
+    "paid plans. We are SOC 2 Type II, ISO 27001, HIPAA, and FedRAMP certified."
+)
+LITE_HYPOTHESIS = (
+    "Team Plan costs $19 per user per month, up to 25 users. "
+    "Phone support is Enterprise only. "
+    "All paid plans include a 14-day free trial. "
+    "SOC 2 Type II and ISO 27001 certified."
+)
+LITE_BATCH_PAIRS = [
+    (LITE_PREMISE, LITE_HYPOTHESIS),
+    ("The sky is blue.", "The sky is green."),
+    ("Apple released a new product.", "Samsung released a new product."),
+    ("The company never ships late.", "The company always ships late."),
+    ("Quantum computing uses qubits.", "The recipe calls for flour and sugar."),
+] * 20  # 100 pairs
+
 
 def _make_softmax_data(rows: int, cols: int = 3) -> np.ndarray:
     rng = np.random.default_rng(42)
@@ -425,6 +444,69 @@ def py_verify_numeric(text: str) -> tuple[int, list[tuple[str, str, str, str]],
     return count, issues, count == 0
 
 
+# ─── Lite scorer (Python path) ────────────────────────────────────────
+
+_PY_LITE_WORD_RE = re.compile(r"\b\w+\b")
+_PY_LITE_ENTITY_RE = re.compile(r"\b[A-Z][a-z]+(?:\s+[A-Z][a-z]+)*\b")
+_PY_LITE_NEG = frozenset(
+    {
+        "not",
+        "no",
+        "never",
+        "neither",
+        "nobody",
+        "nothing",
+        "nowhere",
+        "nor",
+        "cannot",
+        "can't",
+        "don't",
+        "doesn't",
+        "didn't",
+        "won't",
+        "wouldn't",
+        "shouldn't",
+        "isn't",
+        "aren't",
+        "wasn't",
+        "weren't",
+        "hasn't",
+        "haven't",
+        "hadn't",
+    }
+)
+
+
+def py_lite_score(premise: str, hypothesis: str) -> float:
+    if not premise or not hypothesis:
+        return 0.5
+    p_words = set(_PY_LITE_WORD_RE.findall(premise.lower()))
+    h_words = set(_PY_LITE_WORD_RE.findall(hypothesis.lower()))
+    if not p_words or not h_words:
+        return 0.5
+    jaccard = len(p_words & h_words) / len(p_words | h_words)
+    len_ratio = min(len(premise), len(hypothesis)) / max(len(premise), len(hypothesis))
+    p_ents = set(_PY_LITE_ENTITY_RE.findall(premise))
+    h_ents = set(_PY_LITE_ENTITY_RE.findall(hypothesis))
+    if p_ents and h_ents:
+        ent_overlap = len(p_ents & h_ents) / len(p_ents | h_ents)
+    elif p_ents or h_ents:
+        ent_overlap = 0.0
+    else:
+        ent_overlap = 0.5
+    p_neg = len(p_words & _PY_LITE_NEG)
+    h_neg = len(h_words & _PY_LITE_NEG)
+    neg_penalty = 0.3 if (p_neg == 0) != (h_neg == 0) else 0.0
+    similarity = (
+        0.4 * jaccard + 0.2 * len_ratio + 0.2 * ent_overlap + 0.2 * (1.0 - neg_penalty)
+    )
+    return max(0.0, min(1.0, 1.0 - similarity))
+
+
+def py_lite_score_batch(pairs: list[tuple[str, str]]) -> list[float]:
+    return [py_lite_score(p, h) for p, h in pairs]
+
+
 # ─── Benchmark runner ────────────────────────────────────────────────
 
 
@@ -451,6 +533,8 @@ def _try_import_rust():
             rust_detect_task_type,
             rust_extract_reasoning_steps,
             rust_has_suspicious_unicode,
+            rust_lite_score,
+            rust_lite_score_batch,
             rust_probs_to_confidence,
             rust_probs_to_divergence,
             rust_sanitizer_score,
@@ -471,6 +555,8 @@ def _try_import_rust():
             "softmax": rust_softmax,
             "probs_to_divergence": rust_probs_to_divergence,
             "probs_to_confidence": rust_probs_to_confidence,
+            "lite_score": rust_lite_score,
+            "lite_score_batch": rust_lite_score_batch,
         }
     except ImportError:
         return None
@@ -631,6 +717,22 @@ def main():
             "rs_fn": rust_fns["probs_to_confidence"] if rust_fns else None,
             "rs_args": (pr_large_flat, 3),
         },
+        {
+            "name": "lite_score",
+            "description": "Heuristic divergence scorer",
+            "py_fn": py_lite_score,
+            "py_args": (LITE_PREMISE, LITE_HYPOTHESIS),
+            "rs_fn": rust_fns["lite_score"] if rust_fns else None,
+            "rs_args": (LITE_PREMISE, LITE_HYPOTHESIS),
+        },
+        {
+            "name": "lite_score_batch (100 pairs)",
+            "description": "Batch heuristic scorer",
+            "py_fn": py_lite_score_batch,
+            "py_args": (LITE_BATCH_PAIRS,),
+            "rs_fn": rust_fns["lite_score_batch"] if rust_fns else None,
+            "rs_args": (LITE_BATCH_PAIRS,),
+        },
     ]
 
     print(f"Rust Compute vs Python Benchmark ({iters} iterations)")
diff --git a/src/director_ai/core/scoring/lite_scorer.py b/src/director_ai/core/scoring/lite_scorer.py
@@ -25,12 +25,26 @@
 
 __all__ = ["LiteScorer"]
 
+try:
+    from backfire_kernel import rust_lite_score, rust_lite_score_batch
+
+    _RUST_LITE = True
+except ImportError:
+    _RUST_LITE = False
+
 
 class LiteScorer:
-    """Fast divergence scorer without any ML model dependency."""
+    """Fast divergence scorer without any ML model dependency.
+
+    Uses Rust accelerator when available for regex tokenisation
+    and set operations.
+    """
 
     def score(self, premise: str, hypothesis: str) -> float:
         """Compute divergence in [0, 1]. 0 = aligned, 1 = contradicted."""
+        if _RUST_LITE:
+            return float(rust_lite_score(premise, hypothesis))
+
         if not premise or not hypothesis:
             return 0.5
 
@@ -77,6 +91,8 @@ def score(self, premise: str, hypothesis: str) -> float:
 
     def score_batch(self, pairs: list[tuple[str, str]]) -> list[float]:
         """Score multiple (premise, hypothesis) pairs."""
+        if _RUST_LITE:
+            return [float(v) for v in rust_lite_score_batch(pairs)]
         return [self.score(p, h) for p, h in pairs]
 
     def review(
diff --git a/tests/test_rust_compute.py b/tests/test_rust_compute.py