Skip to content

Commit 5eb9442

Browse files
committed
feat: rustify LiteScorer — last unrustified compute function (1.8× speedup)
Add rust_lite_score and rust_lite_score_batch to backfire-kernel compute module. Implements word tokenisation, Jaccard overlap, entity extraction, and negation asymmetry matching Python LiteScorer.score() exactly. 5 Rust unit tests + 8 Python parity tests. Benchmark: single 47→26µs, batch (100 pairs) 1599→719µs. Co-Authored-By: Arcane Sapience <protoscience@anulum.li>
1 parent 2e3cf14 commit 5eb9442

File tree

6 files changed

+388
-9
lines changed

6 files changed

+388
-9
lines changed

CHANGELOG.md

Lines changed: 8 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -82,13 +82,15 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
8282
- `rust_softmax` — row-wise softmax (wired into `_softmax_np()`, threshold ≥100 elements)
8383
- `rust_probs_to_divergence` — NLI divergence (wired into `_probs_to_divergence()`, threshold ≥10 rows)
8484
- `rust_probs_to_confidence` — NLI confidence (wired into `_probs_to_confidence()`, threshold ≥10 rows)
85-
- 34 Rust unit tests + 38 Python parity tests.
85+
- `rust_lite_score` — heuristic divergence scorer (wired into `LiteScorer.score()`)
86+
- `rust_lite_score_batch` — batch heuristic scorer (wired into `LiteScorer.score_batch()`)
87+
- 34 Rust unit tests + 44 Python parity tests.
8688
- **Rust compute benchmark** (`benchmarks/rust_compute_bench.py`): measures all
87-
10 Rust accelerators vs Python fallbacks. Geometric mean **11.1× speedup**;
88-
best: sanitizer_score (benign) 53.8×, worst: word_overlap 0.8× (FFI overhead
89-
dominates for trivial string ops). Key results (median µs, 5000 iterations):
90-
sanitizer_score 57→2.4µs, temporal_freshness 53→2.5µs, softmax(200×3)
91-
204→20µs, probs_to_confidence(200×3) 539→16µs.
89+
12 Rust accelerators vs Python fallbacks. Geometric mean **9.4× speedup**;
90+
best: sanitizer_score (benign) 63.5×, lite_score 1.8×, lite_score_batch
91+
(100 pairs) 2.2×. Key results (median µs, 5000 iterations):
92+
sanitizer_score 58→2.1µs, temporal_freshness 51→2.9µs, softmax(200×3)
93+
352→21µs, probs_to_confidence(200×3) 486→15µs, lite_score 47→26µs.
9294

9395
### Changed
9496
- **God File refactoring** — four large modules split into focused sub-modules

backfire-kernel/crates/backfire-core/src/compute.rs

Lines changed: 144 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,10 @@
2525
//! - [`softmax`] — row-wise softmax for NLI logits
2626
//! - [`probs_to_divergence`] — NLI probability → divergence score
2727
//! - [`probs_to_confidence`] — NLI probability → confidence score
28+
//! - [`lite_score`] — lightweight heuristic divergence (no-NLI fallback)
29+
//! - [`lite_score_batch`] — batch version of lite_score
30+
31+
use std::collections::HashSet;
2832

2933
use once_cell::sync::Lazy;
3034
use regex::Regex;
@@ -538,8 +542,6 @@ pub fn extract_reasoning_steps(text: &str) -> Vec<String> {
538542
///
539543
/// Mirrors `_word_overlap()` from `reasoning_verifier.py`.
540544
pub fn word_overlap(text_a: &str, text_b: &str) -> f64 {
541-
use std::collections::HashSet;
542-
543545
let words_a: HashSet<String> = text_a
544546
.split_whitespace()
545547
.map(|w| w.to_lowercase())
@@ -661,6 +663,104 @@ pub fn probs_to_confidence(probs: &[f64], cols: usize) -> Vec<f64> {
661663
result
662664
}
663665

666+
// ── Lite scorer ────────────────────────────────────────────────────
667+
668+
static LITE_WORD_RE: Lazy<Regex> = Lazy::new(|| Regex::new(r"\b\w+\b").unwrap());
669+
670+
static LITE_ENTITY_RE: Lazy<Regex> =
671+
Lazy::new(|| Regex::new(r"\b[A-Z][a-z]+(?:\s+[A-Z][a-z]+)*\b").unwrap());
672+
673+
static LITE_NEGATION_WORDS: Lazy<HashSet<&'static str>> = Lazy::new(|| {
674+
[
675+
"not", "no", "never", "neither", "nobody", "nothing", "nowhere", "nor",
676+
"cannot", "can't", "don't", "doesn't", "didn't", "won't", "wouldn't",
677+
"shouldn't", "isn't", "aren't", "wasn't", "weren't", "hasn't",
678+
"haven't", "hadn't",
679+
]
680+
.into_iter()
681+
.collect()
682+
});
683+
684+
/// Lightweight divergence scorer using word overlap, length ratio,
685+
/// named entity heuristics, and negation asymmetry.
686+
///
687+
/// Returns divergence in [0, 1]. 0 = aligned, 1 = contradicted.
688+
/// Mirrors `LiteScorer.score()` from `lite_scorer.py`.
689+
pub fn lite_score(premise: &str, hypothesis: &str) -> f64 {
690+
if premise.is_empty() || hypothesis.is_empty() {
691+
return 0.5;
692+
}
693+
694+
let p_words: HashSet<String> = LITE_WORD_RE
695+
.find_iter(&premise.to_lowercase())
696+
.map(|m| m.as_str().to_string())
697+
.collect();
698+
let h_words: HashSet<String> = LITE_WORD_RE
699+
.find_iter(&hypothesis.to_lowercase())
700+
.map(|m| m.as_str().to_string())
701+
.collect();
702+
703+
if p_words.is_empty() || h_words.is_empty() {
704+
return 0.5;
705+
}
706+
707+
// Jaccard overlap
708+
let intersection = p_words.intersection(&h_words).count();
709+
let union = p_words.union(&h_words).count();
710+
let jaccard = intersection as f64 / union as f64;
711+
712+
// Length ratio penalty
713+
let len_ratio =
714+
premise.len().min(hypothesis.len()) as f64 / premise.len().max(hypothesis.len()) as f64;
715+
716+
// Named entity overlap
717+
let p_ents: HashSet<String> = LITE_ENTITY_RE
718+
.find_iter(premise)
719+
.map(|m| m.as_str().to_string())
720+
.collect();
721+
let h_ents: HashSet<String> = LITE_ENTITY_RE
722+
.find_iter(hypothesis)
723+
.map(|m| m.as_str().to_string())
724+
.collect();
725+
let ent_overlap = if !p_ents.is_empty() && !h_ents.is_empty() {
726+
let ei = p_ents.intersection(&h_ents).count();
727+
let eu = p_ents.union(&h_ents).count();
728+
ei as f64 / eu as f64
729+
} else if !p_ents.is_empty() || !h_ents.is_empty() {
730+
0.0
731+
} else {
732+
0.5
733+
};
734+
735+
// Negation asymmetry
736+
let p_neg = p_words
737+
.iter()
738+
.filter(|w| LITE_NEGATION_WORDS.contains(w.as_str()))
739+
.count();
740+
let h_neg = h_words
741+
.iter()
742+
.filter(|w| LITE_NEGATION_WORDS.contains(w.as_str()))
743+
.count();
744+
let neg_penalty = if (p_neg == 0) != (h_neg == 0) {
745+
0.3
746+
} else {
747+
0.0
748+
};
749+
750+
let similarity = 0.4 * jaccard + 0.2 * len_ratio + 0.2 * ent_overlap + 0.2 * (1.0 - neg_penalty);
751+
(1.0 - similarity).clamp(0.0, 1.0)
752+
}
753+
754+
/// Batch lite scoring for multiple (premise, hypothesis) pairs.
755+
///
756+
/// Mirrors `LiteScorer.score_batch()` from `lite_scorer.py`.
757+
pub fn lite_score_batch(pairs: &[(String, String)]) -> Vec<f64> {
758+
pairs
759+
.iter()
760+
.map(|(p, h)| lite_score(p, h))
761+
.collect()
762+
}
763+
664764
// ── Tests ───────────────────────────────────────────────────────────
665765

666766
#[cfg(test)]
@@ -927,4 +1027,46 @@ mod tests {
9271027
let text = "\u{202E}\u{202E}\u{202E}ab";
9281028
assert!(has_suspicious_unicode(text)); // 3/5 = 60%
9291029
}
1030+
1031+
// -- lite_score --
1032+
1033+
#[test]
1034+
fn test_lite_score_identical() {
1035+
let s = lite_score("The sky is blue today.", "The sky is blue today.");
1036+
assert!(s < 0.15, "identical texts should have low divergence: {s}");
1037+
}
1038+
1039+
#[test]
1040+
fn test_lite_score_contradicted() {
1041+
let s = lite_score(
1042+
"The company never ships products late.",
1043+
"The company always ships products extremely late.",
1044+
);
1045+
// Negation asymmetry should raise divergence above identical-text baseline
1046+
assert!(s > 0.2, "contradicted should have higher divergence: {s}");
1047+
}
1048+
1049+
#[test]
1050+
fn test_lite_score_empty() {
1051+
assert!((lite_score("", "something") - 0.5).abs() < 1e-9);
1052+
assert!((lite_score("hello", "") - 0.5).abs() < 1e-9);
1053+
}
1054+
1055+
#[test]
1056+
fn test_lite_score_entity_mismatch() {
1057+
let s = lite_score("Apple released a new product.", "Samsung released a new product.");
1058+
// Same structure, different entity → entity overlap < 1
1059+
assert!(s > 0.1, "entity mismatch should increase divergence: {s}");
1060+
}
1061+
1062+
#[test]
1063+
fn test_lite_score_batch() {
1064+
let pairs = vec![
1065+
("The sky is blue.".to_string(), "The sky is blue.".to_string()),
1066+
("Yes it works.".to_string(), "No it does not work.".to_string()),
1067+
];
1068+
let results = lite_score_batch(&pairs);
1069+
assert_eq!(results.len(), 2);
1070+
assert!(results[0] < results[1], "identical < contradicted");
1071+
}
9301072
}

backfire-kernel/crates/backfire-ffi/src/lib.rs

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1281,6 +1281,16 @@ fn rust_probs_to_confidence(probs: Vec<f64>, cols: usize) -> Vec<f64> {
12811281
backfire_core::compute::probs_to_confidence(&probs, cols)
12821282
}
12831283

1284+
#[pyfunction]
1285+
fn rust_lite_score(premise: &str, hypothesis: &str) -> f64 {
1286+
backfire_core::compute::lite_score(premise, hypothesis)
1287+
}
1288+
1289+
#[pyfunction]
1290+
fn rust_lite_score_batch(pairs: Vec<(String, String)>) -> Vec<f64> {
1291+
backfire_core::compute::lite_score_batch(&pairs)
1292+
}
1293+
12841294
#[pymodule]
12851295
fn backfire_kernel(m: &Bound<'_, PyModule>) -> PyResult<()> {
12861296
// Core safety gate
@@ -1322,5 +1332,7 @@ fn backfire_kernel(m: &Bound<'_, PyModule>) -> PyResult<()> {
13221332
m.add_function(wrap_pyfunction!(rust_softmax, m)?)?;
13231333
m.add_function(wrap_pyfunction!(rust_probs_to_divergence, m)?)?;
13241334
m.add_function(wrap_pyfunction!(rust_probs_to_confidence, m)?)?;
1335+
m.add_function(wrap_pyfunction!(rust_lite_score, m)?)?;
1336+
m.add_function(wrap_pyfunction!(rust_lite_score_batch, m)?)?;
13251337
Ok(())
13261338
}

benchmarks/rust_compute_bench.py

Lines changed: 102 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -93,6 +93,25 @@
9393
OVERLAP_A = "The quick brown fox jumps over the lazy dog near the river bank"
9494
OVERLAP_B = "A quick brown fox leaps over a lazy dog by the river bank"
9595

96+
LITE_PREMISE = (
97+
"The Team Plan costs $19 per user per month and supports up to "
98+
"25 users with email support. Phone support is available for all "
99+
"paid plans. We are SOC 2 Type II, ISO 27001, HIPAA, and FedRAMP certified."
100+
)
101+
LITE_HYPOTHESIS = (
102+
"Team Plan costs $19 per user per month, up to 25 users. "
103+
"Phone support is Enterprise only. "
104+
"All paid plans include a 14-day free trial. "
105+
"SOC 2 Type II and ISO 27001 certified."
106+
)
107+
LITE_BATCH_PAIRS = [
108+
(LITE_PREMISE, LITE_HYPOTHESIS),
109+
("The sky is blue.", "The sky is green."),
110+
("Apple released a new product.", "Samsung released a new product."),
111+
("The company never ships late.", "The company always ships late."),
112+
("Quantum computing uses qubits.", "The recipe calls for flour and sugar."),
113+
] * 20 # 100 pairs
114+
96115

97116
def _make_softmax_data(rows: int, cols: int = 3) -> np.ndarray:
98117
rng = np.random.default_rng(42)
@@ -425,6 +444,69 @@ def py_verify_numeric(text: str) -> tuple[int, list[tuple[str, str, str, str]],
425444
return count, issues, count == 0
426445

427446

447+
# ─── Lite scorer (Python path) ────────────────────────────────────────
448+
449+
_PY_LITE_WORD_RE = re.compile(r"\b\w+\b")
450+
_PY_LITE_ENTITY_RE = re.compile(r"\b[A-Z][a-z]+(?:\s+[A-Z][a-z]+)*\b")
451+
_PY_LITE_NEG = frozenset(
452+
{
453+
"not",
454+
"no",
455+
"never",
456+
"neither",
457+
"nobody",
458+
"nothing",
459+
"nowhere",
460+
"nor",
461+
"cannot",
462+
"can't",
463+
"don't",
464+
"doesn't",
465+
"didn't",
466+
"won't",
467+
"wouldn't",
468+
"shouldn't",
469+
"isn't",
470+
"aren't",
471+
"wasn't",
472+
"weren't",
473+
"hasn't",
474+
"haven't",
475+
"hadn't",
476+
}
477+
)
478+
479+
480+
def py_lite_score(premise: str, hypothesis: str) -> float:
481+
if not premise or not hypothesis:
482+
return 0.5
483+
p_words = set(_PY_LITE_WORD_RE.findall(premise.lower()))
484+
h_words = set(_PY_LITE_WORD_RE.findall(hypothesis.lower()))
485+
if not p_words or not h_words:
486+
return 0.5
487+
jaccard = len(p_words & h_words) / len(p_words | h_words)
488+
len_ratio = min(len(premise), len(hypothesis)) / max(len(premise), len(hypothesis))
489+
p_ents = set(_PY_LITE_ENTITY_RE.findall(premise))
490+
h_ents = set(_PY_LITE_ENTITY_RE.findall(hypothesis))
491+
if p_ents and h_ents:
492+
ent_overlap = len(p_ents & h_ents) / len(p_ents | h_ents)
493+
elif p_ents or h_ents:
494+
ent_overlap = 0.0
495+
else:
496+
ent_overlap = 0.5
497+
p_neg = len(p_words & _PY_LITE_NEG)
498+
h_neg = len(h_words & _PY_LITE_NEG)
499+
neg_penalty = 0.3 if (p_neg == 0) != (h_neg == 0) else 0.0
500+
similarity = (
501+
0.4 * jaccard + 0.2 * len_ratio + 0.2 * ent_overlap + 0.2 * (1.0 - neg_penalty)
502+
)
503+
return max(0.0, min(1.0, 1.0 - similarity))
504+
505+
506+
def py_lite_score_batch(pairs: list[tuple[str, str]]) -> list[float]:
507+
return [py_lite_score(p, h) for p, h in pairs]
508+
509+
428510
# ─── Benchmark runner ────────────────────────────────────────────────
429511

430512

@@ -451,6 +533,8 @@ def _try_import_rust():
451533
rust_detect_task_type,
452534
rust_extract_reasoning_steps,
453535
rust_has_suspicious_unicode,
536+
rust_lite_score,
537+
rust_lite_score_batch,
454538
rust_probs_to_confidence,
455539
rust_probs_to_divergence,
456540
rust_sanitizer_score,
@@ -471,6 +555,8 @@ def _try_import_rust():
471555
"softmax": rust_softmax,
472556
"probs_to_divergence": rust_probs_to_divergence,
473557
"probs_to_confidence": rust_probs_to_confidence,
558+
"lite_score": rust_lite_score,
559+
"lite_score_batch": rust_lite_score_batch,
474560
}
475561
except ImportError:
476562
return None
@@ -631,6 +717,22 @@ def main():
631717
"rs_fn": rust_fns["probs_to_confidence"] if rust_fns else None,
632718
"rs_args": (pr_large_flat, 3),
633719
},
720+
{
721+
"name": "lite_score",
722+
"description": "Heuristic divergence scorer",
723+
"py_fn": py_lite_score,
724+
"py_args": (LITE_PREMISE, LITE_HYPOTHESIS),
725+
"rs_fn": rust_fns["lite_score"] if rust_fns else None,
726+
"rs_args": (LITE_PREMISE, LITE_HYPOTHESIS),
727+
},
728+
{
729+
"name": "lite_score_batch (100 pairs)",
730+
"description": "Batch heuristic scorer",
731+
"py_fn": py_lite_score_batch,
732+
"py_args": (LITE_BATCH_PAIRS,),
733+
"rs_fn": rust_fns["lite_score_batch"] if rust_fns else None,
734+
"rs_args": (LITE_BATCH_PAIRS,),
735+
},
634736
]
635737

636738
print(f"Rust Compute vs Python Benchmark ({iters} iterations)")

src/director_ai/core/scoring/lite_scorer.py

Lines changed: 17 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -25,12 +25,26 @@
2525

2626
__all__ = ["LiteScorer"]
2727

28+
try:
29+
from backfire_kernel import rust_lite_score, rust_lite_score_batch
30+
31+
_RUST_LITE = True
32+
except ImportError:
33+
_RUST_LITE = False
34+
2835

2936
class LiteScorer:
30-
"""Fast divergence scorer without any ML model dependency."""
37+
"""Fast divergence scorer without any ML model dependency.
38+
39+
Uses Rust accelerator when available for regex tokenisation
40+
and set operations.
41+
"""
3142

3243
def score(self, premise: str, hypothesis: str) -> float:
3344
"""Compute divergence in [0, 1]. 0 = aligned, 1 = contradicted."""
45+
if _RUST_LITE:
46+
return float(rust_lite_score(premise, hypothesis))
47+
3448
if not premise or not hypothesis:
3549
return 0.5
3650

@@ -77,6 +91,8 @@ def score(self, premise: str, hypothesis: str) -> float:
7791

7892
def score_batch(self, pairs: list[tuple[str, str]]) -> list[float]:
7993
"""Score multiple (premise, hypothesis) pairs."""
94+
if _RUST_LITE:
95+
return [float(v) for v in rust_lite_score_batch(pairs)]
8096
return [self.score(p, h) for p, h in pairs]
8197

8298
def review(

0 commit comments

Comments
 (0)