Skip to content

Commit

Permalink
attach normalize_case to SimpleString and rename it to mimic_ascii_case
Browse files Browse the repository at this point in the history
  • Loading branch information
Fogapod committed Nov 8, 2023
1 parent 2826b35 commit 0cb7429
Show file tree
Hide file tree
Showing 2 changed files with 87 additions and 82 deletions.
4 changes: 2 additions & 2 deletions src/replacement.rs
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
use crate::utils::{normalize_case, SimpleString};
use crate::utils::SimpleString;

use rand::seq::SliceRandom;
use regex::{Captures, Regex};
Expand Down Expand Up @@ -62,7 +62,7 @@ impl ReplacementCallback {
Self::Noop => caps[0].to_owned(),
Self::Simple(string) => {
if normalize_case_ {
normalize_case(&caps[0], string)
string.mimic_ascii_case(&caps[0])
} else {
string.body.clone()
}
Expand Down
165 changes: 85 additions & 80 deletions src/utils.rs
Original file line number Diff line number Diff line change
Expand Up @@ -20,126 +20,131 @@ impl SimpleString {
}
}

/// try to learn something about strings and adjust case accordingly. all logic is currently
/// ascii only
/// tried using Cows but my computer exploded. TODO: try that again
pub(crate) fn normalize_case(old: &str, new: &SimpleString) -> String {
let mut body = new.body.clone();

// assume lowercase ascii is "weakest" form. anything else returns as is
if !new.is_ascii_lowercase {
return body;
}
impl SimpleString {
/// Try to learn something about strings and adjust case accordingly. all logic is currently
/// ascii only
// tried using Cows but my computer exploded. TODO: try that again
pub(crate) fn mimic_ascii_case(&self, original: &str) -> String {
let mut body = self.body.clone();

// assume lowercase ascii is "weakest" form. anything else returns as is
if !self.is_ascii_lowercase {
return body;
}

// if original was all uppercase we force all uppercase for replacement. this is likely to
// give false positives on short inputs like "I" or abbreviations
if old.chars().all(|c| c.is_ascii_uppercase()) {
return body.to_ascii_uppercase();
}
// if original was all uppercase we force all uppercase for replacement. this is likely to
// give false positives on short inputs like "I" or abbreviations
if original.chars().all(|c| c.is_ascii_uppercase()) {
return body.to_ascii_uppercase();
}

// no constraints if original was all lowercase
if old.chars().all(|c| !c.is_ascii() || c.is_ascii_lowercase()) {
return body;
}
// no constraints if original was all lowercase
if original
.chars()
.all(|c| !c.is_ascii() || c.is_ascii_lowercase())
{
return body;
}

if old.chars().count() == new.char_count {
for (i, c_old) in old.chars().enumerate() {
if c_old.is_ascii_lowercase() {
body.get_mut(i..i + 1)
.expect("strings have same len")
.make_ascii_lowercase()
} else if c_old.is_ascii_uppercase() {
body.get_mut(i..i + 1)
.expect("strings have same len")
.make_ascii_uppercase()
// TODO: SIMD this
if original.chars().count() == self.char_count {
for (i, c_old) in original.chars().enumerate() {
if c_old.is_ascii_lowercase() {
body.get_mut(i..i + 1)
.expect("strings have same len")
.make_ascii_lowercase()
} else if c_old.is_ascii_uppercase() {
body.get_mut(i..i + 1)
.expect("strings have same len")
.make_ascii_uppercase()
}
}
}
}

body
body
}
}

#[cfg(test)]
mod tests {
use super::*;

#[test]
fn normalize_case_input_lowercase() {
assert_eq!(normalize_case("hello", &SimpleString::new("bye")), "bye");
assert_eq!(normalize_case("hello", &SimpleString::new("Bye")), "Bye");
assert_eq!(normalize_case("hello", &SimpleString::new("bYE")), "bYE");
fn string_counts_chars() {
assert_eq!(SimpleString::new("hello").char_count, 5);
assert_eq!(SimpleString::new("привет").char_count, 6);
}

#[test]
fn string_detects_ascii_only() {
assert_eq!(SimpleString::new("Hello").is_ascii_only, true);
assert_eq!(SimpleString::new("1!@$#$").is_ascii_only, true);
assert_eq!(SimpleString::new("Привет").is_ascii_only, false);
}

#[test]
fn string_detects_ascii_lowercase() {
assert_eq!(SimpleString::new("hello").is_ascii_lowercase, true);
assert_eq!(SimpleString::new("Hello").is_ascii_lowercase, false);
assert_eq!(SimpleString::new("1!@$#$").is_ascii_lowercase, false);
assert_eq!(SimpleString::new("привет").is_ascii_lowercase, false);
}

#[test]
fn string_detects_ascii_uppercase() {
assert_eq!(SimpleString::new("HELLO").is_ascii_uppercase, true);
assert_eq!(SimpleString::new("Hello").is_ascii_uppercase, false);
assert_eq!(SimpleString::new("1!@$#$").is_ascii_uppercase, false);
assert_eq!(SimpleString::new("ПРИВЕТ").is_ascii_uppercase, false);
}

#[test]
fn mimic_case_input_lowercase() {
assert_eq!(SimpleString::new("bye").mimic_ascii_case("hello"), "bye");
assert_eq!(SimpleString::new("Bye").mimic_ascii_case("hello"), "Bye");
assert_eq!(SimpleString::new("bYE").mimic_ascii_case("hello"), "bYE");
}

// questionable rule, becomes overcomplicated
// #[test]
// fn normalize_case_input_titled() {
// fn mimic_case_input_titled() {
// assert_eq!(
// normalize_case("Hello", &SimpleString::new("bye")),
// SimpleString::new("bye").steal_ascii_case("Hello"),
// "Bye"
// );
// // has case variation -- do not touch it
// assert_eq!(
// normalize_case("Hello", &SimpleString::new("bYe")),
// SimpleString::new("bYe").steal_ascii_case("Hello"),
// "bYe"
// );
// // not ascii uppercase
// assert_eq!(
// normalize_case("Привет", &SimpleString::new("bye")),
// SimpleString::new("bye").steal_ascii_case("Привет"),
// "bye"
// );
// }

#[test]
fn normalize_case_input_uppercase() {
assert_eq!(normalize_case("HELLO", &SimpleString::new("bye")), "BYE");
fn mimic_case_input_uppercase() {
assert_eq!(SimpleString::new("bye").mimic_ascii_case("HELLO"), "BYE");
// has case variation -- do not touch it
assert_eq!(normalize_case("HELLO", &SimpleString::new("bYE")), "bYE");
assert_eq!(SimpleString::new("bYE").mimic_ascii_case("HELLO"), "bYE");
// not ascii uppercase
assert_eq!(normalize_case("ПРИВЕТ", &SimpleString::new("bye")), "bye");
assert_eq!(normalize_case("HELLO", &SimpleString::new("пока")), "пока");
assert_eq!(SimpleString::new("bye").mimic_ascii_case("ПРИВЕТ"), "bye");
assert_eq!(SimpleString::new("пока").mimic_ascii_case("HELLO"), "пока");
}

#[test]
fn normalize_case_input_different_case() {
assert_eq!(normalize_case("hELLO", &SimpleString::new("bye")), "bye");
fn mimic_case_input_different_case() {
assert_eq!(SimpleString::new("bye").mimic_ascii_case("hELLO"), "bye");
}

#[test]
fn normalize_case_input_different_case_same_len() {
fn mimic_case_input_different_case_same_len() {
assert_eq!(
normalize_case("hELLO", &SimpleString::new("byeee")),
SimpleString::new("byeee").mimic_ascii_case("hELLO"),
"bYEEE"
);
assert_eq!(normalize_case("hI!", &SimpleString::new("bye")), "bYe");
assert_eq!(normalize_case("hI!", &SimpleString::new("Bye")), "Bye");
}

#[test]
fn string_counts_chars() {
assert_eq!(SimpleString::new("hello").char_count, 5);
assert_eq!(SimpleString::new("привет").char_count, 6);
}

#[test]
fn string_detects_ascii_only() {
assert_eq!(SimpleString::new("Hello").is_ascii_only, true);
assert_eq!(SimpleString::new("1!@$#$").is_ascii_only, true);
assert_eq!(SimpleString::new("Привет").is_ascii_only, false);
}

#[test]
fn string_detects_ascii_lowercase() {
assert_eq!(SimpleString::new("hello").is_ascii_lowercase, true);
assert_eq!(SimpleString::new("Hello").is_ascii_lowercase, false);
assert_eq!(SimpleString::new("1!@$#$").is_ascii_lowercase, false);
assert_eq!(SimpleString::new("привет").is_ascii_lowercase, false);
}

#[test]
fn string_detects_ascii_uppercase() {
assert_eq!(SimpleString::new("HELLO").is_ascii_uppercase, true);
assert_eq!(SimpleString::new("Hello").is_ascii_uppercase, false);
assert_eq!(SimpleString::new("1!@$#$").is_ascii_uppercase, false);
assert_eq!(SimpleString::new("ПРИВЕТ").is_ascii_uppercase, false);
assert_eq!(SimpleString::new("bye").mimic_ascii_case("hI!"), "bYe");
assert_eq!(SimpleString::new("Bye").mimic_ascii_case("hI!"), "Bye");
}
}

0 comments on commit 0cb7429

Please sign in to comment.