From 9ca79ead541715afef0242232ef6733db37db6c7 Mon Sep 17 00:00:00 2001 From: Eugene Date: Tue, 27 Feb 2024 00:16:36 +0400 Subject: [PATCH] case mimicking improvements 2 separate literal string implementations - lazy and precomputed to avoid string clone no longer limited to ascii added examples bench sprinkled must_use --- Cargo.toml | 4 + benches/accents.rs | 3 +- benches/examples.rs | 55 ++++++ benches/literal_string.rs | 35 +--- src/accent.rs | 3 +- src/deserialize.rs | 19 +- src/intensity.rs | 1 + src/lib.rs | 3 +- src/match.rs | 47 ++++- src/pass.rs | 7 +- src/tag_impls.rs | 39 +++-- src/utils.rs | 354 +++++++++++++++++++++++++------------- tests/json.rs | 2 +- tests/ron.rs | 2 +- 14 files changed, 396 insertions(+), 178 deletions(-) create mode 100644 benches/examples.rs diff --git a/Cargo.toml b/Cargo.toml index 940cecf..ef05f9a 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -69,6 +69,10 @@ required-features = ["cli"] name = "accents" harness = false +[[bench]] +name = "examples" +harness = false + [[bench]] name = "literal_string" harness = false diff --git a/benches/accents.rs b/benches/accents.rs index 14f872f..ac43abc 100644 --- a/benches/accents.rs +++ b/benches/accents.rs @@ -23,6 +23,7 @@ fn accents(c: &mut Criterion) { let lines = read_sample_file_lines(); let mut g = c.benchmark_group("accents"); + g.sampling_mode(criterion::SamplingMode::Linear); for name in [ "original", "literal", "any", "weights", "upper", "lower", "concat", @@ -32,7 +33,7 @@ fn accents(c: &mut Criterion) { g.bench_function(name, |b| { b.iter(|| { for line in &lines { - accent.say_it(line, 0); + let _ = accent.say_it(line, 0); } }) }); diff --git a/benches/examples.rs b/benches/examples.rs new file mode 100644 index 0000000..2e6f6f9 --- /dev/null +++ b/benches/examples.rs @@ -0,0 +1,55 @@ +use criterion::{criterion_group, criterion_main, Criterion}; +use sayit::Accent; +use std::{fs, path::PathBuf}; + +pub fn read_accent(filename: &PathBuf) -> Accent { + let content = fs::read_to_string(filename).expect("reading accent definition"); + ron::from_str::(&content) + .unwrap_or_else(|_| panic!("parsing accent {}", filename.display())) +} + +pub fn read_sample_file() -> String { + fs::read_to_string("tests/sample_text.txt").expect("reading sample text") +} + +pub fn read_sample_file_lines() -> Vec { + read_sample_file() + .lines() + .filter(|&l| !(l.is_empty() || l.eq(" :"))) + .map(|s| s.to_owned()) + .collect() +} + +fn examples(c: &mut Criterion) { + let lines = read_sample_file_lines(); + + let mut g = c.benchmark_group("examples"); + g.sampling_mode(criterion::SamplingMode::Linear); + + for entry in fs::read_dir("examples").unwrap() { + let path = entry.unwrap().path(); + + if !path.is_file() { + continue; + } + + if !path.extension().is_some_and(|ext| ext == "ron") { + continue; + } + + let accent = read_accent(&path); + let accent_name = path.file_stem().unwrap().to_string_lossy(); + + g.bench_function(accent_name, |b| { + b.iter(|| { + for line in &lines { + let _ = accent.say_it(line, 0); + } + }) + }); + } + g.finish(); +} + +criterion_group!(benches, examples); +criterion_main!(benches); diff --git a/benches/literal_string.rs b/benches/literal_string.rs index 2b5fd5f..107c66c 100644 --- a/benches/literal_string.rs +++ b/benches/literal_string.rs @@ -1,7 +1,5 @@ -use criterion::criterion_group; -use criterion::criterion_main; -use criterion::Criterion; -use sayit::utils::LiteralString; +use criterion::{criterion_group, criterion_main, Criterion, SamplingMode}; +use sayit::utils::{LiteralString, PrecomputedLiteral}; use std::fs; pub fn read_sample_file() -> String { @@ -19,43 +17,26 @@ fn read_sample_words() -> Vec { .collect() } -// this is 100 times slower than _fast test -fn literal_string_slow(c: &mut Criterion) { +fn literal_string(c: &mut Criterion) { let mut g = c.benchmark_group("literal_string"); - g.sample_size(500); - - g.bench_function("creation", |b| { - let words = read_sample_words(); - - b.iter(|| { - for word in &words { - let _ = LiteralString::from(word.as_str()); - } - }) - }); - g.finish(); -} - -fn literal_string_fast(c: &mut Criterion) { - let mut g = c.benchmark_group("literal_string"); - g.sample_size(300); + g.sampling_mode(SamplingMode::Linear); g.bench_function("mimic_case", |b| { let words = read_sample_words(); - let strings: Vec = words + let strings: Vec = words .iter() - .map(|w| LiteralString::from(w.as_str())) + .map(|w| PrecomputedLiteral::new(w.to_string())) .collect(); let reversed_words: Vec = words.into_iter().rev().collect(); b.iter(|| { for (string, word) in strings.iter().zip(&reversed_words) { - let _ = string.mimic_ascii_case(word); + let _ = string.mimic_case_action(word); } }) }); g.finish(); } -criterion_group!(benches, literal_string_slow, literal_string_fast); +criterion_group!(benches, literal_string); criterion_main!(benches); diff --git a/src/accent.rs b/src/accent.rs index 1062e05..74151de 100644 --- a/src/accent.rs +++ b/src/accent.rs @@ -42,6 +42,7 @@ impl Accent { } /// Walks rules for given intensity from top to bottom and applies them + #[must_use] pub fn say_it<'a>(&self, text: &'a str, intensity: u64) -> Cow<'a, str> { // Go from the end and pick first intensity that is less or eaual to requested. This is // guaranteed to return something because base intensity 0 is always present at the bottom @@ -101,7 +102,7 @@ mod tests { "".to_owned(), Pass::new(vec![ ("(?-i)[a-z]".to_string(), Literal::new_boxed("e")), - ("[A-Z]".to_string(), Literal::new_boxed("E")), + ("(?-i)[A-Z]".to_string(), Literal::new_boxed("E")), ]) .unwrap(), )], diff --git a/src/deserialize.rs b/src/deserialize.rs index b5d7294..13fad65 100644 --- a/src/deserialize.rs +++ b/src/deserialize.rs @@ -1,4 +1,8 @@ -use crate::{pass::Pass, tag::Tag, utils::runtime_format_single_value}; +use crate::{ + pass::Pass, + tag::Tag, + utils::{runtime_format_single_value, PrecomputedLiteral}, +}; use std::{fmt, marker::PhantomData}; use serde::{ @@ -75,7 +79,7 @@ where } impl<'de> Deserialize<'de> for Any { - fn deserialize(deserializer: D) -> Result + fn deserialize(deserializer: D) -> Result where D: Deserializer<'de>, { @@ -87,6 +91,17 @@ impl<'de> Deserialize<'de> for Any { } } +impl<'de> Deserialize<'de> for PrecomputedLiteral { + fn deserialize(deserializer: D) -> Result + where + D: Deserializer<'de>, + { + let s = String::deserialize(deserializer)?; + + Ok(Self::new(s)) + } +} + impl TryFrom, false>> for Weights { type Error = WeightsError; diff --git a/src/intensity.rs b/src/intensity.rs index ce3ebcf..d28f687 100644 --- a/src/intensity.rs +++ b/src/intensity.rs @@ -62,6 +62,7 @@ impl Intensity { } /// Runs all inner passes against text + #[must_use] pub fn apply<'a>(&self, text: &'a str) -> Cow<'a, str> { self.passes.iter().fold(Cow::Borrowed(text), |acc, pass| { Cow::Owned(pass.apply(&acc).into_owned()) diff --git a/src/lib.rs b/src/lib.rs index 4016da0..35fad46 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -131,8 +131,7 @@ pub mod pass; pub mod tag; pub mod tag_impls; -// pub for bench -#[doc(hidden)] +#[doc(hidden)] // pub for bench pub mod utils; #[cfg(feature = "deserialize")] diff --git a/src/match.rs b/src/match.rs index 1900200..da24aa1 100644 --- a/src/match.rs +++ b/src/match.rs @@ -1,21 +1,51 @@ +use std::ops::Range; + use regex_automata::util::captures::Captures; -use crate::utils::LiteralString; +use crate::utils::{LazyLiteral, LiteralString}; /// Holds [`regex_automata::util::captures::Captures`] and full input #[derive(Debug)] pub struct Match<'a> { - pub captures: Captures, - pub input: &'a str, + pub(crate) captures: Captures, + pub(crate) input: &'a str, } impl<'a> Match<'a> { + /// # Safety + /// + /// Constructing with invalid Captures will cause UB in [`Match::get_range`] and + /// [`Match::get_match`] + pub unsafe fn new(captures: Captures, input: &'a str) -> Self { + Self { captures, input } + } + + /// Returns full match range (regex group 0) + #[inline] + pub fn get_range(&self) -> Range { + // SAFETY: Match is guaranteed to be created from valid Captures and input or via unsafe + // constructor + unsafe { self.captures.get_match().unwrap_unchecked() }.range() + } + /// Returns full match (regex group 0) + #[inline] pub fn get_match(&self) -> &'a str { - &self.input[self.captures.get_match().expect("this matched").range()] + // SAFETY: Match is guaranteed to be created from valid Captures and input or via unsafe + // constructor + unsafe { self.input.get_unchecked(self.get_range()) } + } + + pub fn get_captures(&self) -> &Captures { + &self.captures + } + + pub fn get_input(&self) -> &'a str { + self.input } /// Uses regex interpolation syntax to use current match in template + #[must_use] pub fn interpolate(&self, template: &str) -> String { let mut dst = String::new(); @@ -26,7 +56,12 @@ impl<'a> Match<'a> { } /// Tries to match string case for current match - pub fn mimic_ascii_case(&self, template: &str) -> String { - LiteralString::from(template).mimic_ascii_case(self.get_match()) + #[must_use] + pub fn mimic_case(&self, template: String) -> String { + let len = self.get_range().len(); + let literal = LazyLiteral::new(template, len); + let action = literal.mimic_case_action(self.get_match()); + + literal.handle_mimic_action(action) } } diff --git a/src/pass.rs b/src/pass.rs index dabd02d..1bd90a8 100644 --- a/src/pass.rs +++ b/src/pass.rs @@ -16,6 +16,7 @@ pub struct Pass { } // skips 20 pages of debug output of `multi_regex` field +#[allow(clippy::missing_fields_in_debug)] impl fmt::Debug for Pass { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { f.debug_struct("Pass") @@ -83,6 +84,7 @@ impl Pass { } /// Produces string with all non-overlapping regexes replaced by corresponding tags + #[must_use] pub fn apply<'a>(&self, text: &'a str) -> Cow<'a, str> { let all_captures: Vec<_> = self.multi_regex.captures_iter(text).collect(); @@ -94,7 +96,10 @@ impl Pass { let mut output = String::with_capacity(text.len()); for caps in all_captures { - let caps_match = caps.get_match().expect("this matched"); + // SAFETY: these captures come from matches. The only way this can fail is if they were + // created manually with Captures::empty() + let caps_match = unsafe { caps.get_match().unwrap_unchecked() }; + let range = caps_match.range(); let tag = &self.tags[caps_match.pattern()]; diff --git a/src/tag_impls.rs b/src/tag_impls.rs index 21dbf11..f984f78 100644 --- a/src/tag_impls.rs +++ b/src/tag_impls.rs @@ -3,7 +3,11 @@ use crate::deserialize::SortedMap; use std::{borrow::Cow, error::Error, fmt::Display}; -use crate::{tag::Tag, utils::LiteralString, Match}; +use crate::{ + tag::Tag, + utils::{LiteralString, PrecomputedLiteral}, + Match, +}; /// Same as [`Literal`] with `"$0"` argument: returns entire match. /// @@ -64,15 +68,16 @@ impl Tag for Delete { derive(serde::Deserialize), serde(transparent) )] -pub struct Literal(LiteralString); +pub struct Literal(PrecomputedLiteral); impl Literal { - pub fn new(s: &str) -> Self { - Self(LiteralString::from(s)) + pub fn new(s: String) -> Self { + Self(PrecomputedLiteral::new(s)) } + // reference to simplify tests pub fn new_boxed(s: &str) -> Box { - Box::new(Self::new(s)) + Box::new(Self::new(s.to_string())) } } @@ -82,9 +87,11 @@ impl Tag for Literal { if self.0.has_template { let interpolated = m.interpolate(&self.0.body); - m.mimic_ascii_case(&interpolated) + m.mimic_case(interpolated) } else { - self.0.mimic_ascii_case(m.get_match()) + let action = self.0.mimic_case_action(m.get_match()); + + self.0.handle_mimic_action(action) } .into() } @@ -345,24 +352,24 @@ mod tests { #[test] fn literal() { - let tag = Literal::new("bar"); + let tag = Literal::new_boxed("bar"); - assert_eq!(apply(&tag, "foo"), "bar"); - assert_eq!(apply(&tag, "bar"), "bar"); + assert_eq!(apply(tag.as_ref(), "foo"), "bar"); + assert_eq!(apply(tag.as_ref(), "bar"), "bar"); } #[test] fn literal_templates() { - let tag = Literal::new("$0"); + let tag = Literal::new_boxed("$0"); - assert_eq!(apply(&tag, "foo"), "foo"); + assert_eq!(apply(tag.as_ref(), "foo"), "foo"); } #[test] fn literal_mimics_case() { - let tag = Literal::new("bar"); + let tag = Literal::new_boxed("bar"); - assert_eq!(apply(&tag, "FOO"), "BAR"); + assert_eq!(apply(tag.as_ref(), "FOO"), "BAR"); } #[test] @@ -453,7 +460,7 @@ mod tests { #[test] fn expansion() { - let swap_words_tag = Literal::new("$2 $1"); + let swap_words_tag = Literal::new_boxed("$2 $1"); let two_words_regex = Regex::new(r"(\w+) (\w+)").unwrap(); let mut caps = two_words_regex.create_captures(); @@ -468,7 +475,7 @@ mod tests { ); // nonexistent goup results in empty string - let delete_word_tag = Literal::new("$nonexistent $2"); + let delete_word_tag = Literal::new_boxed("$nonexistent $2"); let mut caps = two_words_regex.create_captures(); two_words_regex.captures("DELETE US", &mut caps); diff --git a/src/utils.rs b/src/utils.rs index 8feef3e..47c2eb8 100644 --- a/src/utils.rs +++ b/src/utils.rs @@ -2,108 +2,165 @@ use std::sync::OnceLock; use regex_automata::meta::Regex; -/// Wrapper around string, precomputing some metadata to speed up operations -/// -/// NOTE: this is very expensive to initialyze -#[doc(hidden)] // pub for bench -#[derive(Debug, Clone)] -#[cfg_attr( - feature = "deserialize", - derive(serde::Deserialize), - serde(from = "&str") -)] -pub struct LiteralString { - pub(crate) body: String, - // templating is expensive, it is important to skip it if possible - pub(crate) has_template: bool, - // saves time in mimic_case - char_count: usize, - is_ascii_lowercase: bool, -} +static TEMPLATE_REGEX: OnceLock = OnceLock::new(); -impl PartialEq for LiteralString { - fn eq(&self, other: &Self) -> bool { - self.body == other.body +// https://stackoverflow.com/a/38406885 +fn title(s: &str) -> String { + let mut c = s.chars(); + match c.next() { + None => String::new(), + Some(f) => f.to_uppercase().collect::() + c.as_str(), } } -fn case(char_count: usize, string: &str) -> (bool, bool, bool) { - let (lower, upper) = string.chars().fold((0, 0), |(lower, upper), c| { +fn count_cases(string: &str) -> (usize, usize) { + string.chars().fold((0, 0), |(lower, upper), c| { + let is_lower = c.is_lowercase(); + let is_upper = c.is_uppercase(); + + (lower + usize::from(is_lower), upper + usize::from(is_upper)) + }) +} + +fn count_chars_and_cases(string: &str) -> (usize, usize, usize) { + string.chars().fold((0, 0, 0), |(total, lower, upper), c| { + let is_lower = c.is_lowercase(); + let is_upper = c.is_uppercase(); + ( - lower + usize::from(c.is_ascii_lowercase()), - upper + usize::from(c.is_ascii_uppercase()), + total + 1, + lower + usize::from(is_lower), + upper + usize::from(is_upper), ) - }); + }) +} - ( - lower == char_count, - upper == char_count, - lower > 0 && upper > 0, - ) +#[doc(hidden)] // pub for bench +#[derive(Debug, PartialEq, Clone, Copy)] +pub enum MimicAction { + Title, + Uppercase, + Nothing, } -static TEMPLATE_REGEX: OnceLock = OnceLock::new(); +/// Allows examining string case when provided with info about characters +#[doc(hidden)] // pub for bench +pub trait LiteralString { + fn chars(&self) -> (usize, bool, bool); + + /// Examine given string and tell which action to take to match it's case + #[must_use] + fn mimic_case_action(&self, from: &str) -> MimicAction { + let (self_char_count, self_has_lowercase, self_has_uppercase) = self.chars(); + + // do nothing if current string is: + // - has at least one uppercase letter + // - has no letters + if self_has_uppercase || !self_has_lowercase { + return MimicAction::Nothing; + } -impl From<&str> for LiteralString { - fn from(body: &str) -> Self { - let char_count = body.chars().count(); + let (char_count, lowercase, uppercase) = count_chars_and_cases(from); - let (is_ascii_lowercase, _is_ascii_uppercase, _is_ascii_mixed_case) = - case(char_count, body); + // uppercase: has no lowercase letters and at least one uppercase letter + if (lowercase == 0 && uppercase != 0) + // either current string is 1 letter or string is upper and is long + && (self_char_count == 1 || char_count > 1) + { + return MimicAction::Uppercase; + } - Self { - body: body.to_owned(), - char_count, - is_ascii_lowercase, - has_template: TEMPLATE_REGEX - // https://docs.rs/regex-automata/latest/regex_automata/util/interpolate/index.html - // this is not 100% accurate but should never result in false negatives - .get_or_init(|| Regex::new(r"(:?^|[^$])\$(:?[0-9A-Za-z_]|\{.+?\})").unwrap()) - .is_match(body), + // there is exactly one uppercase letter + if uppercase == 1 + // either one letter long or first letter is upper + && (char_count == 1 || from.chars().next().is_some_and(char::is_uppercase)) + { + return MimicAction::Title; } + + MimicAction::Nothing } } -impl LiteralString { - /// Examine given string and try to adjust to it's case. ascii only - #[doc(hidden)] // pub for bench - pub fn mimic_ascii_case(&self, source: &str) -> String { - // only entirely lowercased string is changed. assume case has meaning for everything else - if !self.is_ascii_lowercase { - return self.body.clone(); - } +/// Wrapper around string. Performs single case mimicking, does not precompute anything +pub(crate) struct LazyLiteral { + body: String, + length_hint: usize, +} - // if source was all uppercase we force all uppercase for replacement. this is likely to - // give false positives on short inputs like "I" or abbreviations - if source.chars().all(|c| c.is_ascii_uppercase()) { - return self.body.to_ascii_uppercase(); - } +impl LazyLiteral { + pub(crate) fn new(body: String, length_hint: usize) -> Self { + Self { body, length_hint } + } - // no constraints if source was all lowercase - if source.chars().all(|c| c.is_ascii_lowercase()) || !source.is_ascii() { - return self.body.clone(); + pub(crate) fn handle_mimic_action(self, action: MimicAction) -> String { + match action { + MimicAction::Title => title(&self.body), + MimicAction::Uppercase => self.body.to_uppercase(), + MimicAction::Nothing => self.body, } + } +} - // TODO: SIMD this - if source.chars().count() == self.char_count { - let mut body = self.body.clone(); - - for (i, c_old) in source.chars().enumerate() { - if c_old.is_ascii_lowercase() { - body.get_mut(i..=i) - .expect("strings have same len") - .make_ascii_lowercase(); - } else if c_old.is_ascii_uppercase() { - body.get_mut(i..=i) - .expect("strings have same len") - .make_ascii_uppercase(); - } - } +impl LiteralString for LazyLiteral { + fn chars(&self) -> (usize, bool, bool) { + let (lowercase, uppercase) = count_cases(&self.body); + + (self.length_hint, lowercase != 0, uppercase != 0) + } +} + +/// Wrapper around string. Optionally precomputes information for fast case mimicking +#[doc(hidden)] // pub for bench +#[derive(Debug, Clone)] +pub struct PrecomputedLiteral { + pub(crate) body: String, + body_upper: String, + body_title: String, + pub(crate) has_template: bool, + char_count: usize, + has_lowercase: bool, + has_uppercase: bool, +} + +impl PrecomputedLiteral { + #[doc(hidden)] // pub for bench + pub fn new(body: String) -> Self { + let (char_count, lowercase, uppercase) = count_chars_and_cases(&body); + + Self { + char_count, + has_lowercase: lowercase != 0, + has_uppercase: uppercase != 0, + body_upper: body.to_uppercase(), + body_title: title(&body), + // https://docs.rs/regex-automata/latest/regex_automata/util/interpolate/index.html + // this is not 100% accurate but should never result in false negatives + has_template: TEMPLATE_REGEX + .get_or_init(|| Regex::new(r"(:?^|[^$])\$(:?[0-9A-Za-z_]|\{.+?\})").unwrap()) + .is_match(&body), + body, + } + } - return body; + pub(crate) fn handle_mimic_action(&self, action: MimicAction) -> String { + match action { + MimicAction::Title => self.body_title.clone(), + MimicAction::Uppercase => self.body_upper.clone(), + MimicAction::Nothing => self.body.clone(), } + } +} - self.body.clone() +impl LiteralString for PrecomputedLiteral { + fn chars(&self) -> (usize, bool, bool) { + (self.char_count, self.has_lowercase, self.has_uppercase) + } +} + +impl PartialEq for PrecomputedLiteral { + fn eq(&self, other: &Self) -> bool { + self.body == other.body } } @@ -159,74 +216,131 @@ pub(crate) fn runtime_format_single_value(template: &str, value: &str) -> Result mod tests { use super::*; + impl From<&str> for PrecomputedLiteral { + fn from(body: &str) -> Self { + Self::new(body.to_string()) + } + } + #[test] fn string_detects_template() { - assert!(!LiteralString::from("hello").has_template); - assert!(LiteralString::from("$hello").has_template); - assert!(LiteralString::from("hello $1 world").has_template); - assert!(!LiteralString::from("hello $$1 world").has_template); - assert!(!LiteralString::from("hello $$$1 world").has_template); - assert!(LiteralString::from("hello ${foo[bar].baz} world").has_template); - assert!(!LiteralString::from("hello $${foo[bar].baz} world").has_template); + assert!(!PrecomputedLiteral::from("hello").has_template); + assert!(PrecomputedLiteral::from("$hello").has_template); + assert!(PrecomputedLiteral::from("hello $1 world").has_template); + assert!(!PrecomputedLiteral::from("hello $$1 world").has_template); + assert!(!PrecomputedLiteral::from("hello $$$1 world").has_template); + assert!(PrecomputedLiteral::from("hello ${foo[bar].baz} world").has_template); + assert!(!PrecomputedLiteral::from("hello $${foo[bar].baz} world").has_template); } #[test] fn string_counts_chars() { - assert_eq!(LiteralString::from("hello").char_count, 5); - assert_eq!(LiteralString::from("привет").char_count, 6); + assert_eq!(PrecomputedLiteral::from("hello").chars().0, 5); + assert_eq!(PrecomputedLiteral::from("привет").chars().0, 6); } #[test] - fn string_detects_ascii_lowercase() { - assert_eq!(LiteralString::from("hello").is_ascii_lowercase, true); - assert_eq!(LiteralString::from("Hello").is_ascii_lowercase, false); - assert_eq!(LiteralString::from("1!@$#$").is_ascii_lowercase, false); - assert_eq!(LiteralString::from("привет").is_ascii_lowercase, false); + fn string_detects_lowercase() { + assert_eq!(PrecomputedLiteral::from("hello").chars().1, true); + assert_eq!(PrecomputedLiteral::from("Hello").chars().1, true); + assert_eq!(PrecomputedLiteral::from("1!@$#$").chars().1, false); + assert_eq!(PrecomputedLiteral::from("1!@$H#$").chars().1, false); + assert_eq!(PrecomputedLiteral::from("1!@$Hh#$").chars().1, true); + assert_eq!(PrecomputedLiteral::from("привет").chars().1, true); + assert_eq!(PrecomputedLiteral::from("ПРИВЕТ").chars().1, false); + } + #[test] + fn string_detects_uppercase() { + assert_eq!(PrecomputedLiteral::from("hello").chars().2, false); + assert_eq!(PrecomputedLiteral::from("Hello").chars().2, true); + assert_eq!(PrecomputedLiteral::from("1!@$#$").chars().2, false); + assert_eq!(PrecomputedLiteral::from("1!@$H#$").chars().2, true); + assert_eq!(PrecomputedLiteral::from("1!@$Hh#$").chars().2, true); + assert_eq!(PrecomputedLiteral::from("привет").chars().2, false); + assert_eq!(PrecomputedLiteral::from("ПРИВЕТ").chars().2, true); } #[test] fn mimic_case_input_lowercase() { - assert_eq!(LiteralString::from("bye").mimic_ascii_case("hello"), "bye"); - assert_eq!(LiteralString::from("Bye").mimic_ascii_case("hello"), "Bye"); - assert_eq!(LiteralString::from("bYE").mimic_ascii_case("hello"), "bYE"); + assert_eq!( + PrecomputedLiteral::from("bye").mimic_case_action("hello"), + MimicAction::Nothing + ); + assert_eq!( + PrecomputedLiteral::from("Bye").mimic_case_action("hello"), + MimicAction::Nothing + ); + assert_eq!( + PrecomputedLiteral::from("bYE").mimic_case_action("hello"), + MimicAction::Nothing + ); } - // questionable rule, becomes overcomplicated - // #[test] - // fn mimic_case_input_titled() { - // assert_eq!(LiteralString::from("bye").mimic_ascii_case("Hello"), "Bye"); - // // has case variation -- do not touch it - // assert_eq!(LiteralString::from("bYe").mimic_ascii_case("Hello"), "bYe"); - // // not ascii uppercase - // assert_eq!(LiteralString::from("bye").mimic_ascii_case("Привет"), "bye"); - // } - #[test] - fn mimic_case_input_uppercase() { - assert_eq!(LiteralString::from("bye").mimic_ascii_case("HELLO"), "BYE"); + fn mimic_case_input_titled() { + assert_eq!( + PrecomputedLiteral::from("bye").mimic_case_action("Hello"), + MimicAction::Title + ); // has case variation -- do not touch it - assert_eq!(LiteralString::from("bYE").mimic_ascii_case("HELLO"), "bYE"); - // not ascii uppercase - assert_eq!(LiteralString::from("bye").mimic_ascii_case("ПРИВЕТ"), "bye"); assert_eq!( - LiteralString::from("пока").mimic_ascii_case("HELLO"), - "пока" + PrecomputedLiteral::from("bYe").mimic_case_action("Hello"), + MimicAction::Nothing + ); + // non ascii title + assert_eq!( + PrecomputedLiteral::from("bye").mimic_case_action("Привет"), + MimicAction::Title + ); + } + #[test] + fn mimic_case_input_titled_single_letter() { + assert_eq!( + PrecomputedLiteral::from("je").mimic_case_action("I"), + MimicAction::Title ); } #[test] - fn mimic_case_input_different_case() { - assert_eq!(LiteralString::from("bye").mimic_ascii_case("hELLO"), "bye"); + fn mimic_case_input_uppercase() { + assert_eq!( + PrecomputedLiteral::from("bye").mimic_case_action("HELLO"), + MimicAction::Uppercase + ); + // has case variation -- do not touch it + assert_eq!( + PrecomputedLiteral::from("bbbbYE").mimic_case_action("HELLO"), + MimicAction::Nothing + ); + // non ascii uppercase + assert_eq!( + PrecomputedLiteral::from("bye").mimic_case_action("ПРИВЕТ"), + MimicAction::Uppercase + ); + assert_eq!( + PrecomputedLiteral::from("пока").mimic_case_action("HELLO"), + MimicAction::Uppercase + ); } #[test] - fn mimic_case_input_different_case_same_len() { + fn mimic_case_input_mixed_case() { + assert_eq!( + PrecomputedLiteral::from("bye").mimic_case_action("hELLO"), + MimicAction::Nothing + ); + assert_eq!( + PrecomputedLiteral::from("пока").mimic_case_action("HEllo"), + MimicAction::Nothing + ); + assert_eq!( + PrecomputedLiteral::from("пока").mimic_case_action("HELlo"), + MimicAction::Nothing + ); assert_eq!( - LiteralString::from("byeee").mimic_ascii_case("hELLO"), - "bYEEE" + PrecomputedLiteral::from("bye").mimic_case_action("heLlo"), + MimicAction::Nothing ); - assert_eq!(LiteralString::from("bye").mimic_ascii_case("hI!"), "bYe"); - assert_eq!(LiteralString::from("Bye").mimic_ascii_case("hI!"), "Bye"); } #[test] diff --git a/tests/json.rs b/tests/json.rs index 7abed0f..a071b7e 100644 --- a/tests/json.rs +++ b/tests/json.rs @@ -62,7 +62,7 @@ fn json_accents_work() { let accent = read_accent(path); for line in &lines { for intensity in accent.intensities() { - accent.say_it(line, intensity); + let _ = accent.say_it(line, intensity); } } tested_at_least_one = true; diff --git a/tests/ron.rs b/tests/ron.rs index e3c844d..b61c64a 100644 --- a/tests/ron.rs +++ b/tests/ron.rs @@ -61,7 +61,7 @@ fn ron_accents_work() { let accent = read_accent(path); for line in &lines { for intensity in accent.intensities() { - accent.say_it(line, intensity); + let _ = accent.say_it(line, intensity); } } tested_at_least_one = true;