From f601ba3269e7d9ce6df43ca4c3a39cd995d4a02e Mon Sep 17 00:00:00 2001 From: Dan Nasman Date: Fri, 19 Jul 2024 16:53:52 +0300 Subject: [PATCH 1/4] disallow consecutive separators when parsing floats --- literal/src/float.rs | 1 + 1 file changed, 1 insertion(+) diff --git a/literal/src/float.rs b/literal/src/float.rs index 5c14fcbc..cb6c0ff3 100644 --- a/literal/src/float.rs +++ b/literal/src/float.rs @@ -31,6 +31,7 @@ fn parse_inner(literal: &[u8]) -> Option { // lexical-core's format::PYTHON_STRING is inaccurate const PYTHON_STRING: u128 = NumberFormatBuilder::rebuild(PYTHON3_LITERAL) .no_special(false) + .consecutive_digit_separator(false) .build(); f64::from_lexical_with_options::(literal, &Options::new()).ok() } From 8d28ecdc3e09b72e73046faefe5aa4e9548a2e7f Mon Sep 17 00:00:00 2001 From: Dan Nasman Date: Sat, 20 Jul 2024 19:16:29 +0300 Subject: [PATCH 2/4] add underline handling to float parsing --- literal/src/float.rs | 35 +++++++++++++++++++++++++++++++++-- 1 file changed, 33 insertions(+), 2 deletions(-) diff --git a/literal/src/float.rs b/literal/src/float.rs index cb6c0ff3..224e3490 100644 --- a/literal/src/float.rs +++ b/literal/src/float.rs @@ -6,6 +6,33 @@ pub fn parse_str(literal: &str) -> Option { parse_inner(literal.trim().as_bytes()) } +fn strip_separators(literal: &[u8]) -> Option>{ + let mut prev = b'\0'; + let mut dup = Vec::::new(); + for p in literal { + if *p == b'_' { + // Underscores are only allowed after digits. + if !prev.is_ascii_digit() { + return None; + } + } else { + dup.push(*p); + // Underscores are only allowed before digits. + if prev == b'_' && !p.is_ascii_digit() { + return None; + } + } + prev = *p; + } + + // Underscores are not allowed at the end. + if prev == b'_' { + return None; + } + + Some(dup) +} + pub fn parse_bytes(literal: &[u8]) -> Option { parse_inner(trim_slice(literal, |b| b.is_ascii_whitespace())) } @@ -28,12 +55,16 @@ fn parse_inner(literal: &[u8]) -> Option { use lexical_parse_float::{ format::PYTHON3_LITERAL, FromLexicalWithOptions, NumberFormatBuilder, Options, }; + + // Use custom function for underline handling for now. + // For further information see https://github.com/Alexhuszagh/rust-lexical/issues/96. + let stripped = strip_separators(literal)?; + // lexical-core's format::PYTHON_STRING is inaccurate const PYTHON_STRING: u128 = NumberFormatBuilder::rebuild(PYTHON3_LITERAL) .no_special(false) - .consecutive_digit_separator(false) .build(); - f64::from_lexical_with_options::(literal, &Options::new()).ok() + f64::from_lexical_with_options::(&stripped, &Options::new()).ok() } pub fn is_integer(v: f64) -> bool { From a6f3aac369f009cf785acbeff895664324cdc1fe Mon Sep 17 00:00:00 2001 From: Dan Nasman Date: Sat, 20 Jul 2024 19:27:08 +0300 Subject: [PATCH 3/4] change function name --- literal/src/float.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/literal/src/float.rs b/literal/src/float.rs index 224e3490..d94a2261 100644 --- a/literal/src/float.rs +++ b/literal/src/float.rs @@ -6,7 +6,7 @@ pub fn parse_str(literal: &str) -> Option { parse_inner(literal.trim().as_bytes()) } -fn strip_separators(literal: &[u8]) -> Option>{ +fn strip_underlines(literal: &[u8]) -> Option>{ let mut prev = b'\0'; let mut dup = Vec::::new(); for p in literal { @@ -58,7 +58,7 @@ fn parse_inner(literal: &[u8]) -> Option { // Use custom function for underline handling for now. // For further information see https://github.com/Alexhuszagh/rust-lexical/issues/96. - let stripped = strip_separators(literal)?; + let stripped = strip_underlines(literal)?; // lexical-core's format::PYTHON_STRING is inaccurate const PYTHON_STRING: u128 = NumberFormatBuilder::rebuild(PYTHON3_LITERAL) From 746b24f04ca7b8a967377c057a8ce94a79833ccf Mon Sep 17 00:00:00 2001 From: Dan Nasman Date: Sat, 20 Jul 2024 19:33:23 +0300 Subject: [PATCH 4/4] fix formatting issues --- literal/src/float.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/literal/src/float.rs b/literal/src/float.rs index d94a2261..c8224fa5 100644 --- a/literal/src/float.rs +++ b/literal/src/float.rs @@ -6,7 +6,7 @@ pub fn parse_str(literal: &str) -> Option { parse_inner(literal.trim().as_bytes()) } -fn strip_underlines(literal: &[u8]) -> Option>{ +fn strip_underlines(literal: &[u8]) -> Option> { let mut prev = b'\0'; let mut dup = Vec::::new(); for p in literal { @@ -59,7 +59,7 @@ fn parse_inner(literal: &[u8]) -> Option { // Use custom function for underline handling for now. // For further information see https://github.com/Alexhuszagh/rust-lexical/issues/96. let stripped = strip_underlines(literal)?; - + // lexical-core's format::PYTHON_STRING is inaccurate const PYTHON_STRING: u128 = NumberFormatBuilder::rebuild(PYTHON3_LITERAL) .no_special(false)