From 95b4a05687ec6c98fa64851095d0f2c304fcd067 Mon Sep 17 00:00:00 2001 From: Roberto Vidal Date: Sat, 11 May 2024 18:14:15 +0200 Subject: [PATCH] fix string functions based on character offsets (#205) * fix string functions based on character offsets * fix * make end optional, more tests --- cogs/r5rs.scm | 7 ++- crates/steel-core/src/primitives/strings.rs | 61 ++++++++++++++++++--- 2 files changed, 58 insertions(+), 10 deletions(-) diff --git a/cogs/r5rs.scm b/cogs/r5rs.scm index 8e3c1093f..f3e2cb25b 100644 --- a/cogs/r5rs.scm +++ b/cogs/r5rs.scm @@ -386,12 +386,17 @@ (check-equal? "string length correctly reported for standard string" 3 (string-length "abc")) (check-equal? "string indexing into first character" #\a (string-ref "abc" 0)) - +(check-equal? "string indexing with multibyte characters" #\a (string-ref "λa" 1)) (check-equal? "string indexing into last character" #\c (string-ref "abc" 2)) (check-equal? "empty substring" "" (substring "abc" 0 0)) +(check-equal? "empty substring at the end" "" (substring "abc" 3 3)) +(check-equal? "substring without end" "bc" (substring "abc" 1)) +(check-equal? "empty substring in the middle" "" (substring "abc" 1 1)) (check-equal? "substring just the first character" "a" (substring "abc" 0 1)) (check-equal? "substring a larger chunk" "bc" (substring "abc" 1 3)) +(check-equal? "substring with multibyte characters" "λμ" (substring "λλμν" 1 3)) +(check-equal? "full substring with multibyte characters" "千葉市" (substring "千葉市" 0 3)) (check-equal? "Basic functionality of make-string" "aaa" (make-string 3 #\a)) (check-equal? "make-string with no character" "\0\0\0" (make-string 3)) diff --git a/crates/steel-core/src/primitives/strings.rs b/crates/steel-core/src/primitives/strings.rs index a204c12c9..0844e630d 100644 --- a/crates/steel-core/src/primitives/strings.rs +++ b/crates/steel-core/src/primitives/strings.rs @@ -221,24 +221,67 @@ pub fn string_ci_equals(left: &SteelString, right: &SteelString) -> bool { #[function(name = "string-ref", constant = true)] pub fn string_ref(value: &SteelString, index: usize) -> Result { - if index >= value.len() { - stop!(Generic => "string-ref: index out of bounds: index: {}, string length: {}", index, value); - } + let res = if index < value.len() { + value.chars().nth(index) + } else { + None + }; - Ok(SteelVal::CharV(value.as_str().chars().nth(index).unwrap())) + if let Some(ch) = res { + Ok(SteelVal::CharV(ch)) + } else { + stop!(Generic => "string-ref: index out of bounds: index: {}, string length: {}", index, value.len()); + } } #[function(name = "substring", constant = true)] -pub fn substring(value: &SteelString, i: usize, j: usize) -> Result { - if i >= value.len() { +pub fn substring( + value: &SteelString, + i: usize, + mut rest: RestArgsIter<'_, isize>, +) -> Result { + use std::iter::once; + + if i > value.len() { stop!(Generic => "substring: index out of bounds: left bound: {}, string length: {}", i, value.len()); } - if i > j { - stop!(Generic => "substring: left bound must be less than or equal to the right bound: left: {}, right: {}", i, j); + let j = rest.next().transpose()?.map(|j| j as usize); + + if rest.next().is_some() { + stop!(ArityMismatch => "substring expects 1 or 2 arguments"); + } + + if let Some(j) = j { + if i > j { + stop!(Generic => "substring: left bound must be less than or equal to the right bound: left: {}, right: {}", i, j); + } + } + + if value.is_empty() { + return Ok(SteelVal::StringV("".into())); } - Ok(SteelVal::StringV(value[i..j].into())) + let mut char_offsets = value + .char_indices() + .map(|(offset, _)| offset) + .chain(once(value.len())); + + let Some(start) = char_offsets.nth(i) else { + stop!(Generic => "substring: index out of bounds: left bound: {}", i); + }; + + let Some(j) = j else { + return Ok(SteelVal::StringV(value[start..].into())); + }; + + let mut char_offsets = once(start).chain(char_offsets); + + let Some(end) = char_offsets.nth(j - i) else { + stop!(Generic => "substring: index out of bounds: right bound: {}", j); + }; + + Ok(SteelVal::StringV(value[start..end].into())) } #[function(name = "make-string")]