Skip to content

Commit 07d255a

Browse files
committed
ENH: add substr string operation
The substr operation allows retrieving a substring by index, with an optional limit. The index behaves reasonably if negative, and while I'm not sure the behavior of a negative limit makes sense to me, it behaves as the specification indicates it should.
1 parent 7a8069b commit 07d255a

File tree

3 files changed

+217
-1
lines changed

3 files changed

+217
-1
lines changed

src/lib.rs

+82
Original file line numberDiff line numberDiff line change
@@ -863,6 +863,83 @@ mod jsonlogic_tests {
863863
]
864864
}
865865

866+
fn substr_cases() -> Vec<(Value, Value, Result<Value, ()>)> {
867+
vec![
868+
// Wrong number of arguments
869+
(json!({"substr": []}), json!({}), Err(())),
870+
(json!({"substr": ["foo"]}), json!({}), Err(())),
871+
(json!({"substr": ["foo", 1, 2, 3]}), json!({}), Err(())),
872+
// Wrong argument types
873+
(json!({"substr": [12, 1]}), json!({}), Err(())),
874+
(json!({"substr": ["foo", "12"]}), json!({}), Err(())),
875+
// Non-negative indices
876+
(json!({"substr": ["foo", 0]}), json!({}), Ok(json!("foo"))),
877+
(json!({"substr": ["foo", 1]}), json!({}), Ok(json!("oo"))),
878+
(json!({"substr": ["foo", 2]}), json!({}), Ok(json!("o"))),
879+
// Negative indices
880+
(json!({"substr": ["foo", -1]}), json!({}), Ok(json!("o"))),
881+
(json!({"substr": ["foo", -2]}), json!({}), Ok(json!("oo"))),
882+
(json!({"substr": ["foo", -3]}), json!({}), Ok(json!("foo"))),
883+
// Out-of-bounds indices
884+
(json!({"substr": ["foo", 3]}), json!({}), Ok(json!(""))),
885+
(json!({"substr": ["foo", 20]}), json!({}), Ok(json!(""))),
886+
(json!({"substr": ["foo", -4]}), json!({}), Ok(json!("foo"))),
887+
// Non-negative Limits
888+
(json!({"substr": ["foo", 0, 1]}), json!({}), Ok(json!("f"))),
889+
(
890+
json!({"substr": ["foo", 0, 3]}),
891+
json!({}),
892+
Ok(json!("foo")),
893+
),
894+
(json!({"substr": ["foo", 0, 0]}), json!({}), Ok(json!(""))),
895+
(json!({"substr": ["foo", 1, 1]}), json!({}), Ok(json!("o"))),
896+
// Negative Limits
897+
(
898+
json!({"substr": ["foo", 0, -1]}),
899+
json!({}),
900+
Ok(json!("fo")),
901+
),
902+
(json!({"substr": ["foo", 0, -2]}), json!({}), Ok(json!("f"))),
903+
(json!({"substr": ["foo", 0, -3]}), json!({}), Ok(json!(""))),
904+
// Out-of-bounds limits
905+
(
906+
json!({"substr": ["foo", 0, 10]}),
907+
json!({}),
908+
Ok(json!("foo")),
909+
),
910+
(json!({"substr": ["foo", 0, -10]}), json!({}), Ok(json!(""))),
911+
// Negative indices with negative limits
912+
(
913+
json!({"substr": ["foo", -3, -2]}),
914+
json!({}),
915+
Ok(json!("f")),
916+
),
917+
// Negative indices with positive limits
918+
(
919+
json!({"substr": ["foo", -3, 2]}),
920+
json!({}),
921+
Ok(json!("fo")),
922+
),
923+
// Out-of-bounds indices with out-of-bounds limits
924+
(json!({"substr": ["foo", 10, 10]}), json!({}), Ok(json!(""))),
925+
(
926+
json!({"substr": ["foo", 10, -10]}),
927+
json!({}),
928+
Ok(json!("")),
929+
),
930+
(
931+
json!({"substr": ["foo", -10, 10]}),
932+
json!({}),
933+
Ok(json!("foo")),
934+
),
935+
(
936+
json!({"substr": ["foo", -10, -10]}),
937+
json!({}),
938+
Ok(json!("")),
939+
),
940+
]
941+
}
942+
866943
fn lt_cases() -> Vec<(Value, Value, Result<Value, ()>)> {
867944
vec![
868945
(json!({"<": [1, 2]}), json!({}), Ok(json!(true))),
@@ -1192,6 +1269,11 @@ mod jsonlogic_tests {
11921269
cat_cases().into_iter().for_each(assert_jsonlogic)
11931270
}
11941271

1272+
#[test]
1273+
fn test_substr_op() {
1274+
substr_cases().into_iter().for_each(assert_jsonlogic)
1275+
}
1276+
11951277
#[test]
11961278
fn test_lt_op() {
11971279
lt_cases().into_iter().for_each(assert_jsonlogic)

src/op/mod.rs

+6-1
Original file line numberDiff line numberDiff line change
@@ -178,6 +178,11 @@ pub const OPERATOR_MAP: phf::Map<&'static str, Operator> = phf_map! {
178178
operator: string::cat,
179179
num_params: NumParams::Any,
180180
},
181+
"substr" => Operator {
182+
symbol: "substr",
183+
operator: string::substr,
184+
num_params: NumParams::Variadic(2..4)
185+
},
181186
};
182187

183188
pub const LAZY_OPERATOR_MAP: phf::Map<&'static str, LazyOperator> = phf_map! {
@@ -235,7 +240,7 @@ pub enum NumParams {
235240
Unary,
236241
Exactly(usize),
237242
AtLeast(usize),
238-
Variadic(std::ops::Range<usize>),
243+
Variadic(std::ops::Range<usize>), // [inclusive, exclusive)
239244
}
240245
impl NumParams {
241246
fn is_valid_len(&self, len: &usize) -> bool {

src/op/string.rs

+129
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,9 @@
11
//! String Operations
22
3+
use crate::NULL;
34
use serde_json::Value;
5+
use std::cmp;
6+
use std::convert::TryInto;
47

58
use crate::error::Error;
69

@@ -30,3 +33,129 @@ pub fn cat(items: &Vec<&Value>) -> Result<Value, Error> {
3033
})?;
3134
Ok(Value::String(rv))
3235
}
36+
37+
/// Get a substring by index
38+
///
39+
/// Note: the reference implementation casts the first argument to a string,
40+
/// but since the specification explicitly defines this as a string operation,
41+
/// the argument types are enforced here to avoid unpredictable behavior.
42+
pub fn substr(items: &Vec<&Value>) -> Result<Value, Error> {
43+
// We can only have 2 or 3 arguments. Number of arguments is validated elsewhere.
44+
let (string_arg, idx_arg) = (items[0], items[1]);
45+
let limit_opt: Option<&Value>;
46+
if items.len() > 2 {
47+
limit_opt = Some(items[2]);
48+
} else {
49+
limit_opt = None;
50+
}
51+
52+
let string = match string_arg {
53+
Value::String(s) => s,
54+
_ => {
55+
return Err(Error::InvalidArgument {
56+
value: string_arg.clone(),
57+
operation: "substr".into(),
58+
reason: "First argument to substr must be a string".into(),
59+
})
60+
}
61+
};
62+
let idx = match idx_arg {
63+
Value::Number(n) => {
64+
if let Some(int) = n.as_i64() {
65+
int
66+
} else {
67+
return Err(Error::InvalidArgument {
68+
value: idx_arg.clone(),
69+
operation: "substr".into(),
70+
reason: "Second argument to substr must be an integer".into(),
71+
});
72+
}
73+
}
74+
_ => {
75+
return Err(Error::InvalidArgument {
76+
value: idx_arg.clone(),
77+
operation: "substr".into(),
78+
reason: "Second argument to substr must be a number".into(),
79+
})
80+
}
81+
};
82+
let limit = limit_opt
83+
.map(|limit_arg| match limit_arg {
84+
Value::Number(n) => {
85+
if let Some(int) = n.as_i64() {
86+
Ok(int)
87+
} else {
88+
Err(Error::InvalidArgument {
89+
value: limit_arg.clone(),
90+
operation: "substr".into(),
91+
reason: "Optional third argument to substr must be an integer".into(),
92+
})
93+
}
94+
}
95+
_ => Err(Error::InvalidArgument {
96+
value: limit_arg.clone(),
97+
operation: "substr".into(),
98+
reason: "Optional third argument to substr must be a number".into(),
99+
}),
100+
})
101+
.transpose()?;
102+
103+
let string_len = string.len();
104+
105+
let idx_abs: usize = idx.abs().try_into().map_err(|e| Error::InvalidArgument {
106+
value: idx_arg.clone(),
107+
operation: "substr".into(),
108+
reason: format!(
109+
"The number {} is too large to index strings on this system",
110+
e
111+
),
112+
})?;
113+
let start_idx = match idx {
114+
// If the index is negative it means "number of characters prior to the
115+
// end of the string from which to start", and corresponds to the string
116+
// length minus the index.
117+
idx if idx < 0 => string_len.checked_sub(idx_abs).unwrap_or(0),
118+
// A positive index is simply the starting point. Max starting point
119+
// is the length, which will yield an empty string.
120+
_ => cmp::min(string_len, idx_abs),
121+
};
122+
123+
let end_idx = match limit {
124+
None => string_len,
125+
Some(l) => {
126+
let limit_abs: usize = l.abs().try_into().map_err(|e| Error::InvalidArgument {
127+
value: limit_opt.or(Some(&NULL)).map(|v| v.clone()).unwrap(),
128+
operation: "substr".into(),
129+
reason: format!(
130+
"The number {} is too large to index strings on this system",
131+
e
132+
),
133+
})?;
134+
match l {
135+
// If the limit is negative, it means "characters before the end
136+
// at which to stop", corresponding to an index of either 0 or
137+
// the length of the string minus the limit.
138+
l if l < 0 => string_len.checked_sub(limit_abs).unwrap_or(0),
139+
// A positive limit indicates the number of characters to take,
140+
// so it corresponds to an index of the start index plus the
141+
// limit (with a maximum value of the string length).
142+
_ => cmp::min(
143+
string_len,
144+
start_idx.checked_add(limit_abs).unwrap_or(string_len),
145+
),
146+
}
147+
}
148+
};
149+
150+
let count_in_substr = end_idx.checked_sub(start_idx).unwrap_or(0);
151+
152+
// Iter over our expected count rather than indexing directly to avoid
153+
// potential panics if any of our math is wrong.
154+
Ok(Value::String(
155+
string
156+
.chars()
157+
.skip(start_idx)
158+
.take(count_in_substr)
159+
.collect(),
160+
))
161+
}

0 commit comments

Comments
 (0)