Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
32 changes: 22 additions & 10 deletions datafusion/optimizer/src/simplify_expressions/regex.rs
Original file line number Diff line number Diff line change
Expand Up @@ -283,20 +283,23 @@ fn partial_anchored_literal_to_like(v: &[Hir]) -> Option<String> {

/// Extracts a string literal expression assuming that [`is_anchored_literal`]
/// returned true.
fn anchored_literal_to_expr(v: &[Hir]) -> Option<Expr> {
fn anchored_literal_to_expr(v: &[Hir], string_scalar: &StringScalar) -> Option<Expr> {
match v.len() {
2 => Some(lit("")),
2 => Some(string_scalar.to_expr("")),
3 => {
let HirKind::Literal(l) = v[1].kind() else {
return None;
};
like_str_from_literal(l).map(lit)
like_str_from_literal(l).map(|s| string_scalar.to_expr(s))
}
_ => None,
}
}

fn anchored_alternation_to_exprs(v: &[Hir]) -> Option<Vec<Expr>> {
fn anchored_alternation_to_exprs(
v: &[Hir],
string_scalar: &StringScalar,
) -> Option<Vec<Expr>> {
if 3 != v.len() {
return None;
}
Expand All @@ -308,7 +311,8 @@ fn anchored_alternation_to_exprs(v: &[Hir]) -> Option<Vec<Expr>> {
for hir in alters {
let mut is_safe = false;
if let HirKind::Literal(l) = hir.kind()
&& let Some(safe_literal) = str_from_literal(l).map(lit)
&& let Some(safe_literal) =
str_from_literal(l).map(|s| string_scalar.to_expr(s))
{
literals.push(safe_literal);
is_safe = true;
Expand All @@ -321,7 +325,9 @@ fn anchored_alternation_to_exprs(v: &[Hir]) -> Option<Vec<Expr>> {

return Some(literals);
} else if let HirKind::Literal(l) = sub.kind() {
if let Some(safe_literal) = str_from_literal(l).map(lit) {
if let Some(safe_literal) =
str_from_literal(l).map(|s| string_scalar.to_expr(s))
{
return Some(vec![safe_literal]);
}
return None;
Expand Down Expand Up @@ -351,12 +357,18 @@ fn lower_simple(
));
}
HirKind::Concat(inner) if is_anchored_literal(inner) => {
return anchored_literal_to_expr(inner).map(|right| {
mode.expr_matches_literal(Box::new(left.clone()), Box::new(right))
return anchored_literal_to_expr(inner, string_scalar).map(|right| {
if mode.i {
// Case-insensitive: use ILIKE for exact match (no wildcards)
mode.expr(Box::new(left.clone()), Box::new(right))
} else {
// Case-sensitive: use Eq / NotEq
mode.expr_matches_literal(Box::new(left.clone()), Box::new(right))
}
});
}
HirKind::Concat(inner) if is_anchored_capture(inner) => {
return anchored_alternation_to_exprs(inner)
HirKind::Concat(inner) if !mode.i && is_anchored_capture(inner) => {
return anchored_alternation_to_exprs(inner, string_scalar)
.map(|right| left.clone().in_list(right, mode.not));
}
HirKind::Concat(inner) => {
Expand Down
53 changes: 53 additions & 0 deletions datafusion/sqllogictest/test_files/predicates.slt
Original file line number Diff line number Diff line change
Expand Up @@ -204,12 +204,65 @@ SELECT * FROM test WHERE column1 ~ 'z'
----
Bazzz

query T
SELECT * FROM test WHERE column1 ~ '^Bazzz$'
----
Bazzz

query T
SELECT * FROM test WHERE column1 ~ '^(foo|Bazzz)$'
----
foo
Bazzz

statement ok
CREATE TABLE test_regex_utf8view(s VARCHAR) AS VALUES ('foo'), ('Bazzz');

query T
SELECT * FROM test_regex_utf8view WHERE s ~ '^Bazzz$'
----
Bazzz

query T
SELECT * FROM test_regex_utf8view WHERE s ~ '^(foo|Bazzz)$'
----
foo
Bazzz

# Case-insensitive anchored match over Utf8View: must be simplified to ILIKE
# (not a case-sensitive Eq) and must keep operand types as Utf8View.
query T
SELECT * FROM test_regex_utf8view WHERE s ~* '^bazzz$'
----
Bazzz

# Case-insensitive anchored alternation over Utf8View
query T rowsort
SELECT * FROM test_regex_utf8view WHERE s ~* '^(foo|bazzz)$'
----
Bazzz
foo

statement ok
DROP TABLE test_regex_utf8view;

query T
SELECT * FROM test WHERE column1 ~* 'z'
----
Bazzz
ZZZZZ

query T
SELECT * FROM test WHERE column1 ~* '^barrr$'
----
Barrr

query T
SELECT * FROM test WHERE column1 ~* '^(barrr|bazzz)$'
----
Barrr
Bazzz

query T
SELECT * FROM test WHERE column1 !~ 'z'
----
Expand Down
Loading