Skip to content

Commit

Permalink
[SQL] support split_part for postgres string
Browse files Browse the repository at this point in the history
Signed-off-by: qzhu <[email protected]>
  • Loading branch information
zhuqi-lucas committed Nov 17, 2024
1 parent f6fb45f commit 846f673
Show file tree
Hide file tree
Showing 4 changed files with 119 additions and 1 deletion.
20 changes: 20 additions & 0 deletions crates/sqllib/src/string.rs
Original file line number Diff line number Diff line change
Expand Up @@ -260,6 +260,26 @@ pub fn split1_(source: String) -> Vec<String> {

some_function1!(split1, String, Vec<String>);

pub fn split_part___(s: String, delimiter: String, n: i32) -> String {
let parts: Vec<String> = split2__(s, delimiter);
let part_count = parts.len() as i32;

// Handle negative indices
let n = if n < 0 {
part_count + n + 1
} else {
n
};

if n <= 0 || n > part_count {
return String::new();
}

parts[(n - 1) as usize].to_string()
}

some_function3!(split_part, String, String, i32, String);

pub fn array_to_string2_vec__(value: Vec<String>, separator: String) -> String {
value.join(&separator)
}
Expand Down
8 changes: 8 additions & 0 deletions docs/sql/string.md
Original file line number Diff line number Diff line change
Expand Up @@ -186,6 +186,14 @@ addition to the normal way of `''`.
If delimiter is absent assume it is the string <code>','</code>.</td>
<td><code>SPLIT('a|b|c|', '|')</code> => { 'a', 'b', 'c', '' }</td>
</tr>
<tr>
<td><code>SPLIT_PART(string, delimiter, n)</code></td>
<td>Extracts the <code>n</code>'th part of the string, splitting it at each occurrence of the delimiter.
If <code>n</code> is negative, return the abs(n)'th from the end of the string. If it's out of bounds, return an empty string.
<td><code>SPLIT_PART('a|b|c|', '|', 2)</code> => <code>b</code><br></br>
<code>SPLIT_PART('a|b|c|', '|', -2)</code> => <code>c</code><br></br>
<code>SPLIT_PART('a|b|c|', '|', 5)</code> => <code>''</code></td>
</tr>
<tr>
<td><code>SUBSTRING (</code> string <code>[ FROM</code> start <code>] [ FOR</code> count<code> ] )</code></td>
<td>Extracts the substring of string starting at the "start"'th character if that is specified, and stopping after "count" characters if the value is specified. At least one of "start" or "count" must be provided. If "start" is negative, it is replaced with 1. If "count" is negative the empty string is returned. The index of the first character is 1.</td>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -126,6 +126,7 @@
import java.util.List;
import java.util.Objects;

import static org.apache.calcite.util.BuiltInMethod.SPLIT_PART;
import static org.dbsp.sqlCompiler.ir.type.DBSPTypeCode.NULL;
import static org.dbsp.sqlCompiler.ir.type.DBSPTypeCode.USER;

Expand Down Expand Up @@ -1047,6 +1048,12 @@ node, new DBSPTypeBool(CalciteObject.EMPTY, false), DBSPOpcode.EQ,
for (int i = 0; i < ops.size(); i++)
this.ensureString(ops, i);
return compileFunction(call, node, type, ops, 1, 2);
case "split_part": {
this.ensureString(ops, 0);
this.ensureString(ops, 1);
this.ensureInteger(ops, 2, 32);
return compileFunction(call, node, type, ops, 3);
}
case "overlay": {
// case "regexp_replace":
String module_prefix;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1065,7 +1065,90 @@ public void testReplace() {
yaoo""");
}

// TODO: split_part
@Test
public void testSplitPart() {
this.qs("""
SELECT split_part('abc~@~def~@~ghi', '~@~', 1) AS result;
result
-------
abc
(1 row)
SELECT split_part('abc~@~def~@~ghi', '~@~', -1) AS result;
result
-------
ghi
(1 row)
SELECT split_part('abc~@~def~@~ghi', '~@~', 2) AS result;
result
-------
def
(1 row)
SELECT split_part('abc~@~def~@~ghi', '~@~', -2) AS result;
result
-------
def
(1 row)
SELECT split_part('abc~@~def~@~ghi', '~@~', 3) AS result;
result
-------
ghi
(1 row)
SELECT split_part('abc~@~def~@~ghi', '~@~', -3) AS result;
result
-------
abc
(1 row)
SELECT split_part('abc~@~def~@~ghi', '~@~', 4) AS result;
result
-------
\s
(1 row)
SELECT split_part('abc~@~def~@~ghi', '~@~', -4) AS result;
result
-------
\s
(1 row)
SELECT split_part('abc~@~def~@~ghi', '~@~', 0) AS result;
result
-------
\s
(1 row)
SELECT split_part('abc~@~def~@~ghi', 'abc~@~def~@~ghi', 1) AS result;
result
-------
\s
(1 row)
SELECT split_part('abc', 'abc', 1) AS result;
result
-------
\s
(1 row)
SELECT split_part('abc', 'abc', 2) AS result;
result
-------
\s
(1 row)
SELECT split_part('abc', 'n', 1) AS result;
result
-------
abc
(1 row)
""");
}


// TODO: to_hex
// TODO: sha, encode, decode

Expand Down

0 comments on commit 846f673

Please sign in to comment.