Skip to content

Commit f73fb82

Browse files
authored
feat: add respective json_is UDFs for JSON type (GreptimeTeam#4726)
* feat: add respective json_is UDFs * refactor: rename to_json to parse_json * chore: happy clippy * chore: some rename * fix: small fixes
1 parent 50b3bb4 commit f73fb82

File tree

12 files changed

+688
-144
lines changed

12 files changed

+688
-144
lines changed

src/common/function/src/scalars/json.rs

Lines changed: 15 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -14,12 +14,16 @@
1414

1515
use std::sync::Arc;
1616
mod json_get;
17+
mod json_is;
1718
mod json_to_string;
18-
mod to_json;
19+
mod parse_json;
1920

2021
use json_get::{JsonGetBool, JsonGetFloat, JsonGetInt, JsonGetString};
22+
use json_is::{
23+
JsonIsArray, JsonIsBool, JsonIsFloat, JsonIsInt, JsonIsNull, JsonIsObject, JsonIsString,
24+
};
2125
use json_to_string::JsonToStringFunction;
22-
use to_json::ToJsonFunction;
26+
use parse_json::ParseJsonFunction;
2327

2428
use crate::function_registry::FunctionRegistry;
2529

@@ -28,11 +32,19 @@ pub(crate) struct JsonFunction;
2832
impl JsonFunction {
2933
pub fn register(registry: &FunctionRegistry) {
3034
registry.register(Arc::new(JsonToStringFunction));
31-
registry.register(Arc::new(ToJsonFunction));
35+
registry.register(Arc::new(ParseJsonFunction));
3236

3337
registry.register(Arc::new(JsonGetInt));
3438
registry.register(Arc::new(JsonGetFloat));
3539
registry.register(Arc::new(JsonGetString));
3640
registry.register(Arc::new(JsonGetBool));
41+
42+
registry.register(Arc::new(JsonIsNull));
43+
registry.register(Arc::new(JsonIsInt));
44+
registry.register(Arc::new(JsonIsFloat));
45+
registry.register(Arc::new(JsonIsString));
46+
registry.register(Arc::new(JsonIsBool));
47+
registry.register(Arc::new(JsonIsArray));
48+
registry.register(Arc::new(JsonIsObject));
3749
}
3850
}

src/common/function/src/scalars/json/json_get.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -47,7 +47,7 @@ fn get_json_by_path(json: &[u8], path: &str) -> Option<Vec<u8>> {
4747
/// If the path does not exist or the value is not the type specified, return `NULL`.
4848
macro_rules! json_get {
4949
// e.g. name = JsonGetInt, type = Int64, rust_type = i64, doc = "Get the value from the JSONB by the given path and return it as an integer."
50-
($name: ident, $type: ident, $rust_type: ident, $doc:expr) => {
50+
($name:ident, $type:ident, $rust_type:ident, $doc:expr) => {
5151
paste::paste! {
5252
#[doc = $doc]
5353
#[derive(Clone, Debug, Default)]
Lines changed: 215 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,215 @@
1+
// Copyright 2023 Greptime Team
2+
//
3+
// Licensed under the Apache License, Version 2.0 (the "License");
4+
// you may not use this file except in compliance with the License.
5+
// You may obtain a copy of the License at
6+
//
7+
// http://www.apache.org/licenses/LICENSE-2.0
8+
//
9+
// Unless required by applicable law or agreed to in writing, software
10+
// distributed under the License is distributed on an "AS IS" BASIS,
11+
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
// See the License for the specific language governing permissions and
13+
// limitations under the License.
14+
15+
use std::fmt::{self, Display};
16+
17+
use common_query::error::{InvalidFuncArgsSnafu, Result, UnsupportedInputDataTypeSnafu};
18+
use common_query::prelude::Signature;
19+
use datafusion::logical_expr::Volatility;
20+
use datatypes::data_type::ConcreteDataType;
21+
use datatypes::prelude::VectorRef;
22+
use datatypes::scalars::ScalarVectorBuilder;
23+
use datatypes::vectors::{BooleanVectorBuilder, MutableVector};
24+
use snafu::ensure;
25+
26+
use crate::function::{Function, FunctionContext};
27+
28+
/// Checks if the input is a JSON object of the given type.
29+
macro_rules! json_is {
30+
($name:ident, $json_type:ident, $doc:expr) => {
31+
paste::paste! {
32+
#[derive(Clone, Debug, Default)]
33+
pub struct $name;
34+
35+
impl Function for $name {
36+
fn name(&self) -> &str {
37+
stringify!([<$name:snake>])
38+
}
39+
40+
fn return_type(&self, _input_types: &[ConcreteDataType]) -> Result<ConcreteDataType> {
41+
Ok(ConcreteDataType::boolean_datatype())
42+
}
43+
44+
fn signature(&self) -> Signature {
45+
Signature::exact(vec![ConcreteDataType::json_datatype()], Volatility::Immutable)
46+
}
47+
48+
fn eval(&self, _func_ctx: FunctionContext, columns: &[VectorRef]) -> Result<VectorRef> {
49+
ensure!(
50+
columns.len() == 1,
51+
InvalidFuncArgsSnafu {
52+
err_msg: format!(
53+
"The length of the args is not correct, expect exactly one, have: {}",
54+
columns.len()
55+
),
56+
}
57+
);
58+
59+
let jsons = &columns[0];
60+
let size = jsons.len();
61+
let datatype = jsons.data_type();
62+
let mut results = BooleanVectorBuilder::with_capacity(size);
63+
64+
match datatype {
65+
// JSON data type uses binary vector
66+
ConcreteDataType::Binary(_) => {
67+
for i in 0..size {
68+
let json = jsons.get_ref(i);
69+
let json = json.as_binary();
70+
let result = match json {
71+
Ok(Some(json)) => {
72+
Some(jsonb::[<is_ $json_type>](json))
73+
}
74+
_ => None,
75+
};
76+
results.push(result);
77+
}
78+
}
79+
_ => {
80+
return UnsupportedInputDataTypeSnafu {
81+
function: stringify!([<$name:snake>]),
82+
datatypes: columns.iter().map(|c| c.data_type()).collect::<Vec<_>>(),
83+
}
84+
.fail();
85+
}
86+
}
87+
88+
Ok(results.to_vector())
89+
}
90+
}
91+
92+
impl Display for $name {
93+
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
94+
write!(f, "{}", stringify!([<$name:snake>]).to_ascii_uppercase())
95+
}
96+
}
97+
}
98+
}
99+
}
100+
101+
json_is!(JsonIsNull, null, "Checks if the input JSONB is null");
102+
json_is!(
103+
JsonIsBool,
104+
boolean,
105+
"Checks if the input JSONB is a boolean type JSON value"
106+
);
107+
json_is!(
108+
JsonIsInt,
109+
i64,
110+
"Checks if the input JSONB is a integer type JSON value"
111+
);
112+
json_is!(
113+
JsonIsFloat,
114+
number,
115+
"Checks if the input JSONB is a JSON float"
116+
);
117+
json_is!(
118+
JsonIsString,
119+
string,
120+
"Checks if the input JSONB is a JSON string"
121+
);
122+
json_is!(
123+
JsonIsArray,
124+
array,
125+
"Checks if the input JSONB is a JSON array"
126+
);
127+
json_is!(
128+
JsonIsObject,
129+
object,
130+
"Checks if the input JSONB is a JSON object"
131+
);
132+
133+
#[cfg(test)]
134+
mod tests {
135+
use std::sync::Arc;
136+
137+
use datatypes::scalars::ScalarVector;
138+
use datatypes::vectors::BinaryVector;
139+
140+
use super::*;
141+
142+
#[test]
143+
fn test_json_is_functions() {
144+
let json_is_functions: [&dyn Function; 6] = [
145+
&JsonIsBool,
146+
&JsonIsInt,
147+
&JsonIsFloat,
148+
&JsonIsString,
149+
&JsonIsArray,
150+
&JsonIsObject,
151+
];
152+
let expected_names = [
153+
"json_is_bool",
154+
"json_is_int",
155+
"json_is_float",
156+
"json_is_string",
157+
"json_is_array",
158+
"json_is_object",
159+
];
160+
for (func, expected_name) in json_is_functions.iter().zip(expected_names.iter()) {
161+
assert_eq!(func.name(), *expected_name);
162+
assert_eq!(
163+
func.return_type(&[ConcreteDataType::json_datatype()])
164+
.unwrap(),
165+
ConcreteDataType::boolean_datatype()
166+
);
167+
assert_eq!(
168+
func.signature(),
169+
Signature::exact(
170+
vec![ConcreteDataType::json_datatype()],
171+
Volatility::Immutable
172+
)
173+
);
174+
}
175+
176+
let json_strings = [
177+
r#"true"#,
178+
r#"1"#,
179+
r#"1.0"#,
180+
r#""The pig fly through a castle, and has been attracted by the princess.""#,
181+
r#"[1, 2]"#,
182+
r#"{"a": 1}"#,
183+
];
184+
let expected_results = [
185+
[true, false, false, false, false, false],
186+
[false, true, false, false, false, false],
187+
// Integers are also floats
188+
[false, true, true, false, false, false],
189+
[false, false, false, true, false, false],
190+
[false, false, false, false, true, false],
191+
[false, false, false, false, false, true],
192+
];
193+
194+
let jsonbs = json_strings
195+
.iter()
196+
.map(|s| {
197+
let value = jsonb::parse_value(s.as_bytes()).unwrap();
198+
value.to_vec()
199+
})
200+
.collect::<Vec<_>>();
201+
let json_vector = BinaryVector::from_vec(jsonbs);
202+
let args: Vec<VectorRef> = vec![Arc::new(json_vector)];
203+
204+
for (func, expected_result) in json_is_functions.iter().zip(expected_results.iter()) {
205+
let vector = func.eval(FunctionContext::default(), &args).unwrap();
206+
assert_eq!(vector.len(), json_strings.len());
207+
208+
for (i, expected) in expected_result.iter().enumerate() {
209+
let result = vector.get_ref(i);
210+
let result = result.as_boolean().unwrap().unwrap();
211+
assert_eq!(result, *expected);
212+
}
213+
}
214+
}
215+
}

src/common/function/src/scalars/json/json_to_string.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -119,7 +119,7 @@ mod tests {
119119
use super::*;
120120

121121
#[test]
122-
fn test_get_by_path_function() {
122+
fn test_json_to_string_function() {
123123
let json_to_string = JsonToStringFunction;
124124

125125
assert_eq!("json_to_string", json_to_string.name());

src/common/function/src/scalars/json/to_json.rs renamed to src/common/function/src/scalars/json/parse_json.rs

Lines changed: 10 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -27,11 +27,11 @@ use crate::function::{Function, FunctionContext};
2727

2828
/// Parses the `String` into `JSONB`.
2929
#[derive(Clone, Debug, Default)]
30-
pub struct ToJsonFunction;
30+
pub struct ParseJsonFunction;
3131

32-
const NAME: &str = "to_json";
32+
const NAME: &str = "parse_json";
3333

34-
impl Function for ToJsonFunction {
34+
impl Function for ParseJsonFunction {
3535
fn name(&self) -> &str {
3636
NAME
3737
}
@@ -101,9 +101,9 @@ impl Function for ToJsonFunction {
101101
}
102102
}
103103

104-
impl Display for ToJsonFunction {
104+
impl Display for ParseJsonFunction {
105105
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
106-
write!(f, "TO_JSON")
106+
write!(f, "PARSE_JSON")
107107
}
108108
}
109109

@@ -119,17 +119,17 @@ mod tests {
119119

120120
#[test]
121121
fn test_get_by_path_function() {
122-
let to_json = ToJsonFunction;
122+
let parse_json = ParseJsonFunction;
123123

124-
assert_eq!("to_json", to_json.name());
124+
assert_eq!("parse_json", parse_json.name());
125125
assert_eq!(
126126
ConcreteDataType::json_datatype(),
127-
to_json
127+
parse_json
128128
.return_type(&[ConcreteDataType::json_datatype()])
129129
.unwrap()
130130
);
131131

132-
assert!(matches!(to_json.signature(),
132+
assert!(matches!(parse_json.signature(),
133133
Signature {
134134
type_signature: TypeSignature::Exact(valid_types),
135135
volatility: Volatility::Immutable
@@ -152,13 +152,12 @@ mod tests {
152152

153153
let json_string_vector = StringVector::from_vec(json_strings.to_vec());
154154
let args: Vec<VectorRef> = vec![Arc::new(json_string_vector)];
155-
let vector = to_json.eval(FunctionContext::default(), &args).unwrap();
155+
let vector = parse_json.eval(FunctionContext::default(), &args).unwrap();
156156

157157
assert_eq!(3, vector.len());
158158
for (i, gt) in jsonbs.iter().enumerate() {
159159
let result = vector.get_ref(i);
160160
let result = result.as_binary().unwrap().unwrap();
161-
// remove whitespaces
162161
assert_eq!(gt, result);
163162
}
164163
}

tests/cases/standalone/common/function/json.sql

Lines changed: 0 additions & 56 deletions
This file was deleted.

0 commit comments

Comments
 (0)