Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: add json data type #4619

Merged
merged 30 commits into from
Sep 9, 2024
Merged
Show file tree
Hide file tree
Changes from 22 commits
Commits
Show all changes
30 commits
Select commit Hold shift + click to select a range
7836e03
feat: add json type and vector
CookiePieWw Aug 22, 2024
ed2f07d
fix: allow to create and insert json data
CookiePieWw Aug 23, 2024
f910b37
feat: udf to query json as string
CookiePieWw Aug 26, 2024
c334919
refactor: remove JsonbValue and JsonVector
CookiePieWw Aug 26, 2024
b2585ac
feat: show json value as strings
CookiePieWw Aug 27, 2024
45c853b
chore: make ci happy
CookiePieWw Aug 27, 2024
5e9e417
test: adunit test and sqlness test
CookiePieWw Aug 28, 2024
092ee0c
refactor: use binary as grpc value of json
CookiePieWw Aug 28, 2024
6d5241d
Merge branch 'main' into json
CookiePieWw Aug 28, 2024
cb88312
fix: use non-preserve-order jsonb
CookiePieWw Aug 29, 2024
7ceddd0
test: revert changed test
CookiePieWw Aug 29, 2024
26e8d1a
refactor: change udf get_by_path to jq
CookiePieWw Aug 29, 2024
d3752b3
chore: make ci happy
CookiePieWw Aug 29, 2024
601f647
fix: distinguish binary and json in proto
CookiePieWw Aug 30, 2024
13c370c
chore: delete udf for future pr
CookiePieWw Aug 30, 2024
619f490
refactor: remove Value(Json)
CookiePieWw Aug 30, 2024
9f07158
chore: follow review comments
CookiePieWw Aug 30, 2024
1c279e4
test: some tests and checks
CookiePieWw Aug 30, 2024
ae85538
test: fix unit tests
CookiePieWw Aug 31, 2024
411cf8f
chore: follow review comments
CookiePieWw Sep 2, 2024
3ca3d42
chore: corresponding changes to proto
CookiePieWw Sep 4, 2024
1f2d751
fix: change grpc and pgsql server behavior alongside with sqlness/cru…
CookiePieWw Sep 4, 2024
5645c63
chore: follow review comments
CookiePieWw Sep 5, 2024
d908a46
feat: udf of conversions between json and strings, used for grpc server
CookiePieWw Sep 5, 2024
34c15b0
refactor: rename to_string to json_to_string
CookiePieWw Sep 5, 2024
94e57be
chore: resolve merge conflict
CookiePieWw Sep 5, 2024
315b1e6
test: add more sqlness test for json
CookiePieWw Sep 5, 2024
ac3f9e1
chore: thanks for review :)
CookiePieWw Sep 5, 2024
65b71ce
chore: merge main into json
CookiePieWw Sep 9, 2024
9141eaa
Apply suggestions from code review
WenyXu Sep 9, 2024
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
29 changes: 28 additions & 1 deletion Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

3 changes: 2 additions & 1 deletion Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -120,10 +120,11 @@ etcd-client = { version = "0.13" }
fst = "0.4.7"
futures = "0.3"
futures-util = "0.3"
greptime-proto = { git = "https://github.com/GreptimeTeam/greptime-proto.git", rev = "c437b55725b7f5224fe9d46db21072b4a682ee4b" }
greptime-proto = { git = "https://github.com/GreptimeTeam/greptime-proto.git", rev = "c7247fbc2c6058a5cdba47182e02b4943e54e220" }
humantime = "2.1"
humantime-serde = "1.1"
itertools = "0.10"
jsonb = { git = "https://github.com/CookiePieWw/jsonb.git", rev = "d0166c130fce903bf6c58643417a3173a6172d31", default-features = false }
lazy_static = "1.4"
meter-core = { git = "https://github.com/GreptimeTeam/greptime-meter.git", rev = "80eb97c24c88af4dd9a86f8bbaf50e741d4eb8cd" }
mockall = "0.11.4"
Expand Down
41 changes: 35 additions & 6 deletions src/api/src/helper.rs
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,8 @@ use greptime_proto::v1::greptime_request::Request;
use greptime_proto::v1::query_request::Query;
use greptime_proto::v1::value::ValueData;
use greptime_proto::v1::{
ColumnDataTypeExtension, DdlRequest, DecimalTypeExtension, QueryRequest, Row, SemanticType,
ColumnDataTypeExtension, DdlRequest, DecimalTypeExtension, JsonTypeExtension, QueryRequest,
Row, SemanticType,
};
use paste::paste;
use snafu::prelude::*;
Expand Down Expand Up @@ -103,7 +104,17 @@ impl From<ColumnDataTypeWrapper> for ConcreteDataType {
ColumnDataType::Uint64 => ConcreteDataType::uint64_datatype(),
ColumnDataType::Float32 => ConcreteDataType::float32_datatype(),
ColumnDataType::Float64 => ConcreteDataType::float64_datatype(),
ColumnDataType::Binary => ConcreteDataType::binary_datatype(),
ColumnDataType::Binary => {
if let Some(TypeExt::JsonType(_)) = datatype_wrapper
.datatype_ext
.as_ref()
.and_then(|datatype_ext| datatype_ext.type_ext.as_ref())
{
ConcreteDataType::json_datatype()
} else {
ConcreteDataType::binary_datatype()
}
}
ColumnDataType::String => ConcreteDataType::string_datatype(),
ColumnDataType::Date => ConcreteDataType::date_datatype(),
ColumnDataType::Datetime => ConcreteDataType::datetime_datatype(),
Expand Down Expand Up @@ -236,7 +247,7 @@ impl TryFrom<ConcreteDataType> for ColumnDataTypeWrapper {
ConcreteDataType::UInt64(_) => ColumnDataType::Uint64,
ConcreteDataType::Float32(_) => ColumnDataType::Float32,
ConcreteDataType::Float64(_) => ColumnDataType::Float64,
ConcreteDataType::Binary(_) => ColumnDataType::Binary,
ConcreteDataType::Binary(_) | ConcreteDataType::Json(_) => ColumnDataType::Binary,
ConcreteDataType::String(_) => ColumnDataType::String,
ConcreteDataType::Date(_) => ColumnDataType::Date,
ConcreteDataType::DateTime(_) => ColumnDataType::Datetime,
Expand Down Expand Up @@ -276,6 +287,16 @@ impl TryFrom<ConcreteDataType> for ColumnDataTypeWrapper {
})),
})
}
ColumnDataType::Binary => {
if datatype == ConcreteDataType::json_datatype() {
// Json is the same as binary in proto. The extension marks the binary in proto is actually a json.
Some(ColumnDataTypeExtension {
type_ext: Some(TypeExt::JsonType(JsonTypeExtension::JsonBinary.into())),
})
} else {
None
}
}
_ => None,
};
Ok(Self {
Expand Down Expand Up @@ -649,7 +670,8 @@ pub fn pb_values_to_vector_ref(data_type: &ConcreteDataType, values: Values) ->
ConcreteDataType::Null(_)
| ConcreteDataType::List(_)
| ConcreteDataType::Dictionary(_)
| ConcreteDataType::Duration(_) => {
| ConcreteDataType::Duration(_)
| ConcreteDataType::Json(_) => {
WenyXu marked this conversation as resolved.
Show resolved Hide resolved
unreachable!()
}
}
Expand Down Expand Up @@ -813,7 +835,8 @@ pub fn pb_values_to_values(data_type: &ConcreteDataType, values: Values) -> Vec<
ConcreteDataType::Null(_)
| ConcreteDataType::List(_)
| ConcreteDataType::Dictionary(_)
| ConcreteDataType::Duration(_) => {
| ConcreteDataType::Duration(_)
| ConcreteDataType::Json(_) => {
WenyXu marked this conversation as resolved.
Show resolved Hide resolved
unreachable!()
}
}
Expand All @@ -831,7 +854,13 @@ pub fn is_column_type_value_eq(
expect_type: &ConcreteDataType,
) -> bool {
ColumnDataTypeWrapper::try_new(type_value, type_extension)
.map(|wrapper| ConcreteDataType::from(wrapper) == *expect_type)
.map(|wrapper| {
let datatype = ConcreteDataType::from(wrapper);
(datatype == *expect_type)
// Json type leverage binary type in pb, so this is valid.
|| (datatype == ConcreteDataType::binary_datatype()
&& *expect_type == ConcreteDataType::json_datatype())
WenyXu marked this conversation as resolved.
Show resolved Hide resolved
})
.unwrap_or(false)
}

Expand Down
1 change: 1 addition & 0 deletions src/common/function/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ common-time.workspace = true
common-version.workspace = true
datafusion.workspace = true
datatypes.workspace = true
jsonb.workspace = true
num = "0.4"
num-traits = "0.2"
once_cell.workspace = true
Expand Down
1 change: 1 addition & 0 deletions src/common/function/src/scalars.rs
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ pub mod expression;
pub mod matches;
pub mod math;
pub mod numpy;

#[cfg(test)]
pub(crate) mod test;
pub(crate) mod timestamp;
Expand Down
2 changes: 1 addition & 1 deletion src/common/grpc/src/select.rs
Original file line number Diff line number Diff line change
Expand Up @@ -70,7 +70,7 @@ macro_rules! convert_arrow_array_to_grpc_vals {
return Ok(vals);
},
)+
ConcreteDataType::Null(_) | ConcreteDataType::List(_) | ConcreteDataType::Dictionary(_) | ConcreteDataType::Duration(_) => unreachable!("Should not send {:?} in gRPC", $data_type),
ConcreteDataType::Null(_) | ConcreteDataType::List(_) | ConcreteDataType::Dictionary(_) | ConcreteDataType::Duration(_) | ConcreteDataType::Json(_) => unreachable!("Should not send {:?} in gRPC", $data_type),
}
}};
}
Expand Down
2 changes: 1 addition & 1 deletion src/datanode/src/region_server.rs
Original file line number Diff line number Diff line change
Expand Up @@ -405,7 +405,7 @@ impl FlightCraft for RegionServer {
.trace(tracing_context.attach(info_span!("RegionServer::handle_read")))
.await?;

let stream = Box::pin(FlightRecordBatchStream::new(result, tracing_context));
let stream = Box::pin(FlightRecordBatchStream::new(result, tracing_context, false));
Ok(Response::new(stream))
}
}
Expand Down
1 change: 1 addition & 0 deletions src/datatypes/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ common-telemetry.workspace = true
common-time.workspace = true
datafusion-common.workspace = true
enum_dispatch = "0.3"
jsonb.workspace = true
num = "0.4"
num-traits = "0.2"
ordered-float = { version = "3.0", features = ["serde"] }
Expand Down
15 changes: 12 additions & 3 deletions src/datatypes/src/data_type.rs
Original file line number Diff line number Diff line change
Expand Up @@ -33,8 +33,8 @@ use crate::types::{
BinaryType, BooleanType, DateTimeType, DateType, Decimal128Type, DictionaryType,
DurationMicrosecondType, DurationMillisecondType, DurationNanosecondType, DurationSecondType,
DurationType, Float32Type, Float64Type, Int16Type, Int32Type, Int64Type, Int8Type,
IntervalDayTimeType, IntervalMonthDayNanoType, IntervalType, IntervalYearMonthType, ListType,
NullType, StringType, TimeMillisecondType, TimeType, TimestampMicrosecondType,
IntervalDayTimeType, IntervalMonthDayNanoType, IntervalType, IntervalYearMonthType, JsonType,
ListType, NullType, StringType, TimeMillisecondType, TimeType, TimestampMicrosecondType,
TimestampMillisecondType, TimestampNanosecondType, TimestampSecondType, TimestampType,
UInt16Type, UInt32Type, UInt64Type, UInt8Type,
};
Expand Down Expand Up @@ -81,6 +81,9 @@ pub enum ConcreteDataType {
// Compound types:
List(ListType),
Dictionary(DictionaryType),

// JSON type:
Json(JsonType),
}

impl fmt::Display for ConcreteDataType {
Expand Down Expand Up @@ -128,6 +131,7 @@ impl fmt::Display for ConcreteDataType {
ConcreteDataType::Decimal128(v) => write!(f, "{}", v.name()),
ConcreteDataType::List(v) => write!(f, "{}", v.name()),
ConcreteDataType::Dictionary(v) => write!(f, "{}", v.name()),
ConcreteDataType::Json(v) => write!(f, "{}", v.name()),
}
}
}
Expand Down Expand Up @@ -162,6 +166,7 @@ impl ConcreteDataType {
| ConcreteDataType::Duration(_)
| ConcreteDataType::Decimal128(_)
| ConcreteDataType::Binary(_)
| ConcreteDataType::Json(_)
)
}

Expand Down Expand Up @@ -216,6 +221,10 @@ impl ConcreteDataType {
matches!(self, ConcreteDataType::Decimal128(_))
}

pub fn is_json(&self) -> bool {
matches!(self, ConcreteDataType::Json(_))
}

pub fn numerics() -> Vec<ConcreteDataType> {
vec![
ConcreteDataType::int8_datatype(),
Expand Down Expand Up @@ -404,7 +413,7 @@ macro_rules! impl_new_concrete_type_functions {

impl_new_concrete_type_functions!(
Null, Boolean, UInt8, UInt16, UInt32, UInt64, Int8, Int16, Int32, Int64, Float32, Float64,
Binary, Date, DateTime, String
Binary, Date, DateTime, String, Json
);

impl ConcreteDataType {
Expand Down
11 changes: 10 additions & 1 deletion src/datatypes/src/schema.rs
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@ use datafusion_common::DFSchemaRef;
use snafu::{ensure, ResultExt};

use crate::error::{self, DuplicateColumnSnafu, Error, ProjectArrowSchemaSnafu, Result};
use crate::prelude::DataType;
pub use crate::schema::column_schema::{
ColumnSchema, FulltextAnalyzer, FulltextOptions, Metadata, COMMENT_KEY, FULLTEXT_KEY,
TIME_INDEX_KEY,
Expand All @@ -34,6 +35,8 @@ pub use crate::schema::raw::RawSchema;

/// Key used to store version number of the schema in metadata.
pub const VERSION_KEY: &str = "greptime:version";
/// Key used to store actual column type in field metadata.
pub const TYPE_KEY: &str = "greptime:type";

/// A common schema, should be immutable.
#[derive(Clone, PartialEq, Eq)]
Expand Down Expand Up @@ -256,7 +259,13 @@ fn collect_fields(column_schemas: &[ColumnSchema]) -> Result<FieldsAndIndices> {
if column_schema.is_time_index() && timestamp_index.is_none() {
timestamp_index = Some(index);
}
let field = Field::try_from(column_schema)?;
let mut field = Field::try_from(column_schema)?;

// Json column performs the same as binary column in Arrow, so we need to mark it
if column_schema.data_type.is_json() {
let metadata = HashMap::from([(TYPE_KEY.to_string(), column_schema.data_type.name())]);
field = field.with_metadata(metadata);
}
fields.push(field);
ensure!(
name_to_index
Expand Down
40 changes: 39 additions & 1 deletion src/datatypes/src/schema/column_schema.rs
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ use snafu::{ensure, ResultExt};
use crate::data_type::{ConcreteDataType, DataType};
use crate::error::{self, Error, Result};
use crate::schema::constraint::ColumnDefaultConstraint;
use crate::schema::TYPE_KEY;
use crate::value::Value;
use crate::vectors::VectorRef;

Expand All @@ -34,6 +35,8 @@ pub const COMMENT_KEY: &str = "greptime:storage:comment";
const DEFAULT_CONSTRAINT_KEY: &str = "greptime:default_constraint";
/// Key used to store fulltext options in arrow field's metadata.
pub const FULLTEXT_KEY: &str = "greptime:fulltext";
/// Value used to identify column data type from arrow field's metadata. It should be the same as `ConcreteDataType::json_datatype().name()`.
pub const JSON_TYPE_VALUE: &str = "Json";
discord9 marked this conversation as resolved.
Show resolved Hide resolved

/// Schema of a column, used as an immutable struct.
#[derive(Clone, PartialEq, Eq, Serialize, Deserialize)]
Expand Down Expand Up @@ -268,7 +271,14 @@ impl TryFrom<&Field> for ColumnSchema {
type Error = Error;

fn try_from(field: &Field) -> Result<ColumnSchema> {
let data_type = ConcreteDataType::try_from(field.data_type())?;
let mut data_type = ConcreteDataType::try_from(field.data_type())?;
// Override the data type if it is specified in the metadata.
if field.metadata().contains_key(TYPE_KEY) {
data_type = match field.metadata().get(TYPE_KEY).unwrap().as_str() {
JSON_TYPE_VALUE => ConcreteDataType::json_datatype(),
_ => data_type,
};
}
let mut metadata = field.metadata().clone();
let default_constraint = match metadata.remove(DEFAULT_CONSTRAINT_KEY) {
Some(json) => {
Expand Down Expand Up @@ -528,4 +538,32 @@ mod tests {
assert_eq!(formatted_int8, "test_column_1 Int8 null");
assert_eq!(formatted_int32, "test_column_2 Int32 not null");
}

#[test]
fn test_from_field_to_column_schema() {
let field = Field::new("test", ArrowDataType::Int32, true);
let column_schema = ColumnSchema::try_from(&field).unwrap();
assert_eq!("test", column_schema.name);
assert_eq!(ConcreteDataType::int32_datatype(), column_schema.data_type);
assert!(column_schema.is_nullable);
assert!(!column_schema.is_time_index);
assert!(column_schema.default_constraint.is_none());
assert!(column_schema.metadata.is_empty());

let field = Field::new("test", ArrowDataType::Binary, true);
let field = field.with_metadata(Metadata::from([(
TYPE_KEY.to_string(),
ConcreteDataType::json_datatype().name(),
)]));
let column_schema = ColumnSchema::try_from(&field).unwrap();
assert_eq!("test", column_schema.name);
assert_eq!(ConcreteDataType::json_datatype(), column_schema.data_type);
assert!(column_schema.is_nullable);
assert!(!column_schema.is_time_index);
assert!(column_schema.default_constraint.is_none());
assert_eq!(
column_schema.metadata.get(TYPE_KEY).unwrap(),
&ConcreteDataType::json_datatype().name()
);
}
}
Loading