From 9ece92679df3cd0d111130731daf19a404ba15da Mon Sep 17 00:00:00 2001 From: charlesdong1991 Date: Sun, 3 May 2026 12:20:47 +0200 Subject: [PATCH 1/6] Add nullability support for data type in c++ --- bindings/cpp/include/fluss.hpp | 13 +- bindings/cpp/src/ffi_converter.hpp | 64 ++++-- bindings/cpp/src/lib.rs | 3 + bindings/cpp/src/types.rs | 210 +++++++++++++------ bindings/cpp/test/test_ffi_converter.cpp | 176 +++++++++++++++- bindings/cpp/test/test_log_table.cpp | 63 ++++++ website/docs/user-guide/cpp/api-reference.md | 2 + website/docs/user-guide/cpp/data-types.md | 34 +++ 8 files changed, 479 insertions(+), 86 deletions(-) diff --git a/bindings/cpp/include/fluss.hpp b/bindings/cpp/include/fluss.hpp index f222166d..3d39783f 100644 --- a/bindings/cpp/include/fluss.hpp +++ b/bindings/cpp/include/fluss.hpp @@ -283,8 +283,8 @@ enum class TypeId { class DataType { public: - explicit DataType(TypeId id, int32_t p = 0, int32_t s = 0) - : id_(id), precision_(p), scale_(s) {} + explicit DataType(TypeId id, int32_t p = 0, int32_t s = 0, bool nullable = true) + : id_(id), precision_(p), scale_(s), nullable_(nullable) {} static DataType Boolean() { return DataType(TypeId::Boolean); } static DataType TinyInt() { return DataType(TypeId::TinyInt); } @@ -321,15 +321,24 @@ class DataType { TypeId id() const { return id_; } int32_t precision() const { return precision_; } int32_t scale() const { return scale_; } + bool nullable() const { return nullable_; } /// Returns the element type of an ARRAY. Returns `nullptr` for non-array /// types. The returned pointer is valid as long as this DataType (or a /// copy holding the same shared element) is alive. const DataType* element_type() const { return element_type_.get(); } + /// Returns a copy of this DataType with nullable set to false. + DataType NotNull() const { + DataType dt(id_, precision_, scale_, false); + dt.element_type_ = element_type_; + return dt; + } + private: TypeId id_; int32_t precision_{0}; int32_t scale_{0}; + bool nullable_{true}; std::shared_ptr element_type_; }; diff --git a/bindings/cpp/src/ffi_converter.hpp b/bindings/cpp/src/ffi_converter.hpp index 0ef1487a..8b8375d6 100644 --- a/bindings/cpp/src/ffi_converter.hpp +++ b/bindings/cpp/src/ffi_converter.hpp @@ -33,16 +33,19 @@ namespace utils { /// `nesting` counts the number of ARRAY wrappers stripped to reach the leaf /// element type. `leaf_type`/`leaf_precision`/`leaf_scale` describe that leaf /// scalar. A non-array input produces a zero-initialised value (nesting == 0). +/// `array_nullability` stores each ARRAY wrapper's nullability from outermost +/// to innermost, while `leaf_nullable` stores the scalar leaf nullability. /// /// Using a flat representation — rather than serialising a recursive -/// `DataType` — keeps the cxx bridge contract small (four `i32`s inside -/// `FfiColumn`) while preserving full schema fidelity across the FFI boundary -/// when paired with rebuild_array_type(). +/// `DataType` — keeps the cxx bridge contract small while preserving schema +/// fidelity across the FFI boundary when paired with rebuild_array_type(). struct FlattenedArrayType { int32_t nesting{0}; int32_t leaf_type{0}; int32_t leaf_precision{0}; int32_t leaf_scale{0}; + std::vector array_nullability; + bool leaf_nullable{true}; }; /// Flattens an `ARRAY>>` DataType into a FlattenedArrayType. @@ -53,7 +56,8 @@ struct FlattenedArrayType { /// - If `data_type` is an ARRAY but has a null element_type() chain (which /// should only happen on malformed input), returns a zero-valued result to /// signal the caller to reject the schema. -/// - Otherwise, `nesting >= 1` and leaf_* describe the innermost scalar. +/// - Otherwise, `nesting >= 1`, array_nullability has `nesting` entries, and +/// leaf_* describe the innermost scalar. inline FlattenedArrayType flatten_array_type(const DataType& data_type) { FlattenedArrayType out; if (data_type.id() != TypeId::Array) { @@ -63,6 +67,7 @@ inline FlattenedArrayType flatten_array_type(const DataType& data_type) { const DataType* current = &data_type; while (current && current->id() == TypeId::Array) { out.nesting += 1; + out.array_nullability.push_back(current->nullable() ? 1 : 0); current = current->element_type(); } if (!current) { @@ -72,16 +77,30 @@ inline FlattenedArrayType flatten_array_type(const DataType& data_type) { out.leaf_type = static_cast(current->id()); out.leaf_precision = current->precision(); out.leaf_scale = current->scale(); + out.leaf_nullable = current->nullable(); return out; } /// Inverse of flatten_array_type: rebuilds an `ARRAY>>` type /// from the compact flat form. Requires `flat.nesting >= 1`; callers handle /// the `nesting == 0` case by using a plain scalar DataType directly. -inline DataType rebuild_array_type(const FlattenedArrayType& flat) { - DataType dt(static_cast(flat.leaf_type), flat.leaf_precision, flat.leaf_scale); - for (int32_t i = 0; i < flat.nesting; ++i) { - dt = DataType::Array(std::move(dt)); +inline DataType rebuild_array_type(const FlattenedArrayType& flat, bool outer_nullable = true) { + DataType dt(static_cast(flat.leaf_type), flat.leaf_precision, flat.leaf_scale, + flat.leaf_nullable); + for (int32_t i = flat.nesting - 1; i >= 0; --i) { + bool nullable = true; + if (static_cast(i) < flat.array_nullability.size()) { + nullable = flat.array_nullability[static_cast(i)] != 0; + } else if (i == 0) { + // Backward compatibility for legacy metadata without per-level + // array nullability: preserve the top-level nullable bit. + nullable = outer_nullable; + } + auto arr = DataType::Array(std::move(dt)); + if (!nullable) { + arr = arr.NotNull(); + } + dt = std::move(arr); } return dt; } @@ -150,19 +169,25 @@ inline ffi::FfiColumn to_ffi_column(const Column& col) { ffi::FfiColumn ffi_col; ffi_col.name = rust::String(col.name); ffi_col.data_type = static_cast(col.data_type.id()); + ffi_col.nullable = col.data_type.nullable(); ffi_col.comment = rust::String(col.comment); ffi_col.precision = col.data_type.precision(); ffi_col.scale = col.data_type.scale(); auto flat = flatten_array_type(col.data_type); ffi_col.array_nesting = flat.nesting; + for (auto nullable : flat.array_nullability) { + ffi_col.array_nullability.push_back(nullable); + } if (flat.nesting > 0 && flat.leaf_type != 0) { ffi_col.element_data_type = flat.leaf_type; ffi_col.element_precision = flat.leaf_precision; ffi_col.element_scale = flat.leaf_scale; + ffi_col.element_nullable = flat.leaf_nullable; } else { ffi_col.element_data_type = 0; ffi_col.element_precision = 0; ffi_col.element_scale = 0; + ffi_col.element_nullable = true; } return ffi_col; } @@ -229,7 +254,6 @@ inline ffi::FfiTableDescriptor to_ffi_table_descriptor(const TableDescriptor& de inline Column from_ffi_column(const ffi::FfiColumn& ffi_col) { auto type_id = static_cast(ffi_col.data_type); - DataType dt(type_id, ffi_col.precision, ffi_col.scale); if (type_id == TypeId::Array) { if (ffi_col.element_data_type == 0) { throw std::runtime_error("Malformed ARRAY column '" + std::string(ffi_col.name) + @@ -273,13 +297,23 @@ inline Column from_ffi_column(const ffi::FfiColumn& ffi_col) { } int32_t nesting = ffi_col.array_nesting > 0 ? ffi_col.array_nesting : 1; - dt = rebuild_array_type(FlattenedArrayType{ - nesting, - ffi_col.element_data_type, - ffi_col.element_precision, - ffi_col.element_scale, - }); + std::vector array_nullability; + for (auto nullable : ffi_col.array_nullability) { + array_nullability.push_back(nullable); + } + auto dt = rebuild_array_type( + FlattenedArrayType{ + nesting, + ffi_col.element_data_type, + ffi_col.element_precision, + ffi_col.element_scale, + std::move(array_nullability), + ffi_col.element_nullable, + }, + ffi_col.nullable); + return Column{std::string(ffi_col.name), std::move(dt), std::string(ffi_col.comment)}; } + DataType dt(type_id, ffi_col.precision, ffi_col.scale, ffi_col.nullable); return Column{std::string(ffi_col.name), std::move(dt), std::string(ffi_col.comment)}; } diff --git a/bindings/cpp/src/lib.rs b/bindings/cpp/src/lib.rs index facb0e31..fba9cc10 100644 --- a/bindings/cpp/src/lib.rs +++ b/bindings/cpp/src/lib.rs @@ -83,13 +83,16 @@ mod ffi { struct FfiColumn { name: String, data_type: i32, + nullable: bool, comment: String, precision: i32, scale: i32, array_nesting: i32, + array_nullability: Vec, element_data_type: i32, element_precision: i32, element_scale: i32, + element_nullable: bool, } struct FfiSchema { diff --git a/bindings/cpp/src/types.rs b/bindings/cpp/src/types.rs index c15aadc9..e9e25404 100644 --- a/bindings/cpp/src/types.rs +++ b/bindings/cpp/src/types.rs @@ -41,16 +41,33 @@ pub const DATA_TYPE_CHAR: i32 = 15; pub const DATA_TYPE_BINARY: i32 = 16; pub const DATA_TYPE_ARRAY: i32 = 17; +struct FfiDataTypeSpec { + data_type: i32, + precision: u32, + scale: u32, + element_data_type: i32, + element_precision: u32, + element_scale: u32, + array_nesting: u32, + array_nullability: Vec, + nullable: bool, + element_nullable: bool, +} + fn ffi_column_to_core_data_type(col: &ffi::FfiColumn) -> Result { - ffi_data_type_to_core( - col.data_type, - col.precision as u32, - col.scale as u32, - col.element_data_type, - col.element_precision as u32, - col.element_scale as u32, - col.array_nesting.max(0) as u32, - ) + let dt = ffi_data_type_to_core(FfiDataTypeSpec { + data_type: col.data_type, + precision: col.precision as u32, + scale: col.scale as u32, + element_data_type: col.element_data_type, + element_precision: col.element_precision as u32, + element_scale: col.element_scale as u32, + array_nesting: col.array_nesting.max(0) as u32, + array_nullability: col.array_nullability.clone(), + nullable: col.nullable, + element_nullable: col.element_nullable, + })?; + Ok(dt) } fn type_precision_scale(dt: &fcore::metadata::DataType) -> (i32, i32) { @@ -64,11 +81,22 @@ fn type_precision_scale(dt: &fcore::metadata::DataType) -> (i32, i32) { } } -fn flatten_array_leaf_type(dt: &fcore::metadata::DataType) -> Result<(i32, i32, i32, i32)> { +struct FlattenedLeafType { + nesting: i32, + leaf_type: i32, + leaf_precision: i32, + leaf_scale: i32, + array_nullability: Vec, + leaf_nullable: bool, +} + +fn flatten_array_leaf_type(dt: &fcore::metadata::DataType) -> Result { let mut nesting = 0_i32; let mut leaf = dt; + let mut array_nullability = Vec::new(); while let fcore::metadata::DataType::Array(at) = leaf { nesting += 1; + array_nullability.push(u8::from(leaf.is_nullable())); leaf = at.get_element_type(); } if nesting == 0 { @@ -81,35 +109,47 @@ fn flatten_array_leaf_type(dt: &fcore::metadata::DataType) -> Result<(i32, i32, )); } let (leaf_precision, leaf_scale) = type_precision_scale(leaf); - Ok((nesting, leaf_type, leaf_precision, leaf_scale)) + Ok(FlattenedLeafType { + nesting, + leaf_type, + leaf_precision, + leaf_scale, + array_nullability, + leaf_nullable: leaf.is_nullable(), + }) } -fn build_array_type_from_leaf( - leaf_dt: i32, - leaf_precision: u32, - leaf_scale: u32, - nesting: u32, -) -> Result { - if nesting == 0 { +fn build_array_type_from_leaf(spec: &FfiDataTypeSpec) -> Result { + if spec.array_nesting == 0 { return Err(anyhow!("ARRAY nesting must be >= 1")); } - let mut dt = ffi_data_type_to_core(leaf_dt, leaf_precision, leaf_scale, 0, 0, 0, 0)?; - for _ in 0..nesting { - dt = fcore::metadata::DataTypes::array(dt); + let mut dt = ffi_data_type_to_core(FfiDataTypeSpec { + data_type: spec.element_data_type, + precision: spec.element_precision, + scale: spec.element_scale, + element_data_type: 0, + element_precision: 0, + element_scale: 0, + array_nesting: 0, + array_nullability: Vec::new(), + nullable: spec.element_nullable, + element_nullable: true, + })?; + for i in (0..spec.array_nesting).rev() { + let nullable = spec + .array_nullability + .get(i as usize) + .map(|value| *value != 0) + .unwrap_or_else(|| if i == 0 { spec.nullable } else { true }); + dt = fcore::metadata::DataType::Array(fcore::metadata::ArrayType::with_nullable( + nullable, dt, + )); } Ok(dt) } -fn ffi_data_type_to_core( - dt: i32, - precision: u32, - scale: u32, - element_dt: i32, - element_precision: u32, - element_scale: u32, - array_nesting: u32, -) -> Result { - match dt { +fn ffi_data_type_to_core(spec: FfiDataTypeSpec) -> Result { + let result = match spec.data_type { DATA_TYPE_BOOLEAN => Ok(fcore::metadata::DataTypes::boolean()), DATA_TYPE_TINYINT => Ok(fcore::metadata::DataTypes::tinyint()), DATA_TYPE_SMALLINT => Ok(fcore::metadata::DataTypes::smallint()), @@ -122,43 +162,53 @@ fn ffi_data_type_to_core( DATA_TYPE_DATE => Ok(fcore::metadata::DataTypes::date()), DATA_TYPE_TIME => Ok(fcore::metadata::DataTypes::time()), DATA_TYPE_TIMESTAMP => Ok(fcore::metadata::DataTypes::timestamp_with_precision( - precision, + spec.precision, )), DATA_TYPE_TIMESTAMP_LTZ => Ok(fcore::metadata::DataTypes::timestamp_ltz_with_precision( - precision, + spec.precision, )), DATA_TYPE_DECIMAL => { - let dt = fcore::metadata::DecimalType::new(precision, scale)?; + let dt = fcore::metadata::DecimalType::new(spec.precision, spec.scale)?; Ok(fcore::metadata::DataType::Decimal(dt)) } - DATA_TYPE_CHAR => Ok(fcore::metadata::DataTypes::char(precision)), - DATA_TYPE_BINARY => Ok(fcore::metadata::DataTypes::binary(precision as usize)), + DATA_TYPE_CHAR => Ok(fcore::metadata::DataTypes::char(spec.precision)), + DATA_TYPE_BINARY => Ok(fcore::metadata::DataTypes::binary(spec.precision as usize)), DATA_TYPE_ARRAY => { - if array_nesting > 0 { - build_array_type_from_leaf( - element_dt, - element_precision, - element_scale, - array_nesting, - ) + if spec.array_nesting > 0 { + return build_array_type_from_leaf(&spec); } else { - // Backward compatibility for older one-level metadata. - if element_dt == 0 { + // Legacy path for single-level arrays where array_nesting == 0. + // Modern code always sets array_nesting >= 1 and uses + // build_array_type_from_leaf above; this branch exists for + // backward compatibility with older metadata that only carried + // element_data_type without an explicit nesting count. + if spec.element_data_type == 0 { return Err(anyhow!("ARRAY requires element type metadata")); } - let element_type = ffi_data_type_to_core( - element_dt, - element_precision, - element_scale, - 0, - 0, - 0, - 0, - )?; - Ok(fcore::metadata::DataTypes::array(element_type)) + let element_type = ffi_data_type_to_core(FfiDataTypeSpec { + data_type: spec.element_data_type, + precision: spec.element_precision, + scale: spec.element_scale, + element_data_type: 0, + element_precision: 0, + element_scale: 0, + array_nesting: 0, + array_nullability: Vec::new(), + nullable: spec.element_nullable, + element_nullable: true, + })?; + let arr = fcore::metadata::ArrayType::with_nullable(spec.nullable, element_type); + return Ok(fcore::metadata::DataType::Array(arr)); } } - _ => Err(anyhow!("Unknown data type: {dt}")), + _ => Err(anyhow!("Unknown data type: {}", spec.data_type)), + }; + + let data_type = result?; + if spec.nullable { + Ok(data_type) + } else { + Ok(data_type.as_non_nullable()) } } @@ -188,24 +238,26 @@ pub fn core_data_type_to_ffi(dt: &fcore::metadata::DataType) -> i32 { fn core_column_to_ffi(col: &fcore::metadata::Column) -> ffi::FfiColumn { let (precision, scale) = type_precision_scale(col.data_type()); - let (array_nesting, element_data_type, element_precision, element_scale) = match col.data_type() - { - fcore::metadata::DataType::Array(_) => { - flatten_array_leaf_type(col.data_type()).unwrap_or((0, 0, 0, 0)) - } - _ => (0, 0, 0, 0), + let flat = match col.data_type() { + fcore::metadata::DataType::Array(_) => flatten_array_leaf_type(col.data_type()).ok(), + _ => None, }; ffi::FfiColumn { name: col.name().to_string(), data_type: core_data_type_to_ffi(col.data_type()), + nullable: col.data_type().is_nullable(), comment: col.comment().unwrap_or("").to_string(), precision, scale, - array_nesting, - element_data_type, - element_precision, - element_scale, + array_nesting: flat.as_ref().map_or(0, |f| f.nesting), + array_nullability: flat + .as_ref() + .map_or_else(Vec::new, |f| f.array_nullability.clone()), + element_data_type: flat.as_ref().map_or(0, |f| f.leaf_type), + element_precision: flat.as_ref().map_or(0, |f| f.leaf_precision), + element_scale: flat.as_ref().map_or(0, |f| f.leaf_scale), + element_nullable: flat.is_none_or(|f| f.leaf_nullable), } } @@ -354,9 +406,31 @@ pub fn element_type_from_ffi( array_nesting: u32, ) -> Result { if array_nesting == 0 { - ffi_data_type_to_core(leaf_dt, precision, scale, 0, 0, 0, 0) + ffi_data_type_to_core(FfiDataTypeSpec { + data_type: leaf_dt, + precision, + scale, + element_data_type: 0, + element_precision: 0, + element_scale: 0, + array_nesting: 0, + array_nullability: Vec::new(), + nullable: true, + element_nullable: true, + }) } else { - build_array_type_from_leaf(leaf_dt, precision, scale, array_nesting) + build_array_type_from_leaf(&FfiDataTypeSpec { + data_type: DATA_TYPE_ARRAY, + precision: 0, + scale: 0, + element_data_type: leaf_dt, + element_precision: precision, + element_scale: scale, + array_nesting, + array_nullability: vec![1; array_nesting as usize], + nullable: true, + element_nullable: true, + }) } } diff --git a/bindings/cpp/test/test_ffi_converter.cpp b/bindings/cpp/test/test_ffi_converter.cpp index 4bbe3ebb..6ad75631 100644 --- a/bindings/cpp/test/test_ffi_converter.cpp +++ b/bindings/cpp/test/test_ffi_converter.cpp @@ -24,17 +24,48 @@ namespace { -fluss::ffi::FfiColumn MakeArrayColumn(int32_t nesting, int32_t element_type) { +fluss::ffi::FfiColumn MakeArrayColumn(int32_t nesting, int32_t element_type, + bool nullable = true, bool element_nullable = true, + std::vector per_level_nullability = {}) { fluss::ffi::FfiColumn col; col.name = rust::String("bad_array"); col.data_type = static_cast(fluss::TypeId::Array); + col.nullable = nullable; col.comment = rust::String(""); col.precision = 0; col.scale = 0; col.array_nesting = nesting; + if (!per_level_nullability.empty()) { + for (auto v : per_level_nullability) { + col.array_nullability.push_back(v); + } + } else { + for (int32_t i = 0; i < nesting; ++i) { + col.array_nullability.push_back((i == 0 ? nullable : true) ? 1 : 0); + } + } col.element_data_type = element_type; col.element_precision = 0; col.element_scale = 0; + col.element_nullable = element_nullable; + return col; +} + +fluss::ffi::FfiColumn MakeScalarColumn(const char* name, fluss::TypeId type_id, + bool nullable = true, int32_t precision = 0, + int32_t scale = 0) { + fluss::ffi::FfiColumn col; + col.name = rust::String(name); + col.data_type = static_cast(type_id); + col.nullable = nullable; + col.comment = rust::String(""); + col.precision = precision; + col.scale = scale; + col.array_nesting = 0; + col.element_data_type = 0; + col.element_precision = 0; + col.element_scale = 0; + col.element_nullable = true; return col; } @@ -62,3 +93,146 @@ TEST(FfiConverterTest, SupportsLegacyOneLevelArrayMetadata) { ASSERT_NE(converted.data_type.element_type(), nullptr); EXPECT_EQ(converted.data_type.element_type()->id(), fluss::TypeId::Int); } + +// --- Nullability tests --- + +TEST(DataTypeTest, DefaultNullable) { + auto dt = fluss::DataType::Int(); + EXPECT_TRUE(dt.nullable()); +} + +TEST(DataTypeTest, NotNullMethod) { + auto dt = fluss::DataType::Int().NotNull(); + EXPECT_FALSE(dt.nullable()); + EXPECT_EQ(dt.id(), fluss::TypeId::Int); +} + +TEST(DataTypeTest, NotNullPreservesPrecisionScale) { + auto dt = fluss::DataType::Decimal(10, 2).NotNull(); + EXPECT_FALSE(dt.nullable()); + EXPECT_EQ(dt.precision(), 10); + EXPECT_EQ(dt.scale(), 2); +} + +TEST(DataTypeTest, ArrayElementNullability) { + auto dt = fluss::DataType::Array(fluss::DataType::Int().NotNull()); + EXPECT_TRUE(dt.nullable()); + ASSERT_NE(dt.element_type(), nullptr); + EXPECT_FALSE(dt.element_type()->nullable()); +} + +TEST(DataTypeTest, NotNullArrayNullableElement) { + auto dt = fluss::DataType::Array(fluss::DataType::Int()).NotNull(); + EXPECT_FALSE(dt.nullable()); + ASSERT_NE(dt.element_type(), nullptr); + EXPECT_TRUE(dt.element_type()->nullable()); +} + +TEST(DataTypeTest, NotNullArrayNotNullElement) { + auto dt = fluss::DataType::Array(fluss::DataType::Int().NotNull()).NotNull(); + EXPECT_FALSE(dt.nullable()); + ASSERT_NE(dt.element_type(), nullptr); + EXPECT_FALSE(dt.element_type()->nullable()); +} + +TEST(FfiConverterTest, ScalarNullableRoundTrip) { + fluss::Column col{"id", fluss::DataType::Int(), ""}; + auto ffi_col = fluss::utils::to_ffi_column(col); + EXPECT_TRUE(ffi_col.nullable); + auto back = fluss::utils::from_ffi_column(ffi_col); + EXPECT_TRUE(back.data_type.nullable()); +} + +TEST(FfiConverterTest, ScalarNotNullRoundTrip) { + fluss::Column col{"id", fluss::DataType::Int().NotNull(), ""}; + auto ffi_col = fluss::utils::to_ffi_column(col); + EXPECT_FALSE(ffi_col.nullable); + auto back = fluss::utils::from_ffi_column(ffi_col); + EXPECT_FALSE(back.data_type.nullable()); +} + +TEST(FfiConverterTest, ArrayNotNullElementRoundTrip) { + fluss::Column col{"tags", fluss::DataType::Array(fluss::DataType::String().NotNull()), ""}; + auto ffi_col = fluss::utils::to_ffi_column(col); + EXPECT_TRUE(ffi_col.nullable); + EXPECT_FALSE(ffi_col.element_nullable); + auto back = fluss::utils::from_ffi_column(ffi_col); + EXPECT_TRUE(back.data_type.nullable()); + ASSERT_NE(back.data_type.element_type(), nullptr); + EXPECT_FALSE(back.data_type.element_type()->nullable()); +} + +TEST(FfiConverterTest, NotNullArrayNullableElementRoundTrip) { + fluss::Column col{"ids", fluss::DataType::Array(fluss::DataType::Int()).NotNull(), ""}; + auto ffi_col = fluss::utils::to_ffi_column(col); + EXPECT_FALSE(ffi_col.nullable); + EXPECT_TRUE(ffi_col.element_nullable); + auto back = fluss::utils::from_ffi_column(ffi_col); + EXPECT_FALSE(back.data_type.nullable()); + ASSERT_NE(back.data_type.element_type(), nullptr); + EXPECT_TRUE(back.data_type.element_type()->nullable()); +} + +TEST(FfiConverterTest, NotNullArrayNotNullElementRoundTrip) { + fluss::Column col{ + "strict_ids", + fluss::DataType::Array(fluss::DataType::Int().NotNull()).NotNull(), + "", + }; + auto ffi_col = fluss::utils::to_ffi_column(col); + EXPECT_FALSE(ffi_col.nullable); + EXPECT_FALSE(ffi_col.element_nullable); + auto back = fluss::utils::from_ffi_column(ffi_col); + EXPECT_FALSE(back.data_type.nullable()); + ASSERT_NE(back.data_type.element_type(), nullptr); + EXPECT_FALSE(back.data_type.element_type()->nullable()); +} + +TEST(FfiConverterTest, NestedArrayIntermediateNullabilityRoundTrip) { + fluss::Column col{ + "nested", + fluss::DataType::Array(fluss::DataType::Array(fluss::DataType::Int()).NotNull()), + "", + }; + auto ffi_col = fluss::utils::to_ffi_column(col); + auto back = fluss::utils::from_ffi_column(ffi_col); + + EXPECT_TRUE(back.data_type.nullable()); + ASSERT_NE(back.data_type.element_type(), nullptr); + EXPECT_FALSE(back.data_type.element_type()->nullable()); + ASSERT_NE(back.data_type.element_type()->element_type(), nullptr); + EXPECT_TRUE(back.data_type.element_type()->element_type()->nullable()); +} + +TEST(FfiConverterTest, NestedArrayAllLevelsNullabilityRoundTrip) { + fluss::Column col{ + "strict_nested", + fluss::DataType::Array( + fluss::DataType::Array(fluss::DataType::Int().NotNull()).NotNull()) + .NotNull(), + "", + }; + auto ffi_col = fluss::utils::to_ffi_column(col); + auto back = fluss::utils::from_ffi_column(ffi_col); + + EXPECT_FALSE(back.data_type.nullable()); + ASSERT_NE(back.data_type.element_type(), nullptr); + EXPECT_FALSE(back.data_type.element_type()->nullable()); + ASSERT_NE(back.data_type.element_type()->element_type(), nullptr); + EXPECT_FALSE(back.data_type.element_type()->element_type()->nullable()); +} + +TEST(FfiConverterTest, FfiColumnNonNullableScalarReconstructed) { + auto col = MakeScalarColumn("id", fluss::TypeId::Int, false); + auto converted = fluss::utils::from_ffi_column(col); + EXPECT_FALSE(converted.data_type.nullable()); + EXPECT_EQ(converted.data_type.id(), fluss::TypeId::Int); +} + +TEST(FfiConverterTest, FfiColumnNonNullableArrayReconstructed) { + auto col = MakeArrayColumn(1, static_cast(fluss::TypeId::String), false, false); + auto converted = fluss::utils::from_ffi_column(col); + EXPECT_FALSE(converted.data_type.nullable()); + ASSERT_NE(converted.data_type.element_type(), nullptr); + EXPECT_FALSE(converted.data_type.element_type()->nullable()); +} diff --git a/bindings/cpp/test/test_log_table.cpp b/bindings/cpp/test/test_log_table.cpp index f36c8707..5678e4bb 100644 --- a/bindings/cpp/test/test_log_table.cpp +++ b/bindings/cpp/test/test_log_table.cpp @@ -1458,3 +1458,66 @@ TEST_F(LogTableTest, ArrayWriterOverflowDetection) { EXPECT_NO_THROW(smallint_arr.SetInt32(0, 32767)); } } + +TEST_F(LogTableTest, NullabilityPreservedInTableInfo) { + auto& adm = admin(); + auto& conn = connection(); + + fluss::TablePath table_path("fluss", "test_nullability_table_info_cpp"); + + auto schema = + fluss::Schema::NewBuilder() + .AddColumn("id", fluss::DataType::Int()) + .AddColumn("name", fluss::DataType::String()) + .AddColumn("tags", fluss::DataType::Array(fluss::DataType::String().NotNull())) + .AddColumn("ids", fluss::DataType::Array(fluss::DataType::Int()).NotNull()) + .AddColumn("nested", + fluss::DataType::Array( + fluss::DataType::Array(fluss::DataType::Int()).NotNull())) + .SetPrimaryKeys({"id"}) + .Build(); + + auto table_descriptor = fluss::TableDescriptor::NewBuilder() + .SetSchema(schema) + .SetProperty("table.replication.factor", "1") + .Build(); + + fluss_test::CreateTable(adm, table_path, table_descriptor); + + fluss::Table table; + ASSERT_OK(conn.GetTable(table_path, table)); + auto info = table.GetTableInfo(); + + ASSERT_EQ(info.schema.columns.size(), 5u); + EXPECT_EQ(info.primary_keys, std::vector{"id"}); + + // Primary key columns are forced NOT NULL by schema normalization. + EXPECT_EQ(info.schema.columns[0].data_type.id(), fluss::TypeId::Int); + EXPECT_FALSE(info.schema.columns[0].data_type.nullable()); + + // "name" STRING (nullable) + EXPECT_EQ(info.schema.columns[1].data_type.id(), fluss::TypeId::String); + EXPECT_TRUE(info.schema.columns[1].data_type.nullable()); + + // "tags" ARRAY (outer nullable) + EXPECT_EQ(info.schema.columns[2].data_type.id(), fluss::TypeId::Array); + EXPECT_TRUE(info.schema.columns[2].data_type.nullable()); + ASSERT_NE(info.schema.columns[2].data_type.element_type(), nullptr); + EXPECT_FALSE(info.schema.columns[2].data_type.element_type()->nullable()); + + // "ids" ARRAY NOT NULL (outer not null, element nullable) + EXPECT_EQ(info.schema.columns[3].data_type.id(), fluss::TypeId::Array); + EXPECT_FALSE(info.schema.columns[3].data_type.nullable()); + ASSERT_NE(info.schema.columns[3].data_type.element_type(), nullptr); + EXPECT_TRUE(info.schema.columns[3].data_type.element_type()->nullable()); + + // "nested" ARRAY NOT NULL> (outer nullable, inner array not null) + EXPECT_EQ(info.schema.columns[4].data_type.id(), fluss::TypeId::Array); + EXPECT_TRUE(info.schema.columns[4].data_type.nullable()); + ASSERT_NE(info.schema.columns[4].data_type.element_type(), nullptr); + EXPECT_FALSE(info.schema.columns[4].data_type.element_type()->nullable()); + ASSERT_NE(info.schema.columns[4].data_type.element_type()->element_type(), nullptr); + EXPECT_TRUE(info.schema.columns[4].data_type.element_type()->element_type()->nullable()); + + ASSERT_OK(adm.DropTable(table_path, false)); +} diff --git a/website/docs/user-guide/cpp/api-reference.md b/website/docs/user-guide/cpp/api-reference.md index c50d40cd..ae4e9490 100644 --- a/website/docs/user-guide/cpp/api-reference.md +++ b/website/docs/user-guide/cpp/api-reference.md @@ -492,7 +492,9 @@ Same array getters as [`RowView`](#array-getters-index-based) — `GetArraySize` | `id() -> TypeId` | Get the type ID | | `precision() -> int` | Get precision (for Decimal/Timestamp types) | | `scale() -> int` | Get scale (for Decimal type) | +| `nullable() -> bool` | Returns `true` if this type is nullable (default), `false` if `NOT NULL` | | `element_type() -> const DataType*` | Get element type (for Array type, nullptr otherwise) | +| `NotNull() -> DataType` | Returns a copy of this type with nullable set to `false` | ## `ArrayWriter` diff --git a/website/docs/user-guide/cpp/data-types.md b/website/docs/user-guide/cpp/data-types.md index 400b2ecf..cce40cef 100644 --- a/website/docs/user-guide/cpp/data-types.md +++ b/website/docs/user-guide/cpp/data-types.md @@ -23,6 +23,40 @@ sidebar_position: 3 | `DataType::Decimal(p, s)` | Decimal with precision and scale | | `DataType::Array(element)` | Array of the given element type (supports nesting) | +## Nullability + +All DataTypes are nullable by default. Use `.NotNull()` to create a `NOT NULL` type: + +```cpp +auto schema = fluss::Schema::NewBuilder() + .AddColumn("id", fluss::DataType::Int().NotNull()) + .AddColumn("name", fluss::DataType::String()) // nullable by default + .Build(); +``` + +Primary key columns are automatically forced `NOT NULL` regardless of the `DataType` setting. + +For nested types, nullability is preserved at each array level and at the leaf element: + +```cpp +auto schema = fluss::Schema::NewBuilder() + .AddColumn("tags", fluss::DataType::Array(fluss::DataType::String().NotNull())) + .AddColumn("ids", fluss::DataType::Array(fluss::DataType::Int()).NotNull()) + .AddColumn("nested", fluss::DataType::Array( + fluss::DataType::Array(fluss::DataType::Int()).NotNull())) + .Build(); +// "tags": ARRAY (outer nullable, elements NOT NULL) +// "ids": ARRAY NOT NULL (outer NOT NULL, elements nullable) +// "nested": ARRAY NOT NULL> (outer nullable, inner array NOT NULL) +``` + +You can query nullability at runtime: + +```cpp +auto info = table.GetTableInfo(); +bool is_nullable = info.schema.columns[0].data_type.nullable(); +``` + ## GenericRow Setters `SetInt32` is used for `TinyInt`, `SmallInt`, and `Int` columns. For `TinyInt` and `SmallInt`, the value is validated at write time — an error is returned if it overflows the column's range (e.g., \[-128, 127\] for `TinyInt`, \[-32768, 32767\] for `SmallInt`). From 513fdcb9a7fb9e079dc803d2672a594bc9faf36a Mon Sep 17 00:00:00 2001 From: charlesdong1991 Date: Sun, 3 May 2026 16:38:05 +0200 Subject: [PATCH 2/6] add comment to explain nullability setup --- bindings/cpp/src/types.rs | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/bindings/cpp/src/types.rs b/bindings/cpp/src/types.rs index e9e25404..9110f455 100644 --- a/bindings/cpp/src/types.rs +++ b/bindings/cpp/src/types.rs @@ -123,6 +123,9 @@ fn build_array_type_from_leaf(spec: &FfiDataTypeSpec) -> Result= 1")); } + // Construct the leaf scalar type. `nullable` is set to `spec.element_nullable` + // to control the leaf's own nullability. `element_nullable` is unused here + // because the leaf is a scalar (not an array), so it defaults to `true`. let mut dt = ffi_data_type_to_core(FfiDataTypeSpec { data_type: spec.element_data_type, precision: spec.element_precision, @@ -185,6 +188,8 @@ fn ffi_data_type_to_core(spec: FfiDataTypeSpec) -> Result ffi::FfiTableInfo { /// Convert element type tag + precision/scale to core DataType. /// Used by ArrayWriterInner construction from C++. +/// +/// Nullability is hardcoded to `true` (the default) because `ArrayWriter` +/// only needs the type for encoding — the binary array format does not +/// vary based on nullability. Nullability is a schema-level constraint +/// enforced elsewhere (column definition, primary key normalization). pub fn element_type_from_ffi( leaf_dt: i32, precision: u32, From 6ad7ad96b68b4c2e558cf4b79c8fdd2f59c69da4 Mon Sep 17 00:00:00 2001 From: charlesdong1991 Date: Tue, 5 May 2026 20:46:03 +0200 Subject: [PATCH 3/6] split scalar and array --- bindings/cpp/src/ffi_converter.hpp | 36 ++-- bindings/cpp/src/lib.rs | 1 - bindings/cpp/src/types.rs | 236 +++++++++++------------ bindings/cpp/test/test_ffi_converter.cpp | 14 +- 4 files changed, 133 insertions(+), 154 deletions(-) diff --git a/bindings/cpp/src/ffi_converter.hpp b/bindings/cpp/src/ffi_converter.hpp index 8b8375d6..430dd199 100644 --- a/bindings/cpp/src/ffi_converter.hpp +++ b/bindings/cpp/src/ffi_converter.hpp @@ -33,8 +33,8 @@ namespace utils { /// `nesting` counts the number of ARRAY wrappers stripped to reach the leaf /// element type. `leaf_type`/`leaf_precision`/`leaf_scale` describe that leaf /// scalar. A non-array input produces a zero-initialised value (nesting == 0). -/// `array_nullability` stores each ARRAY wrapper's nullability from outermost -/// to innermost, while `leaf_nullable` stores the scalar leaf nullability. +/// `array_nullability` has `nesting + 1` entries: one per ARRAY wrapper +/// (outermost first) plus a trailing entry for the leaf scalar's nullability. /// /// Using a flat representation — rather than serialising a recursive /// `DataType` — keeps the cxx bridge contract small while preserving schema @@ -45,7 +45,6 @@ struct FlattenedArrayType { int32_t leaf_precision{0}; int32_t leaf_scale{0}; std::vector array_nullability; - bool leaf_nullable{true}; }; /// Flattens an `ARRAY>>` DataType into a FlattenedArrayType. @@ -56,8 +55,8 @@ struct FlattenedArrayType { /// - If `data_type` is an ARRAY but has a null element_type() chain (which /// should only happen on malformed input), returns a zero-valued result to /// signal the caller to reject the schema. -/// - Otherwise, `nesting >= 1`, array_nullability has `nesting` entries, and -/// leaf_* describe the innermost scalar. +/// - Otherwise, `nesting >= 1`, array_nullability has `nesting + 1` entries +/// (last = leaf scalar nullability), and leaf_* describe the innermost scalar. inline FlattenedArrayType flatten_array_type(const DataType& data_type) { FlattenedArrayType out; if (data_type.id() != TypeId::Array) { @@ -77,25 +76,24 @@ inline FlattenedArrayType flatten_array_type(const DataType& data_type) { out.leaf_type = static_cast(current->id()); out.leaf_precision = current->precision(); out.leaf_scale = current->scale(); - out.leaf_nullable = current->nullable(); + out.array_nullability.push_back(current->nullable() ? 1 : 0); return out; } /// Inverse of flatten_array_type: rebuilds an `ARRAY>>` type /// from the compact flat form. Requires `flat.nesting >= 1`; callers handle /// the `nesting == 0` case by using a plain scalar DataType directly. -inline DataType rebuild_array_type(const FlattenedArrayType& flat, bool outer_nullable = true) { +/// `array_nullability` must have `nesting + 1` entries (last = leaf). +inline DataType rebuild_array_type(const FlattenedArrayType& flat) { + bool leaf_nullable = (static_cast(flat.nesting) < flat.array_nullability.size()) + ? (flat.array_nullability[static_cast(flat.nesting)] != 0) + : true; DataType dt(static_cast(flat.leaf_type), flat.leaf_precision, flat.leaf_scale, - flat.leaf_nullable); + leaf_nullable); for (int32_t i = flat.nesting - 1; i >= 0; --i) { - bool nullable = true; - if (static_cast(i) < flat.array_nullability.size()) { - nullable = flat.array_nullability[static_cast(i)] != 0; - } else if (i == 0) { - // Backward compatibility for legacy metadata without per-level - // array nullability: preserve the top-level nullable bit. - nullable = outer_nullable; - } + bool nullable = (static_cast(i) < flat.array_nullability.size()) + ? (flat.array_nullability[static_cast(i)] != 0) + : true; auto arr = DataType::Array(std::move(dt)); if (!nullable) { arr = arr.NotNull(); @@ -182,12 +180,10 @@ inline ffi::FfiColumn to_ffi_column(const Column& col) { ffi_col.element_data_type = flat.leaf_type; ffi_col.element_precision = flat.leaf_precision; ffi_col.element_scale = flat.leaf_scale; - ffi_col.element_nullable = flat.leaf_nullable; } else { ffi_col.element_data_type = 0; ffi_col.element_precision = 0; ffi_col.element_scale = 0; - ffi_col.element_nullable = true; } return ffi_col; } @@ -308,9 +304,7 @@ inline Column from_ffi_column(const ffi::FfiColumn& ffi_col) { ffi_col.element_precision, ffi_col.element_scale, std::move(array_nullability), - ffi_col.element_nullable, - }, - ffi_col.nullable); + }); return Column{std::string(ffi_col.name), std::move(dt), std::string(ffi_col.comment)}; } DataType dt(type_id, ffi_col.precision, ffi_col.scale, ffi_col.nullable); diff --git a/bindings/cpp/src/lib.rs b/bindings/cpp/src/lib.rs index 3c3bc2bf..fe8a9484 100644 --- a/bindings/cpp/src/lib.rs +++ b/bindings/cpp/src/lib.rs @@ -94,7 +94,6 @@ mod ffi { element_data_type: i32, element_precision: i32, element_scale: i32, - element_nullable: bool, } struct FfiSchema { diff --git a/bindings/cpp/src/types.rs b/bindings/cpp/src/types.rs index 9110f455..f61711b5 100644 --- a/bindings/cpp/src/types.rs +++ b/bindings/cpp/src/types.rs @@ -41,33 +41,43 @@ pub const DATA_TYPE_CHAR: i32 = 15; pub const DATA_TYPE_BINARY: i32 = 16; pub const DATA_TYPE_ARRAY: i32 = 17; -struct FfiDataTypeSpec { - data_type: i32, - precision: u32, - scale: u32, - element_data_type: i32, - element_precision: u32, - element_scale: u32, - array_nesting: u32, - array_nullability: Vec, - nullable: bool, - element_nullable: bool, +/// Separates scalar and array type specs so each variant only carries +/// the fields it actually needs — no zeroed-out placeholders. +enum FfiDataTypeSpec { + Scalar { + data_type: i32, + precision: u32, + scale: u32, + nullable: bool, + }, + Array { + element_data_type: i32, + element_precision: u32, + element_scale: u32, + array_nesting: u32, + /// `nesting` entries for each ARRAY wrapper (outermost first) plus + /// one trailing entry for the leaf scalar. Length = `nesting + 1`. + array_nullability: Vec, + }, } fn ffi_column_to_core_data_type(col: &ffi::FfiColumn) -> Result { - let dt = ffi_data_type_to_core(FfiDataTypeSpec { - data_type: col.data_type, - precision: col.precision as u32, - scale: col.scale as u32, - element_data_type: col.element_data_type, - element_precision: col.element_precision as u32, - element_scale: col.element_scale as u32, - array_nesting: col.array_nesting.max(0) as u32, - array_nullability: col.array_nullability.clone(), - nullable: col.nullable, - element_nullable: col.element_nullable, - })?; - Ok(dt) + if col.data_type == DATA_TYPE_ARRAY { + ffi_data_type_to_core(FfiDataTypeSpec::Array { + element_data_type: col.element_data_type, + element_precision: col.element_precision as u32, + element_scale: col.element_scale as u32, + array_nesting: col.array_nesting.max(0) as u32, + array_nullability: col.array_nullability.clone(), + }) + } else { + ffi_data_type_to_core(FfiDataTypeSpec::Scalar { + data_type: col.data_type, + precision: col.precision as u32, + scale: col.scale as u32, + nullable: col.nullable, + }) + } } fn type_precision_scale(dt: &fcore::metadata::DataType) -> (i32, i32) { @@ -86,8 +96,9 @@ struct FlattenedLeafType { leaf_type: i32, leaf_precision: i32, leaf_scale: i32, + /// `nesting` entries for ARRAY wrappers (outermost first) plus one + /// trailing entry for the leaf scalar. Length = `nesting + 1`. array_nullability: Vec, - leaf_nullable: bool, } fn flatten_array_leaf_type(dt: &fcore::metadata::DataType) -> Result { @@ -108,6 +119,7 @@ fn flatten_array_leaf_type(dt: &fcore::metadata::DataType) -> Result Result Result { - if spec.array_nesting == 0 { +fn build_array_type_from_leaf( + element_data_type: i32, + element_precision: u32, + element_scale: u32, + array_nesting: u32, + array_nullability: &[u8], +) -> Result { + if array_nesting == 0 { return Err(anyhow!("ARRAY nesting must be >= 1")); } - // Construct the leaf scalar type. `nullable` is set to `spec.element_nullable` - // to control the leaf's own nullability. `element_nullable` is unused here - // because the leaf is a scalar (not an array), so it defaults to `true`. - let mut dt = ffi_data_type_to_core(FfiDataTypeSpec { - data_type: spec.element_data_type, - precision: spec.element_precision, - scale: spec.element_scale, - element_data_type: 0, - element_precision: 0, - element_scale: 0, - array_nesting: 0, - array_nullability: Vec::new(), - nullable: spec.element_nullable, - element_nullable: true, + let leaf_nullable = array_nullability + .get(array_nesting as usize) + .map(|v| *v != 0) + .unwrap_or(true); + let mut dt = ffi_data_type_to_core(FfiDataTypeSpec::Scalar { + data_type: element_data_type, + precision: element_precision, + scale: element_scale, + nullable: leaf_nullable, })?; - for i in (0..spec.array_nesting).rev() { - let nullable = spec - .array_nullability + for i in (0..array_nesting).rev() { + let nullable = array_nullability .get(i as usize) - .map(|value| *value != 0) - .unwrap_or_else(|| if i == 0 { spec.nullable } else { true }); + .map(|v| *v != 0) + .unwrap_or(true); dt = fcore::metadata::DataType::Array(fcore::metadata::ArrayType::with_nullable( nullable, dt, )); @@ -152,68 +163,58 @@ fn build_array_type_from_leaf(spec: &FfiDataTypeSpec) -> Result Result { - let result = match spec.data_type { - DATA_TYPE_BOOLEAN => Ok(fcore::metadata::DataTypes::boolean()), - DATA_TYPE_TINYINT => Ok(fcore::metadata::DataTypes::tinyint()), - DATA_TYPE_SMALLINT => Ok(fcore::metadata::DataTypes::smallint()), - DATA_TYPE_INT => Ok(fcore::metadata::DataTypes::int()), - DATA_TYPE_BIGINT => Ok(fcore::metadata::DataTypes::bigint()), - DATA_TYPE_FLOAT => Ok(fcore::metadata::DataTypes::float()), - DATA_TYPE_DOUBLE => Ok(fcore::metadata::DataTypes::double()), - DATA_TYPE_STRING => Ok(fcore::metadata::DataTypes::string()), - DATA_TYPE_BYTES => Ok(fcore::metadata::DataTypes::bytes()), - DATA_TYPE_DATE => Ok(fcore::metadata::DataTypes::date()), - DATA_TYPE_TIME => Ok(fcore::metadata::DataTypes::time()), - DATA_TYPE_TIMESTAMP => Ok(fcore::metadata::DataTypes::timestamp_with_precision( - spec.precision, - )), - DATA_TYPE_TIMESTAMP_LTZ => Ok(fcore::metadata::DataTypes::timestamp_ltz_with_precision( - spec.precision, - )), - DATA_TYPE_DECIMAL => { - let dt = fcore::metadata::DecimalType::new(spec.precision, spec.scale)?; - Ok(fcore::metadata::DataType::Decimal(dt)) - } - DATA_TYPE_CHAR => Ok(fcore::metadata::DataTypes::char(spec.precision)), - DATA_TYPE_BINARY => Ok(fcore::metadata::DataTypes::binary(spec.precision as usize)), - DATA_TYPE_ARRAY => { - if spec.array_nesting > 0 { - return build_array_type_from_leaf(&spec); - } else { - // Legacy path for single-level arrays where array_nesting == 0. - // Modern code always sets array_nesting >= 1 and uses - // build_array_type_from_leaf above; this branch exists for - // backward compatibility with older metadata that only carried - // element_data_type without an explicit nesting count. - if spec.element_data_type == 0 { - return Err(anyhow!("ARRAY requires element type metadata")); + match spec { + FfiDataTypeSpec::Scalar { + data_type, + precision, + scale, + nullable, + } => { + let dt = match data_type { + DATA_TYPE_BOOLEAN => fcore::metadata::DataTypes::boolean(), + DATA_TYPE_TINYINT => fcore::metadata::DataTypes::tinyint(), + DATA_TYPE_SMALLINT => fcore::metadata::DataTypes::smallint(), + DATA_TYPE_INT => fcore::metadata::DataTypes::int(), + DATA_TYPE_BIGINT => fcore::metadata::DataTypes::bigint(), + DATA_TYPE_FLOAT => fcore::metadata::DataTypes::float(), + DATA_TYPE_DOUBLE => fcore::metadata::DataTypes::double(), + DATA_TYPE_STRING => fcore::metadata::DataTypes::string(), + DATA_TYPE_BYTES => fcore::metadata::DataTypes::bytes(), + DATA_TYPE_DATE => fcore::metadata::DataTypes::date(), + DATA_TYPE_TIME => fcore::metadata::DataTypes::time(), + DATA_TYPE_TIMESTAMP => { + fcore::metadata::DataTypes::timestamp_with_precision(precision) + } + DATA_TYPE_TIMESTAMP_LTZ => { + fcore::metadata::DataTypes::timestamp_ltz_with_precision(precision) } - // Same as build_array_type_from_leaf: construct the element as a - // scalar, so `element_nullable` is unused and defaults to `true`. - let element_type = ffi_data_type_to_core(FfiDataTypeSpec { - data_type: spec.element_data_type, - precision: spec.element_precision, - scale: spec.element_scale, - element_data_type: 0, - element_precision: 0, - element_scale: 0, - array_nesting: 0, - array_nullability: Vec::new(), - nullable: spec.element_nullable, - element_nullable: true, - })?; - let arr = fcore::metadata::ArrayType::with_nullable(spec.nullable, element_type); - return Ok(fcore::metadata::DataType::Array(arr)); + DATA_TYPE_DECIMAL => { + let dt = fcore::metadata::DecimalType::new(precision, scale)?; + fcore::metadata::DataType::Decimal(dt) + } + DATA_TYPE_CHAR => fcore::metadata::DataTypes::char(precision), + DATA_TYPE_BINARY => fcore::metadata::DataTypes::binary(precision as usize), + _ => return Err(anyhow!("Unknown data type: {}", data_type)), + }; + if nullable { + Ok(dt) + } else { + Ok(dt.as_non_nullable()) } } - _ => Err(anyhow!("Unknown data type: {}", spec.data_type)), - }; - - let data_type = result?; - if spec.nullable { - Ok(data_type) - } else { - Ok(data_type.as_non_nullable()) + FfiDataTypeSpec::Array { + element_data_type, + element_precision, + element_scale, + array_nesting, + ref array_nullability, + } => build_array_type_from_leaf( + element_data_type, + element_precision, + element_scale, + array_nesting, + array_nullability, + ), } } @@ -262,7 +263,6 @@ fn core_column_to_ffi(col: &fcore::metadata::Column) -> ffi::FfiColumn { element_data_type: flat.as_ref().map_or(0, |f| f.leaf_type), element_precision: flat.as_ref().map_or(0, |f| f.leaf_precision), element_scale: flat.as_ref().map_or(0, |f| f.leaf_scale), - element_nullable: flat.is_none_or(|f| f.leaf_nullable), } } @@ -416,31 +416,15 @@ pub fn element_type_from_ffi( array_nesting: u32, ) -> Result { if array_nesting == 0 { - ffi_data_type_to_core(FfiDataTypeSpec { + ffi_data_type_to_core(FfiDataTypeSpec::Scalar { data_type: leaf_dt, precision, scale, - element_data_type: 0, - element_precision: 0, - element_scale: 0, - array_nesting: 0, - array_nullability: Vec::new(), nullable: true, - element_nullable: true, }) } else { - build_array_type_from_leaf(&FfiDataTypeSpec { - data_type: DATA_TYPE_ARRAY, - precision: 0, - scale: 0, - element_data_type: leaf_dt, - element_precision: precision, - element_scale: scale, - array_nesting, - array_nullability: vec![1; array_nesting as usize], - nullable: true, - element_nullable: true, - }) + let array_nullability = vec![1u8; (array_nesting + 1) as usize]; + build_array_type_from_leaf(leaf_dt, precision, scale, array_nesting, &array_nullability) } } diff --git a/bindings/cpp/test/test_ffi_converter.cpp b/bindings/cpp/test/test_ffi_converter.cpp index 6ad75631..2078bdab 100644 --- a/bindings/cpp/test/test_ffi_converter.cpp +++ b/bindings/cpp/test/test_ffi_converter.cpp @@ -25,7 +25,7 @@ namespace { fluss::ffi::FfiColumn MakeArrayColumn(int32_t nesting, int32_t element_type, - bool nullable = true, bool element_nullable = true, + bool nullable = true, bool leaf_nullable = true, std::vector per_level_nullability = {}) { fluss::ffi::FfiColumn col; col.name = rust::String("bad_array"); @@ -43,11 +43,11 @@ fluss::ffi::FfiColumn MakeArrayColumn(int32_t nesting, int32_t element_type, for (int32_t i = 0; i < nesting; ++i) { col.array_nullability.push_back((i == 0 ? nullable : true) ? 1 : 0); } + col.array_nullability.push_back(leaf_nullable ? 1 : 0); } col.element_data_type = element_type; col.element_precision = 0; col.element_scale = 0; - col.element_nullable = element_nullable; return col; } @@ -65,7 +65,6 @@ fluss::ffi::FfiColumn MakeScalarColumn(const char* name, fluss::TypeId type_id, col.element_data_type = 0; col.element_precision = 0; col.element_scale = 0; - col.element_nullable = true; return col; } @@ -155,7 +154,8 @@ TEST(FfiConverterTest, ArrayNotNullElementRoundTrip) { fluss::Column col{"tags", fluss::DataType::Array(fluss::DataType::String().NotNull()), ""}; auto ffi_col = fluss::utils::to_ffi_column(col); EXPECT_TRUE(ffi_col.nullable); - EXPECT_FALSE(ffi_col.element_nullable); + ASSERT_EQ(ffi_col.array_nullability.size(), 2u); + EXPECT_EQ(ffi_col.array_nullability[1], 0); auto back = fluss::utils::from_ffi_column(ffi_col); EXPECT_TRUE(back.data_type.nullable()); ASSERT_NE(back.data_type.element_type(), nullptr); @@ -166,7 +166,8 @@ TEST(FfiConverterTest, NotNullArrayNullableElementRoundTrip) { fluss::Column col{"ids", fluss::DataType::Array(fluss::DataType::Int()).NotNull(), ""}; auto ffi_col = fluss::utils::to_ffi_column(col); EXPECT_FALSE(ffi_col.nullable); - EXPECT_TRUE(ffi_col.element_nullable); + ASSERT_EQ(ffi_col.array_nullability.size(), 2u); + EXPECT_EQ(ffi_col.array_nullability[1], 1); auto back = fluss::utils::from_ffi_column(ffi_col); EXPECT_FALSE(back.data_type.nullable()); ASSERT_NE(back.data_type.element_type(), nullptr); @@ -181,7 +182,8 @@ TEST(FfiConverterTest, NotNullArrayNotNullElementRoundTrip) { }; auto ffi_col = fluss::utils::to_ffi_column(col); EXPECT_FALSE(ffi_col.nullable); - EXPECT_FALSE(ffi_col.element_nullable); + ASSERT_EQ(ffi_col.array_nullability.size(), 2u); + EXPECT_EQ(ffi_col.array_nullability[1], 0); auto back = fluss::utils::from_ffi_column(ffi_col); EXPECT_FALSE(back.data_type.nullable()); ASSERT_NE(back.data_type.element_type(), nullptr); From 62832ae90e96a0812dfb47b85acec0b3106ec7ae Mon Sep 17 00:00:00 2001 From: charlesdong1991 Date: Thu, 7 May 2026 21:09:02 +0200 Subject: [PATCH 4/6] rephrase since behavriour change fater this PR --- bindings/cpp/src/lib.rs | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/bindings/cpp/src/lib.rs b/bindings/cpp/src/lib.rs index fe8a9484..032d51a9 100644 --- a/bindings/cpp/src/lib.rs +++ b/bindings/cpp/src/lib.rs @@ -3614,8 +3614,9 @@ impl ArrayWriterInner { /// Structural type equivalence that ignores nullability flags but preserves /// variant and precision/scale semantics. Used to compare ArrayWriter element -/// types on the binding boundary, where C++ callers never control nullability -/// explicitly. +/// types on the binding boundary. Nullability is ignored in structural comparison +// because the Rust-side element type is always reconstructed as nullable +// (encoding doesn't depend on it). fn structurally_compatible(a: &fcore::metadata::DataType, b: &fcore::metadata::DataType) -> bool { use fcore::metadata::DataType; match (a, b) { From 09690d3482129b080040f4add719d1410e5b6811 Mon Sep 17 00:00:00 2001 From: charlesdong1991 Date: Thu, 7 May 2026 21:09:51 +0200 Subject: [PATCH 5/6] rephrase since behavriour change fater this PR --- bindings/cpp/src/lib.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/bindings/cpp/src/lib.rs b/bindings/cpp/src/lib.rs index 032d51a9..0100473b 100644 --- a/bindings/cpp/src/lib.rs +++ b/bindings/cpp/src/lib.rs @@ -3615,8 +3615,8 @@ impl ArrayWriterInner { /// Structural type equivalence that ignores nullability flags but preserves /// variant and precision/scale semantics. Used to compare ArrayWriter element /// types on the binding boundary. Nullability is ignored in structural comparison -// because the Rust-side element type is always reconstructed as nullable -// (encoding doesn't depend on it). +/// because the Rust-side element type is always reconstructed as nullable +/// (encoding doesn't depend on it). fn structurally_compatible(a: &fcore::metadata::DataType, b: &fcore::metadata::DataType) -> bool { use fcore::metadata::DataType; match (a, b) { From 924a50116da0905c726a2a0e8c10a68233ee3e0c Mon Sep 17 00:00:00 2001 From: charlesdong1991 Date: Thu, 7 May 2026 21:10:25 +0200 Subject: [PATCH 6/6] format --- bindings/cpp/src/lib.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bindings/cpp/src/lib.rs b/bindings/cpp/src/lib.rs index 0100473b..52df3686 100644 --- a/bindings/cpp/src/lib.rs +++ b/bindings/cpp/src/lib.rs @@ -3615,7 +3615,7 @@ impl ArrayWriterInner { /// Structural type equivalence that ignores nullability flags but preserves /// variant and precision/scale semantics. Used to compare ArrayWriter element /// types on the binding boundary. Nullability is ignored in structural comparison -/// because the Rust-side element type is always reconstructed as nullable +/// because the Rust-side element type is always reconstructed as nullable /// (encoding doesn't depend on it). fn structurally_compatible(a: &fcore::metadata::DataType, b: &fcore::metadata::DataType) -> bool { use fcore::metadata::DataType;