diff --git a/bindings/cpp/include/fluss.hpp b/bindings/cpp/include/fluss.hpp index f222166d..3d39783f 100644 --- a/bindings/cpp/include/fluss.hpp +++ b/bindings/cpp/include/fluss.hpp @@ -283,8 +283,8 @@ enum class TypeId { class DataType { public: - explicit DataType(TypeId id, int32_t p = 0, int32_t s = 0) - : id_(id), precision_(p), scale_(s) {} + explicit DataType(TypeId id, int32_t p = 0, int32_t s = 0, bool nullable = true) + : id_(id), precision_(p), scale_(s), nullable_(nullable) {} static DataType Boolean() { return DataType(TypeId::Boolean); } static DataType TinyInt() { return DataType(TypeId::TinyInt); } @@ -321,15 +321,24 @@ class DataType { TypeId id() const { return id_; } int32_t precision() const { return precision_; } int32_t scale() const { return scale_; } + bool nullable() const { return nullable_; } /// Returns the element type of an ARRAY. Returns `nullptr` for non-array /// types. The returned pointer is valid as long as this DataType (or a /// copy holding the same shared element) is alive. const DataType* element_type() const { return element_type_.get(); } + /// Returns a copy of this DataType with nullable set to false. + DataType NotNull() const { + DataType dt(id_, precision_, scale_, false); + dt.element_type_ = element_type_; + return dt; + } + private: TypeId id_; int32_t precision_{0}; int32_t scale_{0}; + bool nullable_{true}; std::shared_ptr element_type_; }; diff --git a/bindings/cpp/src/ffi_converter.hpp b/bindings/cpp/src/ffi_converter.hpp index 0ef1487a..430dd199 100644 --- a/bindings/cpp/src/ffi_converter.hpp +++ b/bindings/cpp/src/ffi_converter.hpp @@ -33,16 +33,18 @@ namespace utils { /// `nesting` counts the number of ARRAY wrappers stripped to reach the leaf /// element type. `leaf_type`/`leaf_precision`/`leaf_scale` describe that leaf /// scalar. A non-array input produces a zero-initialised value (nesting == 0). +/// `array_nullability` has `nesting + 1` entries: one per ARRAY wrapper +/// (outermost first) plus a trailing entry for the leaf scalar's nullability. /// /// Using a flat representation — rather than serialising a recursive -/// `DataType` — keeps the cxx bridge contract small (four `i32`s inside -/// `FfiColumn`) while preserving full schema fidelity across the FFI boundary -/// when paired with rebuild_array_type(). +/// `DataType` — keeps the cxx bridge contract small while preserving schema +/// fidelity across the FFI boundary when paired with rebuild_array_type(). struct FlattenedArrayType { int32_t nesting{0}; int32_t leaf_type{0}; int32_t leaf_precision{0}; int32_t leaf_scale{0}; + std::vector array_nullability; }; /// Flattens an `ARRAY>>` DataType into a FlattenedArrayType. @@ -53,7 +55,8 @@ struct FlattenedArrayType { /// - If `data_type` is an ARRAY but has a null element_type() chain (which /// should only happen on malformed input), returns a zero-valued result to /// signal the caller to reject the schema. -/// - Otherwise, `nesting >= 1` and leaf_* describe the innermost scalar. +/// - Otherwise, `nesting >= 1`, array_nullability has `nesting + 1` entries +/// (last = leaf scalar nullability), and leaf_* describe the innermost scalar. inline FlattenedArrayType flatten_array_type(const DataType& data_type) { FlattenedArrayType out; if (data_type.id() != TypeId::Array) { @@ -63,6 +66,7 @@ inline FlattenedArrayType flatten_array_type(const DataType& data_type) { const DataType* current = &data_type; while (current && current->id() == TypeId::Array) { out.nesting += 1; + out.array_nullability.push_back(current->nullable() ? 1 : 0); current = current->element_type(); } if (!current) { @@ -72,16 +76,29 @@ inline FlattenedArrayType flatten_array_type(const DataType& data_type) { out.leaf_type = static_cast(current->id()); out.leaf_precision = current->precision(); out.leaf_scale = current->scale(); + out.array_nullability.push_back(current->nullable() ? 1 : 0); return out; } /// Inverse of flatten_array_type: rebuilds an `ARRAY>>` type /// from the compact flat form. Requires `flat.nesting >= 1`; callers handle /// the `nesting == 0` case by using a plain scalar DataType directly. +/// `array_nullability` must have `nesting + 1` entries (last = leaf). inline DataType rebuild_array_type(const FlattenedArrayType& flat) { - DataType dt(static_cast(flat.leaf_type), flat.leaf_precision, flat.leaf_scale); - for (int32_t i = 0; i < flat.nesting; ++i) { - dt = DataType::Array(std::move(dt)); + bool leaf_nullable = (static_cast(flat.nesting) < flat.array_nullability.size()) + ? (flat.array_nullability[static_cast(flat.nesting)] != 0) + : true; + DataType dt(static_cast(flat.leaf_type), flat.leaf_precision, flat.leaf_scale, + leaf_nullable); + for (int32_t i = flat.nesting - 1; i >= 0; --i) { + bool nullable = (static_cast(i) < flat.array_nullability.size()) + ? (flat.array_nullability[static_cast(i)] != 0) + : true; + auto arr = DataType::Array(std::move(dt)); + if (!nullable) { + arr = arr.NotNull(); + } + dt = std::move(arr); } return dt; } @@ -150,11 +167,15 @@ inline ffi::FfiColumn to_ffi_column(const Column& col) { ffi::FfiColumn ffi_col; ffi_col.name = rust::String(col.name); ffi_col.data_type = static_cast(col.data_type.id()); + ffi_col.nullable = col.data_type.nullable(); ffi_col.comment = rust::String(col.comment); ffi_col.precision = col.data_type.precision(); ffi_col.scale = col.data_type.scale(); auto flat = flatten_array_type(col.data_type); ffi_col.array_nesting = flat.nesting; + for (auto nullable : flat.array_nullability) { + ffi_col.array_nullability.push_back(nullable); + } if (flat.nesting > 0 && flat.leaf_type != 0) { ffi_col.element_data_type = flat.leaf_type; ffi_col.element_precision = flat.leaf_precision; @@ -229,7 +250,6 @@ inline ffi::FfiTableDescriptor to_ffi_table_descriptor(const TableDescriptor& de inline Column from_ffi_column(const ffi::FfiColumn& ffi_col) { auto type_id = static_cast(ffi_col.data_type); - DataType dt(type_id, ffi_col.precision, ffi_col.scale); if (type_id == TypeId::Array) { if (ffi_col.element_data_type == 0) { throw std::runtime_error("Malformed ARRAY column '" + std::string(ffi_col.name) + @@ -273,13 +293,21 @@ inline Column from_ffi_column(const ffi::FfiColumn& ffi_col) { } int32_t nesting = ffi_col.array_nesting > 0 ? ffi_col.array_nesting : 1; - dt = rebuild_array_type(FlattenedArrayType{ - nesting, - ffi_col.element_data_type, - ffi_col.element_precision, - ffi_col.element_scale, - }); + std::vector array_nullability; + for (auto nullable : ffi_col.array_nullability) { + array_nullability.push_back(nullable); + } + auto dt = rebuild_array_type( + FlattenedArrayType{ + nesting, + ffi_col.element_data_type, + ffi_col.element_precision, + ffi_col.element_scale, + std::move(array_nullability), + }); + return Column{std::string(ffi_col.name), std::move(dt), std::string(ffi_col.comment)}; } + DataType dt(type_id, ffi_col.precision, ffi_col.scale, ffi_col.nullable); return Column{std::string(ffi_col.name), std::move(dt), std::string(ffi_col.comment)}; } diff --git a/bindings/cpp/src/lib.rs b/bindings/cpp/src/lib.rs index 875373bc..52df3686 100644 --- a/bindings/cpp/src/lib.rs +++ b/bindings/cpp/src/lib.rs @@ -85,10 +85,12 @@ mod ffi { struct FfiColumn { name: String, data_type: i32, + nullable: bool, comment: String, precision: i32, scale: i32, array_nesting: i32, + array_nullability: Vec, element_data_type: i32, element_precision: i32, element_scale: i32, @@ -3612,8 +3614,9 @@ impl ArrayWriterInner { /// Structural type equivalence that ignores nullability flags but preserves /// variant and precision/scale semantics. Used to compare ArrayWriter element -/// types on the binding boundary, where C++ callers never control nullability -/// explicitly. +/// types on the binding boundary. Nullability is ignored in structural comparison +/// because the Rust-side element type is always reconstructed as nullable +/// (encoding doesn't depend on it). fn structurally_compatible(a: &fcore::metadata::DataType, b: &fcore::metadata::DataType) -> bool { use fcore::metadata::DataType; match (a, b) { diff --git a/bindings/cpp/src/types.rs b/bindings/cpp/src/types.rs index c15aadc9..f61711b5 100644 --- a/bindings/cpp/src/types.rs +++ b/bindings/cpp/src/types.rs @@ -41,16 +41,43 @@ pub const DATA_TYPE_CHAR: i32 = 15; pub const DATA_TYPE_BINARY: i32 = 16; pub const DATA_TYPE_ARRAY: i32 = 17; +/// Separates scalar and array type specs so each variant only carries +/// the fields it actually needs — no zeroed-out placeholders. +enum FfiDataTypeSpec { + Scalar { + data_type: i32, + precision: u32, + scale: u32, + nullable: bool, + }, + Array { + element_data_type: i32, + element_precision: u32, + element_scale: u32, + array_nesting: u32, + /// `nesting` entries for each ARRAY wrapper (outermost first) plus + /// one trailing entry for the leaf scalar. Length = `nesting + 1`. + array_nullability: Vec, + }, +} + fn ffi_column_to_core_data_type(col: &ffi::FfiColumn) -> Result { - ffi_data_type_to_core( - col.data_type, - col.precision as u32, - col.scale as u32, - col.element_data_type, - col.element_precision as u32, - col.element_scale as u32, - col.array_nesting.max(0) as u32, - ) + if col.data_type == DATA_TYPE_ARRAY { + ffi_data_type_to_core(FfiDataTypeSpec::Array { + element_data_type: col.element_data_type, + element_precision: col.element_precision as u32, + element_scale: col.element_scale as u32, + array_nesting: col.array_nesting.max(0) as u32, + array_nullability: col.array_nullability.clone(), + }) + } else { + ffi_data_type_to_core(FfiDataTypeSpec::Scalar { + data_type: col.data_type, + precision: col.precision as u32, + scale: col.scale as u32, + nullable: col.nullable, + }) + } } fn type_precision_scale(dt: &fcore::metadata::DataType) -> (i32, i32) { @@ -64,11 +91,23 @@ fn type_precision_scale(dt: &fcore::metadata::DataType) -> (i32, i32) { } } -fn flatten_array_leaf_type(dt: &fcore::metadata::DataType) -> Result<(i32, i32, i32, i32)> { +struct FlattenedLeafType { + nesting: i32, + leaf_type: i32, + leaf_precision: i32, + leaf_scale: i32, + /// `nesting` entries for ARRAY wrappers (outermost first) plus one + /// trailing entry for the leaf scalar. Length = `nesting + 1`. + array_nullability: Vec, +} + +fn flatten_array_leaf_type(dt: &fcore::metadata::DataType) -> Result { let mut nesting = 0_i32; let mut leaf = dt; + let mut array_nullability = Vec::new(); while let fcore::metadata::DataType::Array(at) = leaf { nesting += 1; + array_nullability.push(u8::from(leaf.is_nullable())); leaf = at.get_element_type(); } if nesting == 0 { @@ -80,85 +119,102 @@ fn flatten_array_leaf_type(dt: &fcore::metadata::DataType) -> Result<(i32, i32, "Unsupported ARRAY leaf type for C++ bindings: {leaf}" )); } + array_nullability.push(u8::from(leaf.is_nullable())); let (leaf_precision, leaf_scale) = type_precision_scale(leaf); - Ok((nesting, leaf_type, leaf_precision, leaf_scale)) + Ok(FlattenedLeafType { + nesting, + leaf_type, + leaf_precision, + leaf_scale, + array_nullability, + }) } fn build_array_type_from_leaf( - leaf_dt: i32, - leaf_precision: u32, - leaf_scale: u32, - nesting: u32, + element_data_type: i32, + element_precision: u32, + element_scale: u32, + array_nesting: u32, + array_nullability: &[u8], ) -> Result { - if nesting == 0 { + if array_nesting == 0 { return Err(anyhow!("ARRAY nesting must be >= 1")); } - let mut dt = ffi_data_type_to_core(leaf_dt, leaf_precision, leaf_scale, 0, 0, 0, 0)?; - for _ in 0..nesting { - dt = fcore::metadata::DataTypes::array(dt); + let leaf_nullable = array_nullability + .get(array_nesting as usize) + .map(|v| *v != 0) + .unwrap_or(true); + let mut dt = ffi_data_type_to_core(FfiDataTypeSpec::Scalar { + data_type: element_data_type, + precision: element_precision, + scale: element_scale, + nullable: leaf_nullable, + })?; + for i in (0..array_nesting).rev() { + let nullable = array_nullability + .get(i as usize) + .map(|v| *v != 0) + .unwrap_or(true); + dt = fcore::metadata::DataType::Array(fcore::metadata::ArrayType::with_nullable( + nullable, dt, + )); } Ok(dt) } -fn ffi_data_type_to_core( - dt: i32, - precision: u32, - scale: u32, - element_dt: i32, - element_precision: u32, - element_scale: u32, - array_nesting: u32, -) -> Result { - match dt { - DATA_TYPE_BOOLEAN => Ok(fcore::metadata::DataTypes::boolean()), - DATA_TYPE_TINYINT => Ok(fcore::metadata::DataTypes::tinyint()), - DATA_TYPE_SMALLINT => Ok(fcore::metadata::DataTypes::smallint()), - DATA_TYPE_INT => Ok(fcore::metadata::DataTypes::int()), - DATA_TYPE_BIGINT => Ok(fcore::metadata::DataTypes::bigint()), - DATA_TYPE_FLOAT => Ok(fcore::metadata::DataTypes::float()), - DATA_TYPE_DOUBLE => Ok(fcore::metadata::DataTypes::double()), - DATA_TYPE_STRING => Ok(fcore::metadata::DataTypes::string()), - DATA_TYPE_BYTES => Ok(fcore::metadata::DataTypes::bytes()), - DATA_TYPE_DATE => Ok(fcore::metadata::DataTypes::date()), - DATA_TYPE_TIME => Ok(fcore::metadata::DataTypes::time()), - DATA_TYPE_TIMESTAMP => Ok(fcore::metadata::DataTypes::timestamp_with_precision( - precision, - )), - DATA_TYPE_TIMESTAMP_LTZ => Ok(fcore::metadata::DataTypes::timestamp_ltz_with_precision( +fn ffi_data_type_to_core(spec: FfiDataTypeSpec) -> Result { + match spec { + FfiDataTypeSpec::Scalar { + data_type, precision, - )), - DATA_TYPE_DECIMAL => { - let dt = fcore::metadata::DecimalType::new(precision, scale)?; - Ok(fcore::metadata::DataType::Decimal(dt)) - } - DATA_TYPE_CHAR => Ok(fcore::metadata::DataTypes::char(precision)), - DATA_TYPE_BINARY => Ok(fcore::metadata::DataTypes::binary(precision as usize)), - DATA_TYPE_ARRAY => { - if array_nesting > 0 { - build_array_type_from_leaf( - element_dt, - element_precision, - element_scale, - array_nesting, - ) - } else { - // Backward compatibility for older one-level metadata. - if element_dt == 0 { - return Err(anyhow!("ARRAY requires element type metadata")); + scale, + nullable, + } => { + let dt = match data_type { + DATA_TYPE_BOOLEAN => fcore::metadata::DataTypes::boolean(), + DATA_TYPE_TINYINT => fcore::metadata::DataTypes::tinyint(), + DATA_TYPE_SMALLINT => fcore::metadata::DataTypes::smallint(), + DATA_TYPE_INT => fcore::metadata::DataTypes::int(), + DATA_TYPE_BIGINT => fcore::metadata::DataTypes::bigint(), + DATA_TYPE_FLOAT => fcore::metadata::DataTypes::float(), + DATA_TYPE_DOUBLE => fcore::metadata::DataTypes::double(), + DATA_TYPE_STRING => fcore::metadata::DataTypes::string(), + DATA_TYPE_BYTES => fcore::metadata::DataTypes::bytes(), + DATA_TYPE_DATE => fcore::metadata::DataTypes::date(), + DATA_TYPE_TIME => fcore::metadata::DataTypes::time(), + DATA_TYPE_TIMESTAMP => { + fcore::metadata::DataTypes::timestamp_with_precision(precision) + } + DATA_TYPE_TIMESTAMP_LTZ => { + fcore::metadata::DataTypes::timestamp_ltz_with_precision(precision) + } + DATA_TYPE_DECIMAL => { + let dt = fcore::metadata::DecimalType::new(precision, scale)?; + fcore::metadata::DataType::Decimal(dt) } - let element_type = ffi_data_type_to_core( - element_dt, - element_precision, - element_scale, - 0, - 0, - 0, - 0, - )?; - Ok(fcore::metadata::DataTypes::array(element_type)) + DATA_TYPE_CHAR => fcore::metadata::DataTypes::char(precision), + DATA_TYPE_BINARY => fcore::metadata::DataTypes::binary(precision as usize), + _ => return Err(anyhow!("Unknown data type: {}", data_type)), + }; + if nullable { + Ok(dt) + } else { + Ok(dt.as_non_nullable()) } } - _ => Err(anyhow!("Unknown data type: {dt}")), + FfiDataTypeSpec::Array { + element_data_type, + element_precision, + element_scale, + array_nesting, + ref array_nullability, + } => build_array_type_from_leaf( + element_data_type, + element_precision, + element_scale, + array_nesting, + array_nullability, + ), } } @@ -188,24 +244,25 @@ pub fn core_data_type_to_ffi(dt: &fcore::metadata::DataType) -> i32 { fn core_column_to_ffi(col: &fcore::metadata::Column) -> ffi::FfiColumn { let (precision, scale) = type_precision_scale(col.data_type()); - let (array_nesting, element_data_type, element_precision, element_scale) = match col.data_type() - { - fcore::metadata::DataType::Array(_) => { - flatten_array_leaf_type(col.data_type()).unwrap_or((0, 0, 0, 0)) - } - _ => (0, 0, 0, 0), + let flat = match col.data_type() { + fcore::metadata::DataType::Array(_) => flatten_array_leaf_type(col.data_type()).ok(), + _ => None, }; ffi::FfiColumn { name: col.name().to_string(), data_type: core_data_type_to_ffi(col.data_type()), + nullable: col.data_type().is_nullable(), comment: col.comment().unwrap_or("").to_string(), precision, scale, - array_nesting, - element_data_type, - element_precision, - element_scale, + array_nesting: flat.as_ref().map_or(0, |f| f.nesting), + array_nullability: flat + .as_ref() + .map_or_else(Vec::new, |f| f.array_nullability.clone()), + element_data_type: flat.as_ref().map_or(0, |f| f.leaf_type), + element_precision: flat.as_ref().map_or(0, |f| f.leaf_precision), + element_scale: flat.as_ref().map_or(0, |f| f.leaf_scale), } } @@ -347,6 +404,11 @@ pub fn empty_table_info() -> ffi::FfiTableInfo { /// Convert element type tag + precision/scale to core DataType. /// Used by ArrayWriterInner construction from C++. +/// +/// Nullability is hardcoded to `true` (the default) because `ArrayWriter` +/// only needs the type for encoding — the binary array format does not +/// vary based on nullability. Nullability is a schema-level constraint +/// enforced elsewhere (column definition, primary key normalization). pub fn element_type_from_ffi( leaf_dt: i32, precision: u32, @@ -354,9 +416,15 @@ pub fn element_type_from_ffi( array_nesting: u32, ) -> Result { if array_nesting == 0 { - ffi_data_type_to_core(leaf_dt, precision, scale, 0, 0, 0, 0) + ffi_data_type_to_core(FfiDataTypeSpec::Scalar { + data_type: leaf_dt, + precision, + scale, + nullable: true, + }) } else { - build_array_type_from_leaf(leaf_dt, precision, scale, array_nesting) + let array_nullability = vec![1u8; (array_nesting + 1) as usize]; + build_array_type_from_leaf(leaf_dt, precision, scale, array_nesting, &array_nullability) } } diff --git a/bindings/cpp/test/test_ffi_converter.cpp b/bindings/cpp/test/test_ffi_converter.cpp index 4bbe3ebb..2078bdab 100644 --- a/bindings/cpp/test/test_ffi_converter.cpp +++ b/bindings/cpp/test/test_ffi_converter.cpp @@ -24,20 +24,50 @@ namespace { -fluss::ffi::FfiColumn MakeArrayColumn(int32_t nesting, int32_t element_type) { +fluss::ffi::FfiColumn MakeArrayColumn(int32_t nesting, int32_t element_type, + bool nullable = true, bool leaf_nullable = true, + std::vector per_level_nullability = {}) { fluss::ffi::FfiColumn col; col.name = rust::String("bad_array"); col.data_type = static_cast(fluss::TypeId::Array); + col.nullable = nullable; col.comment = rust::String(""); col.precision = 0; col.scale = 0; col.array_nesting = nesting; + if (!per_level_nullability.empty()) { + for (auto v : per_level_nullability) { + col.array_nullability.push_back(v); + } + } else { + for (int32_t i = 0; i < nesting; ++i) { + col.array_nullability.push_back((i == 0 ? nullable : true) ? 1 : 0); + } + col.array_nullability.push_back(leaf_nullable ? 1 : 0); + } col.element_data_type = element_type; col.element_precision = 0; col.element_scale = 0; return col; } +fluss::ffi::FfiColumn MakeScalarColumn(const char* name, fluss::TypeId type_id, + bool nullable = true, int32_t precision = 0, + int32_t scale = 0) { + fluss::ffi::FfiColumn col; + col.name = rust::String(name); + col.data_type = static_cast(type_id); + col.nullable = nullable; + col.comment = rust::String(""); + col.precision = precision; + col.scale = scale; + col.array_nesting = 0; + col.element_data_type = 0; + col.element_precision = 0; + col.element_scale = 0; + return col; +} + } // namespace TEST(FfiConverterTest, RejectsArrayWithoutElementType) { @@ -62,3 +92,149 @@ TEST(FfiConverterTest, SupportsLegacyOneLevelArrayMetadata) { ASSERT_NE(converted.data_type.element_type(), nullptr); EXPECT_EQ(converted.data_type.element_type()->id(), fluss::TypeId::Int); } + +// --- Nullability tests --- + +TEST(DataTypeTest, DefaultNullable) { + auto dt = fluss::DataType::Int(); + EXPECT_TRUE(dt.nullable()); +} + +TEST(DataTypeTest, NotNullMethod) { + auto dt = fluss::DataType::Int().NotNull(); + EXPECT_FALSE(dt.nullable()); + EXPECT_EQ(dt.id(), fluss::TypeId::Int); +} + +TEST(DataTypeTest, NotNullPreservesPrecisionScale) { + auto dt = fluss::DataType::Decimal(10, 2).NotNull(); + EXPECT_FALSE(dt.nullable()); + EXPECT_EQ(dt.precision(), 10); + EXPECT_EQ(dt.scale(), 2); +} + +TEST(DataTypeTest, ArrayElementNullability) { + auto dt = fluss::DataType::Array(fluss::DataType::Int().NotNull()); + EXPECT_TRUE(dt.nullable()); + ASSERT_NE(dt.element_type(), nullptr); + EXPECT_FALSE(dt.element_type()->nullable()); +} + +TEST(DataTypeTest, NotNullArrayNullableElement) { + auto dt = fluss::DataType::Array(fluss::DataType::Int()).NotNull(); + EXPECT_FALSE(dt.nullable()); + ASSERT_NE(dt.element_type(), nullptr); + EXPECT_TRUE(dt.element_type()->nullable()); +} + +TEST(DataTypeTest, NotNullArrayNotNullElement) { + auto dt = fluss::DataType::Array(fluss::DataType::Int().NotNull()).NotNull(); + EXPECT_FALSE(dt.nullable()); + ASSERT_NE(dt.element_type(), nullptr); + EXPECT_FALSE(dt.element_type()->nullable()); +} + +TEST(FfiConverterTest, ScalarNullableRoundTrip) { + fluss::Column col{"id", fluss::DataType::Int(), ""}; + auto ffi_col = fluss::utils::to_ffi_column(col); + EXPECT_TRUE(ffi_col.nullable); + auto back = fluss::utils::from_ffi_column(ffi_col); + EXPECT_TRUE(back.data_type.nullable()); +} + +TEST(FfiConverterTest, ScalarNotNullRoundTrip) { + fluss::Column col{"id", fluss::DataType::Int().NotNull(), ""}; + auto ffi_col = fluss::utils::to_ffi_column(col); + EXPECT_FALSE(ffi_col.nullable); + auto back = fluss::utils::from_ffi_column(ffi_col); + EXPECT_FALSE(back.data_type.nullable()); +} + +TEST(FfiConverterTest, ArrayNotNullElementRoundTrip) { + fluss::Column col{"tags", fluss::DataType::Array(fluss::DataType::String().NotNull()), ""}; + auto ffi_col = fluss::utils::to_ffi_column(col); + EXPECT_TRUE(ffi_col.nullable); + ASSERT_EQ(ffi_col.array_nullability.size(), 2u); + EXPECT_EQ(ffi_col.array_nullability[1], 0); + auto back = fluss::utils::from_ffi_column(ffi_col); + EXPECT_TRUE(back.data_type.nullable()); + ASSERT_NE(back.data_type.element_type(), nullptr); + EXPECT_FALSE(back.data_type.element_type()->nullable()); +} + +TEST(FfiConverterTest, NotNullArrayNullableElementRoundTrip) { + fluss::Column col{"ids", fluss::DataType::Array(fluss::DataType::Int()).NotNull(), ""}; + auto ffi_col = fluss::utils::to_ffi_column(col); + EXPECT_FALSE(ffi_col.nullable); + ASSERT_EQ(ffi_col.array_nullability.size(), 2u); + EXPECT_EQ(ffi_col.array_nullability[1], 1); + auto back = fluss::utils::from_ffi_column(ffi_col); + EXPECT_FALSE(back.data_type.nullable()); + ASSERT_NE(back.data_type.element_type(), nullptr); + EXPECT_TRUE(back.data_type.element_type()->nullable()); +} + +TEST(FfiConverterTest, NotNullArrayNotNullElementRoundTrip) { + fluss::Column col{ + "strict_ids", + fluss::DataType::Array(fluss::DataType::Int().NotNull()).NotNull(), + "", + }; + auto ffi_col = fluss::utils::to_ffi_column(col); + EXPECT_FALSE(ffi_col.nullable); + ASSERT_EQ(ffi_col.array_nullability.size(), 2u); + EXPECT_EQ(ffi_col.array_nullability[1], 0); + auto back = fluss::utils::from_ffi_column(ffi_col); + EXPECT_FALSE(back.data_type.nullable()); + ASSERT_NE(back.data_type.element_type(), nullptr); + EXPECT_FALSE(back.data_type.element_type()->nullable()); +} + +TEST(FfiConverterTest, NestedArrayIntermediateNullabilityRoundTrip) { + fluss::Column col{ + "nested", + fluss::DataType::Array(fluss::DataType::Array(fluss::DataType::Int()).NotNull()), + "", + }; + auto ffi_col = fluss::utils::to_ffi_column(col); + auto back = fluss::utils::from_ffi_column(ffi_col); + + EXPECT_TRUE(back.data_type.nullable()); + ASSERT_NE(back.data_type.element_type(), nullptr); + EXPECT_FALSE(back.data_type.element_type()->nullable()); + ASSERT_NE(back.data_type.element_type()->element_type(), nullptr); + EXPECT_TRUE(back.data_type.element_type()->element_type()->nullable()); +} + +TEST(FfiConverterTest, NestedArrayAllLevelsNullabilityRoundTrip) { + fluss::Column col{ + "strict_nested", + fluss::DataType::Array( + fluss::DataType::Array(fluss::DataType::Int().NotNull()).NotNull()) + .NotNull(), + "", + }; + auto ffi_col = fluss::utils::to_ffi_column(col); + auto back = fluss::utils::from_ffi_column(ffi_col); + + EXPECT_FALSE(back.data_type.nullable()); + ASSERT_NE(back.data_type.element_type(), nullptr); + EXPECT_FALSE(back.data_type.element_type()->nullable()); + ASSERT_NE(back.data_type.element_type()->element_type(), nullptr); + EXPECT_FALSE(back.data_type.element_type()->element_type()->nullable()); +} + +TEST(FfiConverterTest, FfiColumnNonNullableScalarReconstructed) { + auto col = MakeScalarColumn("id", fluss::TypeId::Int, false); + auto converted = fluss::utils::from_ffi_column(col); + EXPECT_FALSE(converted.data_type.nullable()); + EXPECT_EQ(converted.data_type.id(), fluss::TypeId::Int); +} + +TEST(FfiConverterTest, FfiColumnNonNullableArrayReconstructed) { + auto col = MakeArrayColumn(1, static_cast(fluss::TypeId::String), false, false); + auto converted = fluss::utils::from_ffi_column(col); + EXPECT_FALSE(converted.data_type.nullable()); + ASSERT_NE(converted.data_type.element_type(), nullptr); + EXPECT_FALSE(converted.data_type.element_type()->nullable()); +} diff --git a/bindings/cpp/test/test_log_table.cpp b/bindings/cpp/test/test_log_table.cpp index f36c8707..5678e4bb 100644 --- a/bindings/cpp/test/test_log_table.cpp +++ b/bindings/cpp/test/test_log_table.cpp @@ -1458,3 +1458,66 @@ TEST_F(LogTableTest, ArrayWriterOverflowDetection) { EXPECT_NO_THROW(smallint_arr.SetInt32(0, 32767)); } } + +TEST_F(LogTableTest, NullabilityPreservedInTableInfo) { + auto& adm = admin(); + auto& conn = connection(); + + fluss::TablePath table_path("fluss", "test_nullability_table_info_cpp"); + + auto schema = + fluss::Schema::NewBuilder() + .AddColumn("id", fluss::DataType::Int()) + .AddColumn("name", fluss::DataType::String()) + .AddColumn("tags", fluss::DataType::Array(fluss::DataType::String().NotNull())) + .AddColumn("ids", fluss::DataType::Array(fluss::DataType::Int()).NotNull()) + .AddColumn("nested", + fluss::DataType::Array( + fluss::DataType::Array(fluss::DataType::Int()).NotNull())) + .SetPrimaryKeys({"id"}) + .Build(); + + auto table_descriptor = fluss::TableDescriptor::NewBuilder() + .SetSchema(schema) + .SetProperty("table.replication.factor", "1") + .Build(); + + fluss_test::CreateTable(adm, table_path, table_descriptor); + + fluss::Table table; + ASSERT_OK(conn.GetTable(table_path, table)); + auto info = table.GetTableInfo(); + + ASSERT_EQ(info.schema.columns.size(), 5u); + EXPECT_EQ(info.primary_keys, std::vector{"id"}); + + // Primary key columns are forced NOT NULL by schema normalization. + EXPECT_EQ(info.schema.columns[0].data_type.id(), fluss::TypeId::Int); + EXPECT_FALSE(info.schema.columns[0].data_type.nullable()); + + // "name" STRING (nullable) + EXPECT_EQ(info.schema.columns[1].data_type.id(), fluss::TypeId::String); + EXPECT_TRUE(info.schema.columns[1].data_type.nullable()); + + // "tags" ARRAY (outer nullable) + EXPECT_EQ(info.schema.columns[2].data_type.id(), fluss::TypeId::Array); + EXPECT_TRUE(info.schema.columns[2].data_type.nullable()); + ASSERT_NE(info.schema.columns[2].data_type.element_type(), nullptr); + EXPECT_FALSE(info.schema.columns[2].data_type.element_type()->nullable()); + + // "ids" ARRAY NOT NULL (outer not null, element nullable) + EXPECT_EQ(info.schema.columns[3].data_type.id(), fluss::TypeId::Array); + EXPECT_FALSE(info.schema.columns[3].data_type.nullable()); + ASSERT_NE(info.schema.columns[3].data_type.element_type(), nullptr); + EXPECT_TRUE(info.schema.columns[3].data_type.element_type()->nullable()); + + // "nested" ARRAY NOT NULL> (outer nullable, inner array not null) + EXPECT_EQ(info.schema.columns[4].data_type.id(), fluss::TypeId::Array); + EXPECT_TRUE(info.schema.columns[4].data_type.nullable()); + ASSERT_NE(info.schema.columns[4].data_type.element_type(), nullptr); + EXPECT_FALSE(info.schema.columns[4].data_type.element_type()->nullable()); + ASSERT_NE(info.schema.columns[4].data_type.element_type()->element_type(), nullptr); + EXPECT_TRUE(info.schema.columns[4].data_type.element_type()->element_type()->nullable()); + + ASSERT_OK(adm.DropTable(table_path, false)); +} diff --git a/website/docs/user-guide/cpp/api-reference.md b/website/docs/user-guide/cpp/api-reference.md index c50d40cd..ae4e9490 100644 --- a/website/docs/user-guide/cpp/api-reference.md +++ b/website/docs/user-guide/cpp/api-reference.md @@ -492,7 +492,9 @@ Same array getters as [`RowView`](#array-getters-index-based) — `GetArraySize` | `id() -> TypeId` | Get the type ID | | `precision() -> int` | Get precision (for Decimal/Timestamp types) | | `scale() -> int` | Get scale (for Decimal type) | +| `nullable() -> bool` | Returns `true` if this type is nullable (default), `false` if `NOT NULL` | | `element_type() -> const DataType*` | Get element type (for Array type, nullptr otherwise) | +| `NotNull() -> DataType` | Returns a copy of this type with nullable set to `false` | ## `ArrayWriter` diff --git a/website/docs/user-guide/cpp/data-types.md b/website/docs/user-guide/cpp/data-types.md index 400b2ecf..cce40cef 100644 --- a/website/docs/user-guide/cpp/data-types.md +++ b/website/docs/user-guide/cpp/data-types.md @@ -23,6 +23,40 @@ sidebar_position: 3 | `DataType::Decimal(p, s)` | Decimal with precision and scale | | `DataType::Array(element)` | Array of the given element type (supports nesting) | +## Nullability + +All DataTypes are nullable by default. Use `.NotNull()` to create a `NOT NULL` type: + +```cpp +auto schema = fluss::Schema::NewBuilder() + .AddColumn("id", fluss::DataType::Int().NotNull()) + .AddColumn("name", fluss::DataType::String()) // nullable by default + .Build(); +``` + +Primary key columns are automatically forced `NOT NULL` regardless of the `DataType` setting. + +For nested types, nullability is preserved at each array level and at the leaf element: + +```cpp +auto schema = fluss::Schema::NewBuilder() + .AddColumn("tags", fluss::DataType::Array(fluss::DataType::String().NotNull())) + .AddColumn("ids", fluss::DataType::Array(fluss::DataType::Int()).NotNull()) + .AddColumn("nested", fluss::DataType::Array( + fluss::DataType::Array(fluss::DataType::Int()).NotNull())) + .Build(); +// "tags": ARRAY (outer nullable, elements NOT NULL) +// "ids": ARRAY NOT NULL (outer NOT NULL, elements nullable) +// "nested": ARRAY NOT NULL> (outer nullable, inner array NOT NULL) +``` + +You can query nullability at runtime: + +```cpp +auto info = table.GetTableInfo(); +bool is_nullable = info.schema.columns[0].data_type.nullable(); +``` + ## GenericRow Setters `SetInt32` is used for `TinyInt`, `SmallInt`, and `Int` columns. For `TinyInt` and `SmallInt`, the value is validated at write time — an error is returned if it overflows the column's range (e.g., \[-128, 127\] for `TinyInt`, \[-32768, 32767\] for `SmallInt`).