From 20572498a5f67130cf9ffbcb3bd2b7b87c63de37 Mon Sep 17 00:00:00 2001 From: Kelvin Date: Sat, 20 Dec 2025 21:50:53 +0800 Subject: [PATCH 1/2] chore: Update the InternalRow API: get_binary, get_bytes to return &[u8] --- bindings/cpp/src/types.rs | 4 ++-- crates/fluss/src/row/column.rs | 6 ++---- crates/fluss/src/row/mod.rs | 12 ++++++------ 3 files changed, 10 insertions(+), 12 deletions(-) diff --git a/bindings/cpp/src/types.rs b/bindings/cpp/src/types.rs index f9404ac6..8221f22c 100644 --- a/bindings/cpp/src/types.rs +++ b/bindings/cpp/src/types.rs @@ -332,12 +332,12 @@ fn core_row_to_ffi_fields(row: &fcore::row::ColumnarRow) -> Vec { ArrowDataType::Binary => { let mut datum = new_datum(DATUM_TYPE_BYTES); // todo: avoid copy bytes for blob - datum.bytes_val = row.get_bytes(i); + datum.bytes_val = row.get_bytes(i).to_vec(); datum } ArrowDataType::FixedSizeBinary(len) => { let mut datum = new_datum(DATUM_TYPE_BYTES); - datum.bytes_val = row.get_binary(i, *len as usize); + datum.bytes_val = row.get_binary(i, *len as usize).to_vec(); datum } ArrowDataType::LargeBinary => { diff --git a/crates/fluss/src/row/column.rs b/crates/fluss/src/row/column.rs index 6d47836d..24747d65 100644 --- a/crates/fluss/src/row/column.rs +++ b/crates/fluss/src/row/column.rs @@ -156,23 +156,21 @@ impl InternalRow for ColumnarRow { .value(self.row_id) } - fn get_binary(&self, pos: usize, _length: usize) -> Vec { + fn get_binary(&self, pos: usize, _length: usize) -> &[u8] { self.record_batch .column(pos) .as_any() .downcast_ref::() .expect("Expected binary array.") .value(self.row_id) - .to_vec() } - fn get_bytes(&self, pos: usize) -> Vec { + fn get_bytes(&self, pos: usize) -> &[u8]{ self.record_batch .column(pos) .as_any() .downcast_ref::() .expect("Expected bytes array.") .value(self.row_id) - .to_vec() } } diff --git a/crates/fluss/src/row/mod.rs b/crates/fluss/src/row/mod.rs index 909f3b13..8b477127 100644 --- a/crates/fluss/src/row/mod.rs +++ b/crates/fluss/src/row/mod.rs @@ -66,10 +66,10 @@ pub trait InternalRow { // fn get_timestamp_ltz(&self, pos: usize, precision: usize) -> TimestampLtz; /// Returns the binary value at the given position with fixed length - fn get_binary(&self, pos: usize, length: usize) -> Vec; + fn get_binary(&self, pos: usize, length: usize) -> &[u8]; /// Returns the binary value at the given position - fn get_bytes(&self, pos: usize) -> Vec; + fn get_bytes(&self, pos: usize) -> &[u8]; } pub struct GenericRow<'a> { @@ -132,12 +132,12 @@ impl<'a> InternalRow for GenericRow<'a> { self.values.get(pos).unwrap().try_into().unwrap() } - fn get_binary(&self, pos: usize, _length: usize) -> Vec { - self.values.get(pos).unwrap().as_blob().to_vec() + fn get_binary(&self, pos: usize, _length: usize) -> &[u8]{ + self.values.get(pos).unwrap().as_blob() } - fn get_bytes(&self, pos: usize) -> Vec { - self.values.get(pos).unwrap().as_blob().to_vec() + fn get_bytes(&self, pos: usize) -> &[u8] { + self.values.get(pos).unwrap().as_blob() } } From 338d4d70d58645df5bfb9c15a0040203d316110e Mon Sep 17 00:00:00 2001 From: Kelvin Date: Sat, 20 Dec 2025 22:40:40 +0800 Subject: [PATCH 2/2] fix: formatting --- crates/fluss/src/row/column.rs | 2 +- crates/fluss/src/row/mod.rs | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/crates/fluss/src/row/column.rs b/crates/fluss/src/row/column.rs index 24747d65..20d86c02 100644 --- a/crates/fluss/src/row/column.rs +++ b/crates/fluss/src/row/column.rs @@ -165,7 +165,7 @@ impl InternalRow for ColumnarRow { .value(self.row_id) } - fn get_bytes(&self, pos: usize) -> &[u8]{ + fn get_bytes(&self, pos: usize) -> &[u8] { self.record_batch .column(pos) .as_any() diff --git a/crates/fluss/src/row/mod.rs b/crates/fluss/src/row/mod.rs index 8b477127..dd1dedfe 100644 --- a/crates/fluss/src/row/mod.rs +++ b/crates/fluss/src/row/mod.rs @@ -132,11 +132,11 @@ impl<'a> InternalRow for GenericRow<'a> { self.values.get(pos).unwrap().try_into().unwrap() } - fn get_binary(&self, pos: usize, _length: usize) -> &[u8]{ + fn get_binary(&self, pos: usize, _length: usize) -> &[u8] { self.values.get(pos).unwrap().as_blob() } - fn get_bytes(&self, pos: usize) -> &[u8] { + fn get_bytes(&self, pos: usize) -> &[u8] { self.values.get(pos).unwrap().as_blob() } }