Skip to content

Commit 034ff49

Browse files
committed
GH-45948: [C++][Parquet] Variant shredding
1 parent f6b8e66 commit 034ff49

10 files changed

Lines changed: 4592 additions & 4 deletions

cpp/src/arrow/CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -393,6 +393,7 @@ set(ARROW_SRCS
393393
extension/parquet_variant.cc
394394
extension/variant.cc
395395
extension/variant_builder.cc
396+
extension/variant_shredding.cc
396397
extension/uuid.cc
397398
pretty_print.cc
398399
record_batch.cc

cpp/src/arrow/extension/CMakeLists.txt

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,8 @@
1616
# under the License.
1717

1818
set(CANONICAL_EXTENSION_TESTS bool8_test.cc json_test.cc uuid_test.cc
19-
variant_test.cc variant_builder_test.cc)
19+
variant_test.cc variant_builder_test.cc
20+
variant_shredding_test.cc)
2021

2122
if(ARROW_JSON)
2223
list(APPEND CANONICAL_EXTENSION_TESTS tensor_extension_array_test.cc opaque_test.cc)

cpp/src/arrow/extension/meson.build

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@
1515
# specific language governing permissions and limitations
1616
# under the License.
1717

18-
canonical_extension_tests = ['bool8_test.cc', 'json_test.cc', 'uuid_test.cc', 'variant_test.cc', 'variant_builder_test.cc']
18+
canonical_extension_tests = ['bool8_test.cc', 'json_test.cc', 'uuid_test.cc', 'variant_test.cc', 'variant_builder_test.cc', 'variant_shredding_test.cc']
1919

2020
if needs_json
2121
canonical_extension_tests += [
@@ -41,5 +41,6 @@ install_headers(
4141
'uuid.h',
4242
'variable_shape_tensor.h',
4343
'variant.h',
44+
'variant_shredding.h',
4445
],
4546
)

cpp/src/arrow/extension/parquet_variant.h

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -70,7 +70,10 @@ class ARROW_EXPORT VariantExtensionType : public ExtensionType {
7070
std::shared_ptr<Field> value() const { return value_; }
7171

7272
private:
73-
// TODO GH-45948 added shredded_value
73+
// TODO: Track shredded_value field when integrating with Parquet reader.
74+
// The shredding implementation (variant_shredding.h) operates on raw arrays
75+
// externally; a future PR may extend VariantExtensionType to understand
76+
// shredded storage layouts for seamless Parquet I/O integration.
7477
std::shared_ptr<Field> metadata_;
7578
std::shared_ptr<Field> value_;
7679
};

cpp/src/arrow/extension/variant.h

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -678,8 +678,15 @@ class ARROW_EXPORT VariantBuilder {
678678
void Reset();
679679
/// @}
680680

681+
/// @name Shredding support (GH-45948)
682+
/// @{
683+
Result<std::vector<uint8_t>> BuildWithoutMeta();
684+
void UnsafeAppendEncoded(const uint8_t* data, int64_t size);
685+
void SetAllowDuplicates(bool allow);
686+
/// @}
687+
681688
/// @name Internal (used by scopes and shredding)
682-
/// @name Internal (used by scopes)
689+
/// @{
683690
void Truncate(int64_t offset);
684691
/// @}
685692

cpp/src/arrow/extension/variant_builder.cc

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -632,4 +632,22 @@ Status ListScope::Finish() {
632632
return Status::OK();
633633
}
634634

635+
Result<std::vector<uint8_t>> VariantBuilder::BuildWithoutMeta() {
636+
if (buffer_.empty()) {
637+
return Status::Invalid("VariantBuilder::BuildWithoutMeta: no value written");
638+
}
639+
std::vector<uint8_t> result = std::move(buffer_);
640+
buffer_.clear();
641+
return result;
642+
}
643+
644+
void VariantBuilder::UnsafeAppendEncoded(const uint8_t* data, int64_t size) {
645+
DCHECK_NE(data, nullptr);
646+
DCHECK_GT(size, 0);
647+
if (size <= 0) return;
648+
buffer_.insert(buffer_.end(), data, data + size);
649+
}
650+
651+
void VariantBuilder::SetAllowDuplicates(bool allow) { allow_duplicates_ = allow; }
652+
635653
} // namespace arrow::extension::variant

0 commit comments

Comments
 (0)