From be238076a0e67b21a6df486f8cb838301ae05f7b Mon Sep 17 00:00:00 2001 From: silverweed Date: Tue, 8 Jul 2025 15:05:16 +0200 Subject: [PATCH 1/2] [skip-ci][ntuple] Update BinaryFormatSpecification to 1.0.1.0 Added Attribute Sets --- tree/ntuple/doc/BinaryFormatSpecification.md | 59 +++++++++++++++++++- 1 file changed, 58 insertions(+), 1 deletion(-) diff --git a/tree/ntuple/doc/BinaryFormatSpecification.md b/tree/ntuple/doc/BinaryFormatSpecification.md index 4e3d564db5d17..b05f510aa6bba 100644 --- a/tree/ntuple/doc/BinaryFormatSpecification.md +++ b/tree/ntuple/doc/BinaryFormatSpecification.md @@ -1,4 +1,4 @@ -# RNTuple Binary Format Specification 1.0.0.2 +# RNTuple Binary Format Specification 1.0.1.0 ## Versioning Notes @@ -627,6 +627,7 @@ The footer envelope has the following structure: - Header checksum (XxHash-3 64bit) - Schema extension record frame - List frame of cluster group record frames +- List frame of linked attribute set record frames The header checksum can be used to cross-check that header and footer belong together. The meaning of the feature flags is the same as for the header. @@ -684,6 +685,21 @@ The entry range allows for finding the right page list for random access request The number of clusters information allows for using consistent cluster IDs even if cluster groups are accessed non-sequentially. +#### Linked Attribute Set Record Frame + +The attribute set record frame references the anchor of a linked attribute set RNTuple and gives information about it. +It has the following contents, followed a locator to the linked RNTuple anchor and a string with the attribute set name. +``` + 0 1 2 3 + 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 ++-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ +| Schema Version Major | Schema Version Minor | ++-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ +| Attribute Anchor Uncompressed Size | ++-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ +``` +The meaning of the Schema Version is described below. + ### Page List Envelope The page list envelope contains cluster summaries and page locations. @@ -799,6 +815,47 @@ In every cluster, every field has exactly one primary column representation. All other representations must be suppressed. Note that the primary column representation can change from cluster to cluster. +## Linked Attribute Sets + +An RNTuple has zero or more linked attribute sets, containing user-defined metadata. +Each attribute set is stored on disk as an RNTuple and the anchor of each RNTuple is linked to by the main +RNTuple's footer. + +An attribute set RNTuple has the following restrictions compared to a regular RNTuple: + +1. it cannot have linked attribute RNTuples itself; +2. the alias columns sections, both in its header and footer, must be empty (i.e. none of the attribute set RNTuple's + fields can be projected fields); +3. none of its fields may have a structural role of 0x04 (i.e. it must not contain a ROOT streamer object); + +All linked attribute sets must have a non-empty, distinct name. + +### Reserved Attribute Set Names + +Attribute set names starting with `__` (two underscores) are reserved for internal use by implementations: compliant +writers should disallow users from creating attribute sets with such names and only use them for internal metadata +(read access to internal attribute sets may be allowed). + +### Attribute Schema Version + +Each attribute set is created with a user-defined schema. This schema is not used directly by the underlying attribute +set RNTuple, but it is augmented with internal fields used to store additional data that serve to associate each +entry in the attribute set with those in the main RNTuple. + +The Attribute Schema Version describes the internal schema of the linked attribute set RNTuple. +A change in Major version number indicates a breaking, non-forward-compatible change in the schema: readers should +refuse reading an attribute set whose Major Schema Version is unknown. +A change in Minor version number indicates the presence of optional additional fields in the schema: readers should +still be able to read the attribute set as before, ignoring any new field. + +The current Attribute Schema Version is **1.0**. It has the following fields (in the following order): + 1. `_rangeStart` (type `std::uint64_t`): the start of the range that each attribute entry refers to; + 2. `_rangeLen` (type `std::uint64_t`): the length of the range that each attribute entry refers to. + Note that `_rangeLen == 0` is valid and refers to an empty range; + 3. `_userData` (untyped record): a record-type field that serves as the root field to the user-provided schema description + of the attribute set RNTuple. Each user-defined field that was attached to the root field in the user-provided + schema will be attached to this field in the attribute set RNTuple. + ## Mapping of C++ Types to Fields and Columns This section is a comprehensive list of the C++ types with RNTuple I/O support. From 3a8b73cc788b17d1d48b48de19e97a0312123973 Mon Sep 17 00:00:00 2001 From: silverweed Date: Tue, 16 Sep 2025 09:48:08 +0200 Subject: [PATCH 2/2] [ntuple] Add de/serialization of RNTupleAttributes --- tree/ntuple/inc/ROOT/RNTupleDescriptor.hxx | 139 +++++++++++++++++++ tree/ntuple/inc/ROOT/RNTupleSerialize.hxx | 13 ++ tree/ntuple/src/RNTupleDescriptor.cxx | 51 +++++++ tree/ntuple/src/RNTupleSerialize.cxx | 154 ++++++++++++++++++--- tree/ntuple/test/ntuple_descriptor.cxx | 33 +++++ tree/ntuple/test/ntuple_serialize.cxx | 118 ++++++++++++++++ 6 files changed, 488 insertions(+), 20 deletions(-) diff --git a/tree/ntuple/inc/ROOT/RNTupleDescriptor.hxx b/tree/ntuple/inc/ROOT/RNTupleDescriptor.hxx index a24af5fc9342e..9c8e05fea918b 100644 --- a/tree/ntuple/inc/ROOT/RNTupleDescriptor.hxx +++ b/tree/ntuple/inc/ROOT/RNTupleDescriptor.hxx @@ -67,6 +67,50 @@ struct RNTupleClusterBoundaries { std::vector GetClusterBoundaries(const RNTupleDescriptor &desc); } // namespace Internal +namespace Experimental { + +// clang-format off +/** +\class ROOT::Experimental::RNTupleAttrSetDescriptor +\ingroup NTuple +\brief Metadata stored for every Attribute Set linked to an RNTuple. +*/ +// clang-format on +class RNTupleAttrSetDescriptor final { + friend class Experimental::Internal::RNTupleAttrSetDescriptorBuilder; + + std::uint16_t fSchemaVersionMajor = 0; + std::uint16_t fSchemaVersionMinor = 0; + std::uint32_t fAnchorLength = 0; ///< uncompressed size of the linked anchor + // The locator of the AttributeSet anchor. + // In case of kTypeFile, it points to the beginning of the Anchor's payload. + // NOTE: Only kTypeFile is supported at the moment. + RNTupleLocator fAnchorLocator; + std::string fName; + +public: + RNTupleAttrSetDescriptor() = default; + RNTupleAttrSetDescriptor(const RNTupleAttrSetDescriptor &other) = delete; + RNTupleAttrSetDescriptor &operator=(const RNTupleAttrSetDescriptor &other) = delete; + RNTupleAttrSetDescriptor(RNTupleAttrSetDescriptor &&other) = default; + RNTupleAttrSetDescriptor &operator=(RNTupleAttrSetDescriptor &&other) = default; + + bool operator==(const RNTupleAttrSetDescriptor &other) const; + bool operator!=(const RNTupleAttrSetDescriptor &other) const { return !(*this == other); } + + const std::string &GetName() const { return fName; } + std::uint16_t GetSchemaVersionMajor() const { return fSchemaVersionMajor; } + std::uint16_t GetSchemaVersionMinor() const { return fSchemaVersionMinor; } + std::uint32_t GetAnchorLength() const { return fAnchorLength; } + const RNTupleLocator &GetAnchorLocator() const { return fAnchorLocator; } + + RNTupleAttrSetDescriptor Clone() const; +}; + +class RNTupleAttrSetDescriptorIterable; + +} // namespace Experimental + // clang-format off /** \class ROOT::RFieldDescriptor @@ -701,6 +745,8 @@ private: std::vector fSortedClusterGroupIds; /// Potentially a subset of all the available clusters std::unordered_map fClusterDescriptors; + /// List of AttributeSets linked to this RNTuple + std::vector fAttributeSets; // We don't expose this publicly because when we add sharded clusters, this interface does not make sense anymore ROOT::DescriptorId_t FindClusterId(ROOT::NTupleSize_t entryIdx) const; @@ -718,6 +764,7 @@ public: class RClusterGroupDescriptorIterable; class RClusterDescriptorIterable; class RExtraTypeInfoDescriptorIterable; + friend class Experimental::RNTupleAttrSetDescriptorIterable; /// Modifiers passed to CreateModel() struct RCreateModelOptions { @@ -806,6 +853,8 @@ public: RExtraTypeInfoDescriptorIterable GetExtraTypeInfoIterable() const; + ROOT::Experimental::RNTupleAttrSetDescriptorIterable GetAttrSetIterable() const; + const std::string &GetName() const { return fName; } const std::string &GetDescription() const { return fDescription; } @@ -816,6 +865,7 @@ public: std::size_t GetNClusters() const { return fNClusters; } std::size_t GetNActiveClusters() const { return fClusterDescriptors.size(); } std::size_t GetNExtraTypeInfos() const { return fExtraTypeInfoDescriptors.size(); } + std::size_t GetNAttributeSets() const { return fAttributeSets.size(); } /// We know the number of entries from adding the cluster summaries ROOT::NTupleSize_t GetNEntries() const { return fNEntries; } @@ -1145,6 +1195,59 @@ public: RIterator end() { return RIterator(fNTuple.fExtraTypeInfoDescriptors.cend()); } }; +namespace Experimental { +// clang-format off +/** +\class ROOT::Experimental::RNTupleAttrSetDescriptorIterable +\ingroup NTuple +\brief Used to loop over all the Attribute Sets linked to an RNTuple +*/ +// clang-format on +// TODO: move this to RNTupleDescriptor::RNTupleAttrSetDescriptorIterable when it moves out of Experimental. +class RNTupleAttrSetDescriptorIterable final { +private: + /// The associated RNTuple for this range. + const RNTupleDescriptor &fNTuple; + +public: + class RIterator final { + private: + using Iter_t = std::vector::const_iterator; + /// The wrapped vector iterator + Iter_t fIter; + + public: + using iterator_category = std::forward_iterator_tag; + using iterator = RIterator; + using value_type = RNTupleAttrSetDescriptor; + using difference_type = std::ptrdiff_t; + using pointer = const value_type *; + using reference = const value_type &; + + RIterator(Iter_t iter) : fIter(iter) {} + iterator &operator++() /* prefix */ + { + ++fIter; + return *this; + } + iterator operator++(int) /* postfix */ + { + auto old = *this; + operator++(); + return old; + } + reference operator*() const { return *fIter; } + pointer operator->() const { return &*fIter; } + bool operator!=(const iterator &rh) const { return fIter != rh.fIter; } + bool operator==(const iterator &rh) const { return fIter == rh.fIter; } + }; + + RNTupleAttrSetDescriptorIterable(const RNTupleDescriptor &ntuple) : fNTuple(ntuple) {} + RIterator begin() { return RIterator(fNTuple.fAttributeSets.cbegin()); } + RIterator end() { return RIterator(fNTuple.fAttributeSets.cend()); } +}; +} // namespace Experimental + // clang-format off /** \class ROOT::RNTupleDescriptor::RHeaderExtension @@ -1218,6 +1321,39 @@ public: } }; +namespace Experimental::Internal { +class RNTupleAttrSetDescriptorBuilder final { + ROOT::Experimental::RNTupleAttrSetDescriptor fDesc; + +public: + RNTupleAttrSetDescriptorBuilder &Name(std::string_view name) + { + fDesc.fName = name; + return *this; + } + RNTupleAttrSetDescriptorBuilder &SchemaVersion(std::uint16_t major, std::uint16_t minor) + { + fDesc.fSchemaVersionMajor = major; + fDesc.fSchemaVersionMinor = minor; + return *this; + } + RNTupleAttrSetDescriptorBuilder &AnchorLocator(const RNTupleLocator &loc) + { + fDesc.fAnchorLocator = loc; + return *this; + } + RNTupleAttrSetDescriptorBuilder &AnchorLength(std::uint32_t length) + { + fDesc.fAnchorLength = length; + return *this; + } + + /// Attempt to make an AttributeSet descriptor. This may fail if the builder + /// was not given enough information to make a proper descriptor. + RResult MoveDescriptor(); +}; +} // namespace Experimental::Internal + namespace Internal { // clang-format off @@ -1601,6 +1737,8 @@ public: RResult AddExtraTypeInfo(RExtraTypeInfoDescriptor &&extraTypeInfoDesc); void ReplaceExtraTypeInfo(RExtraTypeInfoDescriptor &&extraTypeInfoDesc); + RResult AddAttributeSet(Experimental::RNTupleAttrSetDescriptor &&attrSetDesc); + /// Mark the beginning of the header extension; any fields and columns added after a call to this function are /// annotated as begin part of the header extension. void BeginHeaderExtension(); @@ -1634,6 +1772,7 @@ inline RNTupleDescriptor CloneDescriptorSchema(const RNTupleDescriptor &desc) } } // namespace Internal + } // namespace ROOT #endif // ROOT_RNTupleDescriptor diff --git a/tree/ntuple/inc/ROOT/RNTupleSerialize.hxx b/tree/ntuple/inc/ROOT/RNTupleSerialize.hxx index e767cfeda2662..7a387510f73d8 100644 --- a/tree/ntuple/inc/ROOT/RNTupleSerialize.hxx +++ b/tree/ntuple/inc/ROOT/RNTupleSerialize.hxx @@ -36,6 +36,13 @@ class RNTupleDescriptor; class RClusterDescriptor; enum class EExtraTypeInfoIds; +namespace Experimental { +class RNTupleAttrSetDescriptor; +namespace Internal { +class RNTupleAttrSetDescriptorBuilder; +} // namespace Internal +} // namespace Experimental + namespace Internal { class RClusterDescriptorBuilder; @@ -271,6 +278,12 @@ public: static RResult DeserializeSchemaDescription(const void *buffer, std::uint64_t bufSize, ROOT::Internal::RNTupleDescriptorBuilder &descBuilder); + static RResult + SerializeAttributeSet(const Experimental::RNTupleAttrSetDescriptor &attrSetDesc, void *buffer); + static RResult + DeserializeAttributeSet(const void *buffer, std::uint64_t bufSize, + Experimental::Internal::RNTupleAttrSetDescriptorBuilder &attrSetDescBld); + static RResult SerializeHeader(void *buffer, const RNTupleDescriptor &desc); static RResult SerializePageList(void *buffer, const RNTupleDescriptor &desc, std::span physClusterIDs, diff --git a/tree/ntuple/src/RNTupleDescriptor.cxx b/tree/ntuple/src/RNTupleDescriptor.cxx index a0976d324a23f..d0413614e3740 100644 --- a/tree/ntuple/src/RNTupleDescriptor.cxx +++ b/tree/ntuple/src/RNTupleDescriptor.cxx @@ -803,6 +803,8 @@ ROOT::RNTupleDescriptor ROOT::RNTupleDescriptor::Clone() const clone.fSortedClusterGroupIds = fSortedClusterGroupIds; for (const auto &d : fClusterDescriptors) clone.fClusterDescriptors.emplace(d.first, d.second.Clone()); + for (const auto &d : fAttributeSets) + clone.fAttributeSets.emplace_back(d.Clone()); return clone; } @@ -1123,6 +1125,19 @@ void ROOT::Internal::RNTupleDescriptorBuilder::SetFeature(unsigned int flag) fDescriptor.fFeatureFlags.insert(flag); } +ROOT::RResult +ROOT::Experimental::Internal::RNTupleAttrSetDescriptorBuilder::MoveDescriptor() +{ + if (fDesc.fName.empty()) + return R__FAIL("attribute set name cannot be empty"); + if (fDesc.fAnchorLength == 0) + return R__FAIL("invalid anchor length"); + if (fDesc.fAnchorLocator.GetType() == RNTupleLocator::kTypeUnknown) + return R__FAIL("invalid locator type"); + + return std::move(fDesc); +} + ROOT::RResult ROOT::Internal::RColumnDescriptorBuilder::MakeDescriptor() const { if (fColumn.GetLogicalId() == ROOT::kInvalidDescriptorId) @@ -1377,6 +1392,19 @@ void ROOT::Internal::RNTupleDescriptorBuilder::ReplaceExtraTypeInfo(RExtraTypeIn fDescriptor.fExtraTypeInfoDescriptors.emplace_back(std::move(extraTypeInfoDesc)); } +ROOT::RResult +ROOT::Internal::RNTupleDescriptorBuilder::AddAttributeSet(Experimental::RNTupleAttrSetDescriptor &&attrSetDesc) +{ + auto &attrSets = fDescriptor.fAttributeSets; + if (std::find_if(attrSets.begin(), attrSets.end(), [&name = attrSetDesc.GetName()](const auto &desc) { + return desc.GetName() == name; + }) != attrSets.end()) { + return R__FAIL("attribute sets with duplicate names"); + } + attrSets.push_back(std::move(attrSetDesc)); + return RResult::Success(); +} + RNTupleSerializer::StreamerInfoMap_t ROOT::Internal::RNTupleDescriptorBuilder::BuildStreamerInfos() const { RNTupleSerializer::StreamerInfoMap_t streamerInfoMap; @@ -1492,3 +1520,26 @@ ROOT::RNTupleDescriptor::RExtraTypeInfoDescriptorIterable ROOT::RNTupleDescripto { return RExtraTypeInfoDescriptorIterable(*this); } + +ROOT::Experimental::RNTupleAttrSetDescriptorIterable ROOT::RNTupleDescriptor::GetAttrSetIterable() const +{ + return Experimental::RNTupleAttrSetDescriptorIterable(*this); +} + +bool ROOT::Experimental::RNTupleAttrSetDescriptor::operator==(const RNTupleAttrSetDescriptor &other) const +{ + return fAnchorLength == other.fAnchorLength && fSchemaVersionMajor == other.fSchemaVersionMajor && + fSchemaVersionMinor == other.fSchemaVersionMinor && fAnchorLocator == other.fAnchorLocator && + fName == other.fName; +}; + +ROOT::Experimental::RNTupleAttrSetDescriptor ROOT::Experimental::RNTupleAttrSetDescriptor::Clone() const +{ + RNTupleAttrSetDescriptor desc; + desc.fAnchorLength = fAnchorLength; + desc.fSchemaVersionMajor = fSchemaVersionMajor; + desc.fSchemaVersionMinor = fSchemaVersionMinor; + desc.fAnchorLocator = fAnchorLocator; + desc.fName = fName; + return desc; +} diff --git a/tree/ntuple/src/RNTupleSerialize.cxx b/tree/ntuple/src/RNTupleSerialize.cxx index f6caa493e9a67..fb67f8fdefe4a 100644 --- a/tree/ntuple/src/RNTupleSerialize.cxx +++ b/tree/ntuple/src/RNTupleSerialize.cxx @@ -1799,6 +1799,27 @@ ROOT::RResult ROOT::Internal::RNTupleSerializer::SerializeFooter( return R__FORWARD_ERROR(res); } + // Attributes + frame = pos; + const auto nAttributeSets = desc.GetNAttributeSets(); + if (nAttributeSets > 0) { + R__LOG_WARNING(NTupleLog()) << "RNTuple Attributes are experimental. They are not guaranteed to be readable " + "back in the future (but your main data is)"; + } + pos += SerializeListFramePreamble(nAttributeSets, *where); + for (const auto &attrSet : desc.GetAttrSetIterable()) { + if (auto res = SerializeAttributeSet(attrSet, *where)) { + pos += res.Unwrap(); + } else { + return R__FORWARD_ERROR(res); + } + } + if (auto res = SerializeFramePostscript(buffer ? frame : nullptr, pos - frame)) { + pos += res.Unwrap(); + } else { + return R__FORWARD_ERROR(res); + } + std::uint32_t size = pos - base; if (auto res = SerializeEnvelopePostscript(base, size)) { size += res.Unwrap(); @@ -1808,6 +1829,33 @@ ROOT::RResult ROOT::Internal::RNTupleSerializer::SerializeFooter( return size; } +ROOT::RResult +ROOT::Internal::RNTupleSerializer::SerializeAttributeSet(const Experimental::RNTupleAttrSetDescriptor &attrDesc, + void *buffer) +{ + auto base = reinterpret_cast(buffer); + auto pos = base; + void **where = (buffer == nullptr) ? &buffer : reinterpret_cast(&pos); + + auto frame = pos; + pos += RNTupleSerializer::SerializeRecordFramePreamble(*where); + pos += SerializeUInt16(attrDesc.GetSchemaVersionMajor(), *where); + pos += SerializeUInt16(attrDesc.GetSchemaVersionMinor(), *where); + pos += SerializeUInt32(attrDesc.GetAnchorLength(), *where); + if (auto res = SerializeLocator(attrDesc.GetAnchorLocator(), *where)) { + pos += res.Unwrap(); + } else { + return R__FORWARD_ERROR(res); + } + pos += SerializeString(attrDesc.GetName(), *where); + auto size = pos - frame; + if (auto res = SerializeFramePostscript(buffer ? frame : nullptr, size)) { + return size; + } else { + return R__FORWARD_ERROR(res); + } +} + ROOT::RResult ROOT::Internal::RNTupleSerializer::DeserializeHeader(const void *buffer, std::uint64_t bufSize, RNTupleDescriptorBuilder &descBuilder) { @@ -1918,36 +1966,102 @@ ROOT::RResult ROOT::Internal::RNTupleSerializer::DeserializeFooter(const v } bytes = frame + frameSize; - std::uint32_t nClusterGroups; - frame = bytes; - if (auto res = DeserializeFrameHeader(bytes, fnBufSizeLeft(), frameSize, nClusterGroups)) { - bytes += res.Unwrap(); - } else { - return R__FORWARD_ERROR(res); - } - for (std::uint32_t groupId = 0; groupId < nClusterGroups; ++groupId) { - RClusterGroup clusterGroup; - if (auto res = DeserializeClusterGroup(bytes, fnFrameSizeLeft(), clusterGroup)) { + { + std::uint32_t nClusterGroups; + frame = bytes; + if (auto res = DeserializeFrameHeader(bytes, fnBufSizeLeft(), frameSize, nClusterGroups)) { bytes += res.Unwrap(); } else { return R__FORWARD_ERROR(res); } + for (std::uint32_t groupId = 0; groupId < nClusterGroups; ++groupId) { + RClusterGroup clusterGroup; + if (auto res = DeserializeClusterGroup(bytes, fnFrameSizeLeft(), clusterGroup)) { + bytes += res.Unwrap(); + } else { + return R__FORWARD_ERROR(res); + } + + descBuilder.AddToOnDiskFooterSize(clusterGroup.fPageListEnvelopeLink.fLocator.GetNBytesOnStorage()); + RClusterGroupDescriptorBuilder clusterGroupBuilder; + clusterGroupBuilder.ClusterGroupId(groupId) + .PageListLocator(clusterGroup.fPageListEnvelopeLink.fLocator) + .PageListLength(clusterGroup.fPageListEnvelopeLink.fLength) + .MinEntry(clusterGroup.fMinEntry) + .EntrySpan(clusterGroup.fEntrySpan) + .NClusters(clusterGroup.fNClusters); + descBuilder.AddClusterGroup(clusterGroupBuilder.MoveDescriptor().Unwrap()); + } + bytes = frame + frameSize; + } - descBuilder.AddToOnDiskFooterSize(clusterGroup.fPageListEnvelopeLink.fLocator.GetNBytesOnStorage()); - RClusterGroupDescriptorBuilder clusterGroupBuilder; - clusterGroupBuilder.ClusterGroupId(groupId) - .PageListLocator(clusterGroup.fPageListEnvelopeLink.fLocator) - .PageListLength(clusterGroup.fPageListEnvelopeLink.fLength) - .MinEntry(clusterGroup.fMinEntry) - .EntrySpan(clusterGroup.fEntrySpan) - .NClusters(clusterGroup.fNClusters); - descBuilder.AddClusterGroup(clusterGroupBuilder.MoveDescriptor().Unwrap()); + // NOTE: Attributes were introduced in v1.0.1.0, so this section may be missing. + // Testing for > 8 because bufSize includes the checksum. + if (fnBufSizeLeft() > 8) { + std::uint32_t nAttributeSets; + frame = bytes; + if (auto res = DeserializeFrameHeader(bytes, fnBufSizeLeft(), frameSize, nAttributeSets)) { + bytes += res.Unwrap(); + } else { + return R__FORWARD_ERROR(res); + } + if (nAttributeSets > 0) { + R__LOG_WARNING(NTupleLog()) << "RNTuple Attributes are experimental. They are not guaranteed to be readable " + "back in the future (but your main data is)"; + } + for (std::uint32_t attrSetId = 0; attrSetId < nAttributeSets; ++attrSetId) { + Experimental::Internal::RNTupleAttrSetDescriptorBuilder attrSetDescBld; + if (auto res = DeserializeAttributeSet(bytes, fnBufSizeLeft(), attrSetDescBld)) { + descBuilder.AddAttributeSet(attrSetDescBld.MoveDescriptor().Unwrap()); + bytes += res.Unwrap(); + } else { + return R__FORWARD_ERROR(res); + } + } + bytes = frame + frameSize; } - bytes = frame + frameSize; return RResult::Success(); } +ROOT::RResult ROOT::Internal::RNTupleSerializer::DeserializeAttributeSet( + const void *buffer, std::uint64_t bufSize, Experimental::Internal::RNTupleAttrSetDescriptorBuilder &attrSetDescBld) +{ + auto base = reinterpret_cast(buffer); + auto bytes = base; + auto fnBufSizeLeft = [&]() { return bufSize - (bytes - base); }; + + std::uint64_t frameSize; + if (auto res = DeserializeFrameHeader(bytes, fnBufSizeLeft(), frameSize)) { + bytes += res.Unwrap(); + } else { + return R__FORWARD_ERROR(res); + } + if (fnBufSizeLeft() < static_cast(sizeof(std::uint64_t))) + return R__FAIL("record frame too short"); + std::uint16_t vMajor, vMinor; + bytes += DeserializeUInt16(bytes, vMajor); + bytes += DeserializeUInt16(bytes, vMinor); + std::uint32_t anchorLen; + bytes += DeserializeUInt32(bytes, anchorLen); + RNTupleLocator anchorLoc; + if (auto res = DeserializeLocator(bytes, fnBufSizeLeft(), anchorLoc)) { + bytes += res.Unwrap(); + } else { + return R__FORWARD_ERROR(res); + } + std::string name; + if (auto res = DeserializeString(bytes, fnBufSizeLeft(), name)) { + bytes += res.Unwrap(); + } else { + return R__FORWARD_ERROR(res); + } + + attrSetDescBld.SchemaVersion(vMajor, vMinor).AnchorLength(anchorLen).AnchorLocator(anchorLoc).Name(name); + + return frameSize; +} + ROOT::RResult> ROOT::Internal::RNTupleSerializer::DeserializePageListRaw(const void *buffer, std::uint64_t bufSize, ROOT::DescriptorId_t clusterGroupId, diff --git a/tree/ntuple/test/ntuple_descriptor.cxx b/tree/ntuple/test/ntuple_descriptor.cxx index b195bf2a04460..b99e794abe7d8 100644 --- a/tree/ntuple/test/ntuple_descriptor.cxx +++ b/tree/ntuple/test/ntuple_descriptor.cxx @@ -372,6 +372,39 @@ TEST(RNTupleDescriptor, GetTypeNameForComparison) } } +TEST(RNTupleDescriptor, AttributeSets) +{ + RNTupleLocator locator; + locator.SetType(ROOT::RNTupleLocator::kTypeFile); + locator.SetPosition(128ul); + ROOT::Experimental::Internal::RNTupleAttrSetDescriptorBuilder builder; + builder.SchemaVersion(1, 0).AnchorLength(1024).AnchorLocator(locator).Name("AttrSetName"); + RNTupleDescriptorBuilder descBuilder; + descBuilder.SetVersion(1, 0, 1, 0); + descBuilder.SetNTuple("ntpl", ""); + descBuilder.AddAttributeSet(builder.MoveDescriptor().Unwrap()); + + locator.SetPosition(555ul); + builder.SchemaVersion(2, 4).AnchorLength(200).AnchorLocator(locator).Name("AttrSetName 2"); + descBuilder.AddAttributeSet(builder.MoveDescriptor().Unwrap()); + + auto desc = descBuilder.MoveDescriptor(); + ASSERT_EQ(desc.GetNAttributeSets(), 2); + auto attrSets = desc.GetAttrSetIterable().begin(); + EXPECT_EQ(attrSets->GetName(), "AttrSetName"); + EXPECT_EQ(attrSets->GetAnchorLength(), 1024); + EXPECT_EQ(attrSets->GetSchemaVersionMajor(), 1); + EXPECT_EQ(attrSets->GetSchemaVersionMinor(), 0); + EXPECT_EQ(attrSets->GetAnchorLocator().GetPosition(), 128); + + ++attrSets; + EXPECT_EQ(attrSets->GetName(), "AttrSetName 2"); + EXPECT_EQ(attrSets->GetAnchorLength(), 200); + EXPECT_EQ(attrSets->GetSchemaVersionMajor(), 2); + EXPECT_EQ(attrSets->GetSchemaVersionMinor(), 4); + EXPECT_EQ(attrSets->GetAnchorLocator().GetPosition(), 555); +} + TEST(RFieldDescriptorIterable, IterateOverFieldNames) { auto model = RNTupleModel::Create(); diff --git a/tree/ntuple/test/ntuple_serialize.cxx b/tree/ntuple/test/ntuple_serialize.cxx index ffa0afea6adab..ae23a1bc237a4 100644 --- a/tree/ntuple/test/ntuple_serialize.cxx +++ b/tree/ntuple/test/ntuple_serialize.cxx @@ -2059,3 +2059,121 @@ TEST(RNTuple, SerializeMultiColumnRepresentationDeferredInMainHeader) EXPECT_EQ(expect2_0, columnRange2_0); EXPECT_EQ(expect2_1, columnRange2_1); } + +TEST(RNTuple, SerializeAttrSets) +{ + RNTupleLocator locator; + locator.SetType(ROOT::RNTupleLocator::kTypeFile); + locator.SetPosition(128ul); + ROOT::Experimental::Internal::RNTupleAttrSetDescriptorBuilder builder; + builder.SchemaVersion(1, 0).AnchorLength(1024).AnchorLocator(locator).Name("AttrSetName"); + auto attrSetDesc = builder.MoveDescriptor().Unwrap(); + + auto res = RNTupleSerializer::SerializeAttributeSet(attrSetDesc, nullptr); + ASSERT_TRUE(bool(res)); + auto buf = MakeUninitArray(res.Unwrap()); + res = RNTupleSerializer::SerializeAttributeSet(attrSetDesc, buf.get()); + ASSERT_TRUE(bool(buf)); + + ROOT::Experimental::Internal::RNTupleAttrSetDescriptorBuilder deserializedDescBld; + RNTupleSerializer::DeserializeAttributeSet(buf.get(), res.Unwrap(), deserializedDescBld); + + ASSERT_EQ(attrSetDesc, deserializedDescBld.MoveDescriptor().Unwrap()); +} + +TEST(RNTuple, SerializeDescriptorWithAttrSets) +{ + RNTupleDescriptorBuilder builder; + builder.SetVersionForWriting(); + builder.SetNTuple("ntpl", ""); + + builder.AddField(RFieldDescriptorBuilder() + .FieldId(0) + .FieldName("") + .Structure(ROOT::ENTupleStructure::kRecord) + .MakeDescriptor() + .Unwrap()); + + builder.AddField(RFieldDescriptorBuilder() + .FieldId(5) + .FieldName("pt") + .TypeName("float") + .Structure(ROOT::ENTupleStructure::kPlain) + .MakeDescriptor() + .Unwrap()); + builder.AddFieldLink(0, 5); + builder.AddColumn(RColumnDescriptorBuilder() + .LogicalColumnId(0) + .PhysicalColumnId(0) + .FieldId(5) + .BitsOnStorage(32) + .Type(ROOT::ENTupleColumnType::kReal32) + .FirstElementIndex(1) + .MakeDescriptor() + .Unwrap()); + builder.AddColumn(RColumnDescriptorBuilder() + .LogicalColumnId(1) + .PhysicalColumnId(1) + .FieldId(5) + .BitsOnStorage(16) + .Type(ROOT::ENTupleColumnType::kReal16) + .RepresentationIndex(1) + .FirstElementIndex(1) + .SetSuppressedDeferred() + .MakeDescriptor() + .Unwrap()); + + RNTupleLocator locator; + locator.SetType(ROOT::RNTupleLocator::kTypeFile); + locator.SetPosition(128ul); + + ROOT::Experimental::Internal::RNTupleAttrSetDescriptorBuilder attrBuilder; + attrBuilder.SchemaVersion(1, 0).AnchorLength(1024).AnchorLocator(locator).Name("AttrSetName"); + + // First cluster + RClusterDescriptorBuilder clusterBuilder; + clusterBuilder.ClusterId(0).FirstEntryIndex(0).NEntries(1); + builder.AddCluster(clusterBuilder.MoveDescriptor().Unwrap()); + + RClusterGroupDescriptorBuilder cgBuilder; + cgBuilder.ClusterGroupId(0).NClusters(1).EntrySpan(3); + cgBuilder.AddSortedClusters({0}); + builder.AddClusterGroup(cgBuilder.MoveDescriptor().Unwrap()); + + builder.AddAttributeSet(attrBuilder.MoveDescriptor().Unwrap()); + auto desc = builder.MoveDescriptor(); + + auto context = RNTupleSerializer::SerializeHeader(nullptr, desc).Unwrap(); + auto bufHeader = MakeUninitArray(context.GetHeaderSize()); + ROOT::TestSupport::CheckDiagsRAII diagsRAII; + diagsRAII.requiredDiag(kWarning, "ROOT.NTuple", "Attributes are experimental", false); + context = RNTupleSerializer::SerializeHeader(bufHeader.get(), desc).Unwrap(); + + std::vector physClusterIDs{context.MapClusterId(0)}; + context.MapClusterGroupId(0); + auto sizePageList = RNTupleSerializer::SerializePageList(nullptr, desc, physClusterIDs, context).Unwrap(); + EXPECT_GT(sizePageList, 0); + auto bufPageList = MakeUninitArray(sizePageList); + EXPECT_EQ(sizePageList, + RNTupleSerializer::SerializePageList(bufPageList.get(), desc, physClusterIDs, context).Unwrap()); + + auto sizeFooter = RNTupleSerializer::SerializeFooter(nullptr, desc, context).Unwrap(); + EXPECT_GT(sizeFooter, 0); + auto bufFooter = MakeUninitArray(sizeFooter); + EXPECT_EQ(sizeFooter, RNTupleSerializer::SerializeFooter(bufFooter.get(), desc, context).Unwrap()); + + builder.SetVersionForWriting(); + RNTupleSerializer::DeserializeHeader(bufHeader.get(), context.GetHeaderSize(), builder).ThrowOnError(); + RNTupleSerializer::DeserializeFooter(bufFooter.get(), sizeFooter, builder); + auto deserializedDesc = builder.MoveDescriptor(); + RNTupleSerializer::DeserializePageList(bufPageList.get(), sizePageList, 0, deserializedDesc, + EDeserializeMode::kForReading); + + ASSERT_EQ(desc.GetNAttributeSets(), deserializedDesc.GetNAttributeSets()); + auto attrSets = desc.GetAttrSetIterable().begin(); + auto deserAttrSets = deserializedDesc.GetAttrSetIterable().begin(); + for (auto i = 0u; i < desc.GetNAttributeSets(); ++i) { + EXPECT_EQ(*attrSets, *deserAttrSets); + ++attrSets, ++deserAttrSets; + } +}