diff --git a/tree/ntuple/doc/BinaryFormatSpecification.md b/tree/ntuple/doc/BinaryFormatSpecification.md index 4e3d564db5d17..303db21b2f484 100644 --- a/tree/ntuple/doc/BinaryFormatSpecification.md +++ b/tree/ntuple/doc/BinaryFormatSpecification.md @@ -1,4 +1,4 @@ -# RNTuple Binary Format Specification 1.0.0.2 +# RNTuple Binary Format Specification 1.0.1.0 ## Versioning Notes @@ -627,6 +627,7 @@ The footer envelope has the following structure: - Header checksum (XxHash-3 64bit) - Schema extension record frame - List frame of cluster group record frames +- List frame of linked attribute set record frames The header checksum can be used to cross-check that header and footer belong together. The meaning of the feature flags is the same as for the header. @@ -684,6 +685,40 @@ The entry range allows for finding the right page list for random access request The number of clusters information allows for using consistent cluster IDs even if cluster groups are accessed non-sequentially. +### Linked Attribute Sets + +An RNTuple may have zero or more linked Attribute Sets, containing user-defined metadata. +Each Attribute Set is stored on disk as an RNTuple and the anchor of each RNTuple is linked to by the main +RNTuple's footer. + +An Attribute Set RNTuple has a number of restrictions compared to a regular RNTuple: + +1. it cannot have linked Attribute RNTuples itself; +2. the alias columns sections, both in its header and footer, must be empty (i.e. none of the Attribute Set RNTuple's + fields can be projected fields); +3. none of its fields may have a structural role of 0x04 (i.e. it must not contain a ROOT streamer object); + +An attribute set record frame has the following contents: +``` + 0 1 2 3 + 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 ++-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ +| Schema Version Major | Schema Version Minor | ++-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ +| Attribute Anchor Uncompressed Size | ++-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ +``` + +- The first 32 bits contain the _Attribute Schema Version_. This is split into a _Major_ (16 LSB) and a + _Minor_ (16 MSB) version. The Schema Version is described below; +- a 32-bit unsigned integer follows, containing the uncompressed size of the Attribute Anchor. + +These fields are followed by: + +- a locator pointing to the Attribute RNTuple's anchor; +- a string containing the Attribute Set's name. All linked Attribute Sets must have a non-empty, distinct name. + + ### Page List Envelope The page list envelope contains cluster summaries and page locations. @@ -799,6 +834,25 @@ In every cluster, every field has exactly one primary column representation. All other representations must be suppressed. Note that the primary column representation can change from cluster to cluster. +### Attribute Schema Version +Each Attribute Set is created with a user-defined model. This model is not used directly by the underlying Attribute +Set RNTuple, but it is augmented with internal fields used to store additional data that serve to associate each +entry in the Attribute Set with those in the main RNTuple. + +The Attribute Schema Version describes the internal schema of the linked Attribute Set RNTuple. +A change in Major version number indicates a breaking, non-forward-compatible change in the schema: readers should +refuse reading an Attribute Set whose Major Schema Version is unknown. +A change in Minor version number indicates the presence of optional additional fields in the schema: readers should +still be able to read the Attribute Set as before, ignoring any new field. + +The current Attribute Schema Version is **1.0**. It has the following fields (in the following order): + 1. `_rangeStart` (type `std::uint64_t`): the start of the range that each Attribute Entry refers to; + 2. `_rangeLen` (type `std::uint64_t`): the length of the range that each Attribute Entry refers to. + Note that `_rangeLen == 0` is valid and refers to an empty range; + 3. `_userModel` (untyped record): a record-type field that serves as the root field to the user-provided RNTupleModel + used by the Attribute Set RNTuple. Each user-defined field that was attached to the Field Zero in the user-provided + Model will be attached to this field in the Attribute Set RNTuple. + ## Mapping of C++ Types to Fields and Columns This section is a comprehensive list of the C++ types with RNTuple I/O support. diff --git a/tree/ntuple/inc/ROOT/RNTupleDescriptor.hxx b/tree/ntuple/inc/ROOT/RNTupleDescriptor.hxx index a24af5fc9342e..3bee4d5b0cc7e 100644 --- a/tree/ntuple/inc/ROOT/RNTupleDescriptor.hxx +++ b/tree/ntuple/inc/ROOT/RNTupleDescriptor.hxx @@ -67,6 +67,53 @@ struct RNTupleClusterBoundaries { std::vector GetClusterBoundaries(const RNTupleDescriptor &desc); } // namespace Internal +namespace Experimental { +namespace Internal { +class RNTupleAttrSetDescriptorBuilder; +} + +// clang-format off +/** +\class ROOT::Experimental::RNTupleAttrSetDescriptor +\ingroup NTuple +\brief Metadata stored for every Attribute Set linked to an RNTuple. +*/ +// clang-format on +class RNTupleAttrSetDescriptor final { + friend class Experimental::Internal::RNTupleAttrSetDescriptorBuilder; + + std::uint16_t fSchemaVersionMajor = 0; + std::uint16_t fSchemaVersionMinor = 0; + std::uint32_t fAnchorLength = 0; ///< uncompressed size of the linked anchor + // The locator of the AttributeSet anchor. + // In case of kTypeFile, it points to the beginning of the Anchor's payload. + // NOTE: Only kTypeFile is supported at the moment. + RNTupleLocator fAnchorLocator; + std::string fName; + +public: + RNTupleAttrSetDescriptor() = default; + RNTupleAttrSetDescriptor(const RNTupleAttrSetDescriptor &other) = delete; + RNTupleAttrSetDescriptor &operator=(const RNTupleAttrSetDescriptor &other) = delete; + RNTupleAttrSetDescriptor(RNTupleAttrSetDescriptor &&other) = default; + RNTupleAttrSetDescriptor &operator=(RNTupleAttrSetDescriptor &&other) = default; + + bool operator==(const RNTupleAttrSetDescriptor &other) const; + bool operator!=(const RNTupleAttrSetDescriptor &other) const { return !(*this == other); } + + const std::string &GetName() const { return fName; } + std::uint16_t GetSchemaVersionMajor() const { return fSchemaVersionMajor; } + std::uint16_t GetSchemaVersionMinor() const { return fSchemaVersionMinor; } + std::uint32_t GetAnchorLength() const { return fAnchorLength; } + const RNTupleLocator &GetAnchorLocator() const { return fAnchorLocator; } + + RNTupleAttrSetDescriptor Clone() const; +}; + +class RNTupleAttrSetDescriptorIterable; + +} // namespace Experimental + // clang-format off /** \class ROOT::RFieldDescriptor @@ -701,6 +748,8 @@ private: std::vector fSortedClusterGroupIds; /// Potentially a subset of all the available clusters std::unordered_map fClusterDescriptors; + /// List of AttributeSets linked to this RNTuple + std::vector fAttributeSets; // We don't expose this publicly because when we add sharded clusters, this interface does not make sense anymore ROOT::DescriptorId_t FindClusterId(ROOT::NTupleSize_t entryIdx) const; @@ -718,6 +767,7 @@ public: class RClusterGroupDescriptorIterable; class RClusterDescriptorIterable; class RExtraTypeInfoDescriptorIterable; + friend class Experimental::RNTupleAttrSetDescriptorIterable; /// Modifiers passed to CreateModel() struct RCreateModelOptions { @@ -806,6 +856,8 @@ public: RExtraTypeInfoDescriptorIterable GetExtraTypeInfoIterable() const; + ROOT::Experimental::RNTupleAttrSetDescriptorIterable GetAttrSetIterable() const; + const std::string &GetName() const { return fName; } const std::string &GetDescription() const { return fDescription; } @@ -816,6 +868,7 @@ public: std::size_t GetNClusters() const { return fNClusters; } std::size_t GetNActiveClusters() const { return fClusterDescriptors.size(); } std::size_t GetNExtraTypeInfos() const { return fExtraTypeInfoDescriptors.size(); } + std::size_t GetNAttributeSets() const { return fAttributeSets.size(); } /// We know the number of entries from adding the cluster summaries ROOT::NTupleSize_t GetNEntries() const { return fNEntries; } @@ -1145,6 +1198,59 @@ public: RIterator end() { return RIterator(fNTuple.fExtraTypeInfoDescriptors.cend()); } }; +namespace Experimental { +// clang-format off +/** +\class ROOT::Experimental::RNTupleAttrSetDescriptorIterable +\ingroup NTuple +\brief Used to loop over all the Attribute Sets linked to an RNTuple +*/ +// clang-format on +// TODO: move this to RNTupleDescriptor::RNTupleAttrSetDescriptorIterable when it moves out of Experimental. +class RNTupleAttrSetDescriptorIterable final { +private: + /// The associated RNTuple for this range. + const RNTupleDescriptor &fNTuple; + +public: + class RIterator final { + private: + using Iter_t = std::vector::const_iterator; + /// The wrapped vector iterator + Iter_t fIter; + + public: + using iterator_category = std::forward_iterator_tag; + using iterator = RIterator; + using value_type = RNTupleAttrSetDescriptor; + using difference_type = std::ptrdiff_t; + using pointer = const value_type *; + using reference = const value_type &; + + RIterator(Iter_t iter) : fIter(iter) {} + iterator &operator++() /* prefix */ + { + ++fIter; + return *this; + } + iterator operator++(int) /* postfix */ + { + auto old = *this; + operator++(); + return old; + } + reference operator*() const { return *fIter; } + pointer operator->() const { return &*fIter; } + bool operator!=(const iterator &rh) const { return fIter != rh.fIter; } + bool operator==(const iterator &rh) const { return fIter == rh.fIter; } + }; + + RNTupleAttrSetDescriptorIterable(const RNTupleDescriptor &ntuple) : fNTuple(ntuple) {} + RIterator begin() { return RIterator(fNTuple.fAttributeSets.cbegin()); } + RIterator end() { return RIterator(fNTuple.fAttributeSets.cend()); } +}; +} // namespace Experimental + // clang-format off /** \class ROOT::RNTupleDescriptor::RHeaderExtension @@ -1218,6 +1324,39 @@ public: } }; +namespace Experimental::Internal { +class RNTupleAttrSetDescriptorBuilder final { + ROOT::Experimental::RNTupleAttrSetDescriptor fDesc; + +public: + RNTupleAttrSetDescriptorBuilder &Name(std::string_view name) + { + fDesc.fName = name; + return *this; + } + RNTupleAttrSetDescriptorBuilder &SchemaVersion(std::uint16_t major, std::uint16_t minor) + { + fDesc.fSchemaVersionMajor = major; + fDesc.fSchemaVersionMinor = minor; + return *this; + } + RNTupleAttrSetDescriptorBuilder &AnchorLocator(const RNTupleLocator &loc) + { + fDesc.fAnchorLocator = loc; + return *this; + } + RNTupleAttrSetDescriptorBuilder &AnchorLength(std::uint32_t length) + { + fDesc.fAnchorLength = length; + return *this; + } + + /// Attempt to make an AttributeSet descriptor. This may fail if the builder + /// was not given enough information to make a proper descriptor. + RResult MoveDescriptor(); +}; +} // namespace Experimental::Internal + namespace Internal { // clang-format off @@ -1601,6 +1740,8 @@ public: RResult AddExtraTypeInfo(RExtraTypeInfoDescriptor &&extraTypeInfoDesc); void ReplaceExtraTypeInfo(RExtraTypeInfoDescriptor &&extraTypeInfoDesc); + RResult AddAttributeSet(Experimental::RNTupleAttrSetDescriptor &&attrSetDesc); + /// Mark the beginning of the header extension; any fields and columns added after a call to this function are /// annotated as begin part of the header extension. void BeginHeaderExtension(); @@ -1634,6 +1775,7 @@ inline RNTupleDescriptor CloneDescriptorSchema(const RNTupleDescriptor &desc) } } // namespace Internal + } // namespace ROOT #endif // ROOT_RNTupleDescriptor diff --git a/tree/ntuple/inc/ROOT/RNTupleSerialize.hxx b/tree/ntuple/inc/ROOT/RNTupleSerialize.hxx index e767cfeda2662..7a387510f73d8 100644 --- a/tree/ntuple/inc/ROOT/RNTupleSerialize.hxx +++ b/tree/ntuple/inc/ROOT/RNTupleSerialize.hxx @@ -36,6 +36,13 @@ class RNTupleDescriptor; class RClusterDescriptor; enum class EExtraTypeInfoIds; +namespace Experimental { +class RNTupleAttrSetDescriptor; +namespace Internal { +class RNTupleAttrSetDescriptorBuilder; +} // namespace Internal +} // namespace Experimental + namespace Internal { class RClusterDescriptorBuilder; @@ -271,6 +278,12 @@ public: static RResult DeserializeSchemaDescription(const void *buffer, std::uint64_t bufSize, ROOT::Internal::RNTupleDescriptorBuilder &descBuilder); + static RResult + SerializeAttributeSet(const Experimental::RNTupleAttrSetDescriptor &attrSetDesc, void *buffer); + static RResult + DeserializeAttributeSet(const void *buffer, std::uint64_t bufSize, + Experimental::Internal::RNTupleAttrSetDescriptorBuilder &attrSetDescBld); + static RResult SerializeHeader(void *buffer, const RNTupleDescriptor &desc); static RResult SerializePageList(void *buffer, const RNTupleDescriptor &desc, std::span physClusterIDs, diff --git a/tree/ntuple/src/RNTupleDescriptor.cxx b/tree/ntuple/src/RNTupleDescriptor.cxx index a0976d324a23f..d0413614e3740 100644 --- a/tree/ntuple/src/RNTupleDescriptor.cxx +++ b/tree/ntuple/src/RNTupleDescriptor.cxx @@ -803,6 +803,8 @@ ROOT::RNTupleDescriptor ROOT::RNTupleDescriptor::Clone() const clone.fSortedClusterGroupIds = fSortedClusterGroupIds; for (const auto &d : fClusterDescriptors) clone.fClusterDescriptors.emplace(d.first, d.second.Clone()); + for (const auto &d : fAttributeSets) + clone.fAttributeSets.emplace_back(d.Clone()); return clone; } @@ -1123,6 +1125,19 @@ void ROOT::Internal::RNTupleDescriptorBuilder::SetFeature(unsigned int flag) fDescriptor.fFeatureFlags.insert(flag); } +ROOT::RResult +ROOT::Experimental::Internal::RNTupleAttrSetDescriptorBuilder::MoveDescriptor() +{ + if (fDesc.fName.empty()) + return R__FAIL("attribute set name cannot be empty"); + if (fDesc.fAnchorLength == 0) + return R__FAIL("invalid anchor length"); + if (fDesc.fAnchorLocator.GetType() == RNTupleLocator::kTypeUnknown) + return R__FAIL("invalid locator type"); + + return std::move(fDesc); +} + ROOT::RResult ROOT::Internal::RColumnDescriptorBuilder::MakeDescriptor() const { if (fColumn.GetLogicalId() == ROOT::kInvalidDescriptorId) @@ -1377,6 +1392,19 @@ void ROOT::Internal::RNTupleDescriptorBuilder::ReplaceExtraTypeInfo(RExtraTypeIn fDescriptor.fExtraTypeInfoDescriptors.emplace_back(std::move(extraTypeInfoDesc)); } +ROOT::RResult +ROOT::Internal::RNTupleDescriptorBuilder::AddAttributeSet(Experimental::RNTupleAttrSetDescriptor &&attrSetDesc) +{ + auto &attrSets = fDescriptor.fAttributeSets; + if (std::find_if(attrSets.begin(), attrSets.end(), [&name = attrSetDesc.GetName()](const auto &desc) { + return desc.GetName() == name; + }) != attrSets.end()) { + return R__FAIL("attribute sets with duplicate names"); + } + attrSets.push_back(std::move(attrSetDesc)); + return RResult::Success(); +} + RNTupleSerializer::StreamerInfoMap_t ROOT::Internal::RNTupleDescriptorBuilder::BuildStreamerInfos() const { RNTupleSerializer::StreamerInfoMap_t streamerInfoMap; @@ -1492,3 +1520,26 @@ ROOT::RNTupleDescriptor::RExtraTypeInfoDescriptorIterable ROOT::RNTupleDescripto { return RExtraTypeInfoDescriptorIterable(*this); } + +ROOT::Experimental::RNTupleAttrSetDescriptorIterable ROOT::RNTupleDescriptor::GetAttrSetIterable() const +{ + return Experimental::RNTupleAttrSetDescriptorIterable(*this); +} + +bool ROOT::Experimental::RNTupleAttrSetDescriptor::operator==(const RNTupleAttrSetDescriptor &other) const +{ + return fAnchorLength == other.fAnchorLength && fSchemaVersionMajor == other.fSchemaVersionMajor && + fSchemaVersionMinor == other.fSchemaVersionMinor && fAnchorLocator == other.fAnchorLocator && + fName == other.fName; +}; + +ROOT::Experimental::RNTupleAttrSetDescriptor ROOT::Experimental::RNTupleAttrSetDescriptor::Clone() const +{ + RNTupleAttrSetDescriptor desc; + desc.fAnchorLength = fAnchorLength; + desc.fSchemaVersionMajor = fSchemaVersionMajor; + desc.fSchemaVersionMinor = fSchemaVersionMinor; + desc.fAnchorLocator = fAnchorLocator; + desc.fName = fName; + return desc; +} diff --git a/tree/ntuple/src/RNTupleSerialize.cxx b/tree/ntuple/src/RNTupleSerialize.cxx index f6caa493e9a67..fb67f8fdefe4a 100644 --- a/tree/ntuple/src/RNTupleSerialize.cxx +++ b/tree/ntuple/src/RNTupleSerialize.cxx @@ -1799,6 +1799,27 @@ ROOT::RResult ROOT::Internal::RNTupleSerializer::SerializeFooter( return R__FORWARD_ERROR(res); } + // Attributes + frame = pos; + const auto nAttributeSets = desc.GetNAttributeSets(); + if (nAttributeSets > 0) { + R__LOG_WARNING(NTupleLog()) << "RNTuple Attributes are experimental. They are not guaranteed to be readable " + "back in the future (but your main data is)"; + } + pos += SerializeListFramePreamble(nAttributeSets, *where); + for (const auto &attrSet : desc.GetAttrSetIterable()) { + if (auto res = SerializeAttributeSet(attrSet, *where)) { + pos += res.Unwrap(); + } else { + return R__FORWARD_ERROR(res); + } + } + if (auto res = SerializeFramePostscript(buffer ? frame : nullptr, pos - frame)) { + pos += res.Unwrap(); + } else { + return R__FORWARD_ERROR(res); + } + std::uint32_t size = pos - base; if (auto res = SerializeEnvelopePostscript(base, size)) { size += res.Unwrap(); @@ -1808,6 +1829,33 @@ ROOT::RResult ROOT::Internal::RNTupleSerializer::SerializeFooter( return size; } +ROOT::RResult +ROOT::Internal::RNTupleSerializer::SerializeAttributeSet(const Experimental::RNTupleAttrSetDescriptor &attrDesc, + void *buffer) +{ + auto base = reinterpret_cast(buffer); + auto pos = base; + void **where = (buffer == nullptr) ? &buffer : reinterpret_cast(&pos); + + auto frame = pos; + pos += RNTupleSerializer::SerializeRecordFramePreamble(*where); + pos += SerializeUInt16(attrDesc.GetSchemaVersionMajor(), *where); + pos += SerializeUInt16(attrDesc.GetSchemaVersionMinor(), *where); + pos += SerializeUInt32(attrDesc.GetAnchorLength(), *where); + if (auto res = SerializeLocator(attrDesc.GetAnchorLocator(), *where)) { + pos += res.Unwrap(); + } else { + return R__FORWARD_ERROR(res); + } + pos += SerializeString(attrDesc.GetName(), *where); + auto size = pos - frame; + if (auto res = SerializeFramePostscript(buffer ? frame : nullptr, size)) { + return size; + } else { + return R__FORWARD_ERROR(res); + } +} + ROOT::RResult ROOT::Internal::RNTupleSerializer::DeserializeHeader(const void *buffer, std::uint64_t bufSize, RNTupleDescriptorBuilder &descBuilder) { @@ -1918,36 +1966,102 @@ ROOT::RResult ROOT::Internal::RNTupleSerializer::DeserializeFooter(const v } bytes = frame + frameSize; - std::uint32_t nClusterGroups; - frame = bytes; - if (auto res = DeserializeFrameHeader(bytes, fnBufSizeLeft(), frameSize, nClusterGroups)) { - bytes += res.Unwrap(); - } else { - return R__FORWARD_ERROR(res); - } - for (std::uint32_t groupId = 0; groupId < nClusterGroups; ++groupId) { - RClusterGroup clusterGroup; - if (auto res = DeserializeClusterGroup(bytes, fnFrameSizeLeft(), clusterGroup)) { + { + std::uint32_t nClusterGroups; + frame = bytes; + if (auto res = DeserializeFrameHeader(bytes, fnBufSizeLeft(), frameSize, nClusterGroups)) { bytes += res.Unwrap(); } else { return R__FORWARD_ERROR(res); } + for (std::uint32_t groupId = 0; groupId < nClusterGroups; ++groupId) { + RClusterGroup clusterGroup; + if (auto res = DeserializeClusterGroup(bytes, fnFrameSizeLeft(), clusterGroup)) { + bytes += res.Unwrap(); + } else { + return R__FORWARD_ERROR(res); + } + + descBuilder.AddToOnDiskFooterSize(clusterGroup.fPageListEnvelopeLink.fLocator.GetNBytesOnStorage()); + RClusterGroupDescriptorBuilder clusterGroupBuilder; + clusterGroupBuilder.ClusterGroupId(groupId) + .PageListLocator(clusterGroup.fPageListEnvelopeLink.fLocator) + .PageListLength(clusterGroup.fPageListEnvelopeLink.fLength) + .MinEntry(clusterGroup.fMinEntry) + .EntrySpan(clusterGroup.fEntrySpan) + .NClusters(clusterGroup.fNClusters); + descBuilder.AddClusterGroup(clusterGroupBuilder.MoveDescriptor().Unwrap()); + } + bytes = frame + frameSize; + } - descBuilder.AddToOnDiskFooterSize(clusterGroup.fPageListEnvelopeLink.fLocator.GetNBytesOnStorage()); - RClusterGroupDescriptorBuilder clusterGroupBuilder; - clusterGroupBuilder.ClusterGroupId(groupId) - .PageListLocator(clusterGroup.fPageListEnvelopeLink.fLocator) - .PageListLength(clusterGroup.fPageListEnvelopeLink.fLength) - .MinEntry(clusterGroup.fMinEntry) - .EntrySpan(clusterGroup.fEntrySpan) - .NClusters(clusterGroup.fNClusters); - descBuilder.AddClusterGroup(clusterGroupBuilder.MoveDescriptor().Unwrap()); + // NOTE: Attributes were introduced in v1.0.1.0, so this section may be missing. + // Testing for > 8 because bufSize includes the checksum. + if (fnBufSizeLeft() > 8) { + std::uint32_t nAttributeSets; + frame = bytes; + if (auto res = DeserializeFrameHeader(bytes, fnBufSizeLeft(), frameSize, nAttributeSets)) { + bytes += res.Unwrap(); + } else { + return R__FORWARD_ERROR(res); + } + if (nAttributeSets > 0) { + R__LOG_WARNING(NTupleLog()) << "RNTuple Attributes are experimental. They are not guaranteed to be readable " + "back in the future (but your main data is)"; + } + for (std::uint32_t attrSetId = 0; attrSetId < nAttributeSets; ++attrSetId) { + Experimental::Internal::RNTupleAttrSetDescriptorBuilder attrSetDescBld; + if (auto res = DeserializeAttributeSet(bytes, fnBufSizeLeft(), attrSetDescBld)) { + descBuilder.AddAttributeSet(attrSetDescBld.MoveDescriptor().Unwrap()); + bytes += res.Unwrap(); + } else { + return R__FORWARD_ERROR(res); + } + } + bytes = frame + frameSize; } - bytes = frame + frameSize; return RResult::Success(); } +ROOT::RResult ROOT::Internal::RNTupleSerializer::DeserializeAttributeSet( + const void *buffer, std::uint64_t bufSize, Experimental::Internal::RNTupleAttrSetDescriptorBuilder &attrSetDescBld) +{ + auto base = reinterpret_cast(buffer); + auto bytes = base; + auto fnBufSizeLeft = [&]() { return bufSize - (bytes - base); }; + + std::uint64_t frameSize; + if (auto res = DeserializeFrameHeader(bytes, fnBufSizeLeft(), frameSize)) { + bytes += res.Unwrap(); + } else { + return R__FORWARD_ERROR(res); + } + if (fnBufSizeLeft() < static_cast(sizeof(std::uint64_t))) + return R__FAIL("record frame too short"); + std::uint16_t vMajor, vMinor; + bytes += DeserializeUInt16(bytes, vMajor); + bytes += DeserializeUInt16(bytes, vMinor); + std::uint32_t anchorLen; + bytes += DeserializeUInt32(bytes, anchorLen); + RNTupleLocator anchorLoc; + if (auto res = DeserializeLocator(bytes, fnBufSizeLeft(), anchorLoc)) { + bytes += res.Unwrap(); + } else { + return R__FORWARD_ERROR(res); + } + std::string name; + if (auto res = DeserializeString(bytes, fnBufSizeLeft(), name)) { + bytes += res.Unwrap(); + } else { + return R__FORWARD_ERROR(res); + } + + attrSetDescBld.SchemaVersion(vMajor, vMinor).AnchorLength(anchorLen).AnchorLocator(anchorLoc).Name(name); + + return frameSize; +} + ROOT::RResult> ROOT::Internal::RNTupleSerializer::DeserializePageListRaw(const void *buffer, std::uint64_t bufSize, ROOT::DescriptorId_t clusterGroupId, diff --git a/tree/ntuple/test/ntuple_descriptor.cxx b/tree/ntuple/test/ntuple_descriptor.cxx index b195bf2a04460..b99e794abe7d8 100644 --- a/tree/ntuple/test/ntuple_descriptor.cxx +++ b/tree/ntuple/test/ntuple_descriptor.cxx @@ -372,6 +372,39 @@ TEST(RNTupleDescriptor, GetTypeNameForComparison) } } +TEST(RNTupleDescriptor, AttributeSets) +{ + RNTupleLocator locator; + locator.SetType(ROOT::RNTupleLocator::kTypeFile); + locator.SetPosition(128ul); + ROOT::Experimental::Internal::RNTupleAttrSetDescriptorBuilder builder; + builder.SchemaVersion(1, 0).AnchorLength(1024).AnchorLocator(locator).Name("AttrSetName"); + RNTupleDescriptorBuilder descBuilder; + descBuilder.SetVersion(1, 0, 1, 0); + descBuilder.SetNTuple("ntpl", ""); + descBuilder.AddAttributeSet(builder.MoveDescriptor().Unwrap()); + + locator.SetPosition(555ul); + builder.SchemaVersion(2, 4).AnchorLength(200).AnchorLocator(locator).Name("AttrSetName 2"); + descBuilder.AddAttributeSet(builder.MoveDescriptor().Unwrap()); + + auto desc = descBuilder.MoveDescriptor(); + ASSERT_EQ(desc.GetNAttributeSets(), 2); + auto attrSets = desc.GetAttrSetIterable().begin(); + EXPECT_EQ(attrSets->GetName(), "AttrSetName"); + EXPECT_EQ(attrSets->GetAnchorLength(), 1024); + EXPECT_EQ(attrSets->GetSchemaVersionMajor(), 1); + EXPECT_EQ(attrSets->GetSchemaVersionMinor(), 0); + EXPECT_EQ(attrSets->GetAnchorLocator().GetPosition(), 128); + + ++attrSets; + EXPECT_EQ(attrSets->GetName(), "AttrSetName 2"); + EXPECT_EQ(attrSets->GetAnchorLength(), 200); + EXPECT_EQ(attrSets->GetSchemaVersionMajor(), 2); + EXPECT_EQ(attrSets->GetSchemaVersionMinor(), 4); + EXPECT_EQ(attrSets->GetAnchorLocator().GetPosition(), 555); +} + TEST(RFieldDescriptorIterable, IterateOverFieldNames) { auto model = RNTupleModel::Create(); diff --git a/tree/ntuple/test/ntuple_serialize.cxx b/tree/ntuple/test/ntuple_serialize.cxx index ffa0afea6adab..ae23a1bc237a4 100644 --- a/tree/ntuple/test/ntuple_serialize.cxx +++ b/tree/ntuple/test/ntuple_serialize.cxx @@ -2059,3 +2059,121 @@ TEST(RNTuple, SerializeMultiColumnRepresentationDeferredInMainHeader) EXPECT_EQ(expect2_0, columnRange2_0); EXPECT_EQ(expect2_1, columnRange2_1); } + +TEST(RNTuple, SerializeAttrSets) +{ + RNTupleLocator locator; + locator.SetType(ROOT::RNTupleLocator::kTypeFile); + locator.SetPosition(128ul); + ROOT::Experimental::Internal::RNTupleAttrSetDescriptorBuilder builder; + builder.SchemaVersion(1, 0).AnchorLength(1024).AnchorLocator(locator).Name("AttrSetName"); + auto attrSetDesc = builder.MoveDescriptor().Unwrap(); + + auto res = RNTupleSerializer::SerializeAttributeSet(attrSetDesc, nullptr); + ASSERT_TRUE(bool(res)); + auto buf = MakeUninitArray(res.Unwrap()); + res = RNTupleSerializer::SerializeAttributeSet(attrSetDesc, buf.get()); + ASSERT_TRUE(bool(buf)); + + ROOT::Experimental::Internal::RNTupleAttrSetDescriptorBuilder deserializedDescBld; + RNTupleSerializer::DeserializeAttributeSet(buf.get(), res.Unwrap(), deserializedDescBld); + + ASSERT_EQ(attrSetDesc, deserializedDescBld.MoveDescriptor().Unwrap()); +} + +TEST(RNTuple, SerializeDescriptorWithAttrSets) +{ + RNTupleDescriptorBuilder builder; + builder.SetVersionForWriting(); + builder.SetNTuple("ntpl", ""); + + builder.AddField(RFieldDescriptorBuilder() + .FieldId(0) + .FieldName("") + .Structure(ROOT::ENTupleStructure::kRecord) + .MakeDescriptor() + .Unwrap()); + + builder.AddField(RFieldDescriptorBuilder() + .FieldId(5) + .FieldName("pt") + .TypeName("float") + .Structure(ROOT::ENTupleStructure::kPlain) + .MakeDescriptor() + .Unwrap()); + builder.AddFieldLink(0, 5); + builder.AddColumn(RColumnDescriptorBuilder() + .LogicalColumnId(0) + .PhysicalColumnId(0) + .FieldId(5) + .BitsOnStorage(32) + .Type(ROOT::ENTupleColumnType::kReal32) + .FirstElementIndex(1) + .MakeDescriptor() + .Unwrap()); + builder.AddColumn(RColumnDescriptorBuilder() + .LogicalColumnId(1) + .PhysicalColumnId(1) + .FieldId(5) + .BitsOnStorage(16) + .Type(ROOT::ENTupleColumnType::kReal16) + .RepresentationIndex(1) + .FirstElementIndex(1) + .SetSuppressedDeferred() + .MakeDescriptor() + .Unwrap()); + + RNTupleLocator locator; + locator.SetType(ROOT::RNTupleLocator::kTypeFile); + locator.SetPosition(128ul); + + ROOT::Experimental::Internal::RNTupleAttrSetDescriptorBuilder attrBuilder; + attrBuilder.SchemaVersion(1, 0).AnchorLength(1024).AnchorLocator(locator).Name("AttrSetName"); + + // First cluster + RClusterDescriptorBuilder clusterBuilder; + clusterBuilder.ClusterId(0).FirstEntryIndex(0).NEntries(1); + builder.AddCluster(clusterBuilder.MoveDescriptor().Unwrap()); + + RClusterGroupDescriptorBuilder cgBuilder; + cgBuilder.ClusterGroupId(0).NClusters(1).EntrySpan(3); + cgBuilder.AddSortedClusters({0}); + builder.AddClusterGroup(cgBuilder.MoveDescriptor().Unwrap()); + + builder.AddAttributeSet(attrBuilder.MoveDescriptor().Unwrap()); + auto desc = builder.MoveDescriptor(); + + auto context = RNTupleSerializer::SerializeHeader(nullptr, desc).Unwrap(); + auto bufHeader = MakeUninitArray(context.GetHeaderSize()); + ROOT::TestSupport::CheckDiagsRAII diagsRAII; + diagsRAII.requiredDiag(kWarning, "ROOT.NTuple", "Attributes are experimental", false); + context = RNTupleSerializer::SerializeHeader(bufHeader.get(), desc).Unwrap(); + + std::vector physClusterIDs{context.MapClusterId(0)}; + context.MapClusterGroupId(0); + auto sizePageList = RNTupleSerializer::SerializePageList(nullptr, desc, physClusterIDs, context).Unwrap(); + EXPECT_GT(sizePageList, 0); + auto bufPageList = MakeUninitArray(sizePageList); + EXPECT_EQ(sizePageList, + RNTupleSerializer::SerializePageList(bufPageList.get(), desc, physClusterIDs, context).Unwrap()); + + auto sizeFooter = RNTupleSerializer::SerializeFooter(nullptr, desc, context).Unwrap(); + EXPECT_GT(sizeFooter, 0); + auto bufFooter = MakeUninitArray(sizeFooter); + EXPECT_EQ(sizeFooter, RNTupleSerializer::SerializeFooter(bufFooter.get(), desc, context).Unwrap()); + + builder.SetVersionForWriting(); + RNTupleSerializer::DeserializeHeader(bufHeader.get(), context.GetHeaderSize(), builder).ThrowOnError(); + RNTupleSerializer::DeserializeFooter(bufFooter.get(), sizeFooter, builder); + auto deserializedDesc = builder.MoveDescriptor(); + RNTupleSerializer::DeserializePageList(bufPageList.get(), sizePageList, 0, deserializedDesc, + EDeserializeMode::kForReading); + + ASSERT_EQ(desc.GetNAttributeSets(), deserializedDesc.GetNAttributeSets()); + auto attrSets = desc.GetAttrSetIterable().begin(); + auto deserAttrSets = deserializedDesc.GetAttrSetIterable().begin(); + for (auto i = 0u; i < desc.GetNAttributeSets(); ++i) { + EXPECT_EQ(*attrSets, *deserAttrSets); + ++attrSets, ++deserAttrSets; + } +}