From 738aa549fd4003ca3d8dd3055082052587b9543d Mon Sep 17 00:00:00 2001 From: silverweed Date: Tue, 16 Sep 2025 17:02:14 +0200 Subject: [PATCH 1/3] [ntuple] clang-format some files --- tree/ntuple/inc/ROOT/RPageStorageDaos.hxx | 3 ++- tree/ntuple/src/RPageStorageFile.cxx | 23 +++++++++++------------ 2 files changed, 13 insertions(+), 13 deletions(-) diff --git a/tree/ntuple/inc/ROOT/RPageStorageDaos.hxx b/tree/ntuple/inc/ROOT/RPageStorageDaos.hxx index c6dac79c4206e..4d6200fb5f590 100644 --- a/tree/ntuple/inc/ROOT/RPageStorageDaos.hxx +++ b/tree/ntuple/inc/ROOT/RPageStorageDaos.hxx @@ -77,7 +77,8 @@ struct RDaosNTupleAnchor { /// The object class for user data OIDs, e.g. `SX` std::string fObjClass{}; - bool operator ==(const RDaosNTupleAnchor &other) const { + bool operator==(const RDaosNTupleAnchor &other) const + { return fVersionAnchor == other.fVersionAnchor && fVersionEpoch == other.fVersionEpoch && fVersionMajor == other.fVersionMajor && fVersionMinor == other.fVersionMinor && fVersionPatch == other.fVersionPatch && fNBytesHeader == other.fNBytesHeader && diff --git a/tree/ntuple/src/RPageStorageFile.cxx b/tree/ntuple/src/RPageStorageFile.cxx index 0222d717d8ce8..2889e8a7630bd 100644 --- a/tree/ntuple/src/RPageStorageFile.cxx +++ b/tree/ntuple/src/RPageStorageFile.cxx @@ -509,18 +509,17 @@ ROOT::Internal::RPageSourceFile::PrepareSingleCluster(const RCluster::RKey &clus std::vector onDiskPages; auto activeSize = 0; auto pageZeroMap = std::make_unique(); - PrepareLoadCluster(clusterKey, *pageZeroMap, - [&](ROOT::DescriptorId_t physicalColumnId, ROOT::NTupleSize_t pageNo, - const ROOT::RClusterDescriptor::RPageInfo &pageInfo) { - const auto &pageLocator = pageInfo.GetLocator(); - if (pageLocator.GetType() == RNTupleLocator::kTypeUnknown) - throw RException(R__FAIL("tried to read a page with an unknown locator")); - const auto nBytes = - pageLocator.GetNBytesOnStorage() + pageInfo.HasChecksum() * kNBytesPageChecksum; - activeSize += nBytes; - onDiskPages.push_back( - {physicalColumnId, pageNo, pageLocator.GetPosition(), nBytes, 0}); - }); + PrepareLoadCluster( + clusterKey, *pageZeroMap, + [&](ROOT::DescriptorId_t physicalColumnId, ROOT::NTupleSize_t pageNo, + const ROOT::RClusterDescriptor::RPageInfo &pageInfo) { + const auto &pageLocator = pageInfo.GetLocator(); + if (pageLocator.GetType() == RNTupleLocator::kTypeUnknown) + throw RException(R__FAIL("tried to read a page with an unknown locator")); + const auto nBytes = pageLocator.GetNBytesOnStorage() + pageInfo.HasChecksum() * kNBytesPageChecksum; + activeSize += nBytes; + onDiskPages.push_back({physicalColumnId, pageNo, pageLocator.GetPosition(), nBytes, 0}); + }); // Linearize the page requests by file offset std::sort(onDiskPages.begin(), onDiskPages.end(), From 60e4a453ccd2e7a64e2e100aeeb377ff935ddb63 Mon Sep 17 00:00:00 2001 From: silverweed Date: Tue, 16 Sep 2025 16:57:06 +0200 Subject: [PATCH 2/3] [ntuple] Add some internal utilities to the storage layer Will be used by the RNTupleAttributes. --- tree/ntuple/inc/ROOT/RMiniFile.hxx | 17 ++++--- tree/ntuple/inc/ROOT/RPageNullSink.hxx | 1 + tree/ntuple/inc/ROOT/RPageSinkBuf.hxx | 5 ++ tree/ntuple/inc/ROOT/RPageStorage.hxx | 10 +++- tree/ntuple/inc/ROOT/RPageStorageDaos.hxx | 2 + tree/ntuple/inc/ROOT/RPageStorageFile.hxx | 11 ++++ tree/ntuple/src/RMiniFile.cxx | 8 +++ tree/ntuple/src/RNTupleParallelWriter.cxx | 4 ++ tree/ntuple/src/RPageStorageDaos.cxx | 6 +++ tree/ntuple/src/RPageStorageFile.cxx | 21 ++++++++ tree/ntuple/test/ntuple_endian.cxx | 5 ++ tree/ntuple/test/ntuple_storage.cxx | 62 +++++++++++++++++++++++ 12 files changed, 143 insertions(+), 9 deletions(-) diff --git a/tree/ntuple/inc/ROOT/RMiniFile.hxx b/tree/ntuple/inc/ROOT/RMiniFile.hxx index 45e6ec305572a..6a70610b00f90 100644 --- a/tree/ntuple/inc/ROOT/RMiniFile.hxx +++ b/tree/ntuple/inc/ROOT/RMiniFile.hxx @@ -32,13 +32,14 @@ class TVirtualStreamerInfo; namespace ROOT { +class RNTupleWriteOptions; + namespace Internal { + class RRawFile; -} -class RNTupleWriteOptions; +TDirectory *GetUnderlyingDirectory(ROOT::Internal::RNTupleFileWriter &writer); -namespace Internal { /// Holds status information of an open ROOT file during writing struct RTFileControlBlock; @@ -68,9 +69,6 @@ private: /// Used when the file turns out to be a TFile container. The ntuplePath variable is either the ntuple name /// or an ntuple name preceded by a directory (`myNtuple` or `foo/bar/myNtuple` or `/foo/bar/myNtuple`) RResult GetNTupleProper(std::string_view ntuplePath); - /// Loads an RNTuple anchor from a TFile at the given file offset (unzipping it if necessary). - RResult - GetNTupleProperAtOffset(std::uint64_t payloadOffset, std::uint64_t compSize, std::uint64_t uncompLen); /// Searches for a key with the given name and type in the key index of the directory starting at offsetDir. /// The offset points to the start of the TDirectory DATA section, without the key and without the name and title @@ -84,6 +82,9 @@ public: explicit RMiniFileReader(ROOT::Internal::RRawFile *rawFile); /// Extracts header and footer location for the RNTuple identified by ntupleName RResult GetNTuple(std::string_view ntupleName); + /// Loads an RNTuple anchor from a TFile at the given file offset (unzipping it if necessary). + RResult + GetNTupleProperAtOffset(std::uint64_t payloadOffset, std::uint64_t compSize, std::uint64_t uncompLen); /// Reads a given byte range from the file into the provided memory buffer. /// If `nbytes > fMaxKeySize` it will perform chunked read from multiple blobs, /// whose addresses are listed at the end of the first chunk. @@ -109,6 +110,8 @@ A stand-alone version of RNTuple can remove the TFile based writer. */ // clang-format on class RNTupleFileWriter { + friend TDirectory *ROOT::Internal::GetUnderlyingDirectory(ROOT::Internal::RNTupleFileWriter &writer); + public: /// The key length of a blob. It is always a big key (version > 1000) with class name RBlob. static constexpr std::size_t kBlobKeyLen = 42; @@ -254,7 +257,7 @@ public: void WriteIntoReservedBlob(const void *buffer, size_t nbytes, std::int64_t offset); /// Ensures that the streamer info records passed as argument are written to the file void UpdateStreamerInfos(const ROOT::Internal::RNTupleSerializer::StreamerInfoMap_t &streamerInfos); - /// Writes the RNTuple key to the file so that the header and footer keys can be found + /// Writes the RNTuple key to the file so that the header and footer keys can be found. void Commit(int compression = RCompressionSetting::EDefaults::kUseGeneralPurpose); }; diff --git a/tree/ntuple/inc/ROOT/RPageNullSink.hxx b/tree/ntuple/inc/ROOT/RPageNullSink.hxx index fe15f631ca329..1462f82da112e 100644 --- a/tree/ntuple/inc/ROOT/RPageNullSink.hxx +++ b/tree/ntuple/inc/ROOT/RPageNullSink.hxx @@ -106,6 +106,7 @@ public: void CommitStagedClusters(std::span) final {} void CommitClusterGroup() final {} void CommitDatasetImpl() final {} + std::unique_ptr CreateNewWithNewRNTuple(std::string_view) const final { return nullptr; } }; } // namespace Internal diff --git a/tree/ntuple/inc/ROOT/RPageSinkBuf.hxx b/tree/ntuple/inc/ROOT/RPageSinkBuf.hxx index 19a5d47b3528c..4de28ce970642 100644 --- a/tree/ntuple/inc/ROOT/RPageSinkBuf.hxx +++ b/tree/ntuple/inc/ROOT/RPageSinkBuf.hxx @@ -148,6 +148,11 @@ public: void CommitDatasetImpl() final; RPage ReservePage(ColumnHandle_t columnHandle, std::size_t nElements) final; + + std::unique_ptr CreateNewWithNewRNTuple(std::string_view newName) const final + { + return std::make_unique(fInnerSink->CreateNewWithNewRNTuple(newName)); + } }; // RPageSinkBuf } // namespace Internal diff --git a/tree/ntuple/inc/ROOT/RPageStorage.hxx b/tree/ntuple/inc/ROOT/RPageStorage.hxx index d1f4e4f99d870..88244b33b296a 100644 --- a/tree/ntuple/inc/ROOT/RPageStorage.hxx +++ b/tree/ntuple/inc/ROOT/RPageStorage.hxx @@ -47,9 +47,9 @@ namespace ROOT { class RNTupleModel; namespace Internal { - -class RPageAllocator; class RColumn; +class RMiniFileReader; +class RPageAllocator; struct RNTupleModelChangeset; enum class EPageStorageType { @@ -313,6 +313,10 @@ public: virtual ROOT::NTupleSize_t GetNEntries() const = 0; + /// Creates a new RPageSink linked to the same underlying storage as this, writing to a new RNTuple called `newName`. + /// The existing sink will stay valid. The existing sink and the new one must not write concurrently. + virtual std::unique_ptr CreateNewWithNewRNTuple(std::string_view newName) const = 0; + /// Physically creates the storage container to hold the ntuple (e.g., a keys a TFile or an S3 bucket) /// Init() associates column handles to the columns referenced by the model void Init(RNTupleModel &model) @@ -808,6 +812,8 @@ public: virtual std::vector> LoadClusters(std::span clusterKeys) = 0; + virtual RMiniFileReader *GetUnderlyingReader() { return nullptr; } + /// Parallel decompression and unpacking of the pages in the given cluster. The unzipped pages are supposed /// to be preloaded in a page pool attached to the source. The method is triggered by the cluster pool's /// unzip thread. It is an optional optimization, the method can safely do nothing. In particular, the diff --git a/tree/ntuple/inc/ROOT/RPageStorageDaos.hxx b/tree/ntuple/inc/ROOT/RPageStorageDaos.hxx index 4d6200fb5f590..6295fc161d6ef 100644 --- a/tree/ntuple/inc/ROOT/RPageStorageDaos.hxx +++ b/tree/ntuple/inc/ROOT/RPageStorageDaos.hxx @@ -141,6 +141,8 @@ protected: public: RPageSinkDaos(std::string_view ntupleName, std::string_view uri, const ROOT::RNTupleWriteOptions &options); ~RPageSinkDaos() override; + + std::unique_ptr CreateNewWithNewRNTuple(std::string_view) const final; }; // class RPageSinkDaos // clang-format off diff --git a/tree/ntuple/inc/ROOT/RPageStorageFile.hxx b/tree/ntuple/inc/ROOT/RPageStorageFile.hxx index 81adeed6b339b..dd71482075421 100644 --- a/tree/ntuple/inc/ROOT/RPageStorageFile.hxx +++ b/tree/ntuple/inc/ROOT/RPageStorageFile.hxx @@ -98,6 +98,10 @@ public: RPageSinkFile(RPageSinkFile &&) = default; RPageSinkFile &operator=(RPageSinkFile &&) = default; ~RPageSinkFile() override; + + std::unique_ptr CreateNewWithNewRNTuple(std::string_view) const final; + + ROOT::Internal::RNTupleFileWriter *GetUnderlyingWriter() const { return fWriter.get(); } }; // class RPageSinkFile // clang-format off @@ -149,6 +153,8 @@ private: std::unique_ptr PrepareSingleCluster(const ROOT::Internal::RCluster::RKey &clusterKey, std::vector &readRequests); + RMiniFileReader *GetUnderlyingReader() final { return &fReader; } + protected: void LoadStructureImpl() final; ROOT::RNTupleDescriptor AttachImpl(RNTupleSerializer::EDescriptorDeserializeMode mode) final; @@ -173,6 +179,11 @@ public: RPageSourceFile &operator=(RPageSourceFile &&) = delete; ~RPageSourceFile() override; + /// Creates a new PageSourceFile using the same underlying file as this but referring to a different RNTuple, + /// represented by `anchor`. + std::unique_ptr + OpenWithDifferentAnchor(const RNTuple &anchor, const ROOT::RNTupleReadOptions &options = ROOT::RNTupleReadOptions()); + void LoadSealedPage(ROOT::DescriptorId_t physicalColumnId, RNTupleLocalIndex localIndex, RSealedPage &sealedPage) final; diff --git a/tree/ntuple/src/RMiniFile.cxx b/tree/ntuple/src/RMiniFile.cxx index 438dc516c239f..5e52fb805dcb6 100644 --- a/tree/ntuple/src/RMiniFile.cxx +++ b/tree/ntuple/src/RMiniFile.cxx @@ -1608,3 +1608,11 @@ void ROOT::Internal::RNTupleFileWriter::WriteTFileSkeleton(int defaultCompressio fileSimple.Write(&padding, sizeof(padding)); fileSimple.fKeyOffset = fileSimple.fFilePos; } + +TDirectory *ROOT::Internal::GetUnderlyingDirectory(ROOT::Internal::RNTupleFileWriter &writer) +{ + if (auto *proper = std::get_if(&writer.fFile)) { + return proper->fDirectory; + } + return nullptr; +} diff --git a/tree/ntuple/src/RNTupleParallelWriter.cxx b/tree/ntuple/src/RNTupleParallelWriter.cxx index 70cc6156756a0..3febb23f73407 100644 --- a/tree/ntuple/src/RNTupleParallelWriter.cxx +++ b/tree/ntuple/src/RNTupleParallelWriter.cxx @@ -111,6 +111,10 @@ class RPageSynchronizingSink : public RPageSink { { throw ROOT::RException(R__FAIL("should never commit dataset via RPageSynchronizingSink")); } + std::unique_ptr CreateNewWithNewRNTuple(std::string_view) const final + { + throw ROOT::RException(R__FAIL("CreateNewWithNewRNTuple unavailable for RPageSynchronizingSink")); + } RSinkGuard GetSinkGuard() final { return RSinkGuard(fMutex); } }; diff --git a/tree/ntuple/src/RPageStorageDaos.cxx b/tree/ntuple/src/RPageStorageDaos.cxx index 1de8981b54746..d723b890e5d1a 100644 --- a/tree/ntuple/src/RPageStorageDaos.cxx +++ b/tree/ntuple/src/RPageStorageDaos.cxx @@ -483,6 +483,12 @@ void ROOT::Experimental::Internal::RPageSinkDaos::WriteNTupleAnchor() kDistributionKeyDefault, kAttributeKeyAnchor, kCidMetadata); } +std::unique_ptr +ROOT::Experimental::Internal::RPageSinkDaos::CreateNewWithNewRNTuple(std::string_view) const +{ + throw ROOT::RException(R__FAIL("this method is not available for the DAOS backend")); +} + //////////////////////////////////////////////////////////////////////////////// ROOT::Experimental::Internal::RPageSourceDaos::RPageSourceDaos(std::string_view ntupleName, std::string_view uri, diff --git a/tree/ntuple/src/RPageStorageFile.cxx b/tree/ntuple/src/RPageStorageFile.cxx index 2889e8a7630bd..04ba3aad0e931 100644 --- a/tree/ntuple/src/RPageStorageFile.cxx +++ b/tree/ntuple/src/RPageStorageFile.cxx @@ -252,6 +252,18 @@ void ROOT::Internal::RPageSinkFile::CommitDatasetImpl(unsigned char *serializedF fWriter->Commit(GetWriteOptions().GetCompression()); } +std::unique_ptr +ROOT::Internal::RPageSinkFile::CreateNewWithNewRNTuple(std::string_view newName) const +{ + if (auto *dir = Internal::GetUnderlyingDirectory(*fWriter)) { + auto opts = ROOT::RNTupleWriteOptions(); + opts.SetCompression(GetWriteOptions().GetCompression()); + return std::make_unique(newName, *dir, opts); + } + // TODO: support this method also for non-TFile-based writers + throw ROOT::RException(R__FAIL("cannot CreateNewWithNewRNTuple a non-TFile-based Sink.")); +} + //////////////////////////////////////////////////////////////////////////////// ROOT::Internal::RPageSourceFile::RPageSourceFile(std::string_view ntupleName, const ROOT::RNTupleReadOptions &opts) @@ -305,6 +317,15 @@ ROOT::Internal::RPageSourceFile::CreateFromAnchor(const RNTuple &anchor, const R return pageSource; } +std::unique_ptr +ROOT::Internal::RPageSourceFile::OpenWithDifferentAnchor(const RNTuple &anchor, const ROOT::RNTupleReadOptions &options) +{ + auto pageSource = std::make_unique("", fFile->Clone(), options); + pageSource->fAnchor = anchor; + // NOTE: fNTupleName gets set only upon Attach(). + return pageSource; +} + ROOT::Internal::RPageSourceFile::~RPageSourceFile() = default; void ROOT::Internal::RPageSourceFile::LoadStructureImpl() diff --git a/tree/ntuple/test/ntuple_endian.cxx b/tree/ntuple/test/ntuple_endian.cxx index 44e0be2d75a7e..cd72650349809 100644 --- a/tree/ntuple/test/ntuple_endian.cxx +++ b/tree/ntuple/test/ntuple_endian.cxx @@ -59,6 +59,11 @@ class RPageSinkMock : public RPageSink { void CommitStagedClusters(std::span) final {} void CommitClusterGroup() final {} void CommitDatasetImpl() final {} + std::unique_ptr CreateNewWithNewRNTuple(std::string_view) const final + { + R__ASSERT(false); + return nullptr; + } public: RPageSinkMock(const RColumnElementBase &elt) : RPageSink("test", ROOT::RNTupleWriteOptions()), fElement(elt) diff --git a/tree/ntuple/test/ntuple_storage.cxx b/tree/ntuple/test/ntuple_storage.cxx index 443dbcfc7e327..5a00d847c974b 100644 --- a/tree/ntuple/test/ntuple_storage.cxx +++ b/tree/ntuple/test/ntuple_storage.cxx @@ -54,6 +54,11 @@ class RPageSinkMock : public RPageSink { void CommitStagedClusters(std::span) final {} void CommitClusterGroup() final {} void CommitDatasetImpl() final {} + std::unique_ptr CreateNewWithNewRNTuple(std::string_view) const final + { + R__ASSERT(false); + return nullptr; + } public: RPageSinkMock(const ROOT::RNTupleWriteOptions &options) : RPageSink("test", options) {} @@ -1145,3 +1150,60 @@ TEST(RPageSourceFile, NameFromAnchor) source->Attach(); EXPECT_EQ(source->GetNTupleName(), "ntpl"); } + +TEST(RPageSourceFile, OpenDifferentAnchor) +{ + FileRaii fileGuard("test_ntuple_open_diff_anchor.root"); + + auto model = RNTupleModel::Create(); + auto pF = model->MakeField("f"); + auto file = std::unique_ptr(TFile::Open(fileGuard.GetPath().c_str(), "RECREATE")); + { + auto writer = RNTupleWriter::Append(std::move(model), "ntpl1", *file); + for (auto i = 0; i < 100; ++i) { + *pF = i; + writer->Fill(); + } + } + { + model = RNTupleModel::Create(); + auto pI = model->MakeField("i"); + auto pC = model->MakeField("c"); + + auto writer = RNTupleWriter::Append(std::move(model), "ntpl2", *file); + for (auto i = 0; i < 20; ++i) { + *pI = i; + *pC = i; + writer->Fill(); + } + } + + auto source = std::make_unique("ntpl1", fileGuard.GetPath(), RNTupleReadOptions()); + source->Attach(); + EXPECT_EQ(source->GetNEntries(), 100); + auto desc = source->GetSharedDescriptorGuard(); + EXPECT_NE(desc->FindFieldId("f"), ROOT::kInvalidDescriptorId); + + auto anchor2 = file->Get("ntpl2"); + ASSERT_NE(anchor2, nullptr); + auto source2 = source->OpenWithDifferentAnchor(*anchor2); + source2->Attach(); + EXPECT_EQ(source2->GetNTupleName(), "ntpl2"); + EXPECT_EQ(source2->GetNEntries(), 20); + { + auto desc2 = source2->GetSharedDescriptorGuard(); + EXPECT_EQ(desc2->FindFieldId("f"), ROOT::kInvalidDescriptorId); + EXPECT_NE(desc2->FindFieldId("i"), ROOT::kInvalidDescriptorId); + EXPECT_NE(desc2->FindFieldId("c"), ROOT::kInvalidDescriptorId); + } + + source.reset(); + // source2 should still be valid after dropping the first source. + EXPECT_EQ(source2->GetNEntries(), 20); + { + auto desc2 = source2->GetSharedDescriptorGuard(); + EXPECT_EQ(desc2->FindFieldId("f"), ROOT::kInvalidDescriptorId); + EXPECT_NE(desc2->FindFieldId("i"), ROOT::kInvalidDescriptorId); + EXPECT_NE(desc2->FindFieldId("c"), ROOT::kInvalidDescriptorId); + } +} From 79698ad53d6568bebbe289b1f615f8c4a99ec7f3 Mon Sep 17 00:00:00 2001 From: silverweed Date: Mon, 29 Sep 2025 09:19:54 +0200 Subject: [PATCH 3/3] [ntuple] Restrict the scope of descriptor guard in ntuple_storage. Also update the outdated comment about GetNEntries() --- tree/ntuple/inc/ROOT/RPageStorage.hxx | 2 +- tree/ntuple/test/ntuple_storage.cxx | 6 ++++-- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/tree/ntuple/inc/ROOT/RPageStorage.hxx b/tree/ntuple/inc/ROOT/RPageStorage.hxx index 88244b33b296a..5a2b3d66e87bd 100644 --- a/tree/ntuple/inc/ROOT/RPageStorage.hxx +++ b/tree/ntuple/inc/ROOT/RPageStorage.hxx @@ -761,7 +761,7 @@ public: /// The underlying `std::shared_mutex`, however, is neither read nor write recursive: /// within one thread, only one lock (shared or exclusive) must be acquired at the same time. This requires special /// care in sections protected by `GetSharedDescriptorGuard()` and `GetExclDescriptorGuard()` especially to avoid - /// that the locks are acquired indirectly (e.g. by a call to `GetNEntries()`). As a general guideline, no other + /// that the locks are acquired indirectly. As a general guideline, no other /// method of the page source should be called (directly or indirectly) in a guarded section. const RSharedDescriptorGuard GetSharedDescriptorGuard() const { diff --git a/tree/ntuple/test/ntuple_storage.cxx b/tree/ntuple/test/ntuple_storage.cxx index 5a00d847c974b..c76c7c0660f79 100644 --- a/tree/ntuple/test/ntuple_storage.cxx +++ b/tree/ntuple/test/ntuple_storage.cxx @@ -1181,8 +1181,10 @@ TEST(RPageSourceFile, OpenDifferentAnchor) auto source = std::make_unique("ntpl1", fileGuard.GetPath(), RNTupleReadOptions()); source->Attach(); EXPECT_EQ(source->GetNEntries(), 100); - auto desc = source->GetSharedDescriptorGuard(); - EXPECT_NE(desc->FindFieldId("f"), ROOT::kInvalidDescriptorId); + { + auto desc = source->GetSharedDescriptorGuard(); + EXPECT_NE(desc->FindFieldId("f"), ROOT::kInvalidDescriptorId); + } auto anchor2 = file->Get("ntpl2"); ASSERT_NE(anchor2, nullptr);