Skip to content

Commit 3675b97

Browse files
committed
[ntuple] Add some internal utilities to the storage layer
Will be used by the RNTupleAttributes.
1 parent 875009e commit 3675b97

14 files changed

+154
-13
lines changed

tree/ntuple/inc/ROOT/RMiniFile.hxx

Lines changed: 13 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -32,13 +32,15 @@ class TVirtualStreamerInfo;
3232

3333
namespace ROOT {
3434

35+
class RNTupleWriteOptions;
36+
3537
namespace Internal {
38+
39+
class RPageSource;
3640
class RRawFile;
37-
}
3841

39-
class RNTupleWriteOptions;
42+
TDirectory *GetUnderlyingDirectory(ROOT::Internal::RNTupleFileWriter &writer);
4043

41-
namespace Internal {
4244
/// Holds status information of an open ROOT file during writing
4345
struct RTFileControlBlock;
4446

@@ -53,6 +55,8 @@ RNTuple data keys.
5355
*/
5456
// clang-format on
5557
class RMiniFileReader {
58+
friend ROOT::Internal::RPageSource;
59+
5660
private:
5761
/// The raw file used to read byte ranges
5862
ROOT::Internal::RRawFile *fRawFile = nullptr;
@@ -68,9 +72,6 @@ private:
6872
/// Used when the file turns out to be a TFile container. The ntuplePath variable is either the ntuple name
6973
/// or an ntuple name preceded by a directory (`myNtuple` or `foo/bar/myNtuple` or `/foo/bar/myNtuple`)
7074
RResult<RNTuple> GetNTupleProper(std::string_view ntuplePath);
71-
/// Loads an RNTuple anchor from a TFile at the given file offset (unzipping it if necessary).
72-
RResult<RNTuple>
73-
GetNTupleProperAtOffset(std::uint64_t payloadOffset, std::uint64_t compSize, std::uint64_t uncompLen);
7475

7576
/// Searches for a key with the given name and type in the key index of the directory starting at offsetDir.
7677
/// The offset points to the start of the TDirectory DATA section, without the key and without the name and title
@@ -84,6 +85,9 @@ public:
8485
explicit RMiniFileReader(ROOT::Internal::RRawFile *rawFile);
8586
/// Extracts header and footer location for the RNTuple identified by ntupleName
8687
RResult<RNTuple> GetNTuple(std::string_view ntupleName);
88+
/// Loads an RNTuple anchor from a TFile at the given file offset (unzipping it if necessary).
89+
RResult<RNTuple>
90+
GetNTupleProperAtOffset(std::uint64_t payloadOffset, std::uint64_t compSize, std::uint64_t uncompLen);
8791
/// Reads a given byte range from the file into the provided memory buffer.
8892
/// If `nbytes > fMaxKeySize` it will perform chunked read from multiple blobs,
8993
/// whose addresses are listed at the end of the first chunk.
@@ -109,6 +113,8 @@ A stand-alone version of RNTuple can remove the TFile based writer.
109113
*/
110114
// clang-format on
111115
class RNTupleFileWriter {
116+
friend TDirectory *ROOT::Internal::GetUnderlyingDirectory(ROOT::Internal::RNTupleFileWriter &writer);
117+
112118
public:
113119
/// The key length of a blob. It is always a big key (version > 1000) with class name RBlob.
114120
static constexpr std::size_t kBlobKeyLen = 42;
@@ -254,7 +260,7 @@ public:
254260
void WriteIntoReservedBlob(const void *buffer, size_t nbytes, std::int64_t offset);
255261
/// Ensures that the streamer info records passed as argument are written to the file
256262
void UpdateStreamerInfos(const ROOT::Internal::RNTupleSerializer::StreamerInfoMap_t &streamerInfos);
257-
/// Writes the RNTuple key to the file so that the header and footer keys can be found
263+
/// Writes the RNTuple key to the file so that the header and footer keys can be found.
258264
void Commit(int compression = RCompressionSetting::EDefaults::kUseGeneralPurpose);
259265
};
260266

tree/ntuple/inc/ROOT/RNTupleFillContext.hxx

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -103,7 +103,7 @@ private:
103103
std::size_t FillImpl(Entry &entry)
104104
{
105105
ROOT::RNTupleFillStatus status;
106-
FillNoFlush(entry, status);
106+
FillNoFlushImpl(entry, status);
107107
if (status.ShouldFlushCluster())
108108
FlushCluster();
109109
return status.GetLastEntrySize();
@@ -114,6 +114,9 @@ private:
114114
RNTupleFillContext &operator=(const RNTupleFillContext &) = delete;
115115

116116
public:
117+
RNTupleFillContext(RNTupleFillContext &&) = default;
118+
RNTupleFillContext &operator=(RNTupleFillContext &&) = default;
119+
117120
~RNTupleFillContext();
118121

119122
/// Fill an entry into this context, but don't commit the cluster. The calling code must pass an RNTupleFillStatus

tree/ntuple/inc/ROOT/RPageNullSink.hxx

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -106,6 +106,7 @@ public:
106106
void CommitStagedClusters(std::span<RStagedCluster>) final {}
107107
void CommitClusterGroup() final {}
108108
void CommitDatasetImpl() final {}
109+
std::unique_ptr<RPageSink> CloneWithNewRNTuple(std::string_view) const final { return nullptr; }
109110
};
110111

111112
} // namespace Internal

tree/ntuple/inc/ROOT/RPageSinkBuf.hxx

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -148,6 +148,11 @@ public:
148148
void CommitDatasetImpl() final;
149149

150150
RPage ReservePage(ColumnHandle_t columnHandle, std::size_t nElements) final;
151+
152+
std::unique_ptr<RPageSink> CloneWithNewRNTuple(std::string_view newName) const final
153+
{
154+
return fInnerSink->CloneWithNewRNTuple(newName);
155+
}
151156
}; // RPageSinkBuf
152157

153158
} // namespace Internal

tree/ntuple/inc/ROOT/RPageStorage.hxx

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -47,9 +47,9 @@ namespace ROOT {
4747
class RNTupleModel;
4848

4949
namespace Internal {
50-
51-
class RPageAllocator;
5250
class RColumn;
51+
class RMiniFileReader;
52+
class RPageAllocator;
5353
struct RNTupleModelChangeset;
5454

5555
enum class EPageStorageType {
@@ -313,6 +313,10 @@ public:
313313

314314
virtual ROOT::NTupleSize_t GetNEntries() const = 0;
315315

316+
/// Creates a new RPageSink linked to the same underlying storage as this, writing to a new RNTuple called `newName`.
317+
/// The existing sink will stay valid. The existing sink and the new one must not write concurrently.
318+
virtual std::unique_ptr<RPageSink> CloneWithNewRNTuple(std::string_view newName) const = 0;
319+
316320
/// Physically creates the storage container to hold the ntuple (e.g., a keys a TFile or an S3 bucket)
317321
/// Init() associates column handles to the columns referenced by the model
318322
void Init(RNTupleModel &model)
@@ -808,6 +812,8 @@ public:
808812
virtual std::vector<std::unique_ptr<ROOT::Internal::RCluster>>
809813
LoadClusters(std::span<ROOT::Internal::RCluster::RKey> clusterKeys) = 0;
810814

815+
virtual RMiniFileReader *GetUnderlyingReader() { return nullptr; }
816+
811817
/// Parallel decompression and unpacking of the pages in the given cluster. The unzipped pages are supposed
812818
/// to be preloaded in a page pool attached to the source. The method is triggered by the cluster pool's
813819
/// unzip thread. It is an optional optimization, the method can safely do nothing. In particular, the

tree/ntuple/inc/ROOT/RPageStorageDaos.hxx

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -77,7 +77,8 @@ struct RDaosNTupleAnchor {
7777
/// The object class for user data OIDs, e.g. `SX`
7878
std::string fObjClass{};
7979

80-
bool operator ==(const RDaosNTupleAnchor &other) const {
80+
bool operator==(const RDaosNTupleAnchor &other) const
81+
{
8182
return fVersionAnchor == other.fVersionAnchor && fVersionEpoch == other.fVersionEpoch &&
8283
fVersionMajor == other.fVersionMajor && fVersionMinor == other.fVersionMinor &&
8384
fVersionPatch == other.fVersionPatch && fNBytesHeader == other.fNBytesHeader &&
@@ -140,6 +141,8 @@ protected:
140141
public:
141142
RPageSinkDaos(std::string_view ntupleName, std::string_view uri, const ROOT::RNTupleWriteOptions &options);
142143
~RPageSinkDaos() override;
144+
145+
std::unique_ptr<RPageSink> CloneWithNewRNTuple(std::string_view) const final;
143146
}; // class RPageSinkDaos
144147

145148
// clang-format off

tree/ntuple/inc/ROOT/RPageStorageFile.hxx

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -98,6 +98,10 @@ public:
9898
RPageSinkFile(RPageSinkFile &&) = default;
9999
RPageSinkFile &operator=(RPageSinkFile &&) = default;
100100
~RPageSinkFile() override;
101+
102+
std::unique_ptr<RPageSink> CloneWithNewRNTuple(std::string_view) const final;
103+
104+
ROOT::Internal::RNTupleFileWriter *GetUnderlyingWriter() const { return fWriter.get(); }
101105
}; // class RPageSinkFile
102106

103107
// clang-format off
@@ -149,6 +153,8 @@ private:
149153
std::unique_ptr<ROOT::Internal::RCluster>
150154
PrepareSingleCluster(const ROOT::Internal::RCluster::RKey &clusterKey, std::vector<RRawFile::RIOVec> &readRequests);
151155

156+
RMiniFileReader *GetUnderlyingReader() final { return &fReader; }
157+
152158
protected:
153159
void LoadStructureImpl() final;
154160
ROOT::RNTupleDescriptor AttachImpl(RNTupleSerializer::EDescriptorDeserializeMode mode) final;
@@ -173,6 +179,11 @@ public:
173179
RPageSourceFile &operator=(RPageSourceFile &&) = delete;
174180
~RPageSourceFile() override;
175181

182+
/// Creates a new PageSourceFile using the same underlying file as this but referring to a different RNTuple,
183+
/// represented by `anchor`.
184+
std::unique_ptr<RPageSourceFile>
185+
OpenWithDifferentAnchor(const RNTuple &anchor, const ROOT::RNTupleReadOptions &options = ROOT::RNTupleReadOptions());
186+
176187
void
177188
LoadSealedPage(ROOT::DescriptorId_t physicalColumnId, RNTupleLocalIndex localIndex, RSealedPage &sealedPage) final;
178189

tree/ntuple/src/RMiniFile.cxx

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1608,3 +1608,11 @@ void ROOT::Internal::RNTupleFileWriter::WriteTFileSkeleton(int defaultCompressio
16081608
fileSimple.Write(&padding, sizeof(padding));
16091609
fileSimple.fKeyOffset = fileSimple.fFilePos;
16101610
}
1611+
1612+
TDirectory *ROOT::Internal::GetUnderlyingDirectory(ROOT::Internal::RNTupleFileWriter &writer)
1613+
{
1614+
if (auto *proper = std::get_if<ROOT::Internal::RNTupleFileWriter::RFileProper>(&writer.fFile)) {
1615+
return proper->fDirectory;
1616+
}
1617+
return nullptr;
1618+
}

tree/ntuple/src/RNTupleParallelWriter.cxx

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -113,6 +113,10 @@ class RPageSynchronizingSink : public RPageSink {
113113
{
114114
throw ROOT::RException(R__FAIL("should never commit dataset via RPageSynchronizingSink"));
115115
}
116+
std::unique_ptr<RPageSink> CloneWithNewRNTuple(std::string_view) const final
117+
{
118+
throw ROOT::RException(R__FAIL("CloneWithNewRNTuple unavailable for RPageSynchronizingSink"));
119+
}
116120

117121
RSinkGuard GetSinkGuard() final { return RSinkGuard(fMutex); }
118122
};

tree/ntuple/src/RPageStorage.cxx

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@
2424
#include <ROOT/RPageAllocator.hxx>
2525
#include <ROOT/RPageSinkBuf.hxx>
2626
#include <ROOT/RPageStorageFile.hxx>
27+
#include <ROOT/RNTupleReader.hxx>
2728
#ifdef R__ENABLE_DAOS
2829
#include <ROOT/RPageStorageDaos.hxx>
2930
#endif
@@ -1198,8 +1199,8 @@ void ROOT::Internal::RPagePersistentSink::CommitStagedClusters(std::span<RStaged
11981199
if (!columnInfo.fIsSuppressed)
11991200
continue;
12001201
const auto colId = columnInfo.fPageRange.GetPhysicalColumnId();
1201-
// For suppressed columns, we need to reset the first element index to the first element of the next (upcoming)
1202-
// cluster. This information has been determined for the committed cluster descriptor through
1202+
// For suppressed columns, we need to reset the first element index to the first element of the next
1203+
// (upcoming) cluster. This information has been determined for the committed cluster descriptor through
12031204
// CommitSuppressedColumnRanges(), so we can use the information from the descriptor.
12041205
const auto &columnRangeFromDesc = clusterBuilder.GetColumnRange(colId);
12051206
fOpenColumnRanges[colId].SetFirstElementIndex(columnRangeFromDesc.GetFirstElementIndex() +

0 commit comments

Comments
 (0)