Skip to content

[DF] Allow snapshotting from TTree to RNTuple #19364

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 2 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 0 additions & 5 deletions tree/dataframe/inc/ROOT/RDF/RInterface.hxx
Original file line number Diff line number Diff line change
Expand Up @@ -1371,11 +1371,6 @@ public:
};

if (options.fOutputFormat == ESnapshotOutputFormat::kRNTuple) {
if (RDFInternal::GetDataSourceLabel(*this) == "TTreeDS") {
throw std::runtime_error("Snapshotting from TTree to RNTuple is not yet supported. The current recommended "
"way to convert TTrees to RNTuple is through the RNTupleImporter.");
}

// The data source of the RNTuple resulting from the Snapshot action does not exist yet here, so we create one
// without a data source for now, and set it once the actual data source can be created (i.e., after
// writing the RNTuple).
Expand Down
1 change: 1 addition & 0 deletions tree/dataframe/test/NTupleStruct.hxx
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
struct Electron {
float pt;

friend bool operator==(const Electron &left, const Electron &right) { return left.pt == right.pt; }
friend bool operator<(const Electron &left, const Electron &right) { return left.pt < right.pt; }
};

Expand Down
1 change: 0 additions & 1 deletion tree/dataframe/test/TwoFloats.h
Original file line number Diff line number Diff line change
Expand Up @@ -14,4 +14,3 @@ class TwoFloats {
}
ClassDef(TwoFloats, 2)
};

197 changes: 98 additions & 99 deletions tree/dataframe/test/dataframe_snapshot_ntuple.cxx
Original file line number Diff line number Diff line change
Expand Up @@ -35,41 +35,29 @@ class FileRAII {
std::string GetPath() const { return fPath; }
};

TEST(RDFSnapshotRNTuple, FromScratchTemplated)
template <typename T>
void expect_vec_eq(const ROOT::RVec<T> &v1, const ROOT::RVec<T> &v2)
{
FileRAII fileGuard{"RDFSnapshotRNTuple_from_scratch_templated.root"};
const std::vector<std::string> columns = {"x"};

auto df = ROOT::RDataFrame(25ull).Define("x", [] { return 10; });

RSnapshotOptions opts;
opts.fOutputFormat = ROOT::RDF::ESnapshotOutputFormat::kRNTuple;

auto sdf = df.Snapshot("ntuple", fileGuard.GetPath(), columns, opts);

EXPECT_EQ(columns, sdf->GetColumnNames());

// Verify we actually snapshotted to an RNTuple.
auto ntuple = RNTupleReader::Open("ntuple", fileGuard.GetPath());
EXPECT_EQ(25ull, ntuple->GetNEntries());

auto x = ntuple->GetView<int>("x");
for (const auto i : ntuple->GetEntryRange()) {
EXPECT_EQ(10, x(i));
ASSERT_EQ(v1.size(), v2.size()) << "Vectors 'v1' and 'v2' are of unequal length";
for (std::size_t i = 0ull; i < v1.size(); ++i) {
if constexpr (std::is_floating_point_v<T>)
EXPECT_FLOAT_EQ(v1[i], v2[i]) << "Vectors 'v1' and 'v2' differ at index " << i;
else
EXPECT_EQ(v1[i], v2[i]) << "Vectors 'v1' and 'v2' differ at index " << i;
}
}

TEST(RDFSnapshotRNTuple, FromScratchJITted)
TEST(RDFSnapshotRNTuple, FromScratch)
{
FileRAII fileGuard{"RDFSnapshotRNTuple_from_scratch_jitted.root"};
FileRAII fileGuard{"RDFSnapshotRNTuple_from_scratch.root"};
const std::vector<std::string> columns = {"x"};

auto df = ROOT::RDataFrame(25ull).Define("x", [] { return 10; });

RSnapshotOptions opts;
opts.fOutputFormat = ROOT::RDF::ESnapshotOutputFormat::kRNTuple;

auto sdf = df.Snapshot("ntuple", fileGuard.GetPath(), "x", opts);
auto sdf = df.Snapshot("ntuple", fileGuard.GetPath(), columns, opts);

EXPECT_EQ(columns, sdf->GetColumnNames());

Expand Down Expand Up @@ -172,9 +160,9 @@ TEST_F(RDFSnapshotRNTupleTest, DefaultFormatWarning)
"in RSnapshotOptions. Note that this current default behaviour might change in the future.");
}

TEST_F(RDFSnapshotRNTupleTest, DefaultToRNTupleTemplated)
TEST_F(RDFSnapshotRNTupleTest, DefaultToRNTuple)
{
FileRAII fileGuard{"RDFSnapshotRNTuple_snap_templated.root"};
FileRAII fileGuard{"RDFSnapshotRNTuple_snap.root"};

auto df = ROOT::RDataFrame(fNtplName, fFileName);
auto sdf = df.Define("x", [] { return 10; }).Snapshot("ntuple", fileGuard.GetPath(), {"pt", "x"}, fSnapshotOpts);
Expand All @@ -189,52 +177,9 @@ TEST_F(RDFSnapshotRNTupleTest, DefaultToRNTupleTemplated)
EXPECT_EQ(10, x(0));
}

TEST_F(RDFSnapshotRNTupleTest, DefaultToRNTupleJITted)
{
FileRAII fileGuard{"RDFSnapshotRNTuple_snap_jitted.root"};

auto df = ROOT::RDataFrame(fNtplName, fFileName);
auto sdf = df.Define("x", [] { return 10; }).Snapshot("ntuple", fileGuard.GetPath(), {"pt", "x"}, fSnapshotOpts);

auto ntuple = RNTupleReader::Open("ntuple", fileGuard.GetPath());
EXPECT_EQ(1ull, ntuple->GetNEntries());

auto pt = ntuple->GetView<float>("pt");
auto x = ntuple->GetView<int>("x");

EXPECT_FLOAT_EQ(42.0, pt(0));
EXPECT_EQ(10, x(0));
}

TEST_F(RDFSnapshotRNTupleTest, ToTTreeTemplated)
{
FileRAII fileGuard{"RDFSnapshotRNTuple_to_ttree_templated.root"};

auto df = ROOT::RDataFrame(fNtplName, fFileName);

fSnapshotOpts.fOutputFormat = ROOT::RDF::ESnapshotOutputFormat::kTTree;

auto sdf = df.Define("x", [] { return 10; }).Snapshot("tree", fileGuard.GetPath(), {"pt", "x"}, fSnapshotOpts);

TFile file(fileGuard.GetPath().c_str());
auto tree = file.Get<TTree>("tree");
EXPECT_EQ(1ull, tree->GetEntries());

float pt;
int x;

tree->SetBranchAddress("pt", &pt);
tree->SetBranchAddress("x", &x);

tree->GetEntry(0);

EXPECT_FLOAT_EQ(42.0, pt);
EXPECT_EQ(10, x);
}

TEST_F(RDFSnapshotRNTupleTest, ToTTreeJITted)
TEST_F(RDFSnapshotRNTupleTest, ToTTree)
{
FileRAII fileGuard{"RDFSnapshotRNTuple_to_ttree_jitted.root"};
FileRAII fileGuard{"RDFSnapshotRNTuple_to_ttree.root"};

auto df = ROOT::RDataFrame(fNtplName, fFileName);

Expand Down Expand Up @@ -511,19 +456,39 @@ void WriteTestTree(const std::string &tname, const std::string &fname)
{
TFile file(fname.c_str(), "RECREATE");
TTree t(tname.c_str(), tname.c_str());
float pt;

float pt = 42.f;
std::vector<float> photons{1.f, 2.f, 3.f};
Electron electron{137.f};
Jet jets;
jets.electrons.emplace_back(Electron{122.f});
jets.electrons.emplace_back(Electron{125.f});
jets.electrons.emplace_back(Electron{129.f});

Int_t nmuons = 1;
float muon_pt[3] = {10.f, 20.f, 30.f};

struct {
Int_t x = 1;
Int_t y = 2;
} point;

t.Branch("pt", &pt);
t.Branch("photons", &photons);
t.Branch("electron", &electron);
t.Branch("jets", &jets);
t.Branch("nmuons", &nmuons);
t.Branch("muon_pt", muon_pt, "muon_pt[nmuons]");
t.Branch("point", &point, "x/I:y/I");

pt = 42.0;
t.Fill();

t.Write();
}

TEST(RDFSnapshotRNTuple, DisallowFromTTreeTemplated)
TEST(RDFSnapshotRNTuple, FromTTree)
{
const auto treename = "tree";
FileRAII fileGuard{"RDFSnapshotRNTuple_disallow_from_ttree_templated.root"};
FileRAII fileGuard{"RDFSnapshotRNTuple_disallow_from_ttree.root"};

WriteTestTree(treename, fileGuard.GetPath());

Expand All @@ -532,34 +497,68 @@ TEST(RDFSnapshotRNTuple, DisallowFromTTreeTemplated)
RSnapshotOptions opts;
opts.fOutputFormat = ROOT::RDF::ESnapshotOutputFormat::kRNTuple;

try {
auto sdf = df.Define("x", [] { return 10; }).Snapshot("ntuple", fileGuard.GetPath(), {"pt", "x"}, opts);
FAIL() << "snapshotting from RNTuple to TTree is not (yet) possible";
} catch (const std::runtime_error &err) {
EXPECT_STREQ(err.what(), "Snapshotting from TTree to RNTuple is not yet supported. The current recommended way "
"to convert TTrees to RNTuple is through the RNTupleImporter.");
{
// FIXME(fdegeus): snapshotting leaflist branches as-is (i.e. without explicitly providing their leafs) is not
// supported, because we have no way of reconstructing the memory layout of the branch itself from only the
// TTree's on-disk information without JITting. For RNTuple, we would be able to do this using anonymous record
// fields, however. Once this is implemented, this test should be changed to check the result of snapshotting
// "point" fully.
auto sdf = df.Define("x", [] { return 10; })
.Snapshot("ntuple", fileGuard.GetPath(),
{"x", "pt", "photons", "electron", "jets", "muon_pt", "point.x", "point.y"}, opts);

auto x = sdf->Take<int>("x");
auto pt = sdf->Take<float>("pt");
auto photons = sdf->Take<ROOT::RVec<float>>("photons");
auto electron = sdf->Take<Electron>("electron");
auto jet_electrons = sdf->Take<ROOT::RVec<Electron>>("jets.electrons");
auto nMuons = sdf->Take<int>("nmuons");
auto muonPt = sdf->Take<ROOT::RVec<float>>("muon_pt");
auto pointX = sdf->Take<int>("point_x");
auto pointY = sdf->Take<int>("point_y");

ASSERT_EQ(1UL, x->size());
ASSERT_EQ(1UL, pt->size());
ASSERT_EQ(1UL, photons->size());
ASSERT_EQ(1UL, electron->size());
ASSERT_EQ(1UL, jet_electrons->size());
ASSERT_EQ(1UL, nMuons->size());
ASSERT_EQ(1UL, muonPt->size());
ASSERT_EQ(1UL, pointX->size());
ASSERT_EQ(1UL, pointY->size());

EXPECT_EQ(10, x->front());
EXPECT_EQ(42.f, pt->front());
expect_vec_eq<float>({1.f, 2.f, 3.f}, photons->front());
EXPECT_EQ(Electron{137.f}, electron->front());
expect_vec_eq({Electron{122.f}, Electron{125.f}, Electron{129.f}}, jet_electrons->front());
EXPECT_EQ(1, nMuons->front());
expect_vec_eq({10.f}, muonPt->front());
EXPECT_EQ(1, pointX->front());
EXPECT_EQ(2, pointY->front());
}
}

TEST(RDFSnapshotRNTuple, DisallowFromTTreeJITted)
{
const auto treename = "tree";
FileRAII fileGuard{"RDFSnapshotRNTuple_disallow_from_ttree_jitted.root"};

WriteTestTree(treename, fileGuard.GetPath());

auto df = ROOT::RDataFrame(treename, fileGuard.GetPath());
auto reader = RNTupleReader::Open("ntuple", fileGuard.GetPath());

RSnapshotOptions opts;
opts.fOutputFormat = ROOT::RDF::ESnapshotOutputFormat::kRNTuple;
auto x = reader->GetView<int>("x");
auto pt = reader->GetView<float>("pt");
auto photons = reader->GetView<ROOT::RVec<float>>("photons");
auto electron = reader->GetView<Electron>("electron");
auto jet_electrons = reader->GetView<ROOT::RVec<Electron>>("jets.electrons");
auto nMuons = reader->GetView<int>("nmuons");
auto muonPt = reader->GetView<ROOT::RVec<float>>("muon_pt");
auto pointX = reader->GetView<int>("point_x");
auto pointY = reader->GetView<int>("point_y");

try {
auto sdf = df.Define("x", [] { return 10; }).Snapshot("ntuple", fileGuard.GetPath(), {"pt", "x"}, opts);
FAIL() << "snapshotting from RNTuple to TTree is not (yet) possible";
} catch (const std::runtime_error &err) {
EXPECT_STREQ(err.what(), "Snapshotting from TTree to RNTuple is not yet supported. The current recommended way "
"to convert TTrees to RNTuple is through the RNTupleImporter.");
}
EXPECT_EQ(10, x(0));
EXPECT_EQ(42.f, pt(0));
expect_vec_eq<float>({1.f, 2.f, 3.f}, photons(0));
EXPECT_EQ(Electron{137.f}, electron(0));
expect_vec_eq({Electron{122.f}, Electron{125.f}, Electron{129.f}}, jet_electrons(0));
EXPECT_EQ(1, nMuons(0));
expect_vec_eq({10.f}, muonPt(0));
EXPECT_EQ(1, pointX(0));
EXPECT_EQ(2, pointY(0));
}

#ifdef R__USE_IMT
Expand Down
Loading