Skip to content

Commit 3645459

Browse files
committed
[df] Add tests for RDatasetSpec support in RNTuple
1 parent 7b81d2a commit 3645459

File tree

3 files changed

+259
-0
lines changed

3 files changed

+259
-0
lines changed

tree/dataframe/test/CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -97,6 +97,7 @@ configure_file(RCsvDS_test_win.csv . COPYONLY)
9797
configure_file(RCsvDS_test_NaNs.csv . COPYONLY)
9898
configure_file(RCsvDS_test_parsing.csv . COPYONLY)
9999
configure_file(spec.json . COPYONLY)
100+
configure_file(spec_rntuple.json . COPYONLY)
100101
configure_file(pyspec.json . COPYONLY)
101102
configure_file(spec_ordering_samples_withFriends.json . COPYONLY)
102103
ROOT_ADD_GTEST(datasource_csv datasource_csv.cxx LIBRARIES ROOTDataFrame)

tree/dataframe/test/dataframe_datasetspec.cxx

Lines changed: 221 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,11 @@
99
#include <ROOT/RDF/RMetaData.hxx>
1010
#include <TSystem.h>
1111

12+
#include <ROOT/RNTupleModel.hxx>
13+
#include <ROOT/RNTupleReader.hxx>
14+
#include <ROOT/RNTupleWriter.hxx>
15+
16+
1217
#include <thread> // std::thread::hardware_concurrency
1318

1419
#include <TFile.h>
@@ -832,6 +837,222 @@ TEST_P(RDatasetSpecTest, TreeInSubdir)
832837
EXPECT_EQ(sample_ids->at(0), fraii.GetPath() + std::string("/subdir/T"));
833838
}
834839

840+
841+
// // Tests with RNTuple
842+
struct InputRNTuplesRAII {
843+
unsigned int fNFiles = 0;
844+
std::string fPrefix;
845+
846+
InputRNTuplesRAII(unsigned int nFiles, std::string prefix) : fNFiles(nFiles), fPrefix(std::move(prefix))
847+
{
848+
unsigned int fNEntries{5};
849+
for (auto i = 0u; i < fNFiles; ++i) {
850+
auto model = ROOT::RNTupleModel::Create();
851+
auto fldX = model->MakeField<int>("x");
852+
auto fn = fPrefix + std::to_string(i) + ".root";
853+
auto ntpl = ROOT::RNTupleWriter::Recreate(std::move(model), "ntuple", fn);
854+
for (ULong64_t entry = 0; entry < fNEntries; entry++) {
855+
*fldX = entry;
856+
ntpl->Fill();
857+
}
858+
}
859+
}
860+
~InputRNTuplesRAII()
861+
{
862+
for (auto i = 0u; i < fNFiles; ++i)
863+
std::remove((fPrefix + std::to_string(i) + ".root").c_str());
864+
}
865+
};
866+
867+
struct InputRNTuplesRAIIRanges {
868+
unsigned int fNFiles = 0;
869+
std::string fPrefix;
870+
InputRNTuplesRAIIRanges(unsigned int nFiles, std::string prefix) : fNFiles(nFiles), fPrefix(std::move(prefix))
871+
{
872+
unsigned int fNEntries{5};
873+
for (auto i = 0u; i < fNFiles; ++i) {
874+
auto model = ROOT::RNTupleModel::Create();
875+
auto fldX = model->MakeField<int>("x");
876+
auto fn = fPrefix + std::to_string(i) + ".root";
877+
auto ntpl = ROOT::RNTupleWriter::Recreate(std::move(model), "ntuple", fn);
878+
for (ULong64_t entry = 0; entry < fNEntries; entry++) {
879+
*fldX = i * entry;
880+
ntpl->Fill();
881+
}
882+
}
883+
}
884+
~InputRNTuplesRAIIRanges()
885+
{
886+
for (auto i = 0u; i < fNFiles; ++i)
887+
std::remove((fPrefix + std::to_string(i) + ".root").c_str());
888+
}
889+
};
890+
891+
TEST_P(RDatasetSpecTest, RNTupleSingle)
892+
{
893+
const std::string prefix = "rdatasetspec_rntuple";
894+
InputRNTuplesRAII file(1u, prefix);
895+
auto samp = ROOT::RDF::Experimental::RSample("mysample", "ntuple", prefix + "0.root");
896+
RDatasetSpec spec;
897+
spec.AddSample(samp);
898+
auto df1 = ROOT::RDataFrame(spec);
899+
auto count = df1.Filter("x > 3").Count();
900+
EXPECT_EQ(count.GetValue(), 1);
901+
}
902+
903+
TEST_P(RDatasetSpecTest, RNTupleMultiple)
904+
{
905+
const std::string prefix = "rdatasetspec_rntuple";
906+
InputRNTuplesRAII file(4u, prefix);
907+
ROOT::RDF::Experimental::RMetaData meta, meta1, meta2;
908+
meta.Add("lum", 10.0);
909+
meta1.Add("lum", 20.0);
910+
meta2.Add("lum", 30.0);
911+
auto samp = ROOT::RDF::Experimental::RSample("mysample", "ntuple", std::vector<std::string>{prefix + "0.root", prefix + "3.root"}, meta);
912+
auto samp1 = ROOT::RDF::Experimental::RSample("mysample1", "ntuple", prefix + "1.root", meta1);
913+
auto samp2 = ROOT::RDF::Experimental::RSample("mysample2", "ntuple", prefix + "2.root", meta2);
914+
RDatasetSpec spec;
915+
spec.AddSample(samp);
916+
spec.AddSample(samp1);
917+
spec.AddSample(samp2);
918+
auto df1 = ROOT::RDataFrame(spec);
919+
920+
auto df_final = df1.Filter("x > 3").Count();
921+
922+
auto definepersamp = df1.DefinePerSample("lum", [](unsigned int, const ROOT::RDF::RSampleInfo &id) { return id.GetD("lum"); });
923+
auto df_filtered = definepersamp.Filter("lum == 10.").Count();
924+
925+
EXPECT_EQ(df_final.GetValue(), 4);
926+
EXPECT_EQ(df_filtered.GetValue(), 10);
927+
}
928+
929+
TEST_P(RDatasetSpecTest, RNTupleWithGlobalRanges)
930+
{
931+
const std::string prefix = "rdatasetspec_ranges_rntuple";
932+
InputRNTuplesRAIIRanges file(5u, prefix);
933+
ROOT::RDF::Experimental::RMetaData meta, meta1, meta2;
934+
meta.Add("lum", 10.0);
935+
meta1.Add("lum", 20.0);
936+
meta2.Add("lum", 30.0);
937+
auto samp = ROOT::RDF::Experimental::RSample("mysample", "ntuple", std::vector<std::string>{"rdatasetspec_ranges_rntuple1.root", "rdatasetspec_ranges_rntuple4.root"}, meta);
938+
auto samp1 = ROOT::RDF::Experimental::RSample("mysample1", "ntuple", "rdatasetspec_ranges_rntuple2.root", meta1);
939+
auto samp2 = ROOT::RDF::Experimental::RSample("mysample2", "ntuple", "rdatasetspec_ranges_rntuple3.root", meta2);
940+
RDatasetSpec spec;
941+
spec.AddSample(samp);
942+
spec.AddSample(samp1);
943+
spec.AddSample(samp2);
944+
auto df1 = ROOT::RDataFrame(spec);
945+
946+
std::vector<RDatasetSpec::REntryRange> goodRanges = {{1, 4}, {2, 7}, {6, 19}, {16, 20}};
947+
948+
auto df_final = df1.Filter("x > 3").Count();
949+
950+
auto definepersamp = df1.DefinePerSample("lum", [](unsigned int, const ROOT::RDF::RSampleInfo &id) { return id.GetD("lum"); });
951+
auto df_filtered = definepersamp.Filter("lum == 10.").Count();
952+
953+
auto df = RDataFrame(spec.WithGlobalRange(goodRanges[0]));
954+
auto filt = df.Filter("rdfentry_ == 2");
955+
auto result = filt.Take<ULong64_t>("x");
956+
auto res = result.GetValue();
957+
EXPECT_EQ(res[0], 2);
958+
959+
auto df2 = RDataFrame(spec.WithGlobalRange(goodRanges[1]));
960+
auto filt2 = df2.Filter("rdfentry_ == 3");
961+
auto result2 = filt2.Take<ULong64_t>("x");
962+
auto res2 = result2.GetValue();
963+
EXPECT_EQ(res2[0], 3);
964+
965+
auto df3 = RDataFrame(spec.WithGlobalRange(goodRanges[2]));
966+
auto filt3 = df3.Filter("rdfentry_ == 8");
967+
auto result3 = filt3.Take<ULong64_t>("x");
968+
auto res3 = result3.GetValue();
969+
EXPECT_EQ(res3[0], 12);
970+
971+
auto df4 = RDataFrame(spec.WithGlobalRange(goodRanges[3]));
972+
auto filt4 = df4.Filter("rdfentry_ == 19");
973+
auto result4 = filt4.Take<ULong64_t>("x");
974+
auto res4 = result4.GetValue();
975+
EXPECT_EQ(res4[0], 12);
976+
977+
EXPECT_EQ(df_final.GetValue(), 11);
978+
EXPECT_EQ(df_filtered.GetValue(), 10);
979+
}
980+
981+
TEST_P(RDatasetSpecTest, FromSpecRNTuple)
982+
{
983+
const std::string prefix = "rdatasetspec_rntuple";
984+
InputRNTuplesRAII file(3u, prefix);
985+
auto df_fromspec = ROOT::RDF::Experimental::FromSpec("spec_rntuple.json");
986+
auto df_final = df_fromspec.Filter("x > 3").Count();
987+
auto definepersamp = df_fromspec.DefinePerSample("lum", [](unsigned int, const ROOT::RDF::RSampleInfo &id) { return id.GetD("lum"); });
988+
auto df_filtered = definepersamp.Filter("lum == 20.").Count();
989+
990+
EXPECT_EQ(df_final.GetValue(), 3);
991+
EXPECT_EQ(df_filtered.GetValue(), 5);
992+
}
993+
994+
TEST_P(RDatasetSpecTest, RNTupleWrong)
995+
{
996+
const std::string prefix = "rdatasetspec_rntuple";
997+
InputRNTuplesRAII file(1u, prefix);
998+
999+
auto model = ROOT::RNTupleModel::Create();
1000+
auto fldX = model->MakeField<int>("x");
1001+
auto ntpl = ROOT::RNTupleWriter::Recreate(std::move(model), "mytuple", "rntuple_wrong.root");
1002+
*fldX = 2;
1003+
ntpl->Fill();
1004+
1005+
auto samp = ROOT::RDF::Experimental::RSample("mysample", "ntuple", prefix + "0.root");
1006+
auto samp1 = ROOT::RDF::Experimental::RSample("mysample1", "mytuple", "rntuple_wrong.root");
1007+
1008+
RDatasetSpec spec;
1009+
spec.AddSample(samp);
1010+
spec.AddSample(samp1);
1011+
1012+
EXPECT_THROW(
1013+
try {
1014+
auto samp = ROOT::RDF::Experimental::RSample("mysample", "ntuple", prefix + "0.root");
1015+
auto samp1 = ROOT::RDF::Experimental::RSample("mysample1", "mytuple", "rntuple_wrong.root");
1016+
1017+
RDatasetSpec spec;
1018+
spec.AddSample(samp);
1019+
spec.AddSample(samp1);
1020+
auto df_error = ROOT::RDataFrame(spec); }
1021+
1022+
catch (const std::runtime_error &err) {
1023+
EXPECT_EQ(
1024+
std::string(err.what()),
1025+
"More than one RNTuple name was found, please make sure to use RNTuples with the same RnTuple name.");
1026+
throw;
1027+
},
1028+
std::runtime_error);
1029+
}
1030+
1031+
TEST_P(RDatasetSpecTest, CompareWithRNTupleReader)
1032+
{
1033+
const std::string prefix = "rdatasetspec_rntuple";
1034+
InputRNTuplesRAII file(1u, prefix);
1035+
1036+
auto model = ROOT::RNTupleModel::Create();
1037+
std::shared_ptr<int> x = model->MakeField<int>("x");
1038+
auto reader = ROOT::RNTupleReader::Open(std::move(model), "ntuple", "rdatasetspec_rntuple0.root");
1039+
1040+
TH1I h("h", "x", 5, 0, 5);
1041+
for (auto i = 0u; i < 5; ++i) {
1042+
reader->LoadEntry(i);
1043+
h.Fill(*x);
1044+
}
1045+
1046+
auto samp = ROOT::RDF::Experimental::RSample("mysample", "ntuple", "rdatasetspec_rntuple0.root");
1047+
RDatasetSpec spec;
1048+
spec.AddSample(samp);
1049+
auto df1 = ROOT::RDataFrame(spec);
1050+
1051+
auto mean = df1.Mean("x");
1052+
1053+
EXPECT_EQ(h.GetMean(), mean.GetValue());
1054+
}
1055+
8351056
// instantiate single-thread tests
8361057
INSTANTIATE_TEST_SUITE_P(Seq, RDatasetSpecTest, ::testing::Values(false));
8371058

tree/dataframe/test/spec_rntuple.json

Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,37 @@
1+
{
2+
"samples": {
3+
"sample_ntuple1": {
4+
"trees": [
5+
"ntuple"
6+
],
7+
"files": [
8+
"rdatasetspec_rntuple0.root"
9+
],
10+
"metadata": {
11+
"lum": 10.0
12+
}
13+
},
14+
"sample_ntuple2": {
15+
"trees": [
16+
"ntuple"
17+
],
18+
"files": [
19+
"rdatasetspec_rntuple1.root"
20+
],
21+
"metadata": {
22+
"lum": 20.0
23+
}
24+
},
25+
"sample_ntuple3": {
26+
"trees": [
27+
"ntuple"
28+
],
29+
"files": [
30+
"rdatasetspec_rntuple2.root"
31+
],
32+
"metadata": {
33+
"lum": 30.0
34+
}
35+
}
36+
}
37+
}

0 commit comments

Comments
 (0)