Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix reading of multiple files with ROOTFrameReader #417

Merged
merged 5 commits into from
May 23, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
27 changes: 25 additions & 2 deletions include/podio/ROOTFrameReader.h
Original file line number Diff line number Diff line change
Expand Up @@ -52,8 +52,27 @@ class ROOTFrameReader {
ROOTFrameReader(const ROOTFrameReader&) = delete;
ROOTFrameReader& operator=(const ROOTFrameReader&) = delete;

/**
* Open a single file for reading.
*
* @param filename The name of the input file
*/
void openFile(const std::string& filename);

/**
* Open multiple files for reading and then treat them as if they are one file
*
* NOTE: All of the files are assumed to have the same structure. Specifically
* this means:
* - The same categories are available from all files
* - The collections that are contained in the individual categories are the
* same across all files
*
* This usually boils down to "the files have been written with the same
* settings", e.g. they are outputs of a batched process.
*
* @param filenames The filenames of all input files that should be read
*/
void openFiles(const std::vector<std::string>& filenames);

/**
Expand Down Expand Up @@ -125,7 +144,10 @@ class ROOTFrameReader {
*/
CategoryInfo& getCategoryInfo(const std::string& name);

GenericParameters readEventMetaData(CategoryInfo& catInfo);
/**
* Read the parameters for the entry specified in the passed CategoryInfo
*/
GenericParameters readEntryParameters(CategoryInfo& catInfo, bool reloadBranches, unsigned int localEntry);

/**
* Read the data entry specified in the passed CategoryInfo, and increase the
Expand All @@ -137,7 +159,8 @@ class ROOTFrameReader {
/**
* Get / read the buffers at index iColl in the passed category information
*/
podio::CollectionReadBuffers getCollectionBuffers(CategoryInfo& catInfo, size_t iColl);
podio::CollectionReadBuffers getCollectionBuffers(CategoryInfo& catInfo, size_t iColl, bool reloadBranches,
unsigned int localEntry);

std::unique_ptr<TChain> m_metaChain{nullptr}; ///< The metadata tree
std::unordered_map<std::string, CategoryInfo> m_categories{}; ///< All categories
Expand Down
36 changes: 24 additions & 12 deletions src/ROOTFrameReader.cc
Original file line number Diff line number Diff line change
Expand Up @@ -22,15 +22,22 @@ std::tuple<std::vector<root_utils::CollectionBranches>, std::vector<std::pair<st
createCollectionBranches(TChain* chain, const podio::CollectionIDTable& idTable,
const std::vector<root_utils::CollectionInfoT>& collInfo);

GenericParameters ROOTFrameReader::readEventMetaData(ROOTFrameReader::CategoryInfo& catInfo) {
GenericParameters ROOTFrameReader::readEntryParameters(ROOTFrameReader::CategoryInfo& catInfo, bool reloadBranches,
unsigned int localEntry) {
// Parameter branch is always the last one
auto& paramBranches = catInfo.branches.back();

// Make sure to have a valid branch pointer after switching trees in the chain
// as well as on the first event
if (reloadBranches) {
paramBranches.data = root_utils::getBranch(catInfo.chain.get(), root_utils::paramBranchName);
}
auto* branch = paramBranches.data;

GenericParameters params;
auto* emd = &params;
branch->SetAddress(&emd);
branch->GetEntry(catInfo.entry);
branch->GetEntry(localEntry);
return params;
}

Expand All @@ -53,19 +60,29 @@ std::unique_ptr<ROOTFrameData> ROOTFrameReader::readEntry(ROOTFrameReader::Categ
return nullptr;
}

// After switching trees in the chain, branch pointers get invalidated so
// they need to be reassigned.
// NOTE: root 6.22/06 requires that we get completely new branches here,
// with 6.20/04 we could just re-set them
const auto preTreeNo = catInfo.chain->GetTreeNumber();
const auto localEntry = catInfo.chain->LoadTree(catInfo.entry);
const auto treeChange = catInfo.chain->GetTreeNumber() != preTreeNo;
// Also need to make sure to handle the first event
const auto reloadBranches = treeChange || localEntry == 0;

ROOTFrameData::BufferMap buffers;
for (size_t i = 0; i < catInfo.storedClasses.size(); ++i) {
buffers.emplace(catInfo.storedClasses[i].first, getCollectionBuffers(catInfo, i));
buffers.emplace(catInfo.storedClasses[i].first, getCollectionBuffers(catInfo, i, reloadBranches, localEntry));
}

auto parameters = readEventMetaData(catInfo);
auto parameters = readEntryParameters(catInfo, reloadBranches, localEntry);

catInfo.entry++;
return std::make_unique<ROOTFrameData>(std::move(buffers), catInfo.table, std::move(parameters));
}

podio::CollectionReadBuffers ROOTFrameReader::getCollectionBuffers(ROOTFrameReader::CategoryInfo& catInfo,
size_t iColl) {
podio::CollectionReadBuffers ROOTFrameReader::getCollectionBuffers(ROOTFrameReader::CategoryInfo& catInfo, size_t iColl,
bool reloadBranches, unsigned int localEntry) {
const auto& name = catInfo.storedClasses[iColl].first;
const auto& [collType, isSubsetColl, schemaVersion, index] = catInfo.storedClasses[iColl].second;
auto& branches = catInfo.branches[index];
Expand All @@ -76,12 +93,7 @@ podio::CollectionReadBuffers ROOTFrameReader::getCollectionBuffers(ROOTFrameRead
// TODO: Error handling of empty optional
auto collBuffers = maybeBuffers.value_or(podio::CollectionReadBuffers{});

const auto localEntry = catInfo.chain->LoadTree(catInfo.entry);
// After switching trees in the chain, branch pointers get invalidated so
// they need to be reassigned.
// NOTE: root 6.22/06 requires that we get completely new branches here,
// with 6.20/04 we could just re-set them
if (localEntry == 0) {
if (reloadBranches) {
branches.data = root_utils::getBranch(catInfo.chain.get(), name.c_str());

// reference collections
Expand Down
5 changes: 4 additions & 1 deletion tests/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -64,7 +64,9 @@ set(root_dependent_tests
read_timed.cpp
read_frame.cpp
write_frame_root.cpp
read_frame_legacy_root.cpp)
read_frame_legacy_root.cpp
read_frame_root_multiple.cpp
)
set(root_libs TestDataModelDict ExtensionDataModelDict podio::podioRootIO)
foreach( sourcefile ${root_dependent_tests} )
CREATE_PODIO_TEST(${sourcefile} "${root_libs}")
Expand Down Expand Up @@ -168,6 +170,7 @@ set_property(TEST read_and_write PROPERTY DEPENDS write)
set_property(TEST read_frame_legacy_root PROPERTY DEPENDS write)
set_property(TEST read_timed PROPERTY DEPENDS write_timed)
set_property(TEST read_frame PROPERTY DEPENDS write_frame_root)
set_property(TEST read_frame_root_multiple PROPERTY DEPENDS write_frame_root)

add_executable(check_benchmark_outputs check_benchmark_outputs.cpp)
target_link_libraries(check_benchmark_outputs PRIVATE ROOT::Tree)
Expand Down
1 change: 1 addition & 0 deletions tests/CTestCustom.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ if ((NOT "@FORCE_RUN_ALL_TESTS@" STREQUAL "ON") AND (NOT "@USE_SANITIZER@" STREQ
read-multiple
read-legacy-files
read_frame_legacy_root
read_frame_root_multiple

write_frame_root
read_frame
Expand Down
89 changes: 89 additions & 0 deletions tests/read_frame_root_multiple.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,89 @@
#include "read_frame.h"

#include "podio/ROOTFrameReader.h"

int read_frames(podio::ROOTFrameReader& reader) {
if (reader.currentFileVersion() != podio::version::build_version) {
std::cerr << "The podio build version could not be read back correctly. "
<< "(expected:" << podio::version::build_version << ", actual: " << reader.currentFileVersion() << ")"
<< std::endl;
return 1;
}

if (reader.getEntries("events") != 20) {
std::cerr << "Could not read back the number of events correctly. "
<< "(expected:" << 20 << ", actual: " << reader.getEntries("events") << ")" << std::endl;
return 1;
}

if (reader.getEntries("events") != reader.getEntries("other_events")) {
std::cerr << "Could not read back the number of events correctly. "
<< "(expected:" << 20 << ", actual: " << reader.getEntries("other_events") << ")" << std::endl;
return 1;
}

// Read the frames in a different order than when writing them here to make
// sure that the writing/reading order does not impose any usage requirements
for (size_t i = 0; i < reader.getEntries("events"); ++i) {
auto frame = podio::Frame(reader.readNextEntry("events"));
if (frame.get("emptySubsetColl") == nullptr) {
std::cerr << "Could not retrieve an empty subset collection" << std::endl;
return 1;
}
if (frame.get("emptyCollection") == nullptr) {
std::cerr << "Could not retrieve an empty collection" << std::endl;
return 1;
}

processEvent(frame, (i % 10), reader.currentFileVersion());

auto otherFrame = podio::Frame(reader.readNextEntry("other_events"));
processEvent(otherFrame, (i % 10) + 100, reader.currentFileVersion());
// The other_events category also holds external collections
processExtensions(otherFrame, (i % 10) + 100, reader.currentFileVersion());
}

if (reader.readNextEntry("events")) {
std::cerr << "Trying to read more frame data than is present should return a nullptr" << std::endl;
return 1;
}

std::cout << "========================================================\n" << std::endl;
if (reader.readNextEntry("not_present")) {
std::cerr << "Trying to read non-existant frame data should return a nullptr" << std::endl;
return 1;
}

// Reading specific (jumping to) entry
{
auto frame = podio::Frame(reader.readEntry("events", 4));
processEvent(frame, 4, reader.currentFileVersion());
// Reading the next entry after jump, continues from after the jump
auto nextFrame = podio::Frame(reader.readNextEntry("events"));
processEvent(nextFrame, 5, reader.currentFileVersion());

// Jump over a file boundary and make sure that works
auto otherFrame = podio::Frame(reader.readEntry("other_events", 14));
processEvent(otherFrame, 4 + 100, reader.currentFileVersion());
processExtensions(otherFrame, 4 + 100, reader.currentFileVersion());

// Jumping back also works
auto previousFrame = podio::Frame(reader.readEntry("other_events", 2));
processEvent(previousFrame, 2 + 100, reader.currentFileVersion());
processExtensions(previousFrame, 2 + 100, reader.currentFileVersion());
}

// Trying to read a Frame that is not present returns a nullptr
if (reader.readEntry("events", 30)) {
std::cerr << "Trying to read a specific entry that does not exist should return a nullptr" << std::endl;
return 1;
}

return 0;
}

int main() {
auto reader = podio::ROOTFrameReader();
reader.openFiles({"example_frame.root", "example_frame.root"});
return read_frames(reader);
}