Skip to content

Commit

Permalink
Fix reading of multiple files with ROOTFrameReader (#417)
Browse files Browse the repository at this point in the history
* Add documentation for opening of files API

* Fix seg fault when reading from multiple files

* Add tests and properly fix file switching

* Ignore new test in sanitizer runs
  • Loading branch information
tmadlener authored May 23, 2023
1 parent 7596120 commit fec8609
Show file tree
Hide file tree
Showing 5 changed files with 143 additions and 15 deletions.
27 changes: 25 additions & 2 deletions include/podio/ROOTFrameReader.h
Original file line number Diff line number Diff line change
Expand Up @@ -52,8 +52,27 @@ class ROOTFrameReader {
ROOTFrameReader(const ROOTFrameReader&) = delete;
ROOTFrameReader& operator=(const ROOTFrameReader&) = delete;

/**
* Open a single file for reading.
*
* @param filename The name of the input file
*/
void openFile(const std::string& filename);

/**
* Open multiple files for reading and then treat them as if they are one file
*
* NOTE: All of the files are assumed to have the same structure. Specifically
* this means:
* - The same categories are available from all files
* - The collections that are contained in the individual categories are the
* same across all files
*
* This usually boils down to "the files have been written with the same
* settings", e.g. they are outputs of a batched process.
*
* @param filenames The filenames of all input files that should be read
*/
void openFiles(const std::vector<std::string>& filenames);

/**
Expand Down Expand Up @@ -125,7 +144,10 @@ class ROOTFrameReader {
*/
CategoryInfo& getCategoryInfo(const std::string& name);

GenericParameters readEventMetaData(CategoryInfo& catInfo);
/**
* Read the parameters for the entry specified in the passed CategoryInfo
*/
GenericParameters readEntryParameters(CategoryInfo& catInfo, bool reloadBranches, unsigned int localEntry);

/**
* Read the data entry specified in the passed CategoryInfo, and increase the
Expand All @@ -137,7 +159,8 @@ class ROOTFrameReader {
/**
* Get / read the buffers at index iColl in the passed category information
*/
podio::CollectionReadBuffers getCollectionBuffers(CategoryInfo& catInfo, size_t iColl);
podio::CollectionReadBuffers getCollectionBuffers(CategoryInfo& catInfo, size_t iColl, bool reloadBranches,
unsigned int localEntry);

std::unique_ptr<TChain> m_metaChain{nullptr}; ///< The metadata tree
std::unordered_map<std::string, CategoryInfo> m_categories{}; ///< All categories
Expand Down
36 changes: 24 additions & 12 deletions src/ROOTFrameReader.cc
Original file line number Diff line number Diff line change
Expand Up @@ -22,15 +22,22 @@ std::tuple<std::vector<root_utils::CollectionBranches>, std::vector<std::pair<st
createCollectionBranches(TChain* chain, const podio::CollectionIDTable& idTable,
const std::vector<root_utils::CollectionInfoT>& collInfo);

GenericParameters ROOTFrameReader::readEventMetaData(ROOTFrameReader::CategoryInfo& catInfo) {
GenericParameters ROOTFrameReader::readEntryParameters(ROOTFrameReader::CategoryInfo& catInfo, bool reloadBranches,
unsigned int localEntry) {
// Parameter branch is always the last one
auto& paramBranches = catInfo.branches.back();

// Make sure to have a valid branch pointer after switching trees in the chain
// as well as on the first event
if (reloadBranches) {
paramBranches.data = root_utils::getBranch(catInfo.chain.get(), root_utils::paramBranchName);
}
auto* branch = paramBranches.data;

GenericParameters params;
auto* emd = &params;
branch->SetAddress(&emd);
branch->GetEntry(catInfo.entry);
branch->GetEntry(localEntry);
return params;
}

Expand All @@ -53,19 +60,29 @@ std::unique_ptr<ROOTFrameData> ROOTFrameReader::readEntry(ROOTFrameReader::Categ
return nullptr;
}

// After switching trees in the chain, branch pointers get invalidated so
// they need to be reassigned.
// NOTE: root 6.22/06 requires that we get completely new branches here,
// with 6.20/04 we could just re-set them
const auto preTreeNo = catInfo.chain->GetTreeNumber();
const auto localEntry = catInfo.chain->LoadTree(catInfo.entry);
const auto treeChange = catInfo.chain->GetTreeNumber() != preTreeNo;
// Also need to make sure to handle the first event
const auto reloadBranches = treeChange || localEntry == 0;

ROOTFrameData::BufferMap buffers;
for (size_t i = 0; i < catInfo.storedClasses.size(); ++i) {
buffers.emplace(catInfo.storedClasses[i].first, getCollectionBuffers(catInfo, i));
buffers.emplace(catInfo.storedClasses[i].first, getCollectionBuffers(catInfo, i, reloadBranches, localEntry));
}

auto parameters = readEventMetaData(catInfo);
auto parameters = readEntryParameters(catInfo, reloadBranches, localEntry);

catInfo.entry++;
return std::make_unique<ROOTFrameData>(std::move(buffers), catInfo.table, std::move(parameters));
}

podio::CollectionReadBuffers ROOTFrameReader::getCollectionBuffers(ROOTFrameReader::CategoryInfo& catInfo,
size_t iColl) {
podio::CollectionReadBuffers ROOTFrameReader::getCollectionBuffers(ROOTFrameReader::CategoryInfo& catInfo, size_t iColl,
bool reloadBranches, unsigned int localEntry) {
const auto& name = catInfo.storedClasses[iColl].first;
const auto& [collType, isSubsetColl, schemaVersion, index] = catInfo.storedClasses[iColl].second;
auto& branches = catInfo.branches[index];
Expand All @@ -76,12 +93,7 @@ podio::CollectionReadBuffers ROOTFrameReader::getCollectionBuffers(ROOTFrameRead
// TODO: Error handling of empty optional
auto collBuffers = maybeBuffers.value_or(podio::CollectionReadBuffers{});

const auto localEntry = catInfo.chain->LoadTree(catInfo.entry);
// After switching trees in the chain, branch pointers get invalidated so
// they need to be reassigned.
// NOTE: root 6.22/06 requires that we get completely new branches here,
// with 6.20/04 we could just re-set them
if (localEntry == 0) {
if (reloadBranches) {
branches.data = root_utils::getBranch(catInfo.chain.get(), name.c_str());

// reference collections
Expand Down
5 changes: 4 additions & 1 deletion tests/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -64,7 +64,9 @@ set(root_dependent_tests
read_timed.cpp
read_frame.cpp
write_frame_root.cpp
read_frame_legacy_root.cpp)
read_frame_legacy_root.cpp
read_frame_root_multiple.cpp
)
set(root_libs TestDataModelDict ExtensionDataModelDict podio::podioRootIO)
foreach( sourcefile ${root_dependent_tests} )
CREATE_PODIO_TEST(${sourcefile} "${root_libs}")
Expand Down Expand Up @@ -168,6 +170,7 @@ set_property(TEST read_and_write PROPERTY DEPENDS write)
set_property(TEST read_frame_legacy_root PROPERTY DEPENDS write)
set_property(TEST read_timed PROPERTY DEPENDS write_timed)
set_property(TEST read_frame PROPERTY DEPENDS write_frame_root)
set_property(TEST read_frame_root_multiple PROPERTY DEPENDS write_frame_root)

add_executable(check_benchmark_outputs check_benchmark_outputs.cpp)
target_link_libraries(check_benchmark_outputs PRIVATE ROOT::Tree)
Expand Down
1 change: 1 addition & 0 deletions tests/CTestCustom.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ if ((NOT "@FORCE_RUN_ALL_TESTS@" STREQUAL "ON") AND (NOT "@USE_SANITIZER@" STREQ
read-multiple
read-legacy-files
read_frame_legacy_root
read_frame_root_multiple

write_frame_root
read_frame
Expand Down
89 changes: 89 additions & 0 deletions tests/read_frame_root_multiple.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,89 @@
#include "read_frame.h"

#include "podio/ROOTFrameReader.h"

int read_frames(podio::ROOTFrameReader& reader) {
if (reader.currentFileVersion() != podio::version::build_version) {
std::cerr << "The podio build version could not be read back correctly. "
<< "(expected:" << podio::version::build_version << ", actual: " << reader.currentFileVersion() << ")"
<< std::endl;
return 1;
}

if (reader.getEntries("events") != 20) {
std::cerr << "Could not read back the number of events correctly. "
<< "(expected:" << 20 << ", actual: " << reader.getEntries("events") << ")" << std::endl;
return 1;
}

if (reader.getEntries("events") != reader.getEntries("other_events")) {
std::cerr << "Could not read back the number of events correctly. "
<< "(expected:" << 20 << ", actual: " << reader.getEntries("other_events") << ")" << std::endl;
return 1;
}

// Read the frames in a different order than when writing them here to make
// sure that the writing/reading order does not impose any usage requirements
for (size_t i = 0; i < reader.getEntries("events"); ++i) {
auto frame = podio::Frame(reader.readNextEntry("events"));
if (frame.get("emptySubsetColl") == nullptr) {
std::cerr << "Could not retrieve an empty subset collection" << std::endl;
return 1;
}
if (frame.get("emptyCollection") == nullptr) {
std::cerr << "Could not retrieve an empty collection" << std::endl;
return 1;
}

processEvent(frame, (i % 10), reader.currentFileVersion());

auto otherFrame = podio::Frame(reader.readNextEntry("other_events"));
processEvent(otherFrame, (i % 10) + 100, reader.currentFileVersion());
// The other_events category also holds external collections
processExtensions(otherFrame, (i % 10) + 100, reader.currentFileVersion());
}

if (reader.readNextEntry("events")) {
std::cerr << "Trying to read more frame data than is present should return a nullptr" << std::endl;
return 1;
}

std::cout << "========================================================\n" << std::endl;
if (reader.readNextEntry("not_present")) {
std::cerr << "Trying to read non-existant frame data should return a nullptr" << std::endl;
return 1;
}

// Reading specific (jumping to) entry
{
auto frame = podio::Frame(reader.readEntry("events", 4));
processEvent(frame, 4, reader.currentFileVersion());
// Reading the next entry after jump, continues from after the jump
auto nextFrame = podio::Frame(reader.readNextEntry("events"));
processEvent(nextFrame, 5, reader.currentFileVersion());

// Jump over a file boundary and make sure that works
auto otherFrame = podio::Frame(reader.readEntry("other_events", 14));
processEvent(otherFrame, 4 + 100, reader.currentFileVersion());
processExtensions(otherFrame, 4 + 100, reader.currentFileVersion());

// Jumping back also works
auto previousFrame = podio::Frame(reader.readEntry("other_events", 2));
processEvent(previousFrame, 2 + 100, reader.currentFileVersion());
processExtensions(previousFrame, 2 + 100, reader.currentFileVersion());
}

// Trying to read a Frame that is not present returns a nullptr
if (reader.readEntry("events", 30)) {
std::cerr << "Trying to read a specific entry that does not exist should return a nullptr" << std::endl;
return 1;
}

return 0;
}

int main() {
auto reader = podio::ROOTFrameReader();
reader.openFiles({"example_frame.root", "example_frame.root"});
return read_frames(reader);
}

0 comments on commit fec8609

Please sign in to comment.