Skip to content

Commit

Permalink
Make it possible to write different categories with ROOT
Browse files Browse the repository at this point in the history
  • Loading branch information
tmadlener committed May 30, 2022
1 parent 0ccd019 commit acb2e41
Show file tree
Hide file tree
Showing 4 changed files with 96 additions and 80 deletions.
59 changes: 34 additions & 25 deletions include/podio/ROOTFrameWriter.h
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
#include <memory>
#include <string>
#include <tuple>
#include <unordered_map>
#include <vector>

// forward declarations
Expand All @@ -27,42 +28,50 @@ class ROOTFrameWriter {
ROOTFrameWriter(const ROOTFrameWriter&) = delete;
ROOTFrameWriter& operator=(const ROOTFrameWriter&) = delete;

void writeFrame(const podio::Frame& frame);

/// Register a collection to be written (without check if it is actually present!)
void registerForWrite(const std::string& name);
/** Store the given Frame with the given category. Store only the
* collections that are passed.
*
* NOTE: The contents of the first Frame that is written in this way
* determines the contents that will be written for all subsequent Frames.
*/
void writeFrame(const podio::Frame& frame, const std::string& category, const std::vector<std::string>& collsToWrite);

/** Write the current file, including all the necessary metadata to read it again.
*/
void finish();

private:
using StoreCollection = std::pair<const std::string&, podio::CollectionBase*>;

std::tuple<TTree*, std::vector<root_utils::CollectionBranches>>
initTree(const std::vector<StoreCollection>& collections, /*const*/ podio::GenericParameters& parameters,
podio::CollectionIDTable&& idTable, const std::string& category);
// collectionID, collectionType, subsetCollection
// NOTE: same as in rootUtils.h private header!
using CollectionInfoT = std::tuple<int, std::string, bool>;

/**
* Helper struct to group together all necessary state to write / process a
* given category. Created during the first writing of a category
*/
struct CategoryInfo {
TTree* tree{nullptr}; ///< The TTree to which this category is written
std::vector<root_utils::CollectionBranches> branches{}; ///< The branches for this category
std::vector<CollectionInfoT> collInfo{}; ///< Collection info for this category
podio::CollectionIDTable idTable{}; ///< The collection id table for this category
std::vector<std::string> collsToWrite{}; ///< The collections to write for this category
};

/// Initialize the branches for this category
void initBranches(CategoryInfo& catInfo, const std::vector<StoreCollection>& collections,
/*const*/ podio::GenericParameters& parameters);

/// Get the (potentially uninitialized category information for this category)
CategoryInfo& getCategoryInfo(const std::string& category);

static void resetBranches(std::vector<root_utils::CollectionBranches>& branches,
const std::vector<ROOTFrameWriter::StoreCollection>& collections,
/*const*/ podio::GenericParameters* parameters);

std::unique_ptr<TFile> m_file{nullptr}; ///< The storage file
TTree* m_dataTree{nullptr}; ///< Collection data and parameters for each frame

/// The tree to store all the necessary metadata to read this file again
TTree* m_metaTree{nullptr};

std::vector<std::string> m_collsToWrite{}; ///< The collections that should be written

/// Cached branches for writing collections
std::vector<root_utils::CollectionBranches> m_collectionBranches{};

podio::CollectionIDTable m_idTable{}; ///< The collection id table

// collectionID, collectionType, subsetCollection
// NOTE: same as in rootUtils.h private header!
using CollectionInfoT = std::tuple<int, std::string, bool>;

std::vector<CollectionInfoT> m_collectionInfo{};
std::unique_ptr<TFile> m_file{nullptr}; ///< The storage file
std::unordered_map<std::string, CategoryInfo> m_categories{}; ///< All categories
};

} // namespace podio
Expand Down
95 changes: 49 additions & 46 deletions src/ROOTFrameWriter.cc
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,8 @@
#include "podio/CollectionBase.h"
#include "podio/Frame.h"
#include "podio/GenericParameters.h"

#include "podio/podioVersion.h"

#include "rootUtils.h"

#include "TTree.h"
Expand All @@ -12,52 +12,52 @@ namespace podio {

ROOTFrameWriter::ROOTFrameWriter(const std::string& filename) {
m_file = std::make_unique<TFile>(filename.c_str(), "recreate");

m_metaTree = new TTree(root_utils::metaTreeName, "metadata tree for podio I/O functionality");
m_metaTree->SetDirectory(m_file.get());
}

void ROOTFrameWriter::writeFrame(const podio::Frame& frame) {
std::vector<StoreCollection> collections;
collections.reserve(m_collsToWrite.size());
void ROOTFrameWriter::writeFrame(const podio::Frame& frame, const std::string& category,
const std::vector<std::string>& collsToWrite) {
auto& catInfo = getCategoryInfo(category);
// Use the TTree as proxy here to decide whether this category has already
// been initialized
if (catInfo.tree == nullptr) {
catInfo.idTable = std::move(frame.getCollectionIDTableForWrite());
catInfo.collsToWrite = collsToWrite;
catInfo.tree = new TTree(category.c_str(), (category + " data tree").c_str());
catInfo.tree->SetDirectory(m_file.get());
}

for (const auto& name : m_collsToWrite) {
std::vector<StoreCollection> collections;
collections.reserve(catInfo.collsToWrite.size());
for (const auto& name : catInfo.collsToWrite) {
auto* coll = frame.getCollectionForWrite(name);
collections.emplace_back(name, const_cast<podio::CollectionBase*>(coll));
}

if (!m_dataTree) {
// If we do not have a data tree yet, initialize it and create all the necessary branches in the process
std::tie(m_dataTree, m_collectionBranches) =
initTree(collections, const_cast<podio::GenericParameters&>(frame.getGenericParametersForWrite()),
frame.getCollectionIDTableForWrite(), "events");
// We will at least have a parameters branch, even if there are no
// collections
if (catInfo.branches.empty()) {
initBranches(catInfo, collections, const_cast<podio::GenericParameters&>(frame.getGenericParametersForWrite()));

} else {
// TODO: Can this be done without the const_cast? (also above)
resetBranches(m_collectionBranches, collections,
resetBranches(catInfo.branches, collections,
&const_cast<podio::GenericParameters&>(frame.getGenericParametersForWrite()));
}

m_dataTree->Fill();
catInfo.tree->Fill();
}

std::tuple<TTree*, std::vector<root_utils::CollectionBranches>>
ROOTFrameWriter::initTree(const std::vector<StoreCollection>& collections,
/*const*/ podio::GenericParameters& parameters, podio::CollectionIDTable&& idTable,
const std::string& category) {
auto* tree = new TTree(category.c_str(), (category + " data tree").c_str());
tree->SetDirectory(m_file.get());

// create the necessary branches for storing the metadata and connect them to
// the book-keeping members
m_idTable = std::move(idTable);
m_metaTree->Branch(root_utils::idTableName(category).c_str(), &m_idTable);
ROOTFrameWriter::CategoryInfo& ROOTFrameWriter::getCategoryInfo(const std::string& category) {
if (auto it = m_categories.find(category); it != m_categories.end()) {
return it->second;
}

m_collectionInfo.reserve(collections.size());
m_metaTree->Branch(root_utils::collInfoName(category).c_str(), &m_collectionInfo);
auto [it, _] = m_categories.try_emplace(category, CategoryInfo{});
return it->second;
}

auto allBranches = std::vector<root_utils::CollectionBranches>{};
allBranches.reserve(collections.size() + 1); // collections + parameters
void ROOTFrameWriter::initBranches(CategoryInfo& catInfo, const std::vector<StoreCollection>& collections,
/*const*/ podio::GenericParameters& parameters) {
catInfo.branches.reserve(collections.size() + 1); // collections + parameters

// First collections
for (auto& [name, coll] : collections) {
Expand All @@ -67,15 +67,15 @@ ROOTFrameWriter::initTree(const std::vector<StoreCollection>& collections,
// data buffer branch, only for non-subset collections
if (buffers.data) {
auto bufferDataType = "vector<" + coll->getDataTypeName() + ">";
branches.data = tree->Branch(name.c_str(), bufferDataType.c_str(), buffers.data);
branches.data = catInfo.tree->Branch(name.c_str(), bufferDataType.c_str(), buffers.data);
}

// reference collections
if (auto refColls = buffers.references) {
int i = 0;
for (auto& c : (*refColls)) {
const auto brName = root_utils::refBranch(name, i++);
branches.refs.push_back(tree->Branch(brName.c_str(), c.get()));
branches.refs.push_back(catInfo.tree->Branch(brName.c_str(), c.get()));
}
}

Expand All @@ -85,20 +85,18 @@ ROOTFrameWriter::initTree(const std::vector<StoreCollection>& collections,
for (auto& [type, vec] : (*vmInfo)) {
const auto typeName = "vector<" + type + ">";
const auto brName = root_utils::vecBranch(name, i++);
branches.vecs.push_back(tree->Branch(brName.c_str(), typeName.c_str(), vec));
branches.vecs.push_back(catInfo.tree->Branch(brName.c_str(), typeName.c_str(), vec));
}
}

allBranches.push_back(branches);
m_collectionInfo.emplace_back(m_idTable.collectionID(name), coll->getTypeName(), coll->isSubsetCollection());
catInfo.branches.push_back(branches);
catInfo.collInfo.emplace_back(catInfo.idTable.collectionID(name), coll->getTypeName(), coll->isSubsetCollection());
}

// Also make branches for the parameters
root_utils::CollectionBranches branches;
branches.data = tree->Branch(root_utils::paramBranchName, &parameters);
allBranches.push_back(branches);

return {tree, allBranches};
branches.data = catInfo.tree->Branch(root_utils::paramBranchName, &parameters);
catInfo.branches.push_back(branches);
}

void ROOTFrameWriter::resetBranches(std::vector<root_utils::CollectionBranches>& branches,
Expand All @@ -114,16 +112,21 @@ void ROOTFrameWriter::resetBranches(std::vector<root_utils::CollectionBranches>&
branches.back().data->SetAddress(&parameters);
}

void ROOTFrameWriter::registerForWrite(const std::string& name) {
m_collsToWrite.push_back(name);
}

void ROOTFrameWriter::finish() {
auto* metaTree = new TTree(root_utils::metaTreeName, "metadata tree for podio I/O functionality");
metaTree->SetDirectory(m_file.get());

// Store the collection id table and collection info for reading in the meta tree
for (/*const*/ auto& [category, info] : m_categories) {
metaTree->Branch(root_utils::idTableName(category).c_str(), &info.idTable);
metaTree->Branch(root_utils::collInfoName(category).c_str(), &info.collInfo);
}

// Store the current podio build version into the meta data tree
auto podioVersion = podio::version::build_version;
m_metaTree->Branch(root_utils::versionBranchName, &podioVersion);
metaTree->Branch(root_utils::versionBranchName, &podioVersion);

m_metaTree->Fill();
metaTree->Fill();

m_file->Write();
m_file->Close();
Expand Down
5 changes: 5 additions & 0 deletions tests/read_frame.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -16,5 +16,10 @@ int main() {
processEvent(frame, i, reader.currentFileVersion());
}

for (size_t i = 0; i < reader.getEntries("other_events"); ++i) {
auto frame = podio::Frame(reader.readNextEvent("other_events"));
processEvent(frame, i + 100, reader.currentFileVersion());
}

return 0;
}
17 changes: 8 additions & 9 deletions tests/write_frame_root.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2,26 +2,25 @@

#include "podio/ROOTFrameWriter.h"

#include <array>
#include <string_view>
#include <string>
#include <vector>

static constexpr std::array<std::string_view, 20> collsToWrite = {
static const std::vector<std::string> collsToWrite = {
"mcparticles", "moreMCs", "arrays", "mcParticleRefs", "strings", "hits",
"hitRefs", "refs", "refs2", "clusters", "OneRelation", "info",
"WithVectorMember", "fixedWidthInts", "userInts", "userDoubles"};

int main(int, char**) {
auto writer = podio::ROOTFrameWriter("example_frame.root");

for (const auto n : collsToWrite) {
if (!n.empty()) {
writer.registerForWrite(std::string(n));
}
for (int i = 0; i < 10; ++i) {
auto frame = makeFrame(i);
writer.writeFrame(frame, "events", collsToWrite);
}

for (int i = 0; i < 10; ++i) {
for (int i = 100; i < 111; ++i) {
auto frame = makeFrame(i);
writer.writeFrame(frame);
writer.writeFrame(frame, "other_events", collsToWrite);
}

writer.finish();
Expand Down

0 comments on commit acb2e41

Please sign in to comment.