From 230e83aa6f887b91a4f9371067a5c70647236a2e Mon Sep 17 00:00:00 2001 From: tmadlener Date: Wed, 31 Jul 2024 11:50:40 +0200 Subject: [PATCH 01/18] Add the possibility to specify a version header and variable --- include/podio/DatamodelRegistry.h | 10 +++++++++ python/podio_gen/cpp_generator.py | 1 + python/podio_gen/generator_utils.py | 2 ++ python/podio_gen/podio_config_reader.py | 21 ++++++++++++++++++- python/templates/DatamodelDefinition.h.jinja2 | 14 ++++++++++++- src/DatamodelRegistry.cc | 15 +++++++++++++ tests/datalayout.yaml | 4 ++++ tests/datamodel/version.h | 20 ++++++++++++++++++ 8 files changed, 85 insertions(+), 2 deletions(-) create mode 100644 tests/datamodel/version.h diff --git a/include/podio/DatamodelRegistry.h b/include/podio/DatamodelRegistry.h index cbfa294cd..760adff19 100644 --- a/include/podio/DatamodelRegistry.h +++ b/include/podio/DatamodelRegistry.h @@ -1,6 +1,9 @@ #ifndef PODIO_DATAMODELREGISTRY_H #define PODIO_DATAMODELREGISTRY_H +#include "podio/podioVersion.h" + +#include #include #include #include @@ -97,6 +100,8 @@ class DatamodelRegistry { /// @returns The name of the datamodel const std::string& getDatamodelName(size_t index) const; + std::optional getDatamodelVersion(const std::string& name) const; + /// Register a datamodel and return its index in the registry. /// /// This is the hook that is called during dynamic loading of an EDM to @@ -114,6 +119,9 @@ class DatamodelRegistry { size_t registerDatamodel(std::string name, std::string_view definition, const podio::RelationNameMapping& relationNames); + size_t registerDatamodel(std::string name, std::string_view definition, + const podio::RelationNameMapping& relationNames, podio::version::Version version); + /// Get the names of the relations and vector members of a datatype RelationNames getRelationNames(std::string_view typeName) const; @@ -123,6 +131,8 @@ class DatamodelRegistry { std::vector> m_definitions{}; std::unordered_map m_relations{}; + + std::unordered_map m_datamodelVersions{}; }; } // namespace podio diff --git a/python/podio_gen/cpp_generator.py b/python/podio_gen/cpp_generator.py index bfc16e50e..b1fb9ceaa 100644 --- a/python/podio_gen/cpp_generator.py +++ b/python/podio_gen/cpp_generator.py @@ -511,6 +511,7 @@ def _write_edm_def_file(self): "incfolder": self.incfolder, "schema_version": self.datamodel.schema_version, "datatypes": self.datamodel.datatypes, + "version_info": self.datamodel.version_info, } def quoted_sv(string): diff --git a/python/podio_gen/generator_utils.py b/python/podio_gen/generator_utils.py index b3dd2ceb0..cdfbe9067 100644 --- a/python/podio_gen/generator_utils.py +++ b/python/podio_gen/generator_utils.py @@ -304,6 +304,7 @@ def __init__( interfaces=None, options=None, schema_version=None, + version_info=None, ): self.options = options or { # should getters / setters be prefixed with get / set? @@ -315,6 +316,7 @@ def __init__( "includeSubfolder": False, } self.schema_version = schema_version + self.version_info = version_info self.components = components or {} self.datatypes = datatypes or {} self.interfaces = interfaces or {} diff --git a/python/podio_gen/podio_config_reader.py b/python/podio_gen/podio_config_reader.py index 247ec3214..281a98075 100644 --- a/python/podio_gen/podio_config_reader.py +++ b/python/podio_gen/podio_config_reader.py @@ -531,6 +531,23 @@ def parse_model(cls, model_dict, package_name, upstream_edm=None, parent_path=No f"schema_version has to be convertible to int (is {model_dict['schema_version']})" ) + try: + version_info = model_dict["version_info"] + try: + for key in ("variable", "header"): + if not isinstance(version_info[key], str): + raise DefinitionError( + f"'version_info:{key} needs to be a single string, but is {version_info[key]}" + ) + except KeyError: + raise DefinitionError( + "'version_info' needs to define a 'header' and a 'variable' to be valid" + f" but it defines {model_dict['version_info']}" + ) + except KeyError: + version_info = None + pass + components = {} if "components" in model_dict: for klassname, value in model_dict["components"].items(): @@ -559,7 +576,9 @@ def parse_model(cls, model_dict, package_name, upstream_edm=None, parent_path=No # If this doesn't raise an exception everything should in principle work out validator = ClassDefinitionValidator() - datamodel = DataModel(datatypes, components, interfaces, options, schema_version) + datamodel = DataModel( + datatypes, components, interfaces, options, schema_version, version_info + ) validator.validate(datamodel, upstream_edm) return datamodel diff --git a/python/templates/DatamodelDefinition.h.jinja2 b/python/templates/DatamodelDefinition.h.jinja2 index ce4cefb41..de5622e2b 100644 --- a/python/templates/DatamodelDefinition.h.jinja2 +++ b/python/templates/DatamodelDefinition.h.jinja2 @@ -3,6 +3,10 @@ #include "podio/DatamodelRegistry.h" #include "podio/SchemaEvolution.h" +{% if version_info %} +#include "{{ version_info.header }}" +{% endif %} + namespace {{ package_name }}::meta { /** * The complete definition of the datamodel at generation time in JSON format. @@ -41,7 +45,15 @@ class DatamodelRegistryIndex { public: static size_t value() { static const auto relationNames = {{ package_name }}__getRelationNames(); - static auto index = DatamodelRegistryIndex(podio::DatamodelRegistry::mutInstance().registerDatamodel("{{ package_name }}", {{ package_name }}__JSONDefinition, relationNames)); + static auto index = + DatamodelRegistryIndex(podio::DatamodelRegistry::mutInstance().registerDatamodel( + "{{ package_name }}", + {{ package_name }}__JSONDefinition, + relationNames +{% if version_info %} + , podio::version::Version({{ version_info.variable }}) +{% endif %} + )); return index.m_value; } private: diff --git a/src/DatamodelRegistry.cc b/src/DatamodelRegistry.cc index 16e387132..c15194d7d 100644 --- a/src/DatamodelRegistry.cc +++ b/src/DatamodelRegistry.cc @@ -35,6 +35,14 @@ size_t DatamodelRegistry::registerDatamodel(std::string name, std::string_view d return std::distance(m_definitions.cbegin(), it); } +size_t DatamodelRegistry::registerDatamodel(std::string name, std::string_view definition, + const podio::RelationNameMapping& relationNames, + podio::version::Version version) { + auto index = registerDatamodel(name, definition, relationNames); + m_datamodelVersions.emplace(name, version); + return index; +} + const std::string_view DatamodelRegistry::getDatamodelDefinition(std::string_view name) const { const auto it = std::find_if(m_definitions.cbegin(), m_definitions.cend(), [&name](const auto& kvPair) { return kvPair.first == name; }); @@ -84,4 +92,11 @@ RelationNames DatamodelRegistry::getRelationNames(std::string_view typeName) con return {emptyVec, emptyVec}; } +std::optional DatamodelRegistry::getDatamodelVersion(const std::string& name) const { + if (const auto it = m_datamodelVersions.find(name); it != m_datamodelVersions.end()) { + return it->second; + } + return std::nullopt; +} + } // namespace podio diff --git a/tests/datalayout.yaml b/tests/datalayout.yaml index 06cd5ebee..ef2a8e06e 100644 --- a/tests/datalayout.yaml +++ b/tests/datalayout.yaml @@ -1,6 +1,10 @@ --- schema_version : 2 +version_info: + header: "datamodel/version.h" + variable: "datamodel::ver::version" + options : # should getters / setters be prefixed with get / set? getSyntax: False diff --git a/tests/datamodel/version.h b/tests/datamodel/version.h new file mode 100644 index 000000000..020da77db --- /dev/null +++ b/tests/datamodel/version.h @@ -0,0 +1,20 @@ +#ifndef DATAMODEL_VERSION_H +#define DATAMODEL_VERSION_H + +#include "podio/podioVersion.h" + +namespace datamodel::ver { +struct V { + int major{0}; + int minor{0}; + int patch{0}; + + explicit constexpr operator podio::version::Version() const noexcept { + return {static_cast(major), static_cast(minor), static_cast(patch)}; + } +}; + +constexpr static auto version = V{42, 123, 255}; + +} // namespace datamodel::ver +#endif From 5fc147592a35bc2e39c13741d9159849eb336b0e Mon Sep 17 00:00:00 2001 From: tmadlener Date: Wed, 31 Jul 2024 14:52:03 +0200 Subject: [PATCH 02/18] Add reading back version to test --- tests/read_frame.h | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/tests/read_frame.h b/tests/read_frame.h index bde5e2d28..f6bb1d3f3 100644 --- a/tests/read_frame.h +++ b/tests/read_frame.h @@ -3,6 +3,7 @@ #include "datamodel/ExampleWithInterfaceRelationCollection.h" #include "datamodel/ExampleWithVectorMemberCollection.h" +#include "datamodel/version.h" #include "read_test.h" #include "extension_model/ContainedTypeCollection.h" @@ -114,6 +115,14 @@ int read_frames(const std::string& filename, bool assertBuildVersion = true) { return 1; } + const auto datamodelVersion = reader.currentFileVersion("datamodel").value_or(podio::version::Version{}); + if (assertBuildVersion && datamodelVersion != podio::version::Version(datamodel::ver::version)) { + std::cerr << "The (build) version of the datamodel could not be read back correctly. " + << "(expected: " << podio::version::Version(datamodel::ver::version) << ", actual: " << datamodelVersion + << ")" << std::endl; + return 1; + } + if (reader.getEntries(podio::Category::Event) != 10) { std::cerr << "Could not read back the number of events correctly. " << "(expected:" << 10 << ", actual: " << reader.getEntries(podio::Category::Event) << ")" << std::endl; From 545148131c38a895b4d0607b2e6822a2b6a3b147 Mon Sep 17 00:00:00 2001 From: tmadlener Date: Wed, 31 Jul 2024 17:40:42 +0200 Subject: [PATCH 03/18] Make the readers and writers handle datamodel versions --- include/podio/RNTupleReader.h | 4 ++ include/podio/ROOTReader.h | 4 ++ include/podio/Reader.h | 9 +++ include/podio/SIOBlock.h | 58 ++++++++++++++++++- include/podio/SIOReader.h | 4 ++ .../utilities/DatamodelRegistryIOHelpers.h | 11 +++- src/DatamodelRegistryIOHelpers.cc | 10 ++++ src/RNTupleReader.cc | 11 +++- src/RNTupleWriter.cc | 8 +++ src/ROOTReader.cc | 14 ++++- src/ROOTWriter.cc | 13 +++++ src/SIOReader.cc | 7 ++- src/SIOWriter.cc | 16 ++++- src/rootUtils.h | 6 ++ tests/read_frame.h | 6 ++ 15 files changed, 171 insertions(+), 10 deletions(-) diff --git a/include/podio/RNTupleReader.h b/include/podio/RNTupleReader.h index 373e7c45f..d9c4e8597 100644 --- a/include/podio/RNTupleReader.h +++ b/include/podio/RNTupleReader.h @@ -96,6 +96,10 @@ class RNTupleReader { return m_fileVersion; } + std::optional currentFileVersion(const std::string& name) const { + return m_datamodelHolder.getDatamodelVersion(name); + } + /// Get the datamodel definition for the given name /// /// @param name The name of the datamodel diff --git a/include/podio/ROOTReader.h b/include/podio/ROOTReader.h index e6ecbd0e0..1baf0c1d6 100644 --- a/include/podio/ROOTReader.h +++ b/include/podio/ROOTReader.h @@ -104,6 +104,10 @@ class ROOTReader { return m_fileVersion; } + std::optional currentFileVersion(const std::string& name) const { + return m_datamodelHolder.getDatamodelVersion(name); + } + /// Get the names of all the available Frame categories in the current file(s). /// /// @returns The names of the available categories from the file diff --git a/include/podio/Reader.h b/include/podio/Reader.h index ee8eaa682..9f13faab3 100644 --- a/include/podio/Reader.h +++ b/include/podio/Reader.h @@ -26,6 +26,7 @@ class Reader { virtual podio::Frame readFrame(const std::string& name, size_t index) = 0; virtual size_t getEntries(const std::string& name) const = 0; virtual podio::version::Version currentFileVersion() const = 0; + virtual std::optional currentFileVersion(const std::string& name) const = 0; virtual std::vector getAvailableCategories() const = 0; virtual const std::string_view getDatamodelDefinition(const std::string& name) const = 0; virtual std::vector getAvailableDatamodels() const = 0; @@ -66,6 +67,10 @@ class Reader { return m_reader->currentFileVersion(); } + std::optional currentFileVersion(const std::string& name) const override { + return m_reader->currentFileVersion(name); + } + std::vector getAvailableCategories() const override { return m_reader->getAvailableCategories(); } @@ -166,6 +171,10 @@ class Reader { return m_self->currentFileVersion(); } + std::optional currentFileVersion(const std::string& name) const { + return m_self->currentFileVersion(name); + } + /// Get the names of all the available Frame categories in the current file(s). /// /// @returns The names of the available categories from the file diff --git a/include/podio/SIOBlock.h b/include/podio/SIOBlock.h index 3ae41651e..494db96b4 100644 --- a/include/podio/SIOBlock.h +++ b/include/podio/SIOBlock.h @@ -16,14 +16,15 @@ #include #include #include +#include #include namespace podio { template -void handlePODDataSIO(devT& device, PODData* data, size_t size) { +void handlePODDataSIO(devT& device, const PODData* data, size_t size) { unsigned count = size * sizeof(PODData); - char* dataPtr = reinterpret_cast(data); + auto* dataPtr = reinterpret_cast(const_cast(data)); device.data(dataPtr, count); } @@ -33,7 +34,12 @@ void writeMapLike(sio::write_device& device, const MapLikeT& map) { device.data((int)map.size()); for (const auto& [key, value] : map) { device.data(key); - device.data(value); + using MappedType = detail::GetMappedType; + if constexpr (std::is_trivial_v) { + handlePODDataSIO(device, &value, 1); + } else { + device.data(value); + } } } @@ -189,6 +195,52 @@ struct SIOMapBlock : public sio::block { std::vector> mapData{}; }; +namespace detail { + inline std::string sioMapBlockNameImpl(std::string keyTName, std::string valueTName) { + std::replace(keyTName.begin(), keyTName.end(), ':', '_'); + std::replace(valueTName.begin(), valueTName.end(), ':', '_'); + return "SIOMapBlockV2_KK_" + keyTName + "_VV_" + valueTName; + } + + template + inline std::string sioMapBlockName(); + +#define SIOMAPBLOCK_NAME(key_type, value_type) \ + template <> \ + inline std::string sioMapBlockName() { \ + return sioMapBlockNameImpl(#key_type, #value_type); \ + } + + SIOMAPBLOCK_NAME(std::string, std::string); + SIOMAPBLOCK_NAME(std::string, podio::version::Version); +#undef SIOMAPBLOCK_NAME +} // namespace detail + +/// A block to serialize anything that behaves similar in iterating as a +/// map, e.g. vector>, which is what is used +/// internally to represent the data to be written. +template +struct SIOMapBlockV2 : public sio::block { + SIOMapBlockV2() : sio::block(detail::sioMapBlockName(), sio::version::encode_version(0, 1)) { + } + SIOMapBlockV2(std::vector>&& data) : + sio::block(detail::sioMapBlockName(), sio::version::encode_version(0, 1)), + mapData(std::move(data)) { + } + + SIOMapBlockV2(const SIOMapBlockV2&) = delete; + SIOMapBlockV2& operator=(const SIOMapBlockV2&) = delete; + + void read(sio::read_device& device, sio::version_type) override { + readMapLike(device, mapData); + } + void write(sio::write_device& device) override { + writeMapLike(device, mapData); + } + + std::vector> mapData{}; +}; + /// A block for handling the run and collection meta data class SIONumberedMetaDataBlock : public sio::block { public: diff --git a/include/podio/SIOReader.h b/include/podio/SIOReader.h index 80970abbb..a1c64ce2b 100644 --- a/include/podio/SIOReader.h +++ b/include/podio/SIOReader.h @@ -73,6 +73,10 @@ class SIOReader { return m_fileVersion; } + std::optional currentFileVersion(const std::string& name) const { + return m_datamodelHolder.getDatamodelVersion(name); + } + /// Get the names of all the available Frame categories in the current file. /// /// @returns The names of the available categores from the file diff --git a/include/podio/utilities/DatamodelRegistryIOHelpers.h b/include/podio/utilities/DatamodelRegistryIOHelpers.h index 77416a839..de847ce42 100644 --- a/include/podio/utilities/DatamodelRegistryIOHelpers.h +++ b/include/podio/utilities/DatamodelRegistryIOHelpers.h @@ -35,8 +35,12 @@ class DatamodelDefinitionHolder { public: /// The "map" type that is used internally using MapType = std::vector>; - /// Constructor from an existing collection of names and datamodel definitions - DatamodelDefinitionHolder(MapType&& definitions) : m_availEDMDefs(std::move(definitions)) { + /// The "map" mapping names and datamodel versions (where available) + using VersionList = std::vector>; + + /// Constructor from an existing collection of names and datamodel definitions and versions + DatamodelDefinitionHolder(MapType&& definitions, VersionList&& versions) : + m_availEDMDefs(std::move(definitions)), m_edmVersions(std::move(versions)) { } DatamodelDefinitionHolder() = default; @@ -57,8 +61,11 @@ class DatamodelDefinitionHolder { /// Get all names of the datamodels that have been read from file std::vector getAvailableDatamodels() const; + std::optional getDatamodelVersion(const std::string& name) const; + protected: MapType m_availEDMDefs{}; + VersionList m_edmVersions{}; }; } // namespace podio diff --git a/src/DatamodelRegistryIOHelpers.cc b/src/DatamodelRegistryIOHelpers.cc index 1e7573a1f..8c0dde068 100644 --- a/src/DatamodelRegistryIOHelpers.cc +++ b/src/DatamodelRegistryIOHelpers.cc @@ -47,4 +47,14 @@ std::vector DatamodelDefinitionHolder::getAvailableDatamodels() con return defs; } +std::optional DatamodelDefinitionHolder::getDatamodelVersion(const std::string& name) const { + const auto it = std::find_if(m_edmVersions.begin(), m_edmVersions.end(), + [&name](const auto& entry) { return std::get<0>(entry) == name; }); + if (it != m_edmVersions.end()) { + return std::get<1>(*it); + } + + return std::nullopt; +} + } // namespace podio diff --git a/src/RNTupleReader.cc b/src/RNTupleReader.cc index d391e578a..6e0afe446 100644 --- a/src/RNTupleReader.cc +++ b/src/RNTupleReader.cc @@ -84,7 +84,16 @@ void RNTupleReader::openFiles(const std::vector& filenames) { auto edmView = m_metadata->GetView>>(root_utils::edmDefBranchName); auto edm = edmView(0); - m_datamodelHolder = DatamodelDefinitionHolder(std::move(edm)); + DatamodelDefinitionHolder::VersionList edmVersions{}; + for (const auto& [name, _] : edm) { + try { + auto edmVersionView = m_metadata->GetView>(root_utils::edmVersionBranchName(name)); + auto edmVersion = edmVersionView(0); + edmVersions.emplace_back(name, podio::version::Version{edmVersion[0], edmVersion[1], edmVersion[2]}); + } catch (const ROOT::Experimental::RException&) { + } + } + m_datamodelHolder = DatamodelDefinitionHolder(std::move(edm), std::move(edmVersions)); auto availableCategoriesField = m_metadata->GetView>(root_utils::availableCategories); m_availableCategories = availableCategoriesField(0); diff --git a/src/RNTupleWriter.cc b/src/RNTupleWriter.cc index 0af6b1bd9..73c5db22d 100644 --- a/src/RNTupleWriter.cc +++ b/src/RNTupleWriter.cc @@ -263,6 +263,14 @@ void RNTupleWriter::finish() { *versionField = {podioVersion.major, podioVersion.minor, podioVersion.patch}; auto edmDefinitions = m_datamodelCollector.getDatamodelDefinitionsToWrite(); + for (const auto& [name, _] : edmDefinitions) { + auto edmVersion = DatamodelRegistry::instance().getDatamodelVersion(name); + if (edmVersion) { + auto edmVersionField = metadata->MakeField>(root_utils::edmVersionBranchName(name).c_str()); + *edmVersionField = {edmVersion->major, edmVersion->minor, edmVersion->patch}; + } + } + auto edmField = metadata->MakeField>>(root_utils::edmDefBranchName); *edmField = std::move(edmDefinitions); diff --git a/src/ROOTReader.cc b/src/ROOTReader.cc index 1b4971601..4d541e0f0 100644 --- a/src/ROOTReader.cc +++ b/src/ROOTReader.cc @@ -262,7 +262,19 @@ void ROOTReader::openFiles(const std::vector& filenames) { auto* datamodelDefs = new DatamodelDefinitionHolder::MapType{}; edmDefBranch->SetAddress(&datamodelDefs); edmDefBranch->GetEntry(0); - m_datamodelHolder = DatamodelDefinitionHolder(std::move(*datamodelDefs)); + + DatamodelDefinitionHolder::VersionList edmVersions{}; + for (const auto& [name, _] : *datamodelDefs) { + if (auto* edmVersionBranch = root_utils::getBranch(m_metaChain.get(), root_utils::edmVersionBranchName(name))) { + auto* edmVersion = new podio::version::Version{}; + edmVersionBranch->SetAddress(&edmVersion); + edmVersionBranch->GetEntry(0); + edmVersions.emplace_back(name, *edmVersion); + delete edmVersion; + } + } + + m_datamodelHolder = DatamodelDefinitionHolder(std::move(*datamodelDefs), std::move(edmVersions)); delete datamodelDefs; } diff --git a/src/ROOTWriter.cc b/src/ROOTWriter.cc index 33f02741e..21e2998db 100644 --- a/src/ROOTWriter.cc +++ b/src/ROOTWriter.cc @@ -4,6 +4,7 @@ #include "podio/GenericParameters.h" #include "podio/podioVersion.h" +#include "podio/utilities/DatamodelRegistryIOHelpers.h" #include "rootUtils.h" #include "TTree.h" @@ -185,6 +186,18 @@ void ROOTWriter::finish() { auto edmDefinitions = m_datamodelCollector.getDatamodelDefinitionsToWrite(); metaTree->Branch(root_utils::edmDefBranchName, &edmDefinitions); + // Collect the (build) versions of the generated datamodels where available + DatamodelDefinitionHolder::VersionList edmVersions; + for (const auto& [name, _] : edmDefinitions) { + auto edmVersion = podio::DatamodelRegistry::instance().getDatamodelVersion(name); + if (edmVersion) { + edmVersions.emplace_back(name, edmVersion.value()); + } + } + for (auto& [name, version] : edmVersions) { + metaTree->Branch(root_utils::edmVersionBranchName(name).c_str(), &version); + } + metaTree->Fill(); m_file->Write(); diff --git a/src/SIOReader.cc b/src/SIOReader.cc index 2609d611f..151890f0f 100644 --- a/src/SIOReader.cc +++ b/src/SIOReader.cc @@ -119,11 +119,14 @@ void SIOReader::readEDMDefinitions() { const auto& [buffer, _] = sio_utils::readRecord(m_stream); sio::block_list blocks; - blocks.emplace_back(std::make_shared>()); + blocks.emplace_back(std::make_shared>()); + blocks.emplace_back(std::make_shared>()); + sio::api::read_blocks(buffer.span(), blocks); auto datamodelDefs = static_cast*>(blocks[0].get()); - m_datamodelHolder = DatamodelDefinitionHolder(std::move(datamodelDefs->mapData)); + auto edmVersions = static_cast*>(blocks[1].get()); + m_datamodelHolder = DatamodelDefinitionHolder(std::move(datamodelDefs->mapData), std::move(edmVersions->mapData)); } } // namespace podio diff --git a/src/SIOWriter.cc b/src/SIOWriter.cc index aef6e52e6..d65b05996 100644 --- a/src/SIOWriter.cc +++ b/src/SIOWriter.cc @@ -2,6 +2,7 @@ #include "podio/Frame.h" #include "podio/SIOBlock.h" +#include "podio/utilities/DatamodelRegistryIOHelpers.h" #include "sioUtils.h" #include @@ -54,11 +55,24 @@ void SIOWriter::writeFrame(const podio::Frame& frame, const std::string& categor } void SIOWriter::finish() { - auto edmDefMap = std::make_shared>( + auto edmDefMap = std::make_shared>( m_datamodelCollector.getDatamodelDefinitionsToWrite()); sio::block_list blocks; blocks.push_back(edmDefMap); + + DatamodelDefinitionHolder::VersionList edmVersions; + for (const auto& [name, _] : edmDefMap->mapData) { + auto edmVersion = podio::DatamodelRegistry::instance().getDatamodelVersion(name); + if (edmVersion) { + edmVersions.emplace_back(name, edmVersion.value()); + } + } + + auto edmVersionMap = + std::make_shared>(std::move(edmVersions)); + blocks.push_back(edmVersionMap); + m_tocRecord.addRecord(sio_helpers::SIOEDMDefinitionName, sio_utils::writeRecord(blocks, "EDMDefinitions", m_stream)); blocks.clear(); diff --git a/src/rootUtils.h b/src/rootUtils.h index a473c6f0c..03d6a386e 100644 --- a/src/rootUtils.h +++ b/src/rootUtils.h @@ -140,6 +140,12 @@ constexpr static auto versionBranchName = "PodioBuildVersion"; */ constexpr static auto edmDefBranchName = "EDMDefinitions"; +/// The name of the branch used for storing the version of a generated datamodel +/// (if available) +inline std::string edmVersionBranchName(const std::string& edmname) { + return edmname + "___Version"; +} + /** * Name of the branch for storing the idTable for a given category in the meta * data tree diff --git a/tests/read_frame.h b/tests/read_frame.h index f6bb1d3f3..ac48b7761 100644 --- a/tests/read_frame.h +++ b/tests/read_frame.h @@ -123,6 +123,12 @@ int read_frames(const std::string& filename, bool assertBuildVersion = true) { return 1; } + const auto extensionModelVersion = reader.currentFileVersion("extension_model"); + if (extensionModelVersion) { + std::cerr << "The (build) version of the extension model was available althought it shouldn't be. Its value is " + << extensionModelVersion.value() << std::endl; + } + if (reader.getEntries(podio::Category::Event) != 10) { std::cerr << "Could not read back the number of events correctly. " << "(expected:" << 10 << ", actual: " << reader.getEntries(podio::Category::Event) << ")" << std::endl; From 0ca3aa846ac49722e87bed57e610903b95c4dbc5 Mon Sep 17 00:00:00 2001 From: tmadlener Date: Wed, 31 Jul 2024 17:43:54 +0200 Subject: [PATCH 04/18] Reuse existing type but give it new block name --- include/podio/SIOBlock.h | 36 ++++++------------------------------ src/SIOReader.cc | 4 ++-- src/SIOWriter.cc | 4 ++-- 3 files changed, 10 insertions(+), 34 deletions(-) diff --git a/include/podio/SIOBlock.h b/include/podio/SIOBlock.h index 494db96b4..047cb13c6 100644 --- a/include/podio/SIOBlock.h +++ b/include/podio/SIOBlock.h @@ -171,30 +171,6 @@ class SIOEventMetaDataBlock : public sio::block { podio::GenericParameters* metadata{nullptr}; }; -/// A block to serialize anything that behaves similar in iterating as a -/// map, e.g. vector>, which is what is used -/// internally to represent the data to be written. -template -struct SIOMapBlock : public sio::block { - SIOMapBlock() : sio::block("SIOMapBlock", sio::version::encode_version(0, 1)) { - } - SIOMapBlock(std::vector>&& data) : - sio::block("SIOMapBlock", sio::version::encode_version(0, 1)), mapData(std::move(data)) { - } - - SIOMapBlock(const SIOMapBlock&) = delete; - SIOMapBlock& operator=(const SIOMapBlock&) = delete; - - void read(sio::read_device& device, sio::version_type) override { - readMapLike(device, mapData); - } - void write(sio::write_device& device) override { - writeMapLike(device, mapData); - } - - std::vector> mapData{}; -}; - namespace detail { inline std::string sioMapBlockNameImpl(std::string keyTName, std::string valueTName) { std::replace(keyTName.begin(), keyTName.end(), ':', '_'); @@ -220,16 +196,16 @@ namespace detail { /// map, e.g. vector>, which is what is used /// internally to represent the data to be written. template -struct SIOMapBlockV2 : public sio::block { - SIOMapBlockV2() : sio::block(detail::sioMapBlockName(), sio::version::encode_version(0, 1)) { +struct SIOMapBlock : public sio::block { + SIOMapBlock() : sio::block(detail::sioMapBlockName(), sio::version::encode_version(0, 2)) { } - SIOMapBlockV2(std::vector>&& data) : - sio::block(detail::sioMapBlockName(), sio::version::encode_version(0, 1)), + SIOMapBlock(std::vector>&& data) : + sio::block(detail::sioMapBlockName(), sio::version::encode_version(0, 2)), mapData(std::move(data)) { } - SIOMapBlockV2(const SIOMapBlockV2&) = delete; - SIOMapBlockV2& operator=(const SIOMapBlockV2&) = delete; + SIOMapBlock(const SIOMapBlock&) = delete; + SIOMapBlock& operator=(const SIOMapBlock&) = delete; void read(sio::read_device& device, sio::version_type) override { readMapLike(device, mapData); diff --git a/src/SIOReader.cc b/src/SIOReader.cc index 151890f0f..a54e5670b 100644 --- a/src/SIOReader.cc +++ b/src/SIOReader.cc @@ -119,8 +119,8 @@ void SIOReader::readEDMDefinitions() { const auto& [buffer, _] = sio_utils::readRecord(m_stream); sio::block_list blocks; - blocks.emplace_back(std::make_shared>()); - blocks.emplace_back(std::make_shared>()); + blocks.emplace_back(std::make_shared>()); + blocks.emplace_back(std::make_shared>()); sio::api::read_blocks(buffer.span(), blocks); diff --git a/src/SIOWriter.cc b/src/SIOWriter.cc index d65b05996..ebc7edbab 100644 --- a/src/SIOWriter.cc +++ b/src/SIOWriter.cc @@ -55,7 +55,7 @@ void SIOWriter::writeFrame(const podio::Frame& frame, const std::string& categor } void SIOWriter::finish() { - auto edmDefMap = std::make_shared>( + auto edmDefMap = std::make_shared>( m_datamodelCollector.getDatamodelDefinitionsToWrite()); sio::block_list blocks; @@ -70,7 +70,7 @@ void SIOWriter::finish() { } auto edmVersionMap = - std::make_shared>(std::move(edmVersions)); + std::make_shared>(std::move(edmVersions)); blocks.push_back(edmVersionMap); m_tocRecord.addRecord(sio_helpers::SIOEDMDefinitionName, sio_utils::writeRecord(blocks, "EDMDefinitions", m_stream)); From a95dbeb53cf0b15e54d402b2074ae97a438b1ae5 Mon Sep 17 00:00:00 2001 From: tmadlener Date: Wed, 31 Jul 2024 17:54:32 +0200 Subject: [PATCH 05/18] Make sure to not pollute the available categories --- src/ROOTReader.cc | 2 +- tests/read_frame.h | 13 +++++++++++++ 2 files changed, 14 insertions(+), 1 deletion(-) diff --git a/src/ROOTReader.cc b/src/ROOTReader.cc index 4d541e0f0..7b0373ba4 100644 --- a/src/ROOTReader.cc +++ b/src/ROOTReader.cc @@ -222,7 +222,7 @@ std::vector getAvailableCategories(TChain* metaChain) { for (int i = 0; i < branches->GetEntries(); ++i) { const std::string name = branches->At(i)->GetName(); - const auto fUnder = name.find("___"); + const auto fUnder = name.find(root_utils::idTableName("")); if (fUnder != std::string::npos) { brNames.emplace_back(name.substr(0, fUnder)); } diff --git a/tests/read_frame.h b/tests/read_frame.h index ac48b7761..a0899580c 100644 --- a/tests/read_frame.h +++ b/tests/read_frame.h @@ -129,6 +129,19 @@ int read_frames(const std::string& filename, bool assertBuildVersion = true) { << extensionModelVersion.value() << std::endl; } + const auto availableCategories = reader.getAvailableCategories(); + if (availableCategories.size() != 2) { + std::cerr << "More categories than expected!" << std::endl; + return 1; + } + if (std::find(availableCategories.begin(), availableCategories.end(), "events") == availableCategories.end() || + std::find(availableCategories.begin(), availableCategories.end(), "other_events") == availableCategories.end()) { + std::cerr << "Could not read back the available categories as expected! (expected: ['events', 'other_events']), " + "actual: ['" + << availableCategories[0] << "', '" << availableCategories[1] << "']" << std::endl; + return 1; + } + if (reader.getEntries(podio::Category::Event) != 10) { std::cerr << "Could not read back the number of events correctly. " << "(expected:" << 10 << ", actual: " << reader.getEntries(podio::Category::Event) << ")" << std::endl; From f694bffd0ea6c0f6c895cb1697dc34866fc78957 Mon Sep 17 00:00:00 2001 From: tmadlener Date: Wed, 31 Jul 2024 17:56:40 +0200 Subject: [PATCH 06/18] Add docstrings --- include/podio/RNTupleReader.h | 7 +++++++ include/podio/ROOTReader.h | 7 +++++++ include/podio/Reader.h | 7 +++++++ include/podio/SIOReader.h | 7 +++++++ 4 files changed, 28 insertions(+) diff --git a/include/podio/RNTupleReader.h b/include/podio/RNTupleReader.h index d9c4e8597..36248c5ca 100644 --- a/include/podio/RNTupleReader.h +++ b/include/podio/RNTupleReader.h @@ -96,6 +96,13 @@ class RNTupleReader { return m_fileVersion; } + /// Get the (build) version of a datamodel that has been used to write the + /// current file + /// + /// @param name The name of the datamodel + /// + /// @returns The (build) version of the datamodel if available or an empty + /// optional std::optional currentFileVersion(const std::string& name) const { return m_datamodelHolder.getDatamodelVersion(name); } diff --git a/include/podio/ROOTReader.h b/include/podio/ROOTReader.h index 1baf0c1d6..7167411ec 100644 --- a/include/podio/ROOTReader.h +++ b/include/podio/ROOTReader.h @@ -104,6 +104,13 @@ class ROOTReader { return m_fileVersion; } + /// Get the (build) version of a datamodel that has been used to write the + /// current file + /// + /// @param name The name of the datamodel + /// + /// @returns The (build) version of the datamodel if available or an empty + /// optional std::optional currentFileVersion(const std::string& name) const { return m_datamodelHolder.getDatamodelVersion(name); } diff --git a/include/podio/Reader.h b/include/podio/Reader.h index 9f13faab3..09b00161d 100644 --- a/include/podio/Reader.h +++ b/include/podio/Reader.h @@ -171,6 +171,13 @@ class Reader { return m_self->currentFileVersion(); } + /// Get the (build) version of a datamodel that has been used to write the + /// current file + /// + /// @param name The name of the datamodel + /// + /// @returns The (build) version of the datamodel if available or an empty + /// optional std::optional currentFileVersion(const std::string& name) const { return m_self->currentFileVersion(name); } diff --git a/include/podio/SIOReader.h b/include/podio/SIOReader.h index a1c64ce2b..f9f5ba02a 100644 --- a/include/podio/SIOReader.h +++ b/include/podio/SIOReader.h @@ -73,6 +73,13 @@ class SIOReader { return m_fileVersion; } + /// Get the (build) version of a datamodel that has been used to write the + /// current file + /// + /// @param name The name of the datamodel + /// + /// @returns The (build) version of the datamodel if available or an empty + /// optional std::optional currentFileVersion(const std::string& name) const { return m_datamodelHolder.getDatamodelVersion(name); } From a8b0c903308257fdc6b02c0920d34a5117e79d21 Mon Sep 17 00:00:00 2001 From: tmadlener Date: Wed, 31 Jul 2024 18:05:46 +0200 Subject: [PATCH 07/18] Add documentation for new version_info section --- doc/datamodel_syntax.md | 26 ++++++++++++++++++++++++++ 1 file changed, 26 insertions(+) diff --git a/doc/datamodel_syntax.md b/doc/datamodel_syntax.md index d16be965d..4a754acb4 100644 --- a/doc/datamodel_syntax.md +++ b/doc/datamodel_syntax.md @@ -206,6 +206,32 @@ Some customization of the generated code is possible through flags. These flags - `getSyntax`: steers the naming of get and set methods. If set to true, methods are prefixed with `get` and `set` following the capitalized member name, otherwise the member name is used for both. - `exposePODMembers`: whether get and set methods are also generated for members of a member-component. In the example corresponding methods would be generated to directly set / get `x` through `ExampleType`. +## Embedding a datamodel version +Each datamodel definition needs a schema version. However, in the case of podio +this schema version is a single integer. This makes it rather hard to use in +typical versioning, where one might differentiate between *major*, *minor* (and +*patch*) versions. Hence, the versioning of a datamodel and its schema version +are coupled but do not necessarily have to be the same. podio offers hooks to +store this important meta information into the produce files. In order to do +this you can use the `version_info` section: + +```yaml +version_info: + header: "datamodel/version.h" + variable: "datamodel::version::build_version" +``` + +It takes two parameters +- `header`: This is the header file where the version is stored +- `variable`: This is the variable (inside the `header` from header file) that + defines the (build) version. It has to be convertible to a + `podio::version::Version` + +If this part is found in the YAML file this information will be injected into +the podio internals and will be stored in the output files. They can be +retrieved via the `currentFileVersion(const std::string&)` methods of the +various readers. + ## Extending a datamodel / using types from an upstream datamodel It is possible to extend another datamodel with your own types, resp. use some datatypes or components from an upstream datamodel in your own datamodel. From b90bc7f0918bf57d025660521ab11f06484f9b79 Mon Sep 17 00:00:00 2001 From: tmadlener Date: Wed, 31 Jul 2024 18:16:17 +0200 Subject: [PATCH 08/18] Remove superfluous semicolons --- include/podio/SIOBlock.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/include/podio/SIOBlock.h b/include/podio/SIOBlock.h index 047cb13c6..051d06f9e 100644 --- a/include/podio/SIOBlock.h +++ b/include/podio/SIOBlock.h @@ -187,8 +187,8 @@ namespace detail { return sioMapBlockNameImpl(#key_type, #value_type); \ } - SIOMAPBLOCK_NAME(std::string, std::string); - SIOMAPBLOCK_NAME(std::string, podio::version::Version); + SIOMAPBLOCK_NAME(std::string, std::string) + SIOMAPBLOCK_NAME(std::string, podio::version::Version) #undef SIOMAPBLOCK_NAME } // namespace detail From 73bf1fadd30b885ee8a1a12829f29d537fe48879 Mon Sep 17 00:00:00 2001 From: tmadlener Date: Wed, 31 Jul 2024 19:15:47 +0200 Subject: [PATCH 09/18] Make sure to only encode version_info in json if available --- python/podio_gen/generator_utils.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/python/podio_gen/generator_utils.py b/python/podio_gen/generator_utils.py index cdfbe9067..6bd6e0f48 100644 --- a/python/podio_gen/generator_utils.py +++ b/python/podio_gen/generator_utils.py @@ -5,6 +5,7 @@ import re import json +from copy import deepcopy def _get_namespace_class(full_type): @@ -324,7 +325,11 @@ def __init__( def _to_json(self): """Return the dictionary, so that we can easily hook this into the pythons JSON ecosystem""" - return self.__dict__ + definition = deepcopy(self.__dict__) + # Only dump the version information if it's populated + if definition["version_info"] is None: + del definition["version_info"] + return definition class DataModelJSONEncoder(json.JSONEncoder): From 74f1b5ce03b7ce0b7a786cad5634e8bb4cf86864 Mon Sep 17 00:00:00 2001 From: tmadlener Date: Wed, 28 Aug 2024 11:25:57 +0200 Subject: [PATCH 10/18] Make datamodel version a command line argument to the generator --- cmake/podioMacros.cmake | 10 ++++++-- python/podio_class_generator.py | 23 ++++++++++++++++++- python/podio_gen/cpp_generator.py | 14 +++++++++-- python/podio_gen/generator_base.py | 12 +++++++++- python/podio_gen/generator_utils.py | 8 +------ python/podio_gen/podio_config_reader.py | 21 +---------------- python/templates/DatamodelDefinition.h.jinja2 | 8 ++----- tests/CMakeLists.txt | 2 +- tests/datalayout.yaml | 4 ---- tests/datamodel/version.h | 20 ---------------- tests/read_frame.h | 6 ++--- 11 files changed, 60 insertions(+), 68 deletions(-) delete mode 100644 tests/datamodel/version.h diff --git a/cmake/podioMacros.cmake b/cmake/podioMacros.cmake index b9a892fc3..d816bb406 100644 --- a/cmake/podioMacros.cmake +++ b/cmake/podioMacros.cmake @@ -131,13 +131,14 @@ set_property(CACHE PODIO_USE_CLANG_FORMAT PROPERTY STRINGS AUTO ON OFF) # LANG OPTIONAL: The programming language choice # Default is cpp # DEPENDS OPTIONAL: List of files to be added as configure dependencies of the datamodel +# VERSION OPTIONAL: The version of the datamodel (which does not have to be the schema version!) # ) # # Note that the create_${datamodel} target will always be called, but if the YAML_FILE has not changed # this is essentially a no-op, and should not cause re-compilation. #--------------------------------------------------------------------------------------------------- function(PODIO_GENERATE_DATAMODEL datamodel YAML_FILE RETURN_HEADERS RETURN_SOURCES) - CMAKE_PARSE_ARGUMENTS(ARG "" "OLD_DESCRIPTION;OUTPUT_FOLDER;UPSTREAM_EDM;SCHEMA_EVOLUTION" "IO_BACKEND_HANDLERS;LANG;DEPENDS" ${ARGN}) + CMAKE_PARSE_ARGUMENTS(ARG "" "OLD_DESCRIPTION;OUTPUT_FOLDER;UPSTREAM_EDM;SCHEMA_EVOLUTION" "IO_BACKEND_HANDLERS;LANG;DEPENDS;VERSION" ${ARGN}) IF(NOT ARG_OUTPUT_FOLDER) SET(ARG_OUTPUT_FOLDER ${CMAKE_CURRENT_SOURCE_DIR}) ENDIF() @@ -196,6 +197,11 @@ function(PODIO_GENERATE_DATAMODEL datamodel YAML_FILE RETURN_HEADERS RETURN_SOUR endif() endif() + set(VERSION_ARG "") + if (ARG_VERSION) + set(VERSION_ARG "--datamodel-version=${ARG_VERSION}") + endif() + # Make sure that we re run the generation process every time either the # templates or the yaml file changes. include(${podio_PYTHON_DIR}/templates/CMakeLists.txt) @@ -215,7 +221,7 @@ function(PODIO_GENERATE_DATAMODEL datamodel YAML_FILE RETURN_HEADERS RETURN_SOUR message(STATUS "Creating '${datamodel}' datamodel") # we need to bootstrap the data model, so this has to be executed in the cmake run execute_process( - COMMAND ${Python_EXECUTABLE} ${podio_PYTHON_DIR}/podio_class_generator.py ${CLANG_FORMAT_ARG} ${OLD_DESCRIPTION_ARG} ${SCHEMA_EVOLUTION_ARG} ${UPSTREAM_EDM_ARG} ${YAML_FILE} ${ARG_OUTPUT_FOLDER} ${datamodel} ${ARG_IO_BACKEND_HANDLERS} ${LANGUAGE_ARG} + COMMAND ${Python_EXECUTABLE} ${podio_PYTHON_DIR}/podio_class_generator.py ${CLANG_FORMAT_ARG} ${OLD_DESCRIPTION_ARG} ${SCHEMA_EVOLUTION_ARG} ${UPSTREAM_EDM_ARG} ${YAML_FILE} ${ARG_OUTPUT_FOLDER} ${datamodel} ${ARG_IO_BACKEND_HANDLERS} ${LANGUAGE_ARG} ${VERSION_ARG} WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR} RESULT_VARIABLE podio_generate_command_retval ) diff --git a/python/podio_class_generator.py b/python/podio_class_generator.py index 9254ec155..2d44a4154 100755 --- a/python/podio_class_generator.py +++ b/python/podio_class_generator.py @@ -4,7 +4,7 @@ import os import subprocess - +import re from podio_gen.podio_config_reader import PodioConfigReader from podio_gen.generator_utils import DefinitionError @@ -72,6 +72,20 @@ def read_upstream_edm(name_path): ) from err +def parse_version(version_str): + """Parse the version into a tuple of (major, minor, patch) from the passed + version string. + """ + if version_str is None: + return None + + if re.match(r"v?(\d+)(\.|-)(\d+)((\.|-)(\d+))?$", version_str): + ver = version_str.replace("-", ".").replace("v", "").split(".") + return tuple(int(v) for v in ver) + + raise argparse.ArgumentTypeError(f"{version_str} cannot be parsed as a valid version") + + if __name__ == "__main__": import argparse @@ -147,6 +161,12 @@ def read_upstream_edm(name_path): default=None, action="store", ) + parser.add_argument( + "--datamodel-version", + help="The version string of the generated datamodel", + default=None, + type=parse_version, + ) args = parser.parse_args() @@ -176,6 +196,7 @@ def read_upstream_edm(name_path): verbose=args.verbose, dryrun=args.dryrun, upstream_edm=args.upstream_edm, + datamodel_version=args.datamodel_version, old_description=args.old_description, evolution_file=args.evolution_file, ) diff --git a/python/podio_gen/cpp_generator.py b/python/podio_gen/cpp_generator.py index b1fb9ceaa..47837bd29 100644 --- a/python/podio_gen/cpp_generator.py +++ b/python/podio_gen/cpp_generator.py @@ -55,8 +55,17 @@ def __init__( upstream_edm, old_description, evolution_file, + datamodel_version=None, ): - super().__init__(yamlfile, install_dir, package_name, verbose, dryrun, upstream_edm) + super().__init__( + yamlfile, + install_dir, + package_name, + verbose, + dryrun, + upstream_edm, + datamodel_version=datamodel_version, + ) self.io_handlers = io_handlers # schema evolution specific code @@ -505,13 +514,14 @@ def _write_all_collections_header(self): def _write_edm_def_file(self): """Write the edm definition to a compile time string""" model_encoder = DataModelJSONEncoder() + print(f"{self.datamodel_version=}") data = { "package_name": self.package_name, "edm_definition": model_encoder.encode(self.datamodel), "incfolder": self.incfolder, "schema_version": self.datamodel.schema_version, "datatypes": self.datamodel.datatypes, - "version_info": self.datamodel.version_info, + "datamodel_version": self.datamodel_version, } def quoted_sv(string): diff --git a/python/podio_gen/generator_base.py b/python/podio_gen/generator_base.py index e772d1c98..36f55cb32 100644 --- a/python/podio_gen/generator_base.py +++ b/python/podio_gen/generator_base.py @@ -107,13 +107,23 @@ class ClassGeneratorBaseMixin: """ - def __init__(self, yamlfile, install_dir, package_name, verbose, dryrun, upstream_edm): + def __init__( + self, + yamlfile, + install_dir, + package_name, + verbose, + dryrun, + upstream_edm, + datamodel_version=None, + ): self.yamlfile = yamlfile self.install_dir = install_dir self.package_name = package_name self.verbose = verbose self.dryrun = dryrun self.upstream_edm = upstream_edm + self.datamodel_version = datamodel_version try: self.datamodel = PodioConfigReader.read(yamlfile, package_name, upstream_edm) diff --git a/python/podio_gen/generator_utils.py b/python/podio_gen/generator_utils.py index 6bd6e0f48..abad8488e 100644 --- a/python/podio_gen/generator_utils.py +++ b/python/podio_gen/generator_utils.py @@ -305,7 +305,6 @@ def __init__( interfaces=None, options=None, schema_version=None, - version_info=None, ): self.options = options or { # should getters / setters be prefixed with get / set? @@ -317,7 +316,6 @@ def __init__( "includeSubfolder": False, } self.schema_version = schema_version - self.version_info = version_info self.components = components or {} self.datatypes = datatypes or {} self.interfaces = interfaces or {} @@ -325,11 +323,7 @@ def __init__( def _to_json(self): """Return the dictionary, so that we can easily hook this into the pythons JSON ecosystem""" - definition = deepcopy(self.__dict__) - # Only dump the version information if it's populated - if definition["version_info"] is None: - del definition["version_info"] - return definition + return self.__dict__ class DataModelJSONEncoder(json.JSONEncoder): diff --git a/python/podio_gen/podio_config_reader.py b/python/podio_gen/podio_config_reader.py index 281a98075..247ec3214 100644 --- a/python/podio_gen/podio_config_reader.py +++ b/python/podio_gen/podio_config_reader.py @@ -531,23 +531,6 @@ def parse_model(cls, model_dict, package_name, upstream_edm=None, parent_path=No f"schema_version has to be convertible to int (is {model_dict['schema_version']})" ) - try: - version_info = model_dict["version_info"] - try: - for key in ("variable", "header"): - if not isinstance(version_info[key], str): - raise DefinitionError( - f"'version_info:{key} needs to be a single string, but is {version_info[key]}" - ) - except KeyError: - raise DefinitionError( - "'version_info' needs to define a 'header' and a 'variable' to be valid" - f" but it defines {model_dict['version_info']}" - ) - except KeyError: - version_info = None - pass - components = {} if "components" in model_dict: for klassname, value in model_dict["components"].items(): @@ -576,9 +559,7 @@ def parse_model(cls, model_dict, package_name, upstream_edm=None, parent_path=No # If this doesn't raise an exception everything should in principle work out validator = ClassDefinitionValidator() - datamodel = DataModel( - datatypes, components, interfaces, options, schema_version, version_info - ) + datamodel = DataModel(datatypes, components, interfaces, options, schema_version) validator.validate(datamodel, upstream_edm) return datamodel diff --git a/python/templates/DatamodelDefinition.h.jinja2 b/python/templates/DatamodelDefinition.h.jinja2 index de5622e2b..e56410faf 100644 --- a/python/templates/DatamodelDefinition.h.jinja2 +++ b/python/templates/DatamodelDefinition.h.jinja2 @@ -3,10 +3,6 @@ #include "podio/DatamodelRegistry.h" #include "podio/SchemaEvolution.h" -{% if version_info %} -#include "{{ version_info.header }}" -{% endif %} - namespace {{ package_name }}::meta { /** * The complete definition of the datamodel at generation time in JSON format. @@ -50,8 +46,8 @@ public: "{{ package_name }}", {{ package_name }}__JSONDefinition, relationNames -{% if version_info %} - , podio::version::Version({{ version_info.variable }}) +{% if datamodel_version %} + , podio::version::Version{ {{ datamodel_version | join(", ") }} } {% endif %} )); return index.m_value; diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt index fb5739805..1c0345447 100644 --- a/tests/CMakeLists.txt +++ b/tests/CMakeLists.txt @@ -14,7 +14,7 @@ set(extra_code extra_code/component_declarations.cc ) PODIO_GENERATE_DATAMODEL(datamodel datalayout.yaml headers sources - IO_BACKEND_HANDLERS ${PODIO_IO_HANDLERS} DEPENDS ${extra_code} + IO_BACKEND_HANDLERS ${PODIO_IO_HANDLERS} DEPENDS ${extra_code} VERSION ${${PROJECT_NAME}_VERSION} ) # Use the cmake building blocks to add the different parts (conditionally) diff --git a/tests/datalayout.yaml b/tests/datalayout.yaml index ef2a8e06e..06cd5ebee 100644 --- a/tests/datalayout.yaml +++ b/tests/datalayout.yaml @@ -1,10 +1,6 @@ --- schema_version : 2 -version_info: - header: "datamodel/version.h" - variable: "datamodel::ver::version" - options : # should getters / setters be prefixed with get / set? getSyntax: False diff --git a/tests/datamodel/version.h b/tests/datamodel/version.h deleted file mode 100644 index 020da77db..000000000 --- a/tests/datamodel/version.h +++ /dev/null @@ -1,20 +0,0 @@ -#ifndef DATAMODEL_VERSION_H -#define DATAMODEL_VERSION_H - -#include "podio/podioVersion.h" - -namespace datamodel::ver { -struct V { - int major{0}; - int minor{0}; - int patch{0}; - - explicit constexpr operator podio::version::Version() const noexcept { - return {static_cast(major), static_cast(minor), static_cast(patch)}; - } -}; - -constexpr static auto version = V{42, 123, 255}; - -} // namespace datamodel::ver -#endif diff --git a/tests/read_frame.h b/tests/read_frame.h index a0899580c..6fc835c71 100644 --- a/tests/read_frame.h +++ b/tests/read_frame.h @@ -3,7 +3,6 @@ #include "datamodel/ExampleWithInterfaceRelationCollection.h" #include "datamodel/ExampleWithVectorMemberCollection.h" -#include "datamodel/version.h" #include "read_test.h" #include "extension_model/ContainedTypeCollection.h" @@ -116,10 +115,9 @@ int read_frames(const std::string& filename, bool assertBuildVersion = true) { } const auto datamodelVersion = reader.currentFileVersion("datamodel").value_or(podio::version::Version{}); - if (assertBuildVersion && datamodelVersion != podio::version::Version(datamodel::ver::version)) { + if (assertBuildVersion && datamodelVersion != podio::version::build_version) { std::cerr << "The (build) version of the datamodel could not be read back correctly. " - << "(expected: " << podio::version::Version(datamodel::ver::version) << ", actual: " << datamodelVersion - << ")" << std::endl; + << "(expected: " << podio::version::build_version << ", actual: " << datamodelVersion << ")" << std::endl; return 1; } From 161d9e5bce51ae84d62ef40fe0d04d9f99fa0d8b Mon Sep 17 00:00:00 2001 From: tmadlener Date: Wed, 28 Aug 2024 11:31:01 +0200 Subject: [PATCH 11/18] Update documentation --- doc/datamodel_syntax.md | 29 ++++++++++------------------- 1 file changed, 10 insertions(+), 19 deletions(-) diff --git a/doc/datamodel_syntax.md b/doc/datamodel_syntax.md index 4a754acb4..cbda858ce 100644 --- a/doc/datamodel_syntax.md +++ b/doc/datamodel_syntax.md @@ -212,25 +212,16 @@ this schema version is a single integer. This makes it rather hard to use in typical versioning, where one might differentiate between *major*, *minor* (and *patch*) versions. Hence, the versioning of a datamodel and its schema version are coupled but do not necessarily have to be the same. podio offers hooks to -store this important meta information into the produce files. In order to do -this you can use the `version_info` section: - -```yaml -version_info: - header: "datamodel/version.h" - variable: "datamodel::version::build_version" -``` - -It takes two parameters -- `header`: This is the header file where the version is stored -- `variable`: This is the variable (inside the `header` from header file) that - defines the (build) version. It has to be convertible to a - `podio::version::Version` - -If this part is found in the YAML file this information will be injected into -the podio internals and will be stored in the output files. They can be -retrieved via the `currentFileVersion(const std::string&)` methods of the -various readers. +store this important meta information into the produce files. In order to do you +can pass the version of the datamodel to the generator via the +`--datamodel-version` argument. It expects the version to conform to this +regular expression: `"v?\d+(\.|-)\d+((\.|-)\d+)?$"`, i.e. that the major and +minor version are present, separated by either a dot or comma with an optional +patch version and an optional `v` prefix. + +If this this information is passed to the generator will be injected into the +podio internals and will be stored in the output files. They can be retrieved +via the `currentFileVersion(const std::string&)` methods of the various readers. ## Extending a datamodel / using types from an upstream datamodel From 4b8b00b300db460f7a18e96022e997121e5eccfe Mon Sep 17 00:00:00 2001 From: tmadlener Date: Wed, 28 Aug 2024 11:45:19 +0200 Subject: [PATCH 12/18] Make pylint and flake8 happy --- python/podio_gen/cpp_generator.py | 2 +- python/podio_gen/generator_utils.py | 1 - 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/python/podio_gen/cpp_generator.py b/python/podio_gen/cpp_generator.py index 47837bd29..f09898fc4 100644 --- a/python/podio_gen/cpp_generator.py +++ b/python/podio_gen/cpp_generator.py @@ -44,7 +44,7 @@ class IncludeFrom(IntEnum): class CPPClassGenerator(ClassGeneratorBaseMixin): """The c++ class / code generator for podio""" - def __init__( + def __init__( # pylint: disable=too-many-arguments self, yamlfile, install_dir, diff --git a/python/podio_gen/generator_utils.py b/python/podio_gen/generator_utils.py index abad8488e..b3dd2ceb0 100644 --- a/python/podio_gen/generator_utils.py +++ b/python/podio_gen/generator_utils.py @@ -5,7 +5,6 @@ import re import json -from copy import deepcopy def _get_namespace_class(full_type): From cdedde21a4163ba99c2595c8416020b8fd3b705c Mon Sep 17 00:00:00 2001 From: tmadlener Date: Wed, 28 Aug 2024 16:05:31 +0200 Subject: [PATCH 13/18] Make sure roundtrip models also have version info --- tests/dumpmodel/CMakeLists.txt | 3 +++ 1 file changed, 3 insertions(+) diff --git a/tests/dumpmodel/CMakeLists.txt b/tests/dumpmodel/CMakeLists.txt index 6e9dfef1e..4ae61a66a 100644 --- a/tests/dumpmodel/CMakeLists.txt +++ b/tests/dumpmodel/CMakeLists.txt @@ -7,6 +7,7 @@ add_test(NAME datamodel_def_store_roundtrip_root COMMAND ${PROJECT_SOURCE_DIR}/t ${PROJECT_BINARY_DIR}/tests/root_io/example_frame.root datamodel ${PROJECT_SOURCE_DIR}/tests + --datamodel-version=${podio_VERSION} ) PODIO_SET_TEST_ENV(datamodel_def_store_roundtrip_root) @@ -34,6 +35,7 @@ if (ENABLE_SIO) ${PROJECT_BINARY_DIR}/tests/sio_io/example_frame.sio datamodel ${PROJECT_SOURCE_DIR}/tests + --datamodel-version=${podio_VERSION} ) PODIO_SET_TEST_ENV(datamodel_def_store_roundtrip_sio) # The extension model needs to know about the upstream model for generation @@ -64,6 +66,7 @@ if (ENABLE_RNTUPLE) ${PROJECT_BINARY_DIR}/tests/root_io/example_rntuple.root datamodel ${PROJECT_SOURCE_DIR}/tests + --datamodel-version=${podio_VERSION} ) PODIO_SET_TEST_ENV(datamodel_def_store_roundtrip_rntuple) From 4574370b5963cf0d4cbece6ca7dc7d22617061f1 Mon Sep 17 00:00:00 2001 From: tmadlener Date: Wed, 28 Aug 2024 16:41:33 +0200 Subject: [PATCH 14/18] Make datamodel versions accessible from python --- python/podio/base_reader.py | 20 ++++++++++++++++++++ python/podio/test_Reader.py | 19 +++++++++++++++++++ tools/podio-dump | 12 ++++++++---- 3 files changed, 47 insertions(+), 4 deletions(-) diff --git a/python/podio/base_reader.py b/python/podio/base_reader.py index 7e1ddf2a0..a2c98a692 100644 --- a/python/podio/base_reader.py +++ b/python/podio/base_reader.py @@ -89,3 +89,23 @@ def current_file_version(self): write this file """ return self._reader.currentFileVersion() + + def get_datamodel_version(self, edm_name): + """Get the datamodel version + + Args: + edm_name (str): The package name of the datamodel + + Returns: + podio.version.Version: The version of the datamodel if available + + Raises: + KeyError: If the datamodel does not have a version stored + RuntimeError: If the reader is a legacy reader + """ + if self._is_legacy: + raise RuntimeError("Legacy readers do not store any version info") + maybe_version = self._reader.currentFileVersion(edm_name) + if maybe_version.has_value(): + return maybe_version.value() + raise KeyError(f"No version information available for '{edm_name}'") diff --git a/python/podio/test_Reader.py b/python/podio/test_Reader.py index 1208feace..8324deab1 100644 --- a/python/podio/test_Reader.py +++ b/python/podio/test_Reader.py @@ -1,6 +1,8 @@ #!/usr/bin/env python3 """Unit tests for podio readers""" +from podio.version import build_version + class ReaderTestCaseMixin: """Common unittests for readers. @@ -70,6 +72,23 @@ def test_frame_iterator_invalid_category(self): i += 1 self.assertEqual(i, 0) + def test_available_datamodels(self): + """Make sure that the datamodel information can be retrieved""" + datamodels = self.reader.datamodel_definitions + self.assertEqual(len(datamodels), 2) + for model in datamodels: + self.assertTrue(model in ("datamodel", "extension_model")) + + self.assertEqual(self.reader.get_datamodel_version("datamodel"), build_version) + + def test_invalid_datamodel_version(self): + """Make sure that the necessary exceptions are raised""" + with self.assertRaises(KeyError): + self.reader.get_datamodel_version("extension_model") + + with self.assertRaises(KeyError): + self.reader.get_datamodel_version("non-existant-model") + class LegacyReaderTestCaseMixin: """Common test cases for the legacy readers python bindings. diff --git a/tools/podio-dump b/tools/podio-dump index 8379cf102..a896cf3ca 100755 --- a/tools/podio-dump +++ b/tools/podio-dump @@ -26,10 +26,14 @@ def print_general_info(reader, filename): " (written with podio version: " f"{version_as_str(reader.current_file_version())})\n" ) - print( - "datamodel model definitions stored in this file: " - f'{", ".join(reader.datamodel_definitions)}' - ) + + print("datamodel model definitions stored in this file: ") + for edm_name in reader.datamodel_definitions: + try: + edm_version = reader.get_datamodel_version(edm_name) + print(f" - {edm_name} ({version_as_str(edm_version)})") + except KeyError: + print(f" - {edm_name}") print() print("Frame categories in this file:") From 559fa787567d17af4bf18057665bff9ad99f846f Mon Sep 17 00:00:00 2001 From: tmadlener Date: Thu, 5 Sep 2024 20:44:57 +0200 Subject: [PATCH 15/18] Make python API closer in name to c++ API --- python/podio/base_reader.py | 26 +++++++++++++------------- python/podio/test_Reader.py | 6 +++--- tools/podio-dump | 3 ++- 3 files changed, 18 insertions(+), 17 deletions(-) diff --git a/python/podio/base_reader.py b/python/podio/base_reader.py index a2c98a692..e078aa4ad 100644 --- a/python/podio/base_reader.py +++ b/python/podio/base_reader.py @@ -81,28 +81,28 @@ def get_datamodel_definition(self, edm_name): return "" return self._reader.getDatamodelDefinition(edm_name).data() - def current_file_version(self): - """Get the podio (build) version that was used to write this file + def current_file_version(self, edm_name=None): + """Get the (build) version that was used to write this file - Returns: - podio.version.Version: The build version of podio that was use to - write this file - """ - return self._reader.currentFileVersion() - - def get_datamodel_version(self, edm_name): - """Get the datamodel version + If called without argument or None, the podio build version is returned + otherwise the build version of the datamodel Args: - edm_name (str): The package name of the datamodel + edm_name (str, optional): The package name of the datamodel Returns: - podio.version.Version: The version of the datamodel if available + podio.version.Version: The build version of podio or the build + version of the datamodel (if available) that was used to write + this file Raises: KeyError: If the datamodel does not have a version stored - RuntimeError: If the reader is a legacy reader + RuntimeError: If the reader is a legacy reader and a datamodel + version is requested """ + if edm_name is None: + return self._reader.currentFileVersion() + if self._is_legacy: raise RuntimeError("Legacy readers do not store any version info") maybe_version = self._reader.currentFileVersion(edm_name) diff --git a/python/podio/test_Reader.py b/python/podio/test_Reader.py index 8324deab1..6be55467d 100644 --- a/python/podio/test_Reader.py +++ b/python/podio/test_Reader.py @@ -79,15 +79,15 @@ def test_available_datamodels(self): for model in datamodels: self.assertTrue(model in ("datamodel", "extension_model")) - self.assertEqual(self.reader.get_datamodel_version("datamodel"), build_version) + self.assertEqual(self.reader.current_file_version("datamodel"), build_version) def test_invalid_datamodel_version(self): """Make sure that the necessary exceptions are raised""" with self.assertRaises(KeyError): - self.reader.get_datamodel_version("extension_model") + self.reader.current_file_version("extension_model") with self.assertRaises(KeyError): - self.reader.get_datamodel_version("non-existant-model") + self.reader.current_file_version("non-existant-model") class LegacyReaderTestCaseMixin: diff --git a/tools/podio-dump b/tools/podio-dump index a896cf3ca..f85387711 100755 --- a/tools/podio-dump +++ b/tools/podio-dump @@ -19,6 +19,7 @@ def print_general_info(reader, filename): Args: reader (root_io.Reader, sio_io.Reader): An initialized reader + filename (str): The name of the input file """ legacy_text = " (this is a legacy file!)" if reader.is_legacy else "" print( @@ -30,7 +31,7 @@ def print_general_info(reader, filename): print("datamodel model definitions stored in this file: ") for edm_name in reader.datamodel_definitions: try: - edm_version = reader.get_datamodel_version(edm_name) + edm_version = reader.current_file_version(edm_name) print(f" - {edm_name} ({version_as_str(edm_version)})") except KeyError: print(f" - {edm_name}") From 5ff698f3d40ec42601391b2587163592e2e63a8c Mon Sep 17 00:00:00 2001 From: tmadlener Date: Thu, 5 Sep 2024 20:50:14 +0200 Subject: [PATCH 16/18] Simplify regex for version matching --- doc/datamodel_syntax.md | 2 +- python/podio_class_generator.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/doc/datamodel_syntax.md b/doc/datamodel_syntax.md index cbda858ce..e57b9a96b 100644 --- a/doc/datamodel_syntax.md +++ b/doc/datamodel_syntax.md @@ -215,7 +215,7 @@ are coupled but do not necessarily have to be the same. podio offers hooks to store this important meta information into the produce files. In order to do you can pass the version of the datamodel to the generator via the `--datamodel-version` argument. It expects the version to conform to this -regular expression: `"v?\d+(\.|-)\d+((\.|-)\d+)?$"`, i.e. that the major and +regular expression: `"v?\d+[\.|-]\d+([\.|-]\d+)?$"`, i.e. that the major and minor version are present, separated by either a dot or comma with an optional patch version and an optional `v` prefix. diff --git a/python/podio_class_generator.py b/python/podio_class_generator.py index 2d44a4154..d3d6c62b6 100755 --- a/python/podio_class_generator.py +++ b/python/podio_class_generator.py @@ -79,7 +79,7 @@ def parse_version(version_str): if version_str is None: return None - if re.match(r"v?(\d+)(\.|-)(\d+)((\.|-)(\d+))?$", version_str): + if re.match(r"v?\d+[\.|-]\d+([\.|-]\d+)?$", version_str): ver = version_str.replace("-", ".").replace("v", "").split(".") return tuple(int(v) for v in ver) From 709ca85f63116e2bcdf523651b6dfe65b81a3919 Mon Sep 17 00:00:00 2001 From: tmadlener Date: Thu, 5 Sep 2024 21:11:09 +0200 Subject: [PATCH 17/18] Move strings once we are done with them --- src/DatamodelRegistry.cc | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/DatamodelRegistry.cc b/src/DatamodelRegistry.cc index c15194d7d..ffd89f1a2 100644 --- a/src/DatamodelRegistry.cc +++ b/src/DatamodelRegistry.cc @@ -22,7 +22,7 @@ size_t DatamodelRegistry::registerDatamodel(std::string name, std::string_view d if (it == m_definitions.cend()) { int index = m_definitions.size(); - m_definitions.emplace_back(name, definition); + m_definitions.emplace_back(std::move(name), definition); for (const auto& [typeName, relations, vectorMembers] : relationNames) { m_relations.emplace(typeName, RelationNames{relations, vectorMembers}); @@ -39,7 +39,7 @@ size_t DatamodelRegistry::registerDatamodel(std::string name, std::string_view d const podio::RelationNameMapping& relationNames, podio::version::Version version) { auto index = registerDatamodel(name, definition, relationNames); - m_datamodelVersions.emplace(name, version); + m_datamodelVersions.emplace(std::move(name), version); return index; } From 9c4ec4cd10b3cd8deabcdf6b32a3f69331b29ddd Mon Sep 17 00:00:00 2001 From: Thomas Madlener Date: Tue, 10 Sep 2024 15:28:07 +0200 Subject: [PATCH 18/18] Fix typo in documentation --- doc/datamodel_syntax.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/datamodel_syntax.md b/doc/datamodel_syntax.md index e57b9a96b..ca44db876 100644 --- a/doc/datamodel_syntax.md +++ b/doc/datamodel_syntax.md @@ -219,7 +219,7 @@ regular expression: `"v?\d+[\.|-]\d+([\.|-]\d+)?$"`, i.e. that the major and minor version are present, separated by either a dot or comma with an optional patch version and an optional `v` prefix. -If this this information is passed to the generator will be injected into the +If this this information is passed to the generator it will be injected into the podio internals and will be stored in the output files. They can be retrieved via the `currentFileVersion(const std::string&)` methods of the various readers.