Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add a hook to inject datamodel version information into podio internals #651

Merged
merged 18 commits into from
Sep 10, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 8 additions & 2 deletions cmake/podioMacros.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -131,13 +131,14 @@ set_property(CACHE PODIO_USE_CLANG_FORMAT PROPERTY STRINGS AUTO ON OFF)
# LANG OPTIONAL: The programming language choice
# Default is cpp
# DEPENDS OPTIONAL: List of files to be added as configure dependencies of the datamodel
# VERSION OPTIONAL: The version of the datamodel (which does not have to be the schema version!)
# )
#
# Note that the create_${datamodel} target will always be called, but if the YAML_FILE has not changed
# this is essentially a no-op, and should not cause re-compilation.
#---------------------------------------------------------------------------------------------------
function(PODIO_GENERATE_DATAMODEL datamodel YAML_FILE RETURN_HEADERS RETURN_SOURCES)
CMAKE_PARSE_ARGUMENTS(ARG "" "OLD_DESCRIPTION;OUTPUT_FOLDER;UPSTREAM_EDM;SCHEMA_EVOLUTION" "IO_BACKEND_HANDLERS;LANG;DEPENDS" ${ARGN})
CMAKE_PARSE_ARGUMENTS(ARG "" "OLD_DESCRIPTION;OUTPUT_FOLDER;UPSTREAM_EDM;SCHEMA_EVOLUTION" "IO_BACKEND_HANDLERS;LANG;DEPENDS;VERSION" ${ARGN})
IF(NOT ARG_OUTPUT_FOLDER)
SET(ARG_OUTPUT_FOLDER ${CMAKE_CURRENT_SOURCE_DIR})
ENDIF()
Expand Down Expand Up @@ -196,6 +197,11 @@ function(PODIO_GENERATE_DATAMODEL datamodel YAML_FILE RETURN_HEADERS RETURN_SOUR
endif()
endif()

set(VERSION_ARG "")
if (ARG_VERSION)
set(VERSION_ARG "--datamodel-version=${ARG_VERSION}")
endif()

# Make sure that we re run the generation process every time either the
# templates or the yaml file changes.
include(${podio_PYTHON_DIR}/templates/CMakeLists.txt)
Expand All @@ -215,7 +221,7 @@ function(PODIO_GENERATE_DATAMODEL datamodel YAML_FILE RETURN_HEADERS RETURN_SOUR
message(STATUS "Creating '${datamodel}' datamodel")
# we need to bootstrap the data model, so this has to be executed in the cmake run
execute_process(
COMMAND ${Python_EXECUTABLE} ${podio_PYTHON_DIR}/podio_class_generator.py ${CLANG_FORMAT_ARG} ${OLD_DESCRIPTION_ARG} ${SCHEMA_EVOLUTION_ARG} ${UPSTREAM_EDM_ARG} ${YAML_FILE} ${ARG_OUTPUT_FOLDER} ${datamodel} ${ARG_IO_BACKEND_HANDLERS} ${LANGUAGE_ARG}
COMMAND ${Python_EXECUTABLE} ${podio_PYTHON_DIR}/podio_class_generator.py ${CLANG_FORMAT_ARG} ${OLD_DESCRIPTION_ARG} ${SCHEMA_EVOLUTION_ARG} ${UPSTREAM_EDM_ARG} ${YAML_FILE} ${ARG_OUTPUT_FOLDER} ${datamodel} ${ARG_IO_BACKEND_HANDLERS} ${LANGUAGE_ARG} ${VERSION_ARG}
WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}
RESULT_VARIABLE podio_generate_command_retval
)
Expand Down
17 changes: 17 additions & 0 deletions doc/datamodel_syntax.md
Original file line number Diff line number Diff line change
Expand Up @@ -206,6 +206,23 @@ Some customization of the generated code is possible through flags. These flags
- `getSyntax`: steers the naming of get and set methods. If set to true, methods are prefixed with `get` and `set` following the capitalized member name, otherwise the member name is used for both.
- `exposePODMembers`: whether get and set methods are also generated for members of a member-component. In the example corresponding methods would be generated to directly set / get `x` through `ExampleType`.

## Embedding a datamodel version
Each datamodel definition needs a schema version. However, in the case of podio
this schema version is a single integer. This makes it rather hard to use in
typical versioning, where one might differentiate between *major*, *minor* (and
*patch*) versions. Hence, the versioning of a datamodel and its schema version
are coupled but do not necessarily have to be the same. podio offers hooks to
store this important meta information into the produce files. In order to do you
can pass the version of the datamodel to the generator via the
`--datamodel-version` argument. It expects the version to conform to this
regular expression: `"v?\d+[\.|-]\d+([\.|-]\d+)?$"`, i.e. that the major and
minor version are present, separated by either a dot or comma with an optional
patch version and an optional `v` prefix.

If this this information is passed to the generator it will be injected into the
podio internals and will be stored in the output files. They can be retrieved
via the `currentFileVersion(const std::string&)` methods of the various readers.


## Extending a datamodel / using types from an upstream datamodel
It is possible to extend another datamodel with your own types, resp. use some datatypes or components from an upstream datamodel in your own datamodel.
Expand Down
10 changes: 10 additions & 0 deletions include/podio/DatamodelRegistry.h
Original file line number Diff line number Diff line change
@@ -1,6 +1,9 @@
#ifndef PODIO_DATAMODELREGISTRY_H
#define PODIO_DATAMODELREGISTRY_H

#include "podio/podioVersion.h"

#include <optional>
#include <string>
#include <string_view>
#include <tuple>
Expand Down Expand Up @@ -97,6 +100,8 @@ class DatamodelRegistry {
/// @returns The name of the datamodel
const std::string& getDatamodelName(size_t index) const;

std::optional<podio::version::Version> getDatamodelVersion(const std::string& name) const;

/// Register a datamodel and return its index in the registry.
///
/// This is the hook that is called during dynamic loading of an EDM to
Expand All @@ -114,6 +119,9 @@ class DatamodelRegistry {
size_t registerDatamodel(std::string name, std::string_view definition,
const podio::RelationNameMapping& relationNames);

size_t registerDatamodel(std::string name, std::string_view definition,
const podio::RelationNameMapping& relationNames, podio::version::Version version);

/// Get the names of the relations and vector members of a datatype
RelationNames getRelationNames(std::string_view typeName) const;

Expand All @@ -123,6 +131,8 @@ class DatamodelRegistry {
std::vector<std::pair<std::string, std::string_view>> m_definitions{};

std::unordered_map<std::string_view, RelationNames> m_relations{};

std::unordered_map<std::string, podio::version::Version> m_datamodelVersions{};
};
} // namespace podio

Expand Down
11 changes: 11 additions & 0 deletions include/podio/RNTupleReader.h
Original file line number Diff line number Diff line change
Expand Up @@ -96,6 +96,17 @@ class RNTupleReader {
return m_fileVersion;
}

/// Get the (build) version of a datamodel that has been used to write the
/// current file
///
/// @param name The name of the datamodel
///
/// @returns The (build) version of the datamodel if available or an empty
/// optional
std::optional<podio::version::Version> currentFileVersion(const std::string& name) const {
return m_datamodelHolder.getDatamodelVersion(name);
}

/// Get the datamodel definition for the given name
///
/// @param name The name of the datamodel
Expand Down
11 changes: 11 additions & 0 deletions include/podio/ROOTReader.h
Original file line number Diff line number Diff line change
Expand Up @@ -104,6 +104,17 @@ class ROOTReader {
return m_fileVersion;
}

/// Get the (build) version of a datamodel that has been used to write the
/// current file
///
/// @param name The name of the datamodel
///
/// @returns The (build) version of the datamodel if available or an empty
/// optional
std::optional<podio::version::Version> currentFileVersion(const std::string& name) const {
return m_datamodelHolder.getDatamodelVersion(name);
}

/// Get the names of all the available Frame categories in the current file(s).
///
/// @returns The names of the available categories from the file
Expand Down
16 changes: 16 additions & 0 deletions include/podio/Reader.h
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@ class Reader {
virtual podio::Frame readFrame(const std::string& name, size_t index) = 0;
virtual size_t getEntries(const std::string& name) const = 0;
virtual podio::version::Version currentFileVersion() const = 0;
virtual std::optional<podio::version::Version> currentFileVersion(const std::string& name) const = 0;
virtual std::vector<std::string_view> getAvailableCategories() const = 0;
virtual const std::string_view getDatamodelDefinition(const std::string& name) const = 0;
virtual std::vector<std::string> getAvailableDatamodels() const = 0;
Expand Down Expand Up @@ -66,6 +67,10 @@ class Reader {
return m_reader->currentFileVersion();
}

std::optional<podio::version::Version> currentFileVersion(const std::string& name) const override {
return m_reader->currentFileVersion(name);
}

std::vector<std::string_view> getAvailableCategories() const override {
return m_reader->getAvailableCategories();
}
Expand Down Expand Up @@ -166,6 +171,17 @@ class Reader {
return m_self->currentFileVersion();
}

/// Get the (build) version of a datamodel that has been used to write the
/// current file
///
/// @param name The name of the datamodel
///
/// @returns The (build) version of the datamodel if available or an empty
/// optional
std::optional<podio::version::Version> currentFileVersion(const std::string& name) const {
return m_self->currentFileVersion(name);
}

/// Get the names of all the available Frame categories in the current file(s).
///
/// @returns The names of the available categories from the file
Expand Down
38 changes: 33 additions & 5 deletions include/podio/SIOBlock.h
Original file line number Diff line number Diff line change
Expand Up @@ -16,14 +16,15 @@
#include <string>
#include <string_view>
#include <tuple>
#include <type_traits>
#include <vector>

namespace podio {

template <typename devT, typename PODData>
void handlePODDataSIO(devT& device, PODData* data, size_t size) {
void handlePODDataSIO(devT& device, const PODData* data, size_t size) {
unsigned count = size * sizeof(PODData);
char* dataPtr = reinterpret_cast<char*>(data);
auto* dataPtr = reinterpret_cast<char*>(const_cast<PODData*>(data));
device.data(dataPtr, count);
}

Expand All @@ -33,7 +34,12 @@ void writeMapLike(sio::write_device& device, const MapLikeT& map) {
device.data((int)map.size());
for (const auto& [key, value] : map) {
device.data(key);
device.data(value);
using MappedType = detail::GetMappedType<MapLikeT>;
if constexpr (std::is_trivial_v<MappedType>) {
handlePODDataSIO(device, &value, 1);
} else {
device.data(value);
}
}
}

Expand Down Expand Up @@ -165,15 +171,37 @@ class SIOEventMetaDataBlock : public sio::block {
podio::GenericParameters* metadata{nullptr};
};

namespace detail {
inline std::string sioMapBlockNameImpl(std::string keyTName, std::string valueTName) {
std::replace(keyTName.begin(), keyTName.end(), ':', '_');
std::replace(valueTName.begin(), valueTName.end(), ':', '_');
return "SIOMapBlockV2_KK_" + keyTName + "_VV_" + valueTName;
}

template <typename KeyT, typename ValueT>
inline std::string sioMapBlockName();

#define SIOMAPBLOCK_NAME(key_type, value_type) \
template <> \
inline std::string sioMapBlockName<key_type, value_type>() { \
return sioMapBlockNameImpl(#key_type, #value_type); \
}

SIOMAPBLOCK_NAME(std::string, std::string)
SIOMAPBLOCK_NAME(std::string, podio::version::Version)
#undef SIOMAPBLOCK_NAME
} // namespace detail

/// A block to serialize anything that behaves similar in iterating as a
/// map<KeyT, ValueT>, e.g. vector<tuple<KeyT, ValueT>>, which is what is used
/// internally to represent the data to be written.
template <typename KeyT, typename ValueT>
struct SIOMapBlock : public sio::block {
SIOMapBlock() : sio::block("SIOMapBlock", sio::version::encode_version(0, 1)) {
SIOMapBlock() : sio::block(detail::sioMapBlockName<KeyT, ValueT>(), sio::version::encode_version(0, 2)) {
}
SIOMapBlock(std::vector<std::tuple<KeyT, ValueT>>&& data) :
sio::block("SIOMapBlock", sio::version::encode_version(0, 1)), mapData(std::move(data)) {
sio::block(detail::sioMapBlockName<KeyT, ValueT>(), sio::version::encode_version(0, 2)),
mapData(std::move(data)) {
}

SIOMapBlock(const SIOMapBlock&) = delete;
Expand Down
11 changes: 11 additions & 0 deletions include/podio/SIOReader.h
Original file line number Diff line number Diff line change
Expand Up @@ -73,6 +73,17 @@ class SIOReader {
return m_fileVersion;
}

/// Get the (build) version of a datamodel that has been used to write the
/// current file
///
/// @param name The name of the datamodel
///
/// @returns The (build) version of the datamodel if available or an empty
/// optional
std::optional<podio::version::Version> currentFileVersion(const std::string& name) const {
return m_datamodelHolder.getDatamodelVersion(name);
}

/// Get the names of all the available Frame categories in the current file.
///
/// @returns The names of the available categores from the file
Expand Down
11 changes: 9 additions & 2 deletions include/podio/utilities/DatamodelRegistryIOHelpers.h
Original file line number Diff line number Diff line change
Expand Up @@ -35,8 +35,12 @@ class DatamodelDefinitionHolder {
public:
/// The "map" type that is used internally
using MapType = std::vector<std::tuple<std::string, std::string>>;
/// Constructor from an existing collection of names and datamodel definitions
DatamodelDefinitionHolder(MapType&& definitions) : m_availEDMDefs(std::move(definitions)) {
/// The "map" mapping names and datamodel versions (where available)
using VersionList = std::vector<std::tuple<std::string, podio::version::Version>>;

/// Constructor from an existing collection of names and datamodel definitions and versions
DatamodelDefinitionHolder(MapType&& definitions, VersionList&& versions) :
m_availEDMDefs(std::move(definitions)), m_edmVersions(std::move(versions)) {
}

DatamodelDefinitionHolder() = default;
Expand All @@ -57,8 +61,11 @@ class DatamodelDefinitionHolder {
/// Get all names of the datamodels that have been read from file
std::vector<std::string> getAvailableDatamodels() const;

std::optional<podio::version::Version> getDatamodelVersion(const std::string& name) const;

protected:
MapType m_availEDMDefs{};
VersionList m_edmVersions{};
};

} // namespace podio
Expand Down
30 changes: 25 additions & 5 deletions python/podio/base_reader.py
Original file line number Diff line number Diff line change
Expand Up @@ -81,11 +81,31 @@ def get_datamodel_definition(self, edm_name):
return ""
return self._reader.getDatamodelDefinition(edm_name).data()

def current_file_version(self):
"""Get the podio (build) version that was used to write this file
def current_file_version(self, edm_name=None):
"""Get the (build) version that was used to write this file

If called without argument or None, the podio build version is returned
otherwise the build version of the datamodel

Args:
edm_name (str, optional): The package name of the datamodel

Returns:
podio.version.Version: The build version of podio that was use to
write this file
podio.version.Version: The build version of podio or the build
version of the datamodel (if available) that was used to write
this file

Raises:
KeyError: If the datamodel does not have a version stored
RuntimeError: If the reader is a legacy reader and a datamodel
version is requested
"""
return self._reader.currentFileVersion()
if edm_name is None:
return self._reader.currentFileVersion()

if self._is_legacy:
raise RuntimeError("Legacy readers do not store any version info")
maybe_version = self._reader.currentFileVersion(edm_name)
if maybe_version.has_value():
return maybe_version.value()
raise KeyError(f"No version information available for '{edm_name}'")
19 changes: 19 additions & 0 deletions python/podio/test_Reader.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
#!/usr/bin/env python3
"""Unit tests for podio readers"""

from podio.version import build_version


class ReaderTestCaseMixin:
"""Common unittests for readers.
Expand Down Expand Up @@ -70,6 +72,23 @@ def test_frame_iterator_invalid_category(self):
i += 1
self.assertEqual(i, 0)

def test_available_datamodels(self):
"""Make sure that the datamodel information can be retrieved"""
datamodels = self.reader.datamodel_definitions
self.assertEqual(len(datamodels), 2)
for model in datamodels:
self.assertTrue(model in ("datamodel", "extension_model"))

self.assertEqual(self.reader.current_file_version("datamodel"), build_version)

def test_invalid_datamodel_version(self):
"""Make sure that the necessary exceptions are raised"""
with self.assertRaises(KeyError):
self.reader.current_file_version("extension_model")

with self.assertRaises(KeyError):
self.reader.current_file_version("non-existant-model")


class LegacyReaderTestCaseMixin:
"""Common test cases for the legacy readers python bindings.
Expand Down
Loading