diff --git a/.readthedocs.yaml b/.readthedocs.yaml new file mode 100644 index 000000000..4484bf0e7 --- /dev/null +++ b/.readthedocs.yaml @@ -0,0 +1,31 @@ +# Read the Docs configuration file for Sphinx projects +# See https://docs.readthedocs.io/en/stable/config-file/v2.html for details + +# Required +version: 2 + +# Set the OS, Python version and other tools you might need +build: + os: ubuntu-22.04 + tools: + python: "3.11" + # You can also specify other tool versions: + # nodejs: "19" + # rust: "1.64" + # golang: "1.19" + +# Build documentation in the "docs/" directory with Sphinx +sphinx: + configuration: docs/conf.py + +# Optionally build your docs in additional formats such as PDF and ePub +# formats: +# - pdf +# - epub + +# Optional but recommended, declare the Python requirements required +# to build your documentation +# See https://docs.readthedocs.io/en/stable/guides/reproducible-builds.html +# python: +# install: +# - requirements: docs/requirements.txt diff --git a/README.rst b/README.rst new file mode 100644 index 000000000..07eac7fd1 --- /dev/null +++ b/README.rst @@ -0,0 +1,9 @@ +Template for the Read the Docs tutorial +======================================= + +This GitHub template includes fictional Python library +with some basic Sphinx docs. + +Read the tutorial here: + +https://docs.readthedocs.io/en/stable/tutorial/ diff --git a/docs/.readthedocs.yaml b/docs/.readthedocs.yaml new file mode 100644 index 000000000..1e7c9988a --- /dev/null +++ b/docs/.readthedocs.yaml @@ -0,0 +1,41 @@ +# Read the Docs configuration file for Sphinx projects +# See https://docs.readthedocs.io/en/stable/config-file/v2.html for details + +# Required +version: 2 + +# Set the OS, Python version and other tools you might need +build: + os: ubuntu-22.04 + tools: + python: "3.11" + # You can also specify other tool versions: + # nodejs: "19" + # rust: "1.64" + # golang: "1.19" + +# Build documentation in the "docs/" directory with Sphinx +sphinx: + configuration: docs/conf.py + +# Optionally build your docs in additional formats such as PDF and ePub +# formats: +# - pdf +# - epub + +# Optional but recommended, declare the Python requirements required +# to build your documentation +# See https://docs.readthedocs.io/en/stable/guides/reproducible-builds.html + +# python: +# install: + # - requirements: docs/requirements.txt + # - method: pip + # path: . + # extra_requirements: + # - docs + # - method: pip + # path: another/package +# system_packages: true + +# pip install myst-parser diff --git a/docs/Makefile b/docs/Makefile new file mode 100644 index 000000000..d0c3cbf10 --- /dev/null +++ b/docs/Makefile @@ -0,0 +1,20 @@ +# Minimal makefile for Sphinx documentation +# + +# You can set these variables from the command line, and also +# from the environment for the first two. +SPHINXOPTS ?= +SPHINXBUILD ?= sphinx-build +SOURCEDIR = source +BUILDDIR = build + +# Put it first so that "make" without argument is like "make help". +help: + @$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) + +.PHONY: help Makefile + +# Catch-all target: route all unknown targets to Sphinx using the new +# "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS). +%: Makefile + @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) diff --git a/docs/api.rst b/docs/api.rst new file mode 100644 index 000000000..32069b490 --- /dev/null +++ b/docs/api.rst @@ -0,0 +1,8 @@ +API +=== + +.. autosummary:: + :toctree: generated + +.. image:: https://e0.pxfuel.com/wallpapers/598/557/desktop-wallpaper-animated-smiley-faces-cartoon-smiley-face.jpg + :width: 100% diff --git a/docs/conf.py b/docs/conf.py new file mode 100644 index 000000000..c8c3d8d07 --- /dev/null +++ b/docs/conf.py @@ -0,0 +1,202 @@ + +# +# The following code was added during an automated build on readthedocs.org +# It is auto created and injected for every build. The result is based on the +# conf.py.tmpl file found in the readthedocs.org codebase: +# https://github.com/rtfd/readthedocs.org/blob/main/readthedocs/doc_builder/templates/doc_builder/conf.py.tmpl +# +# Note: this file shouldn't rely on extra dependencies. + +import importlib +import sys +import os.path + +# Borrowed from six. +PY3 = sys.version_info[0] == 3 +string_types = str if PY3 else basestring + +from sphinx import version_info + +# Get suffix for proper linking to GitHub +# This is deprecated in Sphinx 1.3+, +# as each page can have its own suffix +if globals().get('source_suffix', False): + if isinstance(source_suffix, string_types): + SUFFIX = source_suffix + elif isinstance(source_suffix, (list, tuple)): + # Sphinx >= 1.3 supports list/tuple to define multiple suffixes + SUFFIX = source_suffix[0] + elif isinstance(source_suffix, dict): + # Sphinx >= 1.8 supports a mapping dictionary for multiple suffixes + SUFFIX = list(source_suffix.keys())[0] # make a ``list()`` for py2/py3 compatibility + else: + # default to .rst + SUFFIX = '.rst' +else: + SUFFIX = '.rst' + +# Add RTD Static Path. Add to the end because it overwrites previous files. +if not 'html_static_path' in globals(): + html_static_path = [] +if os.path.exists('_static'): + html_static_path.append('_static') + +# Add RTD Theme only if they aren't overriding it already +using_rtd_theme = ( + ( + 'html_theme' in globals() and + html_theme in ['default'] and + # Allow people to bail with a hack of having an html_style + 'html_style' not in globals() + ) or 'html_theme' not in globals() +) +if using_rtd_theme: + html_theme = 'sphinx_rtd_theme' + html_style = None + html_theme_options = {} + + +# This following legacy behavior will gradually be sliced out until its deprecated and removed. +# Skipped for Sphinx 6+ +# Skipped by internal Feature flag SKIP_SPHINX_HTML_THEME_PATH +# Skipped by all new projects since SKIP_SPHINX_HTML_THEME_PATH's introduction (jan 2023) +if ( + using_rtd_theme + and version_info < (6,0) + and not False + ): + theme = importlib.import_module('sphinx_rtd_theme') + if 'html_theme_path' in globals(): + html_theme_path.append(theme.get_html_theme_path()) + else: + html_theme_path = [theme.get_html_theme_path()] + +# Define websupport2_base_url and websupport2_static_url +if globals().get('websupport2_base_url', False): + websupport2_base_url = 'https://readthedocs.org/websupport' + websupport2_static_url = 'https://assets.readthedocs.org/static/' + + +#Add project information to the template context. +context = { + 'using_theme': using_rtd_theme, + 'html_theme': html_theme, + 'current_version': "latest", + 'version_slug': "latest", + 'MEDIA_URL': "https://media.readthedocs.org/", + 'STATIC_URL': "https://assets.readthedocs.org/static/", + 'PRODUCTION_DOMAIN': "readthedocs.org", + 'proxied_static_path': "/_/static/", + 'versions': [ + ("latest", "/en/latest/"), + ("stable", "/en/stable/"), + ], + 'downloads': [ + ("pdf", "//jana.readthedocs.io/_/downloads/en/latest/pdf/"), + ("html", "//jana.readthedocs.io/_/downloads/en/latest/htmlzip/"), + ("epub", "//jana.readthedocs.io/_/downloads/en/latest/epub/"), + ], + 'subprojects': [ + ], + 'slug': 'jana', + 'name': u'JANA', + 'rtd_language': u'en', + 'programming_language': u'cpp', + 'canonical_url': '', + 'analytics_code': 'None', + 'single_version': False, + 'conf_py_path': '/docs/', + 'api_host': 'https://readthedocs.org', + 'github_user': 'JeffersonLab', + 'proxied_api_host': '/_', + 'github_repo': 'JANA2', + 'github_version': 'master', + 'display_github': True, + 'bitbucket_user': 'None', + 'bitbucket_repo': 'None', + 'bitbucket_version': 'master', + 'display_bitbucket': False, + 'gitlab_user': 'None', + 'gitlab_repo': 'None', + 'gitlab_version': 'master', + 'display_gitlab': False, + 'READTHEDOCS': True, + 'using_theme': (html_theme == "default"), + 'new_theme': (html_theme == "sphinx_rtd_theme"), + 'source_suffix': SUFFIX, + 'ad_free': False, + 'docsearch_disabled': False, + 'user_analytics_code': '', + 'global_analytics_code': 'UA-17997319-1', + 'commit': '71a06c23', +} + +# For sphinx >=1.8 we can use html_baseurl to set the canonical URL. +# https://www.sphinx-doc.org/en/master/usage/configuration.html#confval-html_baseurl +if version_info >= (1, 8): + if not globals().get('html_baseurl'): + html_baseurl = context['canonical_url'] + context['canonical_url'] = None + + + + + +if 'html_context' in globals(): + for key in context: + if key not in html_context: + html_context[key] = context[key] +else: + html_context = context + +# Add custom RTD extension +if 'extensions' in globals(): + # Insert at the beginning because it can interfere + # with other extensions. + # See https://github.com/rtfd/readthedocs.org/pull/4054 + extensions.insert(0, "readthedocs_ext.readthedocs") +else: + extensions = ["readthedocs_ext.readthedocs"] +# extensions.append("myst_parser") +# Add External version warning banner to the external version documentation +if 'branch' == 'external': + extensions.insert(1, "readthedocs_ext.external_version_warning") + readthedocs_vcs_url = 'None' + readthedocs_build_url = 'https://readthedocs.org/projects/jana/builds/21148681/' + +project_language = 'en' + +# User's Sphinx configurations +language_user = globals().get('language', None) +latex_engine_user = globals().get('latex_engine', None) +latex_elements_user = globals().get('latex_elements', None) + +# Remove this once xindy gets installed in Docker image and XINDYOPS +# env variable is supported +# https://github.com/rtfd/readthedocs-docker-images/pull/98 +latex_use_xindy = False + +chinese = any([ + language_user in ('zh_CN', 'zh_TW'), + project_language in ('zh_CN', 'zh_TW'), +]) + +japanese = any([ + language_user == 'ja', + project_language == 'ja', +]) + +if chinese: + latex_engine = latex_engine_user or 'xelatex' + + latex_elements_rtd = { + 'preamble': '\\usepackage[UTF8]{ctex}\n', + } + latex_elements = latex_elements_user or latex_elements_rtd +elif japanese: + latex_engine = latex_engine_user or 'platex' + +# Make sure our build directory is always excluded +exclude_patterns = globals().get('exclude_patterns', []) +exclude_patterns.extend(['_build']) + diff --git a/docs/how-to guides.rst b/docs/how-to guides.rst new file mode 100644 index 000000000..b22fdb5b5 --- /dev/null +++ b/docs/how-to guides.rst @@ -0,0 +1,478 @@ +How-to guides +============= +This section walks the user through specific steps for solving a real-world problem. + +Table of contents +----------------- +1. `Download `_ and `install `_ JANA +2. `Use the JANA command-line program `_ +3. `Configure JANA `_ +4. `Benchmark a JANA program `_ +5. `Generate code skeletons `_ for projects, plugins, components, etc +6. `Run the JANA Status/Control/Debugger GUI `_ +7. `Work with factory metadata `_ for collecting statistics, etc +8. Create a service which can be shared between different plugins +9. Handle both real and simulated data +10. Handle EPICS data +11. `Detect when a group of events has finished `_ +12. Use JANA with ROOT +13. Persist the entire DST using ROOT +14. Checkpoint the entire DST using ROOT +15. `Stream data to and from JANA `_ +16. Build and filter events (“L1 and L2 triggers”) +17. Process subevents +18. Migrate from JANA1 to JANA2 + +Building JANA +~~~~~~~~~~~~~~ +First, set your ``$JANA_HOME`` environment variable. This is where the executables, libraries, headers, and plugins get installed. (It is also where we will clone the source). CMake will install to ``$JANA_HOME`` if it is set (it will install to ``${CMAKE_BINARY_DIR}/install`` if not). Be aware that although CMake usually defaults ``CMAKE_INSTALL_PREFIX`` to ``/usr/local``, we have disabled this because we rarely want this in practice, and we don’t want the build system picking up outdated headers and libraries we installed to ``/usr/local`` by accident. If you want to set ``JANA_HOME=/usr/local``, you are free to do so, but you must do so deliberately. + +Next, set your build directory. This is where CMake’s caches, logs, intermediate build artifacts, etc go. The convention is to name it ``build`` and put it in the project’s root directory. If you are using CLion, it will automatically create a ``cmake-build-debug`` directory which works just fine. + +Finally, you can cd into your build directory and build and install everything the usual CMake way. + +.. code-block:: console + + export JANA_VERSION=v2.0.5 # Convenient to set this once for specific release + export JANA_HOME=${PWD}/JANA${JANA_VERSION} # Set full path to install dir + + git clone https://github.com/JeffersonLab/JANA2 --branch ${JANA_VERSION} ${JANA_HOME} # Get JANA2 + + mkdir build # Set build dir + cd build + cmake3 ${JANA_HOME} # Generate makefiles # Generate makefiles + make -j8 install # Build (using 8 threads) and install + + source ${JANA_HOME}/bin/jana-this.sh # Set PATH (and other envars) + jana -Pplugins=JTest # Run JTest plugin to verify successful install + +Note: If you want to use a compiler other than the default one on your system, it is not enough to modify your $PATH, as CMake ignores this by design. You either need to set the ``CXX`` environment variable or the ``CMAKE_CXX_COMPILER`` CMake variable. + +By default, JANA will look for plugins und=$JANA_HOME/plugins`. For your plugins to propagate here, you have to ``install`` them. If you don’t want to do that, you can also set the environment variable ``$JANA_PLUGIN_PATH`` to point to the build directory of your project. JANA will report where exactly it went looking for your plugins and what happened when it tried to load them if you set the JANA config ``jana:debug_plugin_loading=1``. + +.. code-block:: console + + jana -Pplugins=JTest -Pjana:debug_plugin_loading=1 + + +Using JANA in a CMake project +_______________________________ + +To use JANA in a CMake project, simply add ``$JANA_HOME/lib/cmake/JANA`` to your ``CMAKE_PREFIX_PATH``, or alternatively, set the CMake variable ``JANA_DIR=$JANA_HOME/lib/cmake/JANA``. + + +Using JANA in a non-CMake project +___________________________________ + +To use JANA in a non-CMake project: + +1. Source ``$JANA_HOME/bin/jana-this.sh`` to set the environment variables needed for JANA’s dependencies +2. Use ``$JANA_HOME/bin/jana-config --cflags`` to obtain JANA’s compiler flags +3. Use ``$JANA_HOME/bin/jana_config --libs`` to obtain JANA’s linker flags + + +How to benchmark JANA +~~~~~~~~~~~~~~~~~~~~~~~ +JANA includes a built-in facililty for benchmarking programs and plugins. It produces a scalability curve by repeatedly pausing execution, adding additional worker threads, resuming execution, and measuring the resulting throughput over fixed time intervals. There is an additional option to measure the scalability curves for a matrix of different affinity and locality strategies. This is useful when your hardware architecture has nonuniform memory access. + +In case you don’t have JANA code ready to benchmark yet, JANA provides a plugin called ``JJTest`` which can simulate different workloads. ``JTest`` runs a dummy algorithm on randomly generated data, using a user-specified event size and number of FLOPs (floating point operations) per event. This gives a rough estimate of your code’s performance. If you don’t know the number of FLOPs per event, you can still compare the performance of JANA on different hardware architectures just by using the default settings. + +Here is how you do benchmarking with ``JTest``: + +.. code-block:: console + + # Obtain and build JANA, if you haven't already + git clone http://github.com/JeffersonLab/JANA2 + cd JANA2 + mkdir build + mkdir install + export JANA_HOME=`pwd`/install + cmake -S . -B build + cmake --build build -j 10 --target install + cd install/bin + + # Run the benchmarking + ./jana -b -Pplugins=JTest + # -b enables benchmarking + # -Pplugins=JTest pulls in the JTest plugin + # Additional configuration options are listed below + + + # Benchmarking may take awhile. You can terminate any time without + # losing data by pressing Ctrl-C _once or twice_. If you press it three + # times or more, it will hard-exit and won't write the results file. + + + cd JANA_Test_Results + # Raw data CSV files are in `samples.dat` + # Average and RMS rates are in `rates.dat` + + # Show the scalability curve in a matplotlib window + ./jana-plot-scaletest.py + +If you already have a JANA project you would like to benchmark, all you have to do is build and install it the way you usually would, and then run + +.. code-block:: console + + jana -b -Pplugins=$MY_PLUGIN + # Or + my_jana_app -b + + cd JANA_Test_Results + # Raw data CSV files are in `samples.dat` + # Average and RMS rates are in `rates.dat` + + # Show the scalability curve in a matplotlib window + ./jana-plot-scaletest.py + +These are the relevant configuration parameters for ``JTest``: + +.. list-table:: + :widths: 25 15 25 50 + :header-rows: 1 + + * - Name + - Units + - Default + - Description + * - benchmark:nsamples + - int + - 15 + - Number of measurements made for each thread count + * - benchmark:minthreads + - int + - 1 + - Minimum thread count + * - benchmark:maxthreads + - int + - ncores + - Maximum thread count + * - benchmark:threadstep + - int + - 1 + - Thread count increment + * - benchmark:resultsdir + - string + - JANA_Test_Results + - Directory name for benchmark test results + + +Detect when a group of events has finished +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Sometimes it is necessary to organize events into groups, process the events the usual way, but then notify some component whenever a group has completely finished. The original motivating example for this was EPICS data, which was maintained as a bundle of shared state. Whenever updates arrived, JANA1 would emit a ‘barrier event’ which would stop the data flow until all in-flight events completed, so that preceding events could only read the old state and subsequent events could only read the new state. We now recommend EPICS data be handled differently. Nevertheless this pattern still occasionally comes into play. + +One example is a JEventProcessor which writes statistics for the previous run every time the run number changes. This is trickier than it first appears because events may arrive out of order. The JEventProcessor can easily maintain a set of run numbers it has already seen, but it won’t know when it has seen all of the events for a given run number. For that it needs an additional piece of information: the number of events emitted with that run number. Complicating things further, this information needs to be read and modified by both the JEventSource and the JEventProcessor. + +Our current recommendation is a ``JService`` called ``JEventGroupManager``. This is designed to be used as follows: + +1. A JEventSource should keep a pointer to the current JEventGroup, which it obtains through the JEventGroupManager. Groups are given a unique id, which + +2. Whenever the JEventSource emits a new event, it should insert the JEventGroup into the JEvent. The event is now tagged as belonging to that group. + +3. When the JEventSource moves on to the next group, e.g. if the run number changed, it should close out the old group by calling JEventGroup::CloseGroup(). The group needs to be closed before it will report itself as finished, even if there are no events still in-flight. + +4. A JEventProcessor should retrieve the JEventGroup object by calling JEvent::Get. It should report that an event is finished by calling JEventGroup::FinishEvent. Please only call this once; although we could make JEventGroup robust against repeated calls, it would add some overhead. + +5. A JEventSource or JEventProcessor (or technically anything whose lifespan is enclosed by the lifespan of JServices) may then test whether this is the last event in its group by calling JEventGroup::IsGroupFinished(). A blocking version, JEventGroup::WaitUntilGroupFinished(), is also provided. This mechanism allows relatively arbitrary hooks into the event stream. + + +Stream data to and from JANA +~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +1. The first question to ask is: What is the relationship between messages and events? Remember, a message is just a packet of data sent over the wire, whereas an event is JANA’s main unit of independent computation, corresponding to all data associated with one physics interaction. The answer will depend on: + +* What systems already exist upstream, and how difficult they are to change +* The expected size of each event +* Whether event building is handled upstream or within JANA + +If events are large enough (>0.5MB), the cleanest thing to do is to establish a one-to-one relationship between messages and events. JANA provides JStreamingEventSource to make this convenient. + +If events are very small, you probably want many events in one message. A corresponding helper class does not exist yet, but would be a straightforward adaptation of the above. + +If upstream doesn’t do any event building (e.g. it is reading out ADC samples over a fixed time window) you probably want to have JANA determine physically meaningful event boundaries, maybe even incorporating a software L2 trigger. This is considerably more complicated, and is discussed in `the event building how-to `_ instead. + +For the remainder of this how-to we assume that messages and events are one-to-one. + +2. The second question to ask is: What transport should be used? + +JANA makes it so that the message format and transport can be varied independently. The transport wrapper need only implement the JTransport interface, which is essentially just: + +.. code-block:: console + enum class Result {SUCCESS, TRYAGAIN}; + + virtual void initialize(); + virtual Result send(const JMessage& src_msg); + virtual Result receive(JMessage& dest_msg); + +The key detail is that both ``send`` and ``receive`` should block until data has finished transferring to/from the ``JMessage`` buffer so that the buffer may be accessed by the caller with no additional synchronization. If there are no pending messages, ``receive`` should return ``TRYAGAIN`` immediately so as not to block the event source. In contrast, ``send`` must block until it succeeds, as otherwise there will be data loss. + +An implementation already exists for ZeroMQ. See ``examples/JExample7/ZmqTransport.h`` + +The final and most important question to ask is: What is the message format? + +Message formats each get their own class, which must inherit from the JMessage and JEventMessage interfaces. + + +Using the JANA CLI +------------------- + +JANA is typically run like this: + +.. code-block:: console + + $JANA_HOME/bin/jana -Pplugins=JTest -Pnthreads=8 ~/data/inputfile.txt + +Note that the JANA executable won’t do anything until you provide plugins. A simple plugin is provided called JTest, which verifies that everything is working and optionally does a quick performance benchmark. Additional simple plugins are provided in ``src/examples``. Instructions on how to write your own are given in the Tutorial section. + +Along with specifying plugins, you need to specify the input files containing the events you wish to process. Note that JTest ignores these and crunches randomly generated data instead. + +The command-line flags are: + +.. list-table:: + :widths: 10 25 50 + :header-rows: 1 + + * - Short + - Long + - Meaning + * - -h + - –help + - Display help message + * - -v + - –version + - Display version information + * - -c + - –configs + - Display configuration parameters + * - -l + - –loadconfigs + - Load configuration parameters from file + * - -d + - –dumpconfigs + - Dump configuration parameters to file + * - -b + - –benchmark + - Run JANA in benchmark mode + * - -P + - + - Specify a configuration parameter (see below) + + +Configuring JANA +----------------- +JANA provides a parameter manager so that configuration options may be controlled via code, command-line args, and config files in a consistent and self-documenting way. Plugins are free to request any existing parameters or register their own. + +The following configuration options are used most commonly: + +.. list-table:: + :widths: 25 25 50 + :header-rows: 1 + + * - Name + - Type + - Descriptioin + * - nthreads + - int + - Size of thread team (Defaults to the number of cores on your machine) + * - plugins + - string + - Comma-separated list of plugin filenames. JANA will look for these on the ``$JANA_PLUGIN_PATH`` + * - plugins_to_ignore + - string + - This removes plugins which had been specified in ``plugins``. + * - event_source_type + - string + - Manually override JANA’s decision about which JEventSource to use + * - jana:nevents + - int + - Limit the number of events each source may emit + * - jana:nskip + - int + - Skip processing the first n events from each event source + * - jana:extended_report + - bool + - The amount of status information to show while running + * - jana:status_fname + - string + - Named pipe for retrieving status information remotely + +JANA has its own logger. You can control the verbosity of different components using the parameters ``log:off``, ``log:fatal``, ``log:error``, ``log:warn``, ``log:info``, ``log:debug``, and ``log:trace``. The following example shows how you would increase the verbosity of JPluginLoader and JComponentManager: + +.. code-block:: console + + jana -Pplugins=JTest -Plog:debug=JPluginLoader,JComponentManager + +The following parameters are used for benchmarking: + +.. list-table:: + :widths: 25 10 25 50 + :header-rows: 1 + + * - Name + - Type + - Default + - Description + * - benchmark:nsamples + - int + - 15 + - Number of measurements made for each thread count + * - benchmark:minthreads + - int + - 1 + - Minimum thread count + * - benchmark:maxthread + - int + - ncores + - Maximum thread count + * - benchmark:threadstep + - int + - 1 + - Thread count increment + * - benchmark:resultsdir + - string + - JANA_Test_Results + - Directory name for benchmark test results + +The following parameters may come in handy when doing performance tuning: + +.. list-table:: + :widths: 25 10 25 50 + :header-rows: 1 + + * - Name + - Type + - Default + - Description + * - jana:engine + - int + - 0 + - Which parallelism engine to use. 0: JArrowProcessingController. 1: JDebugProcessingController. + * - jana:event_pool_size + - int + - nthreads + - The number of events which may be in-flight at once + * - jana:limit_total_events_in_flight + - bool + - 1 + - Whether the number of in-flight events should be limited + * - jana:affinity + - int + - 0 + - Thread pinning strategy. 0: None. 1: Minimize number of memory localities. 2: Minimize number of hyperthreads. + * - jana:locality + - int + - 0 + - Memory locality strategy. 0: Global. 1: Socket-local. 2: Numa-domain-local. 3. Core-local. 4. Cpu-local + * - jana:enable_stealing + - bool + - 0 + - Allow threads to pick up work from a different memory location if their local mailbox is empty. + * - jana:event_queue_threshold + - int + - 80 + - Mailbox buffer size + * - jana:event_source_chunksize + - int + - 40 + - Reduce mailbox contention by chunking work assignments + * - jana:event_processor_chunksize + - int + - 1 + - Reduce mailbox contention by chunking work assignments + +Creating code skeletons +------------------------ +JANA provides a script, ``$JANA_HOME/bin/jana-generate.py``, which generates code skeletons for different kinds of JANA components, but also entire project structures. These are intended to compile and run with zero or minimal modification, to provide all of the boilerplate needed, and to include comments explaining what each piece of boilerplate does and what the user is expected to add. The aim is to demonstrate idiomatic usage of the JANA framework and reduce the learning curve as much as possible. + + +Complete projects +~~~~~~~~~~~~~~~~~~~ + +The ‘project’ skeleton lays out the recommended structure for a complex experiment with multiple plugins, a domain model which is shared between plugins, and a custom executable. In general, each experiment is expected to have one project. + +``jana-generate.py project ProjectName`` + + +Project plugins +~~~~~~~~~~~~~~~~~~~ + +Project plugins are used to modularize some functionality within the context of an existing project. Not only does this help separate concerns, so that many members of a collaboration can work together without interfering with another, but it also helps manage the complexity arising from build dependencies. Some scientific software stubbornly refuses to build on certain platforms, and plugins are a much cleaner solution than the traditional mix of environment variables, build system variables, and preprocessor macros. Project plugins include one JEventProcessor by default. + +``jana-generate.py ProjectPlugin PluginNameInCamelCase`` + + +Mini plugins +~~~~~~~~~~~~~~~~~~~ + +Mini plugins are project plugins which have been stripped down to a single cc file. They are useful when someone wants to do a quick analysis and doesn’t need or want the additional boilerplate. They include one JEventProcessor with support for ROOT histograms. There are two options: + +.. code-block:: console + + jana-generate.py MiniStandalonePlugin PluginNameInCamelCase + jana-generate.py MiniProjectPlugin PluginNameInCamelCase + + +Standalone plugins +~~~~~~~~~~~~~~~~~~~ + +Standalone plugins are useful for getting started quickly. They are also effective when someone wishes to integrate with an existing project, but want their analyses to live in a separate repository. + +``jana-generate.py StandalonePlugin PluginNameInCamelCase`` + + +Executables +~~~~~~~~~~~~~~~~~~~ +Executables are useful when using the provided ``$JANA_HOME/bin/jana`` is inconvenient. This may be because the project is sufficiently simple that multiple plugins aren’t even needed, or because the project is sufficiently complex that specialized configuration is needed before loading any other plugins. + +``jana-generate.py Executable ExecutableNameInCamelCase`` + + +JEventSources +~~~~~~~~~~~~~~~~~~~ + +``jana-generate.py JEventSource NameInCamelCase`` + + +JEventProcessors +~~~~~~~~~~~~~~~~~~~ + +``jana-generate.py JEventProcessor NameInCamelCase`` + + +JEventProcessors which output to ROOT +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +This JEventProcessor includes the boilerplate for creating a ROOT histogram in a specific virtual subdirectory of a TFile. If this TFile is shared among different ``JEventProcessors``, it should be encapsulated in a JService. Otherwise, it can be specified as a simple parameter. We recommend naming the subdirectory after the plugin name. E.g. a ``trk_eff`` plugin contains a ``TrackingEfficiencyProcessor`` which writes all of its results to the ``trk_eff`` subdirectory of the TFile. + +``jana-generate.py RootEventProcessor ProcessorNameInCamelCase`` +``directory_name_in_snake_case`` + +Note that this script, like the others, does not update your ``CMakeLists.txt``. Not only will you need to add the file to ``PluginName_PLUGIN_SOURCES``, but you may need to add ROOT as a dependency if your project hasn’t yet: + +.. code-block:: console + + find_package(ROOT) + include_directories(${ROOT_INCLUDE_DIRS}) + link_directories(${ROOT_LIBRARY_DIR}) + target_link_libraries(${PLUGIN_NAME} ${ROOT_LIBRARIES}) + + +JFactories +~~~~~~~~~~~ +Because JFactories are templates parameterized by the type of JObjects they produce, we need two arguments to generate them. The naming convention is left up to the user, but the following is recommended. If the JObject name is ‘RecoTrack’, and the factory uses Genfit under the hood, the factory name should be ‘RecoTrackFactory_Genfit’. + +``jana-generate.py JFactory JFactoryNameInCamelCase JObjectNameInCamelCase`` + +Run the Status Control Debugger GUI +------------------------------------- +The JANA Status/Control/Debugger GUI can be a useful tool for probing a running process. Details can be found on the dedicated page for the GUI + +Using factory metadata +---------------------- +The ``JFactoryT`` interface abstracts the creation logic for a vector of n objects of type ``T``. However, often we also care about single pieces of data associated with the same computation. For instance, a track fitting factory might want to return statistics about how many fits succeeded and failed. + +A naive solution is to put member variables on the factory and then access them from a ``JEventProcessor`` by obtaining the ``JFactoryT`` via ``GetFactory<>`` and performing a dynamic cast to the underlying factory type. Although this works, it means that that factory can no longer be swapped with an alternate version without modifying the calling code. This degrades the whole project’s ability to take advantage of the plugin architecture and hurts its overall code quality. + +Instead, we recommend using the ``JMetadata`` template trait. Each ``JFactoryT`` not only produces a vector of ``T``, but also a singular ``JMetadata`` struct whose contents can be completely arbitrary, but cannot be redefined for a particular T. All ``JFactoryT`` for some ``T`` will use it. + +An example project demonstrating usage of JMetadata can be found under ``examples/MetadataExample``. diff --git a/docs/index.rst b/docs/index.rst new file mode 100644 index 000000000..6185b9afa --- /dev/null +++ b/docs/index.rst @@ -0,0 +1,28 @@ +Welcome to JANA! +================= +JANA is a C++ framework for multi-threaded HENP (High Energy and Nuclear Physics) event reconstruction. It is very efficient at multi-threading with a design that makes it easy for less experienced programmers to contribute pieces to the larger reconstruction project. The same JANA program can be used to easily do partial or full reconstruction, fully maximizing the available cores for the current job. + +Its design strives to be extremely easy to setup when first getting started, yet have a depth of customization options that allow for more complicated reconstruction as your project grows. The intent is to make it easy to run on a laptop for local code development, but to also be highly efficent when deploying to large computing sites like `NERSC `_. + +JANA has undergone a large rewrite with the newer version dubbed JANA2. The code is now available for use and you are free to browse around. The project is hosted on `GitHub `_ + +.. code-block:: console + + auto tracks = jevent->Get(); + + for(auto t : tracks){ + // ... do something with a track + } + +Contents +-------- + +.. toctree:: + + tutorial + how-to guides + principles + reference + how-to instructions + + diff --git a/docs/make.bat b/docs/make.bat new file mode 100644 index 000000000..6247f7e23 --- /dev/null +++ b/docs/make.bat @@ -0,0 +1,35 @@ +@ECHO OFF + +pushd %~dp0 + +REM Command file for Sphinx documentation + +if "%SPHINXBUILD%" == "" ( + set SPHINXBUILD=sphinx-build +) +set SOURCEDIR=source +set BUILDDIR=build + +if "%1" == "" goto help + +%SPHINXBUILD% >NUL 2>NUL +if errorlevel 9009 ( + echo. + echo.The 'sphinx-build' command was not found. Make sure you have Sphinx + echo.installed, then set the SPHINXBUILD environment variable to point + echo.to the full path of the 'sphinx-build' executable. Alternatively you + echo.may add the Sphinx directory to PATH. + echo. + echo.If you don't have Sphinx installed, grab it from + echo.http://sphinx-doc.org/ + exit /b 1 +) + +%SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O% +goto end + +:help +%SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O% + +:end +popd diff --git a/docs/principles.rst b/docs/principles.rst new file mode 100644 index 000000000..11ab7ebb0 --- /dev/null +++ b/docs/principles.rst @@ -0,0 +1,83 @@ +Principles +========= + +This section provides higher-level background and context for JANA, and discusses JANA’s design philosophy and the associated tradeoffs. + +.. autosummary:: + :toctree: generated + +.. JANA concepts: + +JANA concepts +-------------- +* JObjects are data containers for specific resuts, e.g. clusters or tracks. They may be plain-old structs or they may optionally inherit from (e.g.) ROOT or NumPy datatypes. + +* JEventSources take a file or messaging producer which provides raw event data, and exposes it to JANA as a stream. + +* JFactories calculate a specific result on an event-by-event basis. Their inputs may come from an EventSource or may be computed via other JFactories. All results are evaluated lazily and cached until the entire event is finished processing. in order to do so. Importantly, JFactories are decoupled from one another via the JEvent interface. It should make no difference to the JFactory where its input data came from, as long as it has the correct type and tag. While the `Factory Pattern `_ usually abstracts away the subtype of the class being created, in our case it abstracts away the number of instances created instead. For instance, a ClusterFactory may take m Hit objects and produce n Cluster objects, where m and n vary per event and won’t be known until that event gets processed. + +* JEventProcessors run desired JFactories over the event stream and write the results to an output file or messaging consumer. JFactories form a lazy directed acyclic graph, whereas JEventProcessors trigger their actual evaluation. + +Object lifecycles +------------------ +JObjects are data containers for specific resuts, e.g. clusters or tracks. They may be plain-old structs or they may optionally inherit from (e.g.) ROOT or NumPy datatypes. + +By default, a JFactory owns all of the JObjects that it created during ``Process()``. Once all event processors have finished processing a ``JEvent``, all ``JFactories`` associated with that ``JEvent`` will clears and delete their ``JObjects``. However, you can change this behavior by setting one of the factory flags: + +* ``PERSISTENT`` Objects are neither cleared nor deleted. This is usually used for calibrations and translation tables. Note that if an object is persistent, ``JFactory::Process`` will not be re-run on the next ``JEvent``. The user may still update the objects manually, via ``JFactory::BeginRun``, and must delete the objects manually via ``JFactory::EndRun`` or ``JFactory::Finish``. + +* ``NOT_OBJECT_OWNER``: Objects are cleared from the ``JFactory`` but not deleted. This is useful for “proxy” factories (which reorganize objects that are owned by a different factory) and for ``JEventGroups``. ``JFactory::Process`` *will* be re-run for each ``JEvent``. As long as the objects are owned by a different ``JFactory``, the user doesn’t have to do any cleanup. + +The lifetime of a ``JFactory`` spans the time that a ``JEvent`` is in-flight. No other guarantees are made: ``JFactories`` might be re-used for multiple ``JEvents`` for the sake of efficiency, but the implementation is free to not do so. In particular, the user must never assume that one ``JFactory`` will see the entire ``JEvent`` stream. + +The lifetime of a ``JEventSource`` spans the time that all of its emitted ``JEvents`` are in-flight. + +The lifetime of a ``JEventProcessor`` spans the time that any ``JEventSources`` are active. + +The lifetime of a ``JService`` not only spans the time that any ``JEventProcessors`` are active, but also the lifetime of ``JApplication`` itself. Furthermore, because JServices use ``shared_ptr``, they are allowed to live even longer than ``JApplication``, which is helpful for things like writing test cases. + +Design philosophy +----------------- +JANA’s design philosophy can be boiled down to five values, ordered by importance: + +Simple to use +______________ +JANA chooses its battles carefully. First and foremost, JANA is about parallelizing computations over data organized into events. From a 30000-foot view, it should look more like OpenMP or Thread Building Blocks or RaftLib than like ROOT. Unlike the aforementioned, JANA’s vocabulary of abstractions is designed around the needs of physicists rather than general programmers. However, JANA does not attempt to meet *all* of the needs of physicists. + +JANA recognizes when coinciding concerns ought to be handled orthogonally. A good example is persistence. JANA does not seek to provide its own persistence layer, nor does it require the user to commit to a specific dependency such as ROOT or Numpy or Apache Arrow. Instead, JANA tries to make it feasible for the user to choose their persistence layer independently. This way, if a collaboration decides they wish to (for instance) migrate from ROOT to Arrow, they have a well-defined migration path which keeps the core analysis code largely intact. + +In particular, this means minimizing the complexity of the build system and minimizing orchestration. Building code against JANA should require nothing more than implementing certain key interfaces, adding a single path to includes, and linking against a single library. + +Well-organized +______________ +While JANA’s primary goal is running code in parallel, its secondary goal is imposing an organizing principle on the users’ codebase. This can be invaluable in a large collaboration where members vary in programming skill. Specifically, JANA organizes processing logic into decoupled units. JFactories are agnostic of how and when their prerequisites are computed, are only run when actually needed, and cache their results for reuse. Different analyses can coexist in separate JEventProcessors. Components can be compiled into independent plugins, to be mixed and matched at runtime. All together, JANA enforces an organizing principle that enables groups to develop and test their code with both freedom and discipline. + +Safe +____ +JANA recognizes that not all of its users are proficient parallel programmers, and it steers users towards patterns which mitigate some of the pitfalls. Specifically, it provides: + +* **Modern C++ features** such as smart pointers and judicious templating, to discourage common classes of bugs. JANA seeks to make its memory ownership semantics explicit in the type system as much as possible. + +* **Internally managed locks** to reduce the learning curve and discourage tricky parallelism bugs. + +* **A stable API** with an effort towards backwards-compatibility, so that everybody can benefit from new features and performance/stability improvements. + +Fast +_____ +JANA uses low-level optimizations wherever it can in order to boost performance. + +Flexible +_________ +* Disentangling: Input data is bundled into blocks (each containing an array of entangled events) and we want to parse each block in order to emit a stream of events (*flatmap*) + +* Software triggers: With streaming data readout, we may want to accept a stream of raw hit data and let JANA determine the event boundaries. Arbitrary triggers can be created using existing JFactories. (*windowed join*) + +* Subevent-level parallelism: This is necessary if individual events are very large. It may also play a role in effectively utilizing a GPU, particularly as machine learning is adopted in reconstruction (*flatmap+merge*) + +JANA is also flexible enough to be compiled and run different ways. Users may compile their code into a standalone executable, into one or more plugins which can be run by a generic executable, or run from a Jupyter notebook. + +Comparison to other frameworks +-------------------------------- +Many different event reconstruction frameworks exist. The following are frequently compared and contrasted with JANA: + +* `Clara `_ While JANA specializes in thread-level parallelism, Clara uses node-level parallelism via a message-passing interface. This higher level of abstraction comes with some performance overhead and significant orchestration requirements. On the other hand, it can scale to larger problem sizes and support more general stream topologies. JANA is to OpenMP as Clara is to MPI. diff --git a/docs/reference.rst b/docs/reference.rst new file mode 100644 index 000000000..bba479a64 --- /dev/null +++ b/docs/reference.rst @@ -0,0 +1,42 @@ +Reference +======== + +This section describes the underlying machinery of JANA. This is essentially a launchpad into Doxygen. + +Main API +----------- + +* `JApplication `_: The main entry point into the program +* `JObject `_: Data containers for specific results +* `JEventSource `_ : From a file or messaging producer, expose a stream of events. Each event is an aggregate of JObjects +* `JFactory `_ : Given an event, calculate a specific result, potentially calling other JFactories recursively to obtain any prereqs +* `JEventProcessor `_: Run desired JFactories over the event stream, writing results to an output file or messaging consumer + +Streaming Extensions +---------------------- + +* `JStreamingEventSource `_ : A starting point for structured, composable streaming +* `JTransport `_ : An interface for a generic messaging transport +* `JMessage `_: An interface for a stream buffer + +User-facing utilities +----------------------- + +* `JParameter `_ : Configure the behavior of components at runtime +* `JLogger `_ : Publish debugging information to standard out in a structured, convenient, threadsafe way +* `JService `_ : Share external (stateful) services such as calibration constants and magnetic field maps +* `JCsvWriter `_ : Conveniently debug a JFactory by writing its generated JObjects to CSV + +Internal services +------------------------ + +* `JLoggingService `_ : Furnish the user with a logger already configured for that particular component +* `JParameterManager `_ : Furnish the user with parameters extracted from command line flags and configuration files + +Parallelism engine +---------------------- + +* `JProcessingController `_ : The interface which any parallelism engine must adhere to +* `JArrowProcessingController `_ : The entry point into the “Arrow” engine +* `JWorker `_ : Contains the loop for each worker thread, along with startup/shutdown logic and encapsulated worker state. +* `JScheduler `_ : Contains the logic for giving a worker a new assignment diff --git a/docs/tutorial.rst b/docs/tutorial.rst new file mode 100644 index 000000000..8043df6eb --- /dev/null +++ b/docs/tutorial.rst @@ -0,0 +1,515 @@ +Tutorial +======= + +Introduction +------------ + +Before we begin, we need to make sure that + +* The JANA library is installed +* The ``JANA_HOME`` environment variable points to the installation directory +* Your ``$PATH`` contains ``$JANA_HOME/bin``. + +The installation process is described here. We can quickly test that our install was successful by running a builtin benchmarking/scaling test: + +.. code-block:: console + + jana -Pplugins=JTest -b # (cancel with Ctrl-C) + +We can understand this command as follows: + +* ``jana`` is the default command-line tool for launching JANA. If you would rather create your own executable which uses JANA internally, you are free to do so. + + +* The ``-P`` flag specifies a configuration parameter, e.g. ``-Pjana:debug_plugin_loading=1`` tells JANA to log detailed information about where the plugin loader went looking and what it found. + + +* ``plugins`` is the parameter specifying the names of plugins to load, as a comma-separated list (without spaces). By default JANA searches for these in ``$JANA_HOME/plugins``, although you can also specify full paths. + + +* ``-b tells`` JANA to run everything in benchmark mode, i.e. it slowly increases the number of threads while measuring the overall throughput. You can cancel processing at any time by pressing Ctrl-C. + + +Creating a JANA plugin +----------------------- + +With JANA working, we can now create our own plugin. JANA provides a script which generates code skeletons to help us get started. We shall generate a skeleton for a plugin named “QuickTutorial” as follows: + +.. code-block:: console + + jana-generate.py Plugin QuickTutorial + +This creates the following directory tree. By default, a minimal skelton is created in a single file: ``QuickTutorial.cc``. This provides a JEventProcessor class as well as the the plugin entry point. The generated files include lots of comments providing helpful hints on their use. +This creates the following directory tree. By default, a minimal skelton is created in a single file: + +.. code-block:: console + + QuickTutorial/ + ├── CMakeLists.txt + │├─ QuickTutorial.cc + +The ``jana-generate.py`` Plugin ... command provides some option flags as well that can be given at the end of the command line. Run ``jana-generate.py`` --help to see what they are. + +Integrating into an existing project +-------------------------------------- + +If you are working with an existing project such as eJANA or GlueX, then you don’t need the CMake project. All you need are the source files (e.g. QuickTutorial.cc): + +.. code-block:: console + + cp QuickTutorial $PATH_TO_PROJECT_SOURCE/src/plugins/QuickTutorial + +Be aware that you will have to manually tell the parent CMakeLists.txt to ``add_subdirectory(QuickTutorial)``. + +The rest of the tutorial assumes that we are using a standalone plugin. + +Building the plugin +-------------------- + +We build and run the plugin with the following: + +.. code-block:: console + + cd QuickTutorial + mkdir build + cd build + cmake3 .. + make install + jana -Pplugins=QuickTutorial + + +Adding an event source +------------------------ + +When we run this, we observe that JANA loads the plugin, opens our QuickTutorialProcessor, closes it again without processing any events, and exits. This is because there is nothing to do because we haven’t specified any sources. If we are running in the context of an existing project, we can pull in event sources from other plugins and observe our processor dutifully print out the event number. For now, however, we assume that we don’t have access to an event source, so we’ll create one ourselves. Our first event source will emit an infinite stream of random data, so we’ll name it RandomSource. + +.. code-block:: console + + cd .. + jana-generate.py JEventSource RandomSource + +This creates two files, RandomSource.cc and RandomSource.h, in the current directory. We’ll need to add them to ``CMakeLists.txt`` ourselves. Note that we retain complete control over our directory structure. In this tutorial, for simplicity, we’ll keep all .h and .cc files in the topmost directory. For larger projects, ``jana-generate project MyProjectName`` creates a much more complex code skeleton. + +To use our new RandomSource as-is, we need to do three things: + +* Add ``RandomSource.cc`` and ``RandomSource.h`` to the ``add_library(...)`` line in ``CMakeLists.txt``. +* Register our ``RandomSource`` with JANA inside ``QuickTutorial.cc`` +* Rebuild the cmake project, rebuild the plugin target, and install. +* The modified line in the CMakeLists.txt line should look like: + +.. code-block:: console + + add_library(QuickTutorial_plugin SHARED QuickTutorial.cc RandomSource.cc RandomSource.h) + +The modified ``QuickTuorial.cc`` file needs to have the new ``RandomSource.h`` header included so it can instantiatie an object and pass it over to the JApplication in the ``InitPlugin()`` routine. The bottom of the file should look like this: + +.. code-block:: console + + #include // <- ADD THIS LINE (probably better to put this at top of file) + + extern "C" { + void InitPlugin(JApplication *app) { + InitJANAPlugin(app); + app->Add(new QuickTutorialProcessor); + app->Add(new RandomSource("random", app)); // <- ADD THIS LINE + } + } + +And finally, rebuild … + +.. code-block:: console + + cdbuild + make install + +When we run the QuickTutorial plugin now, we observe that ``QuickTutorialProcessor::Process`` is being called on every event. Note that ``Process`` is ‘seeing’ events slightly out-of-order. This is because there are multiple threads running ``Process``, which means that we have to be careful about how we organize the work we do inside there. This will be discussed in depth later. + +Configuring an event source +---------------------------- + +Because neither the source nor the processor are doing any ‘real work’, the events are being processed very quickly. To throttle the rate events get emitted, to whatever frequency we like, we can add a delay inside ``GetEvent``. Perhaps we’d even like to set the emit frequency at runtime. First, we declare a member variable on ``RandomSource``, initializing it to our preferred default value: + +.. code-block:: console + + class RandomSource : public JEventSource { + int m_max_emit_freq_hz = 100; // <- ADD THIS LINE + + public: + RandomSource(std::string resource_name, JApplication* app); + virtual ~RandomSource() = default; + void Open() override; + void GetEvent(std::shared_ptr) override; + static std::string GetDescription(); + }; + +Next we sync the variable with the parameter manager inside Open. We do this by calling ``JApplication::SetDefaultParameter``, which tells JANA to look among its configuration parameters for one called “random_source:max_emit_freq_hz”. If it finds one, it sets ``m_max_emit_freq_hz`` to the value it found. Otherwise, it leaves the variable alone. JANA remembers all such ‘default parameters’ along with their default values so that it can report them and generate config files. Note that we conventionally prefix our parameter names with the name of the requesting component or plugin. This helps prevent namespace collisions. + +.. code-block:: console + + void RandomSource::Open() { + JApplication* app = GetApplication(); // <- ADD THIS LINE + app->SetDefaultParameter("random_source:max_emit_freq_hz", // <- ADD THIS LINE + m_max_emit_freq_hz, // <- ADD THIS LINE + "Maximum event rate [Hz] for RandomSource"); // <- ADD THIS LINE + } + +We can now use the value of ``m_max_emit_freq_hz``, confident that it is consistent with the current runtime configuration: + +.. code-block:: console + + void RandomSource::GetEvent(std::shared_ptr event) { + + /// Configure event and run numbers + static size_t current_event_number = 1; + event->SetEventNumber(current_event_number++); + event->SetRunNumber(22); + + /// Slow down event source // <- ADD THIS LINE + auto delay_ms = std::chrono::milliseconds(1000/m_max_emit_freq_hz); // <- ADD THIS LINE + std::this_thread::sleep_for(delay_ms); // <- ADD THIS LINE + } + +Finally, we can set this parameter on the command line and observe the throughput change accordingly: + +.. code-block:: console + + jana -Pplugins=QuickTutorial -Prandom_source:max_emit_freq_hz=10 + + +Creating JObjects +------------------ + +So far ``RandomSource`` has been emitting events with no data attached. Now we’d like to have them emit randomly generated ‘Hit’ objects which simulate the readout from a detector. First, we need to set up our data model. Although we can insert pointers of any kind into our ``JEvent``, we strongly recommend using ``JObjects`` for reasons we will discuss later. + +.. code-block:: console + + cd src + jana-generate.py JObject Hit + + +JObjects are meant to be plain-old data. For this tutorial we pretend that our detector consists of a 3D grid of sensors, each of which measures some energy at some time. Note that we are declaring ``Hit`` to be a ``struct`` instead of a ``class``. This is because ``JObjects`` should be lightweight containers with no creation logic and no invariants which need to be encapsulated. JObjects are free to contain pointers to arbitrary data types and nested STL containers, but the recommended approach is to maintain a flat structure of primitives whenever possible. A JObject should conceptually resemble a row in a database table. + +.. code-block:: console + + struct Hit : public JObject { + int x; // Pixel coordinates + int y; // Pixel coordinates + double E; // Energy loss in GeV + double t; // Time in us + + // Make it possible to construct a Hit as a one-liner + Hit(int x, int y, double E, double t) : x(x), y(y), E(E), t(t) {}; + ... + +The only additional thing we need to fill out is the ``Summarize`` method, which aids in debugging and introspection. Basically, it tells JANA how to convert this JObject into a (structured) string. Inside ``Summarize``, we add each of our primitive member variables to the provided ``JObjectSummary``, along with the variable name, a C-style format specifier, and a description of what that variable means. JANA provides a ``NAME_OF`` macro so that if we rename a member variable using automatic refactoring tools, it will automatically update the string representation of the variable name as well. + + .. code-block:: console + + ... + void Summarize(JObjectSummary& summary) const override { + summary.add(x, NAME_OF(x), "%d", "Pixel coordinates centered around 0,0"); + summary.add(y, NAME_OF(y), "%d", "Pixel coordinates centered around 0,0"); + summary.add(E, NAME_OF(E), "%f", "Energy loss in GeV"); + summary.add(t, NAME_OF(t), "%f", "Time in us"); + } + } + + +Inserting JObjects into a JEvent +--------------------------------- + +Now it is time to have our ``RandomSource`` emit events which contain ``Hit`` objects. For the sake of brevity, we shall keep our hit generation logic as simple as possible: four hits which are constant. We can make our detector simulation arbitrarily complex, but be aware that ``JEventSources`` only run on a single thread by default, so complex simulations can reduce the event rate. Synchronizing ``GetEvent`` makes our job easier, however, because we can manipulate non-thread-local state such as file pointers or cursors or message buffers without having to worry about race conditions and deadlocks. + +The pattern we use for inserting data into the event is simple: For data of type ``T``, create a ``std::vector``, fill it, and pass it to ``JEvent::Insert``, which will move its contents directly into the ``JEvent`` object. If we want, when we insert we can also specify a tag, which is just a string. The purpose of a tag is to provide an extra level of granularity. For instance, if we have two detectors which both use the ``Hit`` datatype but have separate processing logic, we want to be able to access them independently. + +.. code-block:: console + + #include "Hit.h" + // ... + + void RandomSource::GetEvent(std::shared_ptr event) { + // ... + + /// Insert simulated data into event // ADD ME + + std::vector hits; // ADD ME + hits.push_back(new Hit(0, 0, 1.0, 0)); // ADD ME + hits.push_back(new Hit(0, 1, 1.0, 0)); // ADD ME + hits.push_back(new Hit(1, 0, 1.0, 0)); // ADD ME + hits.push_back(new Hit(1, 1, 1.0, 0)); // ADD ME + event->Insert(hits); // ADD ME + //event->Insert(hits, "fcal"); // If we used a tag + } + +We now have ``Hits`` in our event stream. The next section will cover how the ``QuickTutorialProcessor`` should access them. However, we don’t need to create a custom JEventProcessor to examine our event stream. JANA provides a small utility called ``JCsvWriter`` which creates a CSV file containing all ``JObjects` of a certain type and tag. It can figure out how to do this thanks to ``JObject::Summarize``. You can examine the full code for ``JCsvWriter`` if you look under ``$JANA_HOME/include/JANA/JCsvWriter.h``. Be aware that ``JCsvWriter`` is very inefficient and should be used for debugging, not for production. + +To use ``JCsvWriter``, we merely register it with our ``JApplication``. If we run JANA now, a file ‘Hit.csv’ should appear in the current working directory. Note that the CSV file will be closed correctly even when we terminate JANA using Ctrl-C. + +.. code-block:: console + + #include // ADD ME + #include "Hit.h" // ADD ME + // ... + + extern "C" { + void InitPlugin(JApplication* app) { + + InitJANAPlugin(app); + + app->Add(new QuickTutorialProcessor); + app->Add(new RandomSource("random", app)); + app->Add(new JCsvWriter); // ADD ME + //app->Add(new JCsvWriter("fcal")); // If we used a tag + } + + +Writing our own JEventProcessor +-------------------------------- + +A JEventProcessor does two things: It calculates a bunch of intermediate results for each event (this part is done in parallel), and then it aggregates those results into a single output (this part is done sequentially). The canonical example is to calculate clusters, track candidates, and tracks separately for each event, and then produce a histogram using all of the tracks of all of the events. + +In this section, we are going to modify the automatically generated TutorialProcessor to produce a heatmap that only uses hit data. We discuss how to structure more complicated calculations later. First, we add a quick-and-dirty heatmap member variable: + +.. code-block:: console + + class QuickTutorialProcessor : public JEventProcessor { + double m_heatmap[100][100]; // ADD ME + std::mutex m_mutex; + + public: + // ... + +The heatmap itself is a piece of shared state. We have to be careful because if multiple threads try to read and write to this shared state, they will conflict with each other and corrupt it. This means we have to protect who can access it and when. Only QuickTutorialProcessor should be able to access it, so we make it a private member. However, this is not enough. Only one thread running ``QuickTutorialProcessor::Process`` must be allowed to access it at a time, which we enforce using ``m_mutex``. Let’s look at how this is used: + +.. code-block:: console + + #include "Hit.h" // ADD ME + + void QuickTutorialProcessor::Process(const std::shared_ptr &event) { + + /// Do everything we can in parallel + /// Warning: We are only allowed to use local variables and `event` here + auto hits = event->Get(); // ADD ME + + /// Lock mutex + std::lock_guardlock(m_mutex); + + /// Do the rest sequentially + /// Now we are free to access shared state such as m_heatmap + for (const Hit* hit : hits) { // ADD ME + m_heatmap[hit->x][hit->y] += hit->E; // ADD ME + } + } + +As you can see, we do everything we can in parallel, before we lock our mutex. All we are doing for now is retrieve the ``Hit`` objects we ``Inserted`` earlier, however, as we will later see, virtually all of our per-event computations will be called from here. Remember that we should only access local variables and data retrieved from a ``JEvent`` at first, whereas after we lock the mutex, we are free to access our private member variables as well. + +We proceed to define our ``Init`` and ``Finish methods``. The former zeroes out each bucket and the latter prints the heatmap to standard out as ASCII art. Note that if we want to output our results to a file all at once, we should do so in ``Finish``. ``Finish`` will be called even if we forcibly terminate JANA with Ctrl-C. On the other hand, if we wanted to write to a file incrementally like we do with JCsvWriter, we can open it in ``Init``, access it ``Process`` inside the lock, and close it in ``Finish``. + +.. code-block:: console + + void QuickTutorialProcessor::Init() { + LOG << "QuickTutorialProcessor::Init: Initializing heatmap" << LOG_END; + + for (int i=0; i<100; ++i) { + for (int j=0; j<100; ++j) { + m_heatmap[i][j] = 0.0; + } + } + } + + void QuickTutorialProcessor::Finish() { + LOG << "QuickTutorialProcessor::Finish: Displaying heatmap" << LOG_END; + + double min_value = m_heatmap[0][0]; + double max_value = m_heatmap[0][0]; + + for (int i=0; i<100; ++i) { + for (int j=0; j<100; ++j) { + double value = m_heatmap[i][j]; + if (min_value > value) min_value = value; + if (max_value < value) max_value = value; + } + } + if (min_value != max_value) { + char ramp[] = " .:-=+*#%@"; + for (int i=0; i<100; ++i) { + for (int j=0; j<100; ++j) { + int shade = int((m_heatmap[i][j] - min_value)/(max_value - min_value) * 9); + std::cout << ramp[shade]; + } + std::cout << std::endl; + } + } + } + + +Organizing computations using JFactories +----------------------------------------- + +Just as JANA uses JObjects to organize experiment data, it uses JFactories to organize the algorithms for processing said data. + +JFactories are slightly different from the ‘Factory’ design patterns: rather than abstracting away the subclass of the object being constructed, JFactories abstract away the multiplicity instead. This is a good match for nuclear and high-energy physics, where m inputs produce n outputs and n isn’t always known until after the algorithm has finished. JFactories confer other benefits as well: + +* Algorithms can be swapped at runtime +* Results are calculated only if they are needed (‘lazy’) +* Results are only calculated once and then reused as needed (‘memoized’) +* JFactories are agnostic as to whether their inputs were calculated by another JFactory or inserted by a JEventSource +* Different paths for deriving a result may come into play depending on the source data +* For this example, we create a simple algorithm computing clusters, given hit data. We start by generating a cluster JObject: + +``jana-generate.py JObject Cluster`` + +We fill out the ``Cluster.h`` skeleton, defining a cluster to be the coordinates of its center along with the total energy and time interval. Note that using JObjects helps keep our domain model malleable, so we can evolve it over time as we learn more. + +.. code-block:: console + + struct Cluster : public JObject { + double x_center; // Pixel coordinates centered around 0,0 + double y_center; // Pixel coordinates centered around 0,0 + double E_tot; // Energy loss in GeV + double t_begin; // Time in us + double t_end; // Time in us + + Cluster(double x_center, double y_center, double E_tot, double t_begin, double t_end) + : x_center(x_center), y_center(y_center), E_tot(E_tot), t_begin(t_begin), t_end(t_end) {}; + + void Summarize(JObjectSummary& summary) const override { + summary.add(x_center, NAME_OF(x_center), "%f", "Pixel coords <- [0,80)"); + summary.add(y_center, NAME_OF(y_center), "%f", "Pixel coords <- [0,24)"); + summary.add(E_tot, NAME_OF(E_tot), "%f", "Energy loss in GeV"); + summary.add(t_begin, NAME_OF(t_begin), "%f", "Earliest observed time in us"); + summary.add(t_end, NAME_OF(t_end), "%f", "Latest observed time in us"); + } + ... + } + +Now we generate a JFactory which will compute n Clusters given m Hits. Note that we need to provide both the classname of our factory and the classname of the JObject it produces. + +``jana-generate.py JFactory SimpleClusterFactory Cluster`` + +The heart of a JFactory is the function ``Process``, where we take an event, extract whatever inputs we need by calling ``JEvent::Get`` or one of its variants, produce some number of outputs, and publish them by calling ``JFactory::Set``. These outputs will stay cached as long as the current event is in flight and get cleared afterwards. To keep things really simple, our example shall assume there is only one cluster and all of the hits associated with this event belong to it. + +.. code-block:: console + + #include "Hit.h" + // ... + + void SimpleClusterFactory::Process(const std::shared_ptr &event) { + + auto hits = event->Get(); + + auto cluster = new Cluster(0,0,0,0,0); + for (auto hit : hits) { + cluster->x_center += hit->x; + cluster->y_center += hit->y; + cluster->E_tot += hit->E; + if (cluster->t_begin > hit->t) cluster->t_begin = hit->t; + if (cluster->t_end < hit->t) cluster->t_end = hit->t; + } + cluster->x_center /= hits.size(); + cluster->y_center /= hits.size(); + + std::vector results; + results.push_back(cluster); + Set(results); + } + +For our tutorial, we don’t need to do anything inside ``Init`` or ``ChangeRun``. Usually, these are useful for collecting statistics, or when the algorithm depends on calibration constants which we want to cache. We are free to access member variables without locking a mutex because a JFactory is assigned to at most one thread at a time. + +Although JFactories are relatively simple, there are several important details. First, because each instance is assigned at most one thread, it won’t see the entire event stream. Second, there will be at least as many instances of each JFactory in existence as threads, and possibly more depending on how JANA is configured, so ``Initialize`` and ``ChangeRun`` should be fast. Thirdly, although it is tempting to use static variables to share state between different instances of the same JFactory, this practice is discouraged. That state should live in a JService instead. + +Next, we register our ``SimpleClusterFactory`` with our JApplication. Because JANA will need arbitrarily many instances of these, we pass in a ``JFactoryGenerator`` which knows how to create a ``SimpleClusterFactory``. As long as our JFactory has a zero-argument constructor, this is easy: + +.. code-block:: console + + #include // ADD ME + #include "SimpleClusterFactory.h" // ADD ME + // ... + + extern "C" { + void InitPlugin(JApplication* app) { + + InitJANAPlugin(app); + + app->Add(new QuickTutorialProcessor); + app->Add(new RandomSource("random", app)); + app->Add(new JCsvWriter()); + app->Add(new JFactoryGeneratorT); // ADD ME + } + } + +We are now free to modify ``QuickTutorialProcessor`` (or create a new ``JEventProcessor``) which histograms clusters instead of hits. Crucially, ``JEvent::Get`` doesn’t care whether the ``JObjects`` were Inserted by an event source or whether they were ``Set`` by a ``JFactory``. The interface for retrieving them is the same either way. + +Reading files using a JEventSource +----------------------------------- + +Earlier we created a ``JEventSource`` which we added directly to the ``JApplication``. This works well for simple cases but becomes cumbersome due to the amount of configuration needed: First we’d have to tell the plugin which ``JEventSource`` to register, then tell that source which files to open, and we’d have to do this for each ``JEventSource`` separately. Instead, JANA gives us a cleaner option tailored to our workflow: we specify a set of input URIs (a.k.a. file paths or sockets) and let JANA decide which JEventSource to instantiate for each. Thus we prefer to call JANA like this: + +.. code-block:: console + + jana -PQuickTutorial,CsvSourcePlugin,RootSourcePlugin path/to/file1.csv path/to/file2.root + +In order to make this happen, we need to define a ``JEventSourceGenerator``. This is conceptually similar to the ``JFactoryGenerator`` we mentioned earlier, with one important addition: a method which reports back the likelihood that the underlying event source can make sense of that resource. Let’s remove the line where we added the ``RandomSource`` instance directly to the JApplication, and replace it with a corresponding ``JEventSourceGenerator``: + +.. code-block:: console + + #include + #include + #include // ADD ME + #include + + #include "Hit.h" + #include "RandomSource.h" + #include "QuickTutorialProcessor.h" + #include "SimpleClusterFactory.h" + + extern "C" { + void InitPlugin(JApplication* app) { + + InitJANAPlugin(app); + + app->Add(new QuickTutorialProcessor); + // app->Add(new RandomSource("random", app)); // REMOVE ME + app->Add(new JEventSourceGeneratorT); // ADD ME + app->Add(new JCsvWriter()); + app->Add(new JFactoryGeneratorT); + } + } + +By default, ``JEventSourceGeneratorT`` will report a confidence of 0.1 that it can open any resource it is given. Let’s make this more realistic: suppose we want to use this event source if and only if the resource name is “random”. In ``RandomSource.h``, observe that ``jana-generate.py`` already declared for us: + +.. code-block:: console + + template <> + double JEventSourceGeneratorT::CheckOpenable(std::string); + + +We fill out the definition in ``RandomSource.cc``: + +.. code-block:: console + + template <> + double JEventSourceGeneratorT::CheckOpenable(std::string resource_name) { + return (resource_name == "random") ? 1.0 : 0.0; + } + +Note that ``JEventSourceGenerator`` puts some constraints on our ``JEventSource``. Specifically, we need to note that: + +* Our ``JEventSource`` needs a two-argument constructor which accepts a string containing the resource name, and a ``JApplication pointer``. + +* Our ``JEventSource`` needs a static method ``GetDescription``, to help JANA report to the user which sources are available and which ended up being chosen. + +* In case we need to override JANA’s preferred JEventSource for some resource, we can specify the typename of the event source we’d rather use instead via the configuration parameter ``event_source_type``. + +* When we implement Open for an event source that reads a file, we get the filename from ``JEventSource::GetResourceName()``. + +Exercises for the reader +------------------------- + +* Create a new ``JEventProcessor`` which generates a heatmap of ``Clusters`` instead of ``Hits``. + +* Create a ``BetterClusterFactory`` which handles multiple clusters per event. Bonus points if it is a lightweight wrapper around an industrial-strength clustering algorithm. Inside ``InitPlugin``, use a configuration parameter to decide which ``JFactoryT`` gets registered with the ``JApplication``. + +* Use tags to register both ``ClusterFactories`` with the ``JApplication``. Create a ``JEventProcessor`` which asks for the results from both algorithms and compares their results. + +* Create a ``CsvFileSource`` which reads the CSV file generated from the ``JCsvWriter``. For CheckOpenable, read the first line of the file and check whether the column headers match what we’d expect for a table of ``Hits``. Verify that we get the same histograms whether we use the``RandomSource`` or the``CsvFileSource``. diff --git a/docs/usage.rst b/docs/usage.rst new file mode 100644 index 000000000..d007b84d5 --- /dev/null +++ b/docs/usage.rst @@ -0,0 +1,40 @@ +Usage +===== + +.. autosummary:: + :toctree: generated + +.. _installation: + +Installation +------------ + +To use Lumache, first install it using pip: + +.. code-block:: console + + (.venv) $ pip install lumache + +Creating recipes +---------------- + +To retrieve a list of random ingredients, +you can use the ``lumache.get_random_ingredients()`` function: + +.. autofunction:: lumache.get_random_ingredients + +The ``kind`` parameter should be either ``"meat"``, ``"fish"``, +or ``"veggies"``. Otherwise, :py:func:`lumache.get_random_ingredients` +will raise an exception. + +.. autoexception:: lumache.InvalidKindError + +For example: + +>>> import lumache +>>> lumache.get_random_ingredients() +['shells', 'gorgonzola', 'parsley'] + +`Test `_ + + diff --git a/lumache.py b/lumache.py new file mode 100644 index 000000000..3ea7ce95c --- /dev/null +++ b/lumache.py @@ -0,0 +1,23 @@ +""" +Lumache - Python library for cooks and food lovers. +""" + +__version__ = "0.1.0" + + +class InvalidKindError(Exception): + """Raised if the kind is invalid.""" + pass + + +def get_random_ingredients(kind=None): + """ + Return a list of random ingredients as strings. + + :param kind: Optional "kind" of ingredients. + :type kind: list[str] or None + :raise lumache.InvalidKindError: If the kind is invalid. + :return: The ingredients list. + :rtype: list[str] + """ + return ["shells", "gorgonzola", "parsley"] diff --git a/pyproject.toml b/pyproject.toml new file mode 100644 index 000000000..14a2dda5d --- /dev/null +++ b/pyproject.toml @@ -0,0 +1,8 @@ +[build-system] +requires = ["flit_core >=3.2,<4"] +build-backend = "flit_core.buildapi" + +[project] +name = "lumache" +authors = [{name = "Graziella", email = "graziella@lumache"}] +dynamic = ["version", "description"]