From d95276304b3609936d6e77f2f641b783d9e611ef Mon Sep 17 00:00:00 2001
From: Jover <joverlee521@gmail.com>
Date: Mon, 11 Jul 2022 12:44:24 -0700
Subject: [PATCH 01/11] Start the shared `argparse_` module
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This `argparse_` module is expected to house custom helpers for
interacting/extending the `argparse` standard library.

Move `add_default_command()` to a shared `argparse_` module so that
it can be used by commands without having circular import issues.

I named the module `argparse_` (with an underscore) to avoid overwriting
the `argparse` standard library within `augur/__init__.py`. @tsibley
found the relevant docs¹ to explain this behavior.

¹ https://docs.python.org/3/reference/import.html#submodules
---
 augur/__init__.py  | 13 +------------
 augur/argparse_.py | 16 ++++++++++++++++
 augur/titers.py    |  3 +--
 3 files changed, 18 insertions(+), 14 deletions(-)
 create mode 100644 augur/argparse_.py

diff --git a/augur/__init__.py b/augur/__init__.py
index 31ee68261..f7fa538d9 100644
--- a/augur/__init__.py
+++ b/augur/__init__.py
@@ -14,6 +14,7 @@
 from .errors import AugurError
 from .io import print_err
 from .utils import first_line
+from .argparse_ import add_default_command
 
 recursion_limit = os.environ.get("AUGUR_RECURSION_LIMIT")
 if recursion_limit:
@@ -99,18 +100,6 @@ def run(argv):
         sys.exit(2)
 
 
-def add_default_command(parser):
-    """
-    Sets the default command to run when none is provided.
-    """
-    class default_command():
-        def run(args):
-            parser.print_help()
-            return 2
-
-    parser.set_defaults(__command__ = default_command)
-
-
 def add_version_alias(parser):
     """
     Add --version as a (hidden) alias for the version command.
diff --git a/augur/argparse_.py b/augur/argparse_.py
new file mode 100644
index 000000000..b742067ef
--- /dev/null
+++ b/augur/argparse_.py
@@ -0,0 +1,16 @@
+"""
+Custom helpers for the argparse standard library.
+"""
+
+
+def add_default_command(parser):
+    """
+    Sets the default command to run when none is provided.
+    """
+    class default_command():
+        def run(args):
+            parser.print_help()
+            return 2
+
+    parser.set_defaults(__command__ = default_command)
+
diff --git a/augur/titers.py b/augur/titers.py
index a2b093889..5ec762dee 100644
--- a/augur/titers.py
+++ b/augur/titers.py
@@ -10,11 +10,10 @@
 from .reconstruct_sequences import load_alignments
 from .titer_model import InsufficientDataException
 from .utils import read_node_data, write_json
+from .argparse_ import add_default_command
 
 
 def register_arguments(parser):
-    from . import add_default_command
-
     subparsers = parser.add_subparsers()
     add_default_command(parser)
 

From 7e4b216b4df60a1a1b9b7225419140ec3752b7e6 Mon Sep 17 00:00:00 2001
From: Jover <joverlee521@gmail.com>
Date: Mon, 11 Jul 2022 16:47:27 -0700
Subject: [PATCH 02/11] Allow commands to add their own subparser
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Instead of just having each command to add their own arguments, allow
them add their own subparser. This gives each command complete control
of its own parser and their subparsers to customize them as needed.
This allows each command to define its own command name instead
of parsing the command name from the module name, which is important for
the following commit to rename the `import` module.

This commit also introduces the behavior to print a command's help
when called if it does not have its own `run` function.

Note that for a majority of the commands, `register_arguments` was
updated to `register_parser`. However, for `align`, `filter` and `mask`,
I kept the `register_arguments` functions since they are used in
unit testing.

This change came from discussion of how to extract the boilerplate for
registering subparsers so that commands can use the same function to
register their own subcommands.¹

¹ https://github.com/nextstrain/augur/pull/1002#discussion_r917222854
---
 augur/__init__.py              | 37 +++-------------------------------
 augur/align.py                 | 14 ++++++++++++-
 augur/ancestral.py             |  6 ++++--
 augur/argparse_.py             | 34 +++++++++++++++++++++++++++++++
 augur/clades.py                |  6 ++++--
 augur/distance.py              |  6 ++++--
 augur/export.py                |  5 ++++-
 augur/filter.py                | 14 +++++++++++--
 augur/frequencies.py           |  6 ++++--
 augur/import.py                |  5 ++++-
 augur/index.py                 |  6 ++++--
 augur/lbi.py                   |  6 ++++--
 augur/mask.py                  | 14 +++++++++++--
 augur/measurements.py          |  6 ++++--
 augur/parse.py                 |  5 ++++-
 augur/reconstruct_sequences.py |  6 ++++--
 augur/refine.py                |  7 +++++--
 augur/sequence_traits.py       |  6 ++++--
 augur/titers.py                |  6 ++++--
 augur/traits.py                |  7 +++++--
 augur/translate.py             |  6 ++++--
 augur/tree.py                  |  6 ++++--
 augur/validate.py              |  6 +++++-
 augur/version.py               |  6 +++---
 24 files changed, 152 insertions(+), 74 deletions(-)

diff --git a/augur/__init__.py b/augur/__init__.py
index f7fa538d9..cafeed1eb 100644
--- a/augur/__init__.py
+++ b/augur/__init__.py
@@ -3,7 +3,6 @@
 """
 
 import argparse
-import re
 import os
 import sys
 import importlib
@@ -13,8 +12,7 @@
 
 from .errors import AugurError
 from .io import print_err
-from .utils import first_line
-from .argparse_ import add_default_command
+from .argparse_ import add_command_subparsers, add_default_command
 
 recursion_limit = os.environ.get("AUGUR_RECURSION_LIMIT")
 if recursion_limit:
@@ -52,26 +50,11 @@ def make_parser():
         prog        = "augur",
         description = "Augur: A bioinformatics toolkit for phylogenetic analysis.")
 
-    subparsers = parser.add_subparsers()
-
     add_default_command(parser)
     add_version_alias(parser)
 
-    for command in COMMANDS:
-        # Add a subparser for each command.
-        subparser = subparsers.add_parser(
-            command_name(command),
-            help        = first_line(command.__doc__),
-            description = command.__doc__)
-
-        subparser.set_defaults(__command__ = command)
-
-        # Let the command register arguments on its subparser.
-        command.register_arguments(subparser)
-
-        # Use the same formatting class for every command for consistency.
-        # Set here to avoid repeating it in every command's register_parser().
-        subparser.formatter_class = argparse.ArgumentDefaultsHelpFormatter
+    subparsers = parser.add_subparsers()
+    add_command_subparsers(subparsers, COMMANDS)
 
     return parser
 
@@ -118,17 +101,3 @@ def __call__(self, *args, **kwargs):
         nargs  = 0,
         help   = argparse.SUPPRESS,
         action = run_version_command)
-
-
-def command_name(command):
-    """
-    Returns a short name for a command module.
-    """
-
-    def remove_prefix(prefix, string):
-        return re.sub('^' + re.escape(prefix), '', string)
-
-    package     = command.__package__
-    module_name = command.__name__
-
-    return remove_prefix(package, module_name).lstrip(".").replace("_", "-")
diff --git a/augur/align.py b/augur/align.py
index 9a27c92f4..96f8734a5 100644
--- a/augur/align.py
+++ b/augur/align.py
@@ -7,7 +7,7 @@
 import numpy as np
 from Bio import AlignIO, SeqIO, Seq, Align
 from .io import run_shell_command, shquote
-from .utils import nthreads_value
+from .utils import first_line, nthreads_value
 from collections import defaultdict
 
 class AlignmentError(Exception):
@@ -17,6 +17,11 @@ class AlignmentError(Exception):
     pass
 
 def register_arguments(parser):
+    """
+    Add arguments to parser.
+    Kept as a separate function than `register_parser` to continue to support
+    unit tests that use this function to create argparser.
+    """
     parser.add_argument('--sequences', '-s', required=True, nargs="+", metavar="FASTA", help="sequences to align")
     parser.add_argument('--output', '-o', default="alignment.fasta", help="output file (default: %(default)s)")
     parser.add_argument('--nthreads', type=nthreads_value, default=1,
@@ -29,6 +34,13 @@ def register_arguments(parser):
     parser.add_argument('--existing-alignment', metavar="FASTA", default=False, help="An existing alignment to which the sequences will be added. The ouput alignment will be the same length as this existing alignment.")
     parser.add_argument('--debug', action="store_true", default=False, help="Produce extra files (e.g. pre- and post-aligner files) which can help with debugging poor alignments.")
 
+
+def register_parser(parent_subparsers):
+    parser = parent_subparsers.add_parser("align", help=first_line(__doc__))
+    register_arguments(parser)
+    return parser
+
+
 def prepare(sequences, existing_aln_fname, output, ref_name, ref_seq_fname):
     """Prepare the sequences, existing alignment, and reference sequence for alignment.
 
diff --git a/augur/ancestral.py b/augur/ancestral.py
index 74d6945d7..a95d07156 100644
--- a/augur/ancestral.py
+++ b/augur/ancestral.py
@@ -7,7 +7,7 @@
 from Bio import Phylo, SeqIO
 from Bio.Seq import Seq
 from Bio.SeqRecord import SeqRecord
-from .utils import read_tree, InvalidTreeError, write_json, get_json_name
+from .utils import first_line, read_tree, InvalidTreeError, write_json, get_json_name
 from treetime.vcf_utils import read_vcf, write_vcf
 from collections import defaultdict
 
@@ -117,7 +117,8 @@ def collect_mutations_and_sequences(tt, infer_tips=False, full_sequences=False,
     return {"nodes": data, "mask": mask}
 
 
-def register_arguments(parser):
+def register_parser(parent_subparsers):
+    parser = parent_subparsers.add_parser("ancestral", help=first_line(__doc__))
     parser.add_argument('--tree', '-t', required=True, help="prebuilt Newick")
     parser.add_argument('--alignment', '-a', help="alignment in fasta or VCF format")
     parser.add_argument('--output-node-data', type=str, help='name of JSON file to save mutations and ancestral sequences to')
@@ -133,6 +134,7 @@ def register_arguments(parser):
                                 help='infer nucleotides at ambiguous (N,W,R,..) sites on tip sequences and replace with most likely state.')
     parser.add_argument('--keep-overhangs', action="store_true", default=False,
                                 help='do not infer nucleotides for gaps (-) on either side of the alignment')
+    return parser
 
 def run(args):
     # check alignment type, set flags, read in if VCF
diff --git a/augur/argparse_.py b/augur/argparse_.py
index b742067ef..6d1e7915a 100644
--- a/augur/argparse_.py
+++ b/augur/argparse_.py
@@ -1,6 +1,7 @@
 """
 Custom helpers for the argparse standard library.
 """
+from argparse import ArgumentDefaultsHelpFormatter
 
 
 def add_default_command(parser):
@@ -14,3 +15,36 @@ def run(args):
 
     parser.set_defaults(__command__ = default_command)
 
+
+def add_command_subparsers(subparsers, commands):
+    """
+    Add subparsers for each command module.
+
+    Parameters
+    ----------
+    subparsers: argparse._SubParsersAction
+        The special subparsers action object created by the parent parser
+        via `parser.add_subparsers()`.
+
+    commands: list[ModuleType]
+        A list of modules that are commands that require their own subparser.
+        Each module is required to have a `register_parser` function to add its own
+        subparser and arguments.
+    """
+    for command in commands:
+        # Allow each command to register its own subparser
+        subparser = command.register_parser(subparsers)
+
+        # Allows us to run commands directly with `args.__command__.run()`
+        subparser.set_defaults(__command__ = command)
+
+        # Use the same formatting class for every command for consistency.
+        # Set here to avoid repeating it in every command's register_parser().
+        subparser.formatter_class = ArgumentDefaultsHelpFormatter
+
+        if not subparser.description and command.__doc__:
+            subparser.description = command.__doc__
+
+        # If a command doesn't have its own run() function, then print its help when called.
+        if not getattr(command, "run", None):
+            add_default_command(subparser)
diff --git a/augur/clades.py b/augur/clades.py
index c040ab18e..c66724b10 100644
--- a/augur/clades.py
+++ b/augur/clades.py
@@ -9,7 +9,7 @@
 from collections import defaultdict
 import networkx as nx
 from itertools import islice
-from .utils import get_parent_name_by_child_name_for_tree, read_node_data, write_json, get_json_name
+from .utils import first_line, get_parent_name_by_child_name_for_tree, read_node_data, write_json, get_json_name
 
 def read_in_clade_definitions(clade_file):
     '''
@@ -248,12 +248,14 @@ def get_reference_sequence_from_root_node(all_muts, root_name):
     return ref
 
 
-def register_arguments(parser):
+def register_parser(parent_subparsers):
+    parser = parent_subparsers.add_parser("clades", help=first_line(__doc__))
     parser.add_argument('--tree', help="prebuilt Newick -- no tree will be built if provided")
     parser.add_argument('--mutations', nargs='+', help='JSON(s) containing ancestral and tip nucleotide and/or amino-acid mutations ')
     parser.add_argument('--reference', nargs='+', help='fasta files containing reference and tip nucleotide and/or amino-acid sequences ')
     parser.add_argument('--clades', type=str, help='TSV file containing clade definitions by amino-acid')
     parser.add_argument('--output-node-data', type=str, help='name of JSON file to save clade assignments to')
+    return parser
 
 
 def run(args):
diff --git a/augur/distance.py b/augur/distance.py
index e7ab043c0..d41be670e 100644
--- a/augur/distance.py
+++ b/augur/distance.py
@@ -159,7 +159,7 @@
 
 from .frequency_estimators import timestamp_to_float
 from .reconstruct_sequences import load_alignments
-from .utils import annotate_parents_for_tree, read_node_data, write_json
+from .utils import annotate_parents_for_tree, first_line, read_node_data, write_json
 
 
 def read_distance_map(map_file):
@@ -626,7 +626,8 @@ def get_distances_to_all_pairs(tree, sequences_by_node_and_gene, distance_map, e
     return distances_by_node
 
 
-def register_arguments(parser):
+def register_parser(parent_subparsers):
+    parser = parent_subparsers.add_parser("distance", help=first_line(__doc__))
     parser.add_argument("--tree", help="Newick tree", required=True)
     parser.add_argument("--alignment", nargs="+", help="sequence(s) to be used, supplied as FASTA files", required=True)
     parser.add_argument('--gene-names', nargs="+", type=str, help="names of the sequences in the alignment, same order assumed", required=True)
@@ -637,6 +638,7 @@ def register_arguments(parser):
     parser.add_argument("--earliest-date", help="earliest date at which samples are considered to be from previous seasons (e.g., 2019-01-01). This date is only used in pairwise comparisons. If omitted, all samples prior to the latest date will be considered.")
     parser.add_argument("--latest-date", help="latest date at which samples are considered to be from previous seasons (e.g., 2019-01-01); samples from any date after this are considered part of the current season")
     parser.add_argument("--output", help="JSON file with calculated distances stored by node name and attribute name", required=True)
+    return parser
 
 
 def run(args):
diff --git a/augur/export.py b/augur/export.py
index d4dcd9371..09d7e114b 100644
--- a/augur/export.py
+++ b/augur/export.py
@@ -1,11 +1,13 @@
 """
 Export JSON files suitable for visualization with auspice.
 """
+from .utils import first_line
 from .export_v1 import run_v1, register_arguments_v1
 from .export_v2 import run_v2, register_arguments_v2
 
 
-def register_arguments(parser):
+def register_parser(parent_subparsers):
+    parser = parent_subparsers.add_parser("export", help=first_line(__doc__))
     metavar_msg ="Augur export now needs you to define the JSON version " + \
                  "you want, e.g. `augur export v2`."
     subparsers = parser.add_subparsers(title="JSON SCHEMA",
@@ -13,6 +15,7 @@ def register_arguments(parser):
     subparsers.required = True
     register_arguments_v2(subparsers)
     register_arguments_v1(subparsers)
+    return parser
 
 
 def run(args):
diff --git a/augur/filter.py b/augur/filter.py
index effa1b998..365b690bf 100644
--- a/augur/filter.py
+++ b/augur/filter.py
@@ -21,7 +21,7 @@
 from .errors import AugurError
 from .index import index_sequences, index_vcf
 from .io import open_file, read_metadata, read_sequences, write_sequences, is_vcf as filename_is_vcf, write_vcf
-from .utils import read_strains
+from .utils import first_line, read_strains
 
 comment_char = '#'
 
@@ -30,8 +30,12 @@
     "non_nucleotide",
 )
 
-
 def register_arguments(parser):
+    """
+    Add arguments to parser.
+    Kept as a separate function than `register_parser` to continue to support
+    unit tests that use this function to create argparser.
+    """
     input_group = parser.add_argument_group("inputs", "metadata and sequences to be filtered")
     input_group.add_argument('--metadata', required=True, metavar="FILE", help="sequence metadata, as CSV or TSV")
     input_group.add_argument('--sequences', '-s', help="sequences in FASTA or VCF format")
@@ -88,6 +92,12 @@ def register_arguments(parser):
     parser.set_defaults(probabilistic_sampling=True)
 
 
+def register_parser(parent_subparsers):
+    parser = parent_subparsers.add_parser("filter", help=first_line(__doc__))
+    register_arguments(parser)
+    return parser
+
+
 class FilterException(AugurError):
     """Representation of an error that occurred during filtering.
     """
diff --git a/augur/frequencies.py b/augur/frequencies.py
index fd7696dcd..4a62b0611 100644
--- a/augur/frequencies.py
+++ b/augur/frequencies.py
@@ -11,10 +11,11 @@
 from .frequency_estimators import AlignmentKdeFrequencies, TreeKdeFrequencies, TreeKdeFrequenciesError
 from .dates import numeric_date_type, SUPPORTED_DATE_HELP_TEXT, get_numerical_dates
 from .io import read_metadata
-from .utils import read_node_data, write_json
+from .utils import first_line, read_node_data, write_json
 
 
-def register_arguments(parser):
+def register_parser(parent_subparsers):
+    parser = parent_subparsers.add_parser("frequencies", help=first_line(__doc__))
     # Shared arguments
     parser.add_argument('--method', choices=["diffusion", "kde"], required=True,
                         help="method by which frequencies should be estimated")
@@ -71,6 +72,7 @@ def register_arguments(parser):
                         help="format to export frequencies JSON depending on the viewing interface")
     parser.add_argument('--output', '-o', type=str,
                         help='JSON file to save estimated frequencies to')
+    return parser
 
 
 def format_frequencies(freq):
diff --git a/augur/import.py b/augur/import.py
index 939b6377b..fb98b2fcd 100644
--- a/augur/import.py
+++ b/augur/import.py
@@ -1,14 +1,17 @@
 """
 Import analyses into augur pipeline from other systems
 """
+from .utils import first_line
 from .import_beast import run_beast, register_arguments_beast
 
-def register_arguments(parser):
+def register_parser(parent_subparsers):
+    parser = parent_subparsers.add_parser("import", help=first_line(__doc__))
     metavar_msg = "Import analyses into augur pipeline from other systems"
     subparsers = parser.add_subparsers(title="TYPE",
                                        metavar=metavar_msg)
     subparsers.required = True
     register_arguments_beast(subparsers)
+    return parser
 
 def run(args):
     if "beast" in args:
diff --git a/augur/index.py b/augur/index.py
index 7abb53160..4e3328b1e 100644
--- a/augur/index.py
+++ b/augur/index.py
@@ -9,12 +9,14 @@
 import csv
 
 from .io import open_file, read_sequences, is_vcf, read_vcf
+from .utils import first_line
 
-
-def register_arguments(parser):
+def register_parser(parent_subparsers):
+    parser = parent_subparsers.add_parser("index", help=first_line(__doc__))
     parser.add_argument('--sequences', '-s', required=True, help="sequences in FASTA or VCF formats. Augur will summarize the content of FASTA sequences and only report the names of strains found in a given VCF.")
     parser.add_argument('--output', '-o', help="tab-delimited file containing the number of bases per sequence in the given file. Output columns include strain, length, and counts for A, C, G, T, N, other valid IUPAC characters, ambiguous characters ('?' and '-'), and other invalid characters.", required=True)
     parser.add_argument('--verbose', '-v', action="store_true", help="print index statistics to stdout")
+    return parser
 
 
 def index_vcf(vcf_path, index_path):
diff --git a/augur/lbi.py b/augur/lbi.py
index e26538b92..54fa5ab91 100644
--- a/augur/lbi.py
+++ b/augur/lbi.py
@@ -5,7 +5,7 @@
 from collections import defaultdict
 import json
 import numpy as np
-from .utils import write_json
+from .utils import first_line, write_json
 
 
 def select_nodes_in_season(tree, timepoint, time_window=0.6):
@@ -79,7 +79,8 @@ def calculate_LBI(tree, attr="lbi", tau=0.4, transform=lambda x:x, normalize=Tru
         setattr(node, attr, node.attr[attr])
 
 
-def register_arguments(parser):
+def register_parser(parent_subparsers):
+    parser = parent_subparsers.add_parser("lbi", help=first_line(__doc__))
     parser.add_argument("--tree", help="Newick tree", required=True)
     parser.add_argument("--branch-lengths", help="JSON with branch lengths and internal node dates estimated by TreeTime", required=True)
     parser.add_argument("--output", help="JSON file with calculated distances stored by node name and attribute name", required=True)
@@ -87,6 +88,7 @@ def register_arguments(parser):
     parser.add_argument("--tau", nargs="+", type=float, help="tau value(s) defining the neighborhood of each clade", required=True)
     parser.add_argument("--window", nargs="+", type=float, help="time window(s) to calculate LBI across", required=True)
     parser.add_argument("--no-normalization", action="store_true", help="disable normalization of LBI by the maximum value")
+    return parser
 
 
 def run(args):
diff --git a/augur/mask.py b/augur/mask.py
index afa02846f..247ef0226 100644
--- a/augur/mask.py
+++ b/augur/mask.py
@@ -11,7 +11,7 @@
 from Bio.Seq import MutableSeq
 
 from .io import open_file, read_sequences, write_sequences, run_shell_command, shquote, is_vcf
-from .utils import load_mask_sites, VALID_NUCLEOTIDES
+from .utils import first_line, load_mask_sites, VALID_NUCLEOTIDES
 
 def get_chrom_name(vcf_file):
     """Read the CHROM field from the first non-header line of a vcf file.
@@ -165,8 +165,12 @@ def mask_fasta(mask_sites, in_file, out_file, mask_from_beginning=0, mask_from_e
         "fasta"
     )
 
-
 def register_arguments(parser):
+    """
+    Add arguments to parser.
+    Kept as a separate function than `register_parser` to continue to support
+    unit tests that use this function to create argparser.
+    """
     parser.add_argument('--sequences', '-s', required=True, help="sequences in VCF or FASTA format")
     parser.add_argument('--mask', dest="mask_file", required=False, help="locations to be masked in either BED file format, DRM format, or one 1-indexed site per line.")
     parser.add_argument('--mask-from-beginning', type=int, default=0, help="FASTA Only: Number of sites to mask from beginning")
@@ -177,6 +181,12 @@ def register_arguments(parser):
     parser.add_argument('--no-cleanup', dest="cleanup", action="store_false",
                         help="Leave intermediate files around. May be useful for debugging")
 
+def register_parser(parent_subparsers):
+    parser = parent_subparsers.add_parser("mask", help=first_line(__doc__))
+    register_arguments(parser)
+    return parser
+
+
 def run(args):
     '''
     Mask specified sites from the VCF or FASTA.
diff --git a/augur/measurements.py b/augur/measurements.py
index 9cc78f1ad..27bea775f 100644
--- a/augur/measurements.py
+++ b/augur/measurements.py
@@ -5,7 +5,7 @@
 import pandas as pd
 import sys
 
-from .utils import write_json, HideAsFalseAction
+from .utils import first_line, write_json, HideAsFalseAction
 from .validate import (
     measurements as read_measurements_json,
     measurements_collection_config as read_collection_config_json,
@@ -198,7 +198,8 @@ def concat_measurements(args):
         sys.exit(1)
 
 
-def register_arguments(parser):
+def register_parser(parent_subparsers):
+    parser = parent_subparsers.add_parser("measurements", help=first_line(__doc__))
     subparsers = parser.add_subparsers(dest='subcommand')
     subparsers.required = True
 
@@ -292,6 +293,7 @@ def register_arguments(parser):
              "If not provided, the first collection of the first JSON file will be displayed")
     concat_optional.add_argument("--minify-json", action="store_true",
         help="Concatenate JSONs without indentation or line returns.")
+    return parser
 
 
 
diff --git a/augur/parse.py b/augur/parse.py
index 78a52df60..9792329d3 100644
--- a/augur/parse.py
+++ b/augur/parse.py
@@ -7,6 +7,7 @@
 from .io import open_file, read_sequences, write_sequences
 from .dates import get_numerical_date_from_value
 from .errors import AugurError
+from .utils import first_line
 
 forbidden_characters = str.maketrans(
     {' ': None,
@@ -137,7 +138,8 @@ def parse_sequence(sequence, fields, strain_key="strain", separator="|", prettif
     return sequence, metadata
 
 
-def register_arguments(parser):
+def register_parser(parent_subparsers):
+    parser = parent_subparsers.add_parser("parse", help=first_line(__doc__))
     parser.add_argument('--sequences', '-s', required=True, help="sequences in fasta or VCF format")
     parser.add_argument('--output-sequences', help="output sequences file")
     parser.add_argument('--output-metadata', help="output metadata file")
@@ -146,6 +148,7 @@ def register_arguments(parser):
     parser.add_argument('--separator', default='|', help="separator of fasta header")
     parser.add_argument('--fix-dates', choices=['dayfirst', 'monthfirst'],
                                 help="attempt to parse non-standard dates and output them in standard YYYY-MM-DD format")
+    return parser
 
 
 def run(args):
diff --git a/augur/reconstruct_sequences.py b/augur/reconstruct_sequences.py
index 981f835b8..d8abd6244 100644
--- a/augur/reconstruct_sequences.py
+++ b/augur/reconstruct_sequences.py
@@ -6,12 +6,13 @@
 import numpy as np
 from collections import defaultdict
 from Bio import SeqIO, Seq, SeqRecord, Phylo
-from .utils import read_node_data, write_json
+from .utils import first_line, read_node_data, write_json
 from treetime.vcf_utils import read_vcf
 
 
 
-def register_arguments(parser):
+def register_parser(parent_subparsers):
+    parser = parent_subparsers.add_parser("reconstruct-sequences", help=first_line(__doc__))
     parser.add_argument('--tree', required=True, help="tree as Newick file")
     parser.add_argument('--gene', type=str, help="gene to translate (list or file containing list)")
     parser.add_argument('--mutations', required=True, type=str, help="json file containing mutations "
@@ -19,6 +20,7 @@ def register_arguments(parser):
     parser.add_argument('--vcf-aa-reference', type=str, help='fasta file of the reference gene translations for VCF format')
     parser.add_argument('--internal-nodes', action='store_true', help="include sequences of internal nodes in output")
     parser.add_argument('--output', type=str)
+    return parser
 
 
 def get_sequence(pseq, muts):
diff --git a/augur/refine.py b/augur/refine.py
index 8c702d89b..20711ff4f 100644
--- a/augur/refine.py
+++ b/augur/refine.py
@@ -6,7 +6,7 @@
 from Bio import Phylo
 from .dates import get_numerical_dates
 from .io import read_metadata
-from .utils import read_tree, write_json, InvalidTreeError
+from .utils import first_line, read_tree, write_json, InvalidTreeError
 from treetime.vcf_utils import read_vcf, write_vcf
 from treetime.seq_utils import profile_maps
 
@@ -92,7 +92,8 @@ def collect_node_data(T, attributes):
     return data
 
 
-def register_arguments(parser):
+def register_parser(parent_subparsers):
+    parser = parent_subparsers.add_parser("refine", help=first_line(__doc__))
     parser.add_argument('--alignment', '-a', help="alignment in fasta or VCF format")
     parser.add_argument('--tree', '-t', required=True, help="prebuilt Newick")
     parser.add_argument('--metadata', type=str, metavar="FILE", help="sequence metadata, as CSV or TSV")
@@ -128,6 +129,8 @@ def register_arguments(parser):
                         default='mutations-per-site', help='Units in which sequence divergences is exported.')
     parser.add_argument('--seed', type=int, help='seed for random number generation')
     parser.set_defaults(covariance=True)
+    return parser
+
 
 def run(args):
     if args.seed is not None:
diff --git a/augur/sequence_traits.py b/augur/sequence_traits.py
index 66317d89d..cfa1da3ee 100644
--- a/augur/sequence_traits.py
+++ b/augur/sequence_traits.py
@@ -6,7 +6,7 @@
 import numpy as np
 from treetime.vcf_utils import read_vcf
 from collections import defaultdict
-from .utils import write_json, get_json_name
+from .utils import first_line, write_json, get_json_name
 
 def read_in_translate_vcf(vcf_file, ref_file):
     """
@@ -290,7 +290,8 @@ def attach_features(annotations, label, count):
     return seq_feature_dict
 
 
-def register_arguments(parser):
+def register_parser(parent_subparsers):
+    parser = parent_subparsers.add_parser("sequence-traits", help=first_line(__doc__))
     parser.add_argument('--ancestral-sequences', type=str, help="nucleotide alignment (VCF) to search for sequence traits in (can be generated from 'ancestral' using '--output-vcf')")
     parser.add_argument('--translations', type=str, help="AA alignment to search for sequence traits in (can include ancestral sequences)")
     parser.add_argument('--vcf-reference', type=str, help='fasta file of the sequence the nucleotide VCF was mapped to')
@@ -300,6 +301,7 @@ def register_arguments(parser):
     parser.add_argument('--count', type=str, choices=['traits','mutations'], default='traits', help='Whether to count traits (ex: # drugs resistant to) or mutations')
     parser.add_argument('--label', type=str, default="# Traits", help='How to label the counts (ex: Drug_Resistance)')
     parser.add_argument('--output-node-data', type=str, help='name of JSON file to save sequence features to')
+    return parser
 
 
 def run(args):
diff --git a/augur/titers.py b/augur/titers.py
index 5ec762dee..27f622226 100644
--- a/augur/titers.py
+++ b/augur/titers.py
@@ -9,11 +9,12 @@
 
 from .reconstruct_sequences import load_alignments
 from .titer_model import InsufficientDataException
-from .utils import read_node_data, write_json
+from .utils import first_line, read_node_data, write_json
 from .argparse_ import add_default_command
 
 
-def register_arguments(parser):
+def register_parser(parent_subparsers):
+    parser = parent_subparsers.add_parser("titers", help=first_line(__doc__))
     subparsers = parser.add_subparsers()
     add_default_command(parser)
 
@@ -36,6 +37,7 @@ def register_arguments(parser):
     sub_model.set_defaults(
         __command__ = infer_substitution_model
     )
+    return parser
 
 
 class infer_substitution_model():
diff --git a/augur/traits.py b/augur/traits.py
index 77bee5646..8f87912fd 100644
--- a/augur/traits.py
+++ b/augur/traits.py
@@ -7,7 +7,7 @@
 import os, sys
 import pandas as pd
 from .io import read_metadata
-from .utils import write_json, get_json_name
+from .utils import first_line, write_json, get_json_name
 TINY = 1e-12
 
 def mugration_inference(tree=None, seq_meta=None, field='country', confidence=True,
@@ -97,7 +97,7 @@ def mugration_inference(tree=None, seq_meta=None, field='country', confidence=Tr
     return tt.tree, tt.gtr, letter_to_state
 
 
-def register_arguments(parser):
+def register_parser(parent_subparsers):
     """Add subcommand specific arguments
 
     Parameters
@@ -105,6 +105,7 @@ def register_arguments(parser):
     parser : argparse
         subcommand argument parser
     """
+    parser = parent_subparsers.add_parser("traits", help=first_line(__doc__))
     parser.add_argument('--tree', '-t', required=True, help="tree to perform trait reconstruction on")
     parser.add_argument('--metadata', required=True, metavar="FILE", help="table with metadata, as CSV or TSV")
     parser.add_argument('--weights', required=False, help="tsv/csv table with equilibrium probabilities of discrete states")
@@ -120,6 +121,8 @@ def register_arguments(parser):
                              '(or rather the time spent in a particular state on the tree)')
     parser.add_argument('--output-node-data', type=str, help='name of JSON file to save trait inferences to')
     parser.epilog = "Note that missing data must be represented by a `?` character. Missing data will currently be inferred."
+    return parser
+
 
 def run(args):
     """run mugration inference
diff --git a/augur/translate.py b/augur/translate.py
index d2f1293b6..fc70ade5e 100644
--- a/augur/translate.py
+++ b/augur/translate.py
@@ -6,7 +6,7 @@
 import numpy as np
 from Bio import SeqIO, SeqFeature, Seq, SeqRecord, Phylo
 from .io import write_VCF_translation
-from .utils import read_node_data, load_features, write_json, get_json_name
+from .utils import first_line, read_node_data, load_features, write_json, get_json_name
 from treetime.vcf_utils import read_vcf
 
 class MissingNodeError(Exception):
@@ -302,7 +302,8 @@ def get_genes_from_file(fname):
     return unique_genes
 
 
-def register_arguments(parser):
+def register_parser(parent_subparsers):
+    parser = parent_subparsers.add_parser("translate", help=first_line(__doc__))
     parser.add_argument('--tree', help="prebuilt Newick -- no tree will be built if provided")
     parser.add_argument('--ancestral-sequences', type=str, help='JSON (fasta input) or VCF (VCF input) containing ancestral and tip sequences')
     parser.add_argument('--reference-sequence', required=True,
@@ -314,6 +315,7 @@ def register_arguments(parser):
                                    "like so: 'my_alignment_%%GENE.fasta', where '%%GENE' will be replaced by the name of the gene")
     parser.add_argument('--vcf-reference-output', type=str, help="fasta file where reference sequence translations for VCF input will be written")
     parser.add_argument('--vcf-reference', type=str, help='fasta file of the sequence the VCF was mapped to')
+    return parser
 
 
 def run(args):
diff --git a/augur/tree.py b/augur/tree.py
index fdd0d48bd..15d31b311 100644
--- a/augur/tree.py
+++ b/augur/tree.py
@@ -16,7 +16,7 @@
 from pathlib import Path
 
 from .io import read_sequences, run_shell_command, shquote
-from .utils import nthreads_value, load_mask_sites
+from .utils import first_line, nthreads_value, load_mask_sites
 
 DEFAULT_ARGS = {
     "fasttree": "-nt -nosupport",
@@ -395,7 +395,8 @@ def mask_sites_in_multiple_sequence_alignment(alignment_file, excluded_sites_fil
     return masked_alignment_file
 
 
-def register_arguments(parser):
+def register_parser(parent_subparsers):
+    parser = parent_subparsers.add_parser("tree", help=first_line(__doc__))
     parser.add_argument('--alignment', '-a', required=True, help="alignment in fasta or VCF format")
     parser.add_argument('--method', default='iqtree', choices=["fasttree", "raxml", "iqtree"], help="tree builder to use")
     parser.add_argument('--output', '-o', type=str, help='file name to write tree to')
@@ -416,6 +417,7 @@ def register_arguments(parser):
     parser.epilog = """For example, to build a tree with IQ-TREE, use the following format:
     augur tree --method iqtree --alignment <alignment> --substitution-model <model> --output <tree> --tree-builder-args="<extra arguments>"
     """
+    return parser
 
 def run(args):
     # check alignment type, set flags, read in if VCF
diff --git a/augur/validate.py b/augur/validate.py
index 07ca06c5c..fca37713f 100644
--- a/augur/validate.py
+++ b/augur/validate.py
@@ -288,7 +288,9 @@ def measurements_collection_config(collection_config_json, **kwargs):
     return collection_config
 
 
-def register_arguments(parser):
+def register_parser(parent_subparsers):
+    # Not using utils.first_line for help here because it results in a circular import
+    parser = parent_subparsers.add_parser("validate", help=__doc__)
     subparsers = parser.add_subparsers(dest="subcommand", help="Which file(s) do you want to validate?")
 
     subparsers.add_parser("export-v2", help="validate JSON intended for auspice v2") \
@@ -306,6 +308,8 @@ def register_arguments(parser):
 
     subparsers.add_parser("measurements-collection-config", help="validate measurement collection config intended for `augur measurements export`") \
         .add_argument("collection_config_json", metavar="JSON", help="collection config JSON")
+    return parser
+
 
 def run(args):
     try:
diff --git a/augur/version.py b/augur/version.py
index 4f0299105..e71669f2c 100644
--- a/augur/version.py
+++ b/augur/version.py
@@ -1,11 +1,11 @@
 """
 Print the version of augur.
 """
-
+from .utils import first_line
 from .__version__ import __version__
 
-def register_arguments(parser):
-    pass
+def register_parser(parent_subparsers):
+    return parent_subparsers.add_parser("version", help=first_line(__doc__))
 
 def run(args):
     print("augur", __version__)

From 311cccb52e00043d80f54544d9ce9c604e25cf6d Mon Sep 17 00:00:00 2001
From: Jover <joverlee521@gmail.com>
Date: Tue, 12 Jul 2022 09:44:22 -0700
Subject: [PATCH 03/11] refactor: rename `import.py` to `import_.py`
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Since `import` is a Python keyword, the best practice is to name the
module as `import_` according to PEP-8 guidelines.¹

This should not affect the usage of the `augur import` command since
the command name remains unchanged. Since the module only has
`register_parser` and `run` functions, there is unlikely to be any
external uses of `augur.import`.

This change came from suggestion by @tsibley in review.²

¹ https://peps.python.org/pep-0008/#descriptive-naming-styles
² https://github.com/nextstrain/augur/pull/1002#discussion_r918328749
---
 augur/__init__.py               | 2 +-
 augur/{import.py => import_.py} | 0
 2 files changed, 1 insertion(+), 1 deletion(-)
 rename augur/{import.py => import_.py} (100%)

diff --git a/augur/__init__.py b/augur/__init__.py
index cafeed1eb..f5e6be682 100644
--- a/augur/__init__.py
+++ b/augur/__init__.py
@@ -39,7 +39,7 @@
     "export",
     "validate",
     "version",
-    "import",
+    "import_",
     "measurements",
 ]
 
diff --git a/augur/import.py b/augur/import_.py
similarity index 100%
rename from augur/import.py
rename to augur/import_.py

From d531d48e12ec13ded19bdcbf437685d38e5df958 Mon Sep 17 00:00:00 2001
From: Jover <joverlee521@gmail.com>
Date: Mon, 11 Jul 2022 17:49:27 -0700
Subject: [PATCH 04/11] Move `HideAsFalseAction` to `argparse_` module

This is a custom argparse.Action so it belongs with the other argparse
helpers. This was added with the augur measurements command so it's
highly unlikely to be used externally.
---
 augur/argparse_.py    | 11 ++++++++++-
 augur/measurements.py |  3 ++-
 augur/utils.py        |  8 --------
 3 files changed, 12 insertions(+), 10 deletions(-)

diff --git a/augur/argparse_.py b/augur/argparse_.py
index 6d1e7915a..bdb34e914 100644
--- a/augur/argparse_.py
+++ b/augur/argparse_.py
@@ -1,7 +1,7 @@
 """
 Custom helpers for the argparse standard library.
 """
-from argparse import ArgumentDefaultsHelpFormatter
+from argparse import Action, ArgumentDefaultsHelpFormatter
 
 
 def add_default_command(parser):
@@ -48,3 +48,12 @@ def add_command_subparsers(subparsers, commands):
         # If a command doesn't have its own run() function, then print its help when called.
         if not getattr(command, "run", None):
             add_default_command(subparser)
+
+
+class HideAsFalseAction(Action):
+    """
+    Custom argparse Action that stores False for arguments passed as `--hide*`
+    and stores True for all other argument patterns.
+    """
+    def __call__(self, parser, namespace, values, option_string=None):
+        setattr(namespace, self.dest, option_string[2:6] != 'hide')
diff --git a/augur/measurements.py b/augur/measurements.py
index 27bea775f..8bf75cd5c 100644
--- a/augur/measurements.py
+++ b/augur/measurements.py
@@ -5,7 +5,8 @@
 import pandas as pd
 import sys
 
-from .utils import first_line, write_json, HideAsFalseAction
+from .argparse_ import HideAsFalseAction
+from .utils import first_line, write_json
 from .validate import (
     measurements as read_measurements_json,
     measurements_collection_config as read_collection_config_json,
diff --git a/augur/utils.py b/augur/utils.py
index 2421989f5..9f4a155b2 100644
--- a/augur/utils.py
+++ b/augur/utils.py
@@ -548,11 +548,3 @@ def read_strains(*files, comment_char="#"):
                     strains.add(strain_name)
 
     return strains
-
-class HideAsFalseAction(argparse.Action):
-    """
-    Custom argparse Action that stores False for arguments passed as `--hide*`
-    and stores True for all other argument patterns.
-    """
-    def __call__(self, parser, namespace, values, option_string=None):
-        setattr(namespace, self.dest, option_string[2:6] != 'hide')

From dc4f58b014563bc7805272886c6962d2144104fe Mon Sep 17 00:00:00 2001
From: Jover <joverlee521@gmail.com>
Date: Mon, 11 Jul 2022 18:00:55 -0700
Subject: [PATCH 05/11] Refactor measurements module to a package

Moves the measurements module to its own package where the subcommands
are split out into their own modules. The subcommand subparsers are
added with the new `add_command_subparsers` function.

This is an effort to reorganize augur commands with subcommands as
packages instead of continuously adding subcommands as modules.
Starting with the measurements command since it is a new command that is
unlikely to already be used by anyone outside of the Nextstrain. In the
future, we may want to consider refactoring export and validate
in the same way. Putting that off for now since reorganizing those
modules may result in breaking outside uses of their APIs.
---
 augur/measurements/__init__.py                |  19 ++
 augur/measurements/concat.py                  |  56 +++++
 .../export.py}                                | 215 +++++++-----------
 3 files changed, 156 insertions(+), 134 deletions(-)
 create mode 100644 augur/measurements/__init__.py
 create mode 100644 augur/measurements/concat.py
 rename augur/{measurements.py => measurements/export.py} (82%)

diff --git a/augur/measurements/__init__.py b/augur/measurements/__init__.py
new file mode 100644
index 000000000..3fbc93273
--- /dev/null
+++ b/augur/measurements/__init__.py
@@ -0,0 +1,19 @@
+"""
+Create JSON files suitable for visualization within the measurements panel of Auspice.
+"""
+from augur.argparse_ import add_command_subparsers
+from augur.utils import first_line
+from . import export, concat
+
+SUBCOMMANDS = [
+    export,
+    concat,
+]
+
+
+def register_parser(parent_subparsers):
+    parser = parent_subparsers.add_parser("measurements", help=first_line(__doc__))
+    # Add subparsers for subcommands
+    subparsers = parser.add_subparsers(dest='subcommand')
+    add_command_subparsers(subparsers, SUBCOMMANDS)
+    return parser
diff --git a/augur/measurements/concat.py b/augur/measurements/concat.py
new file mode 100644
index 000000000..fd4f6751d
--- /dev/null
+++ b/augur/measurements/concat.py
@@ -0,0 +1,56 @@
+"""
+Concatenate multiple measurements JSONs into a single JSON file
+"""
+import sys
+
+from augur.utils import first_line, write_json
+from augur.validate import (
+    measurements as read_measurements_json,
+    ValidateError
+)
+
+
+def register_parser(parent_subparsers):
+    parser = parent_subparsers.add_parser("concat", help=first_line(__doc__))
+
+    concat_required = parser.add_argument_group(
+        title="REQUIRED"
+    )
+    concat_required.add_argument("--jsons", required=True, type=str, nargs="+", metavar="JSONs",
+        help="Measurement JSON files to concatenate.")
+    concat_required.add_argument("--output-json", required=True, metavar="JSON", type=str,
+        help="Output JSON file")
+
+    concat_optional = parser.add_argument_group(
+        title="OPTIONAL SETTINGS"
+    )
+    concat_optional.add_argument("--default-collection", type=str,
+        help="The key of the default collection to display. " +
+             "If not provided, the first collection of the first JSON file will be displayed")
+    concat_optional.add_argument("--minify-json", action="store_true",
+        help="Concatenate JSONs without indentation or line returns.")
+
+    return parser
+
+
+def run(args):
+    output = {
+        'collections': []
+    }
+    if args.default_collection is not None:
+        output['default_collection'] = args.default_collection
+
+    for json in args.jsons:
+        measurements = read_measurements_json(json)
+        output['collections'].extend(measurements['collections'])
+
+    indent = {"indent": None} if args.minify_json else {}
+    write_json(output, args.output_json, include_version=False, **indent)
+    try:
+        read_measurements_json(measurements_json=args.output_json)
+    except ValidateError:
+        print(
+            "ERROR: Validation of output JSON failed. See detailed errors above.",
+            file=sys.stderr,
+        )
+        sys.exit(1)
diff --git a/augur/measurements.py b/augur/measurements/export.py
similarity index 82%
rename from augur/measurements.py
rename to augur/measurements/export.py
index 8bf75cd5c..b54579a1b 100644
--- a/augur/measurements.py
+++ b/augur/measurements/export.py
@@ -1,13 +1,13 @@
 """
-Create JSON files suitable for visualization within the measurements panel of Auspice.
+Export a measurements JSON for a single collection
 """
 import os
 import pandas as pd
 import sys
 
-from .argparse_ import HideAsFalseAction
-from .utils import first_line, write_json
-from .validate import (
+from augur.argparse_ import HideAsFalseAction
+from augur.utils import first_line, write_json
+from augur.validate import (
     measurements as read_measurements_json,
     measurements_collection_config as read_collection_config_json,
     ValidateError
@@ -22,7 +22,83 @@
 }
 
 
-def export_measurements(args):
+def register_parser(parent_subparsers):
+    parser = parent_subparsers.add_parser("export", help=first_line(__doc__))
+
+    export_required = parser.add_argument_group(
+        title="REQUIRED"
+    )
+    export_required.add_argument("--collection", required=True, metavar="TSV",
+        help="Collection of measurements and metadata in a TSV file. " +
+             "Keep in mind duplicate columns will be renamed as 'X', 'X.1', 'X.2'...'X.N'")
+    export_required.add_argument("--strain-column", default="strain",
+        help="Name of the column containing strain names. " +
+             "Provided column will be renamed to `strain` so please make sure no other columns are named `strain`. " +
+             "Strain names in this column should match the strain names in the corresponding Auspice dataset JSON. " +
+             "(default: %(default)s)")
+    export_required.add_argument("--value-column", default="value",
+        help="Name of the column containing the numeric values to be plotted for the given collection. " +
+             "Provided column will be renamed to `value` so please make sure no other columns are named `value`. " +
+             "(default: %(default)s)")
+    export_required.add_argument("--output-json", required=True, metavar="JSON", type=str,
+        help="Output JSON file. " +
+             "The file name must follow the Auspice sidecar file naming convention to be recognized as a sidecar file. " +
+             "See Nextstrain data format docs for more details.")
+
+    export_config = parser.add_argument_group(
+        title="COLLECTION CONFIGURATION",
+        description="These options control the configuration of the collection for Auspice. " +
+                    "You can provide a config JSON (which includes all available options) or " +
+                    "command line arguments (which are more limited). " +
+                    "Command line arguments will override the values set in the config JSON."
+    )
+    export_config.add_argument("--collection-config", metavar="JSON",
+        help="Collection configuration file for advanced configurations. ")
+    export_config.add_argument("--grouping-column", nargs="+",
+        help="Name of the column(s) that should be used as grouping(s) for measurements. " +
+             "Note that if groupings are provided via command line args, the default group-by " +
+             "field in the config JSON will be dropped.")
+    export_config.add_argument("--key",
+        help="A short key name of the collection for internal use within Auspice. " +
+             "If not provided via config or command line option, the collection TSV filename will be used. ")
+    export_config.add_argument("--title",
+        help="The full title of the collection to display in the measurements panel title. " +
+             f"If not provided via config or command line option, the panel's default title is {DEFAULT_ARGS['title']!r}.")
+    export_config.add_argument("--x-axis-label",
+        help="The short label to display for the x-axis that describles the value of the measurements. " +
+             "If not provided via config or command line option, the panel's default " +
+             f"x-axis label is {DEFAULT_ARGS['x_axis_label']!r}.")
+    export_config.add_argument("--threshold", type=float,
+        help="A measurements value threshold to be displayed in the measurements panel.")
+    export_config.add_argument("--filters", nargs="+",
+        help="The columns that are to be used a filters for measurements. " +
+             "If not provided, all columns will be available as filters.")
+    export_config.add_argument("--group-by", type=str,
+        help="The default grouping column. If not provided, the first grouping will be used.")
+    export_config.add_argument("--measurements-display", type=str, choices=["raw", "mean"],
+        help="The default display of the measurements")
+
+    export_config.add_argument("--show-overall-mean", "--hide-overall-mean",
+        dest="show_overall_mean", action=HideAsFalseAction, nargs=0,
+        help="Show or hide the overall mean per group by default")
+    export_config.add_argument("--show-threshold", "--hide-threshold",
+        dest="show_threshold", action=HideAsFalseAction, nargs=0,
+        help="Show or hide the threshold by default. This will be ignored if no threshold is provided.")
+
+    export_optional = parser.add_argument_group(
+        title="OPTIONAL SETTINGS"
+    )
+    export_optional.add_argument("--include-columns", nargs="+",
+        help="The columns to include from the collection TSV in the measurements JSON. " +
+             "Be sure to list columns that are used as groupings and/or filters. " +
+             "If no columns are provided, then all columns will be included by default.")
+    export_optional.add_argument("--minify-json", action="store_true",
+        help="Export JSON without indentation or line returns.")
+
+    return parser
+
+
+def run(args):
     # Default value to None so all columns will be read
     columns_to_include = None
     if args.include_columns is not None:
@@ -174,132 +250,3 @@ def export_measurements(args):
             file=sys.stderr,
         )
         sys.exit(1)
-
-
-def concat_measurements(args):
-    output = {
-        'collections': []
-    }
-    if args.default_collection is not None:
-        output['default_collection'] = args.default_collection
-
-    for json in args.jsons:
-        measurements = read_measurements_json(json)
-        output['collections'].extend(measurements['collections'])
-
-    indent = {"indent": None} if args.minify_json else {}
-    write_json(output, args.output_json, include_version=False, **indent)
-    try:
-        read_measurements_json(measurements_json=args.output_json)
-    except ValidateError:
-        print(
-            "ERROR: Validation of output JSON failed. See detailed errors above.",
-            file=sys.stderr,
-        )
-        sys.exit(1)
-
-
-def register_parser(parent_subparsers):
-    parser = parent_subparsers.add_parser("measurements", help=first_line(__doc__))
-    subparsers = parser.add_subparsers(dest='subcommand')
-    subparsers.required = True
-
-    export = subparsers.add_parser("export", help="Export a measurements JSON for a single collection")
-
-    export_required = export.add_argument_group(
-        title="REQUIRED"
-    )
-    export_required.add_argument("--collection", required=True, metavar="TSV",
-        help="Collection of measurements and metadata in a TSV file. " +
-             "Keep in mind duplicate columns will be renamed as 'X', 'X.1', 'X.2'...'X.N'")
-    export_required.add_argument("--strain-column", default="strain",
-        help="Name of the column containing strain names. " +
-             "Provided column will be renamed to `strain` so please make sure no other columns are named `strain`. " +
-             "Strain names in this column should match the strain names in the corresponding Auspice dataset JSON. " +
-             "(default: %(default)s)")
-    export_required.add_argument("--value-column", default="value",
-        help="Name of the column containing the numeric values to be plotted for the given collection. " +
-             "Provided column will be renamed to `value` so please make sure no other columns are named `value`. " +
-             "(default: %(default)s)")
-    export_required.add_argument("--output-json", required=True, metavar="JSON", type=str,
-        help="Output JSON file. " +
-             "The file name must follow the Auspice sidecar file naming convention to be recognized as a sidecar file. " +
-             "See Nextstrain data format docs for more details.")
-
-    export_config = export.add_argument_group(
-        title="COLLECTION CONFIGURATION",
-        description="These options control the configuration of the collection for Auspice. " +
-                    "You can provide a config JSON (which includes all available options) or " +
-                    "command line arguments (which are more limited). " +
-                    "Command line arguments will override the values set in the config JSON."
-    )
-    export_config.add_argument("--collection-config", metavar="JSON",
-        help="Collection configuration file for advanced configurations. ")
-    export_config.add_argument("--grouping-column", nargs="+",
-        help="Name of the column(s) that should be used as grouping(s) for measurements. " +
-             "Note that if groupings are provided via command line args, the default group-by " +
-             "field in the config JSON will be dropped.")
-    export_config.add_argument("--key",
-        help="A short key name of the collection for internal use within Auspice. " +
-             "If not provided via config or command line option, the collection TSV filename will be used. ")
-    export_config.add_argument("--title",
-        help="The full title of the collection to display in the measurements panel title. " +
-             f"If not provided via config or command line option, the panel's default title is {DEFAULT_ARGS['title']!r}.")
-    export_config.add_argument("--x-axis-label",
-        help="The short label to display for the x-axis that describles the value of the measurements. " +
-             "If not provided via config or command line option, the panel's default " +
-             f"x-axis label is {DEFAULT_ARGS['x_axis_label']!r}.")
-    export_config.add_argument("--threshold", type=float,
-        help="A measurements value threshold to be displayed in the measurements panel.")
-    export_config.add_argument("--filters", nargs="+",
-        help="The columns that are to be used a filters for measurements. " +
-             "If not provided, all columns will be available as filters.")
-    export_config.add_argument("--group-by", type=str,
-        help="The default grouping column. If not provided, the first grouping will be used.")
-    export_config.add_argument("--measurements-display", type=str, choices=["raw", "mean"],
-        help="The default display of the measurements")
-
-    export_config.add_argument("--show-overall-mean", "--hide-overall-mean",
-        dest="show_overall_mean", action=HideAsFalseAction, nargs=0,
-        help="Show or hide the overall mean per group by default")
-    export_config.add_argument("--show-threshold", "--hide-threshold",
-        dest="show_threshold", action=HideAsFalseAction, nargs=0,
-        help="Show or hide the threshold by default. This will be ignored if no threshold is provided.")
-
-    export_optional = export.add_argument_group(
-        title="OPTIONAL SETTINGS"
-    )
-    export_optional.add_argument("--include-columns", nargs="+",
-        help="The columns to include from the collection TSV in the measurements JSON. " +
-             "Be sure to list columns that are used as groupings and/or filters. " +
-             "If no columns are provided, then all columns will be included by default.")
-    export_optional.add_argument("--minify-json", action="store_true",
-        help="Export JSON without indentation or line returns.")
-
-
-    concat = subparsers.add_parser("concat", help="Concatenate multiple measurements JSONs into a single JSON file")
-    concat_required = concat.add_argument_group(
-        title="REQUIRED"
-    )
-    concat_required.add_argument("--jsons", required=True, type=str, nargs="+", metavar="JSONs",
-        help="Measurement JSON files to concatenate.")
-    concat_required.add_argument("--output-json", required=True, metavar="JSON", type=str,
-        help="Output JSON file")
-
-    concat_optional = concat.add_argument_group(
-        title="OPTIONAL SETTINGS"
-    )
-    concat_optional.add_argument("--default-collection", type=str,
-        help="The key of the default collection to display. " +
-             "If not provided, the first collection of the first JSON file will be displayed")
-    concat_optional.add_argument("--minify-json", action="store_true",
-        help="Concatenate JSONs without indentation or line returns.")
-    return parser
-
-
-
-def run(args):
-    if args.subcommand == 'export':
-        return export_measurements(args)
-    if args.subcommand == "concat":
-        return concat_measurements(args)

From 1f0d8234549ada754264f2469d21d3cf51a96a66 Mon Sep 17 00:00:00 2001
From: Jover <joverlee521@gmail.com>
Date: Tue, 12 Jul 2022 09:23:34 -0700
Subject: [PATCH 06/11] Refactor import_ and import_beast to a package
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Moves the import_ and import_beast modules to an import_ package and
uses the new `add_command_subparsers` function to add the beast
subparser.

Search via cs.github.com¹ shows that there are no external uses of
the `augur.import` or `augur.import_beast` APIs, so this should be
safe to move to a package.

¹ https://cs.github.com/?scopeName=All+repos&scope=&q=%22augur.import%22
---
 augur/{import_.py => import_/__init__.py}   | 16 ++++++++--------
 augur/{import_beast.py => import_/beast.py} | 10 ++++++----
 2 files changed, 14 insertions(+), 12 deletions(-)
 rename augur/{import_.py => import_/__init__.py} (62%)
 rename augur/{import_beast.py => import_/beast.py} (99%)

diff --git a/augur/import_.py b/augur/import_/__init__.py
similarity index 62%
rename from augur/import_.py
rename to augur/import_/__init__.py
index fb98b2fcd..d2f925a29 100644
--- a/augur/import_.py
+++ b/augur/import_/__init__.py
@@ -1,18 +1,18 @@
 """
 Import analyses into augur pipeline from other systems
 """
-from .utils import first_line
-from .import_beast import run_beast, register_arguments_beast
+from augur.argparse_ import add_command_subparsers
+from augur.utils import first_line
+from . import beast
+
+SUBCOMMANDS = [
+    beast,
+]
 
 def register_parser(parent_subparsers):
     parser = parent_subparsers.add_parser("import", help=first_line(__doc__))
     metavar_msg = "Import analyses into augur pipeline from other systems"
     subparsers = parser.add_subparsers(title="TYPE",
                                        metavar=metavar_msg)
-    subparsers.required = True
-    register_arguments_beast(subparsers)
+    add_command_subparsers(subparsers, SUBCOMMANDS)
     return parser
-
-def run(args):
-    if "beast" in args:
-        return run_beast(args)
diff --git a/augur/import_beast.py b/augur/import_/beast.py
similarity index 99%
rename from augur/import_beast.py
rename to augur/import_/beast.py
index f53e983ee..acef81269 100644
--- a/augur/import_beast.py
+++ b/augur/import_/beast.py
@@ -11,13 +11,13 @@
 import numpy as np
 from Bio import Phylo
 from treetime import TreeAnc
-from .utils import write_json
+from augur.utils import write_json
 
-def register_arguments_beast(subparsers):
+def register_parser(parent_subparsers):
     """
     Arguments available to `augur import beast`
     """
-    beast_parser = subparsers.add_parser('beast', help="Import beast analysis")
+    beast_parser = parent_subparsers.add_parser('beast', help="Import beast analysis")
     beast_parser.add_argument("--beast", help=SUPPRESS, default=True) # used to disambiguate subcommands
     beast_parser.add_argument('--mcc', required=True, help="BEAST MCC tree")
     beast_parser.add_argument('--most-recent-tip-date', default=0, type=float, help='Numeric date of most recent tip in tree (--tip-date-regex, --tip-date-format and --tip-date-delimeter are ignored if this is set)')
@@ -28,6 +28,8 @@ def register_arguments_beast(subparsers):
     beast_parser.add_argument('--recursion-limit', default=False, type=int, help="Set a custom recursion limit (dangerous!)")
     beast_parser.add_argument('--output-tree', required=True, type=str, help='file name to write tree to')
     beast_parser.add_argument('--output-node-data', required=True, type=str, help='file name to write (temporal) branch lengths & BEAST traits as node data')
+    return beast_parser
+
 
 def parse_beast_tree(data, tipMap, verbose=False):
     """
@@ -568,7 +570,7 @@ def make_color_block(attr):
 
 
 
-def run_beast(args):
+def run(args):
     '''
     BEAST MCC tree to newick and node-data JSON for further augur processing / export
     '''

From 7de1fcd80516d70e2268954d523dfd475fdcd8ca Mon Sep 17 00:00:00 2001
From: Jover <joverlee521@gmail.com>
Date: Mon, 11 Jul 2022 19:01:42 -0700
Subject: [PATCH 07/11] Refactor `export` to use `add_command_subparsers`

Simplifies the `register_parser` function and removes the now unneeded
`run` function in export.py

Ideally, export and it subcommands would be moved to its own package,
but keeping these files in place since moving them can potentially
break external uses of their APIs.
---
 augur/export.py    | 21 +++++++++------------
 augur/export_v1.py | 16 ++++++++--------
 augur/export_v2.py | 16 ++++++++--------
 3 files changed, 25 insertions(+), 28 deletions(-)

diff --git a/augur/export.py b/augur/export.py
index 09d7e114b..35a7f360c 100644
--- a/augur/export.py
+++ b/augur/export.py
@@ -1,25 +1,22 @@
 """
 Export JSON files suitable for visualization with auspice.
 """
+from .argparse_ import add_command_subparsers
 from .utils import first_line
-from .export_v1 import run_v1, register_arguments_v1
-from .export_v2 import run_v2, register_arguments_v2
+from . import export_v1, export_v2
+
+SUBCOMMANDS = [
+    export_v1,
+    export_v2,
+]
 
 
 def register_parser(parent_subparsers):
     parser = parent_subparsers.add_parser("export", help=first_line(__doc__))
+    # Add subparsers for subcommands
     metavar_msg ="Augur export now needs you to define the JSON version " + \
                  "you want, e.g. `augur export v2`."
     subparsers = parser.add_subparsers(title="JSON SCHEMA",
                                        metavar=metavar_msg)
-    subparsers.required = True
-    register_arguments_v2(subparsers)
-    register_arguments_v1(subparsers)
+    add_command_subparsers(subparsers, SUBCOMMANDS)
     return parser
-
-
-def run(args):
-    if "v1" in args:
-        return run_v1(args)
-    else:
-        return run_v2(args)
diff --git a/augur/export_v1.py b/augur/export_v1.py
index 0975e5f5e..0822f9c20 100644
--- a/augur/export_v1.py
+++ b/augur/export_v1.py
@@ -261,7 +261,7 @@ def add_tsv_metadata_to_nodes(nodes, meta_tsv, meta_json, extra_fields=['authors
         fields = [x for x in meta_json["color_options"].keys() if x != "gt"] + extra_fields
     else:
         fields = list(extra_fields)
-        
+
     if "geo" in meta_json:
         fields += meta_json["geo"]
 
@@ -331,15 +331,15 @@ def add_option_args(parser):
     return options
 
 
-def register_arguments_v1(subparsers):
-    # V1 sub-command
-    v1 = subparsers.add_parser('v1', help="Export version 1 JSON schema (separate meta and tree JSONs)")
-    v1_core = add_core_args(v1)
-    v1_options = add_option_args(v1)
-    v1.add_argument("--v1", help=SUPPRESS, default=True)
+def register_parser(parent_subparsers):
+    parser = parent_subparsers.add_parser("v1", help="Export version 1 JSON schema (separate meta and tree JSONs)")
+    add_core_args(parser)
+    add_option_args(parser)
+    parser.add_argument("--v1", help=SUPPRESS, default=True)
+    return parser
 
 
-def run_v1(args):
+def run(args):
     T = Phylo.read(args.tree, 'newick')
     node_data = read_node_data(args.node_data) # args.node_data is an array of multiple files (or a single file)
     nodes = node_data["nodes"] # this is the per-node metadata produced by various augur modules
diff --git a/augur/export_v2.py b/augur/export_v2.py
index db32d45a6..41d30efeb 100644
--- a/augur/export_v2.py
+++ b/augur/export_v2.py
@@ -819,17 +819,17 @@ def node_data_prop_is_normal_trait(name):
     return True
 
 
-def register_arguments_v2(subparsers):
-    v2 = subparsers.add_parser("v2", help="Export version 2 JSON schema")
+def register_parser(parent_subparsers):
+    parser = parent_subparsers.add_parser("v2", help="Export version 2 JSON schema")
 
-    required = v2.add_argument_group(
+    required = parser.add_argument_group(
         title="REQUIRED"
     )
     required.add_argument('--tree','-t', metavar="newick", required=True, help="Phylogenetic tree, usually output from `augur refine`")
     required.add_argument('--node-data', metavar="JSON", required=True, nargs='+', help="JSON files containing metadata for nodes in the tree")
     required.add_argument('--output', metavar="JSON", required=True, help="Ouput file (typically for visualisation in auspice)")
 
-    config = v2.add_argument_group(
+    config = parser.add_argument_group(
         title="DISPLAY CONFIGURATION",
         description="These control the display settings for auspice. \
             You can supply a config JSON (which has all available options) or command line arguments (which are more limited but great to get started). \
@@ -844,21 +844,21 @@ def register_arguments_v2(subparsers):
     config.add_argument('--color-by-metadata', metavar="trait", nargs='+', help="Metadata columns to include as coloring options")
     config.add_argument('--panels', metavar="panels", nargs='+', choices=['tree', 'map', 'entropy', 'frequencies', 'measurements'], help="Restrict panel display in auspice. Options are %(choices)s. Ignore this option to display all available panels.")
 
-    optional_inputs = v2.add_argument_group(
+    optional_inputs = parser.add_argument_group(
         title="OPTIONAL INPUT FILES"
     )
     optional_inputs.add_argument('--metadata', metavar="FILE", help="Additional metadata for strains in the tree, as CSV or TSV")
     optional_inputs.add_argument('--colors', metavar="FILE", help="Custom color definitions, one per line in the format `TRAIT_TYPE\\tTRAIT_VALUE\\tHEX_CODE`")
     optional_inputs.add_argument('--lat-longs', metavar="TSV", help="Latitudes and longitudes for geography traits (overrides built in mappings)")
 
-    optional_settings = v2.add_argument_group(
+    optional_settings = parser.add_argument_group(
         title="OPTIONAL SETTINGS"
     )
     optional_settings.add_argument('--minify-json', action="store_true", help="export JSONs without indentation or line returns")
     optional_settings.add_argument('--include-root-sequence', action="store_true", help="Export an additional JSON containing the root sequence (reference sequence for vcf) used to identify mutations. The filename will follow the pattern of <OUTPUT>_root-sequence.json for a main auspice JSON of <OUTPUT>.json")
     optional_settings.add_argument('--skip-validation', action="store_true", help="skip validation of input/output files. Use at your own risk!")
 
-    return v2
+    return parser
 
 
 def set_display_defaults(data_json, config):
@@ -982,7 +982,7 @@ def get_config(args):
         del config["vaccine_choices"]
     return config
 
-def run_v2(args):
+def run(args):
     configure_warnings()
     data_json = {"version": "v2", "meta": {"updated": time.strftime('%Y-%m-%d')}}
 

From 9b6107d5595ced9a56ab649a0c94cc9cd08e1805 Mon Sep 17 00:00:00 2001
From: Jover <joverlee521@gmail.com>
Date: Tue, 19 Jul 2022 12:32:58 -0700
Subject: [PATCH 08/11] measurements: rename parser group variables

These were carried over from when the two command subparsers were
created within the same parent parser. Now that they are separated,
the argument group variables can be renamed to remove redundant words.
---
 augur/measurements/concat.py | 12 +++++------
 augur/measurements/export.py | 40 ++++++++++++++++++------------------
 2 files changed, 26 insertions(+), 26 deletions(-)

diff --git a/augur/measurements/concat.py b/augur/measurements/concat.py
index fd4f6751d..4b686094a 100644
--- a/augur/measurements/concat.py
+++ b/augur/measurements/concat.py
@@ -13,21 +13,21 @@
 def register_parser(parent_subparsers):
     parser = parent_subparsers.add_parser("concat", help=first_line(__doc__))
 
-    concat_required = parser.add_argument_group(
+    required = parser.add_argument_group(
         title="REQUIRED"
     )
-    concat_required.add_argument("--jsons", required=True, type=str, nargs="+", metavar="JSONs",
+    required.add_argument("--jsons", required=True, type=str, nargs="+", metavar="JSONs",
         help="Measurement JSON files to concatenate.")
-    concat_required.add_argument("--output-json", required=True, metavar="JSON", type=str,
+    required.add_argument("--output-json", required=True, metavar="JSON", type=str,
         help="Output JSON file")
 
-    concat_optional = parser.add_argument_group(
+    optional = parser.add_argument_group(
         title="OPTIONAL SETTINGS"
     )
-    concat_optional.add_argument("--default-collection", type=str,
+    optional.add_argument("--default-collection", type=str,
         help="The key of the default collection to display. " +
              "If not provided, the first collection of the first JSON file will be displayed")
-    concat_optional.add_argument("--minify-json", action="store_true",
+    optional.add_argument("--minify-json", action="store_true",
         help="Concatenate JSONs without indentation or line returns.")
 
     return parser
diff --git a/augur/measurements/export.py b/augur/measurements/export.py
index b54579a1b..fa55efaeb 100644
--- a/augur/measurements/export.py
+++ b/augur/measurements/export.py
@@ -25,74 +25,74 @@
 def register_parser(parent_subparsers):
     parser = parent_subparsers.add_parser("export", help=first_line(__doc__))
 
-    export_required = parser.add_argument_group(
+    required = parser.add_argument_group(
         title="REQUIRED"
     )
-    export_required.add_argument("--collection", required=True, metavar="TSV",
+    required.add_argument("--collection", required=True, metavar="TSV",
         help="Collection of measurements and metadata in a TSV file. " +
              "Keep in mind duplicate columns will be renamed as 'X', 'X.1', 'X.2'...'X.N'")
-    export_required.add_argument("--strain-column", default="strain",
+    required.add_argument("--strain-column", default="strain",
         help="Name of the column containing strain names. " +
              "Provided column will be renamed to `strain` so please make sure no other columns are named `strain`. " +
              "Strain names in this column should match the strain names in the corresponding Auspice dataset JSON. " +
              "(default: %(default)s)")
-    export_required.add_argument("--value-column", default="value",
+    required.add_argument("--value-column", default="value",
         help="Name of the column containing the numeric values to be plotted for the given collection. " +
              "Provided column will be renamed to `value` so please make sure no other columns are named `value`. " +
              "(default: %(default)s)")
-    export_required.add_argument("--output-json", required=True, metavar="JSON", type=str,
+    required.add_argument("--output-json", required=True, metavar="JSON", type=str,
         help="Output JSON file. " +
              "The file name must follow the Auspice sidecar file naming convention to be recognized as a sidecar file. " +
              "See Nextstrain data format docs for more details.")
 
-    export_config = parser.add_argument_group(
+    config = parser.add_argument_group(
         title="COLLECTION CONFIGURATION",
         description="These options control the configuration of the collection for Auspice. " +
                     "You can provide a config JSON (which includes all available options) or " +
                     "command line arguments (which are more limited). " +
                     "Command line arguments will override the values set in the config JSON."
     )
-    export_config.add_argument("--collection-config", metavar="JSON",
+    config.add_argument("--collection-config", metavar="JSON",
         help="Collection configuration file for advanced configurations. ")
-    export_config.add_argument("--grouping-column", nargs="+",
+    config.add_argument("--grouping-column", nargs="+",
         help="Name of the column(s) that should be used as grouping(s) for measurements. " +
              "Note that if groupings are provided via command line args, the default group-by " +
              "field in the config JSON will be dropped.")
-    export_config.add_argument("--key",
+    config.add_argument("--key",
         help="A short key name of the collection for internal use within Auspice. " +
              "If not provided via config or command line option, the collection TSV filename will be used. ")
-    export_config.add_argument("--title",
+    config.add_argument("--title",
         help="The full title of the collection to display in the measurements panel title. " +
              f"If not provided via config or command line option, the panel's default title is {DEFAULT_ARGS['title']!r}.")
-    export_config.add_argument("--x-axis-label",
+    config.add_argument("--x-axis-label",
         help="The short label to display for the x-axis that describles the value of the measurements. " +
              "If not provided via config or command line option, the panel's default " +
              f"x-axis label is {DEFAULT_ARGS['x_axis_label']!r}.")
-    export_config.add_argument("--threshold", type=float,
+    config.add_argument("--threshold", type=float,
         help="A measurements value threshold to be displayed in the measurements panel.")
-    export_config.add_argument("--filters", nargs="+",
+    config.add_argument("--filters", nargs="+",
         help="The columns that are to be used a filters for measurements. " +
              "If not provided, all columns will be available as filters.")
-    export_config.add_argument("--group-by", type=str,
+    config.add_argument("--group-by", type=str,
         help="The default grouping column. If not provided, the first grouping will be used.")
-    export_config.add_argument("--measurements-display", type=str, choices=["raw", "mean"],
+    config.add_argument("--measurements-display", type=str, choices=["raw", "mean"],
         help="The default display of the measurements")
 
-    export_config.add_argument("--show-overall-mean", "--hide-overall-mean",
+    config.add_argument("--show-overall-mean", "--hide-overall-mean",
         dest="show_overall_mean", action=HideAsFalseAction, nargs=0,
         help="Show or hide the overall mean per group by default")
-    export_config.add_argument("--show-threshold", "--hide-threshold",
+    config.add_argument("--show-threshold", "--hide-threshold",
         dest="show_threshold", action=HideAsFalseAction, nargs=0,
         help="Show or hide the threshold by default. This will be ignored if no threshold is provided.")
 
-    export_optional = parser.add_argument_group(
+    optional = parser.add_argument_group(
         title="OPTIONAL SETTINGS"
     )
-    export_optional.add_argument("--include-columns", nargs="+",
+    optional.add_argument("--include-columns", nargs="+",
         help="The columns to include from the collection TSV in the measurements JSON. " +
              "Be sure to list columns that are used as groupings and/or filters. " +
              "If no columns are provided, then all columns will be included by default.")
-    export_optional.add_argument("--minify-json", action="store_true",
+    optional.add_argument("--minify-json", action="store_true",
         help="Export JSON without indentation or line returns.")
 
     return parser

From 888ba09d91b18561dc66ef47c352f40682adc460 Mon Sep 17 00:00:00 2001
From: Jover <joverlee521@gmail.com>
Date: Tue, 19 Jul 2022 12:54:08 -0700
Subject: [PATCH 09/11] Remove `utils.first_line` from modules with one line
 docstrings

`utils.first_line` is useful for the brief help message for
`augur/distance.py` since it has a long multiline docstring, but seems
like an overkill for other modules. Keep things simple by removing
`utils.first_line` from other modules and just use their docstrings
directly as their help message.
---
 augur/align.py                 | 4 ++--
 augur/ancestral.py             | 4 ++--
 augur/clades.py                | 4 ++--
 augur/export.py                | 3 +--
 augur/filter.py                | 4 ++--
 augur/frequencies.py           | 4 ++--
 augur/index.py                 | 3 +--
 augur/lbi.py                   | 4 ++--
 augur/mask.py                  | 4 ++--
 augur/parse.py                 | 3 +--
 augur/reconstruct_sequences.py | 4 ++--
 augur/refine.py                | 4 ++--
 augur/sequence_traits.py       | 4 ++--
 augur/titers.py                | 4 ++--
 augur/traits.py                | 4 ++--
 augur/translate.py             | 4 ++--
 augur/tree.py                  | 4 ++--
 augur/version.py               | 3 +--
 18 files changed, 32 insertions(+), 36 deletions(-)

diff --git a/augur/align.py b/augur/align.py
index 96f8734a5..72240b5fd 100644
--- a/augur/align.py
+++ b/augur/align.py
@@ -7,7 +7,7 @@
 import numpy as np
 from Bio import AlignIO, SeqIO, Seq, Align
 from .io import run_shell_command, shquote
-from .utils import first_line, nthreads_value
+from .utils import nthreads_value
 from collections import defaultdict
 
 class AlignmentError(Exception):
@@ -36,7 +36,7 @@ def register_arguments(parser):
 
 
 def register_parser(parent_subparsers):
-    parser = parent_subparsers.add_parser("align", help=first_line(__doc__))
+    parser = parent_subparsers.add_parser("align", help=__doc__)
     register_arguments(parser)
     return parser
 
diff --git a/augur/ancestral.py b/augur/ancestral.py
index a95d07156..bd6457b20 100644
--- a/augur/ancestral.py
+++ b/augur/ancestral.py
@@ -7,7 +7,7 @@
 from Bio import Phylo, SeqIO
 from Bio.Seq import Seq
 from Bio.SeqRecord import SeqRecord
-from .utils import first_line, read_tree, InvalidTreeError, write_json, get_json_name
+from .utils import read_tree, InvalidTreeError, write_json, get_json_name
 from treetime.vcf_utils import read_vcf, write_vcf
 from collections import defaultdict
 
@@ -118,7 +118,7 @@ def collect_mutations_and_sequences(tt, infer_tips=False, full_sequences=False,
 
 
 def register_parser(parent_subparsers):
-    parser = parent_subparsers.add_parser("ancestral", help=first_line(__doc__))
+    parser = parent_subparsers.add_parser("ancestral", help=__doc__)
     parser.add_argument('--tree', '-t', required=True, help="prebuilt Newick")
     parser.add_argument('--alignment', '-a', help="alignment in fasta or VCF format")
     parser.add_argument('--output-node-data', type=str, help='name of JSON file to save mutations and ancestral sequences to')
diff --git a/augur/clades.py b/augur/clades.py
index c66724b10..b3155f438 100644
--- a/augur/clades.py
+++ b/augur/clades.py
@@ -9,7 +9,7 @@
 from collections import defaultdict
 import networkx as nx
 from itertools import islice
-from .utils import first_line, get_parent_name_by_child_name_for_tree, read_node_data, write_json, get_json_name
+from .utils import get_parent_name_by_child_name_for_tree, read_node_data, write_json, get_json_name
 
 def read_in_clade_definitions(clade_file):
     '''
@@ -249,7 +249,7 @@ def get_reference_sequence_from_root_node(all_muts, root_name):
 
 
 def register_parser(parent_subparsers):
-    parser = parent_subparsers.add_parser("clades", help=first_line(__doc__))
+    parser = parent_subparsers.add_parser("clades", help=__doc__)
     parser.add_argument('--tree', help="prebuilt Newick -- no tree will be built if provided")
     parser.add_argument('--mutations', nargs='+', help='JSON(s) containing ancestral and tip nucleotide and/or amino-acid mutations ')
     parser.add_argument('--reference', nargs='+', help='fasta files containing reference and tip nucleotide and/or amino-acid sequences ')
diff --git a/augur/export.py b/augur/export.py
index 35a7f360c..8160477f5 100644
--- a/augur/export.py
+++ b/augur/export.py
@@ -2,7 +2,6 @@
 Export JSON files suitable for visualization with auspice.
 """
 from .argparse_ import add_command_subparsers
-from .utils import first_line
 from . import export_v1, export_v2
 
 SUBCOMMANDS = [
@@ -12,7 +11,7 @@
 
 
 def register_parser(parent_subparsers):
-    parser = parent_subparsers.add_parser("export", help=first_line(__doc__))
+    parser = parent_subparsers.add_parser("export", help=__doc__)
     # Add subparsers for subcommands
     metavar_msg ="Augur export now needs you to define the JSON version " + \
                  "you want, e.g. `augur export v2`."
diff --git a/augur/filter.py b/augur/filter.py
index 365b690bf..f92d41b4a 100644
--- a/augur/filter.py
+++ b/augur/filter.py
@@ -21,7 +21,7 @@
 from .errors import AugurError
 from .index import index_sequences, index_vcf
 from .io import open_file, read_metadata, read_sequences, write_sequences, is_vcf as filename_is_vcf, write_vcf
-from .utils import first_line, read_strains
+from .utils import read_strains
 
 comment_char = '#'
 
@@ -93,7 +93,7 @@ def register_arguments(parser):
 
 
 def register_parser(parent_subparsers):
-    parser = parent_subparsers.add_parser("filter", help=first_line(__doc__))
+    parser = parent_subparsers.add_parser("filter", help=__doc__)
     register_arguments(parser)
     return parser
 
diff --git a/augur/frequencies.py b/augur/frequencies.py
index 4a62b0611..96ceb8137 100644
--- a/augur/frequencies.py
+++ b/augur/frequencies.py
@@ -11,11 +11,11 @@
 from .frequency_estimators import AlignmentKdeFrequencies, TreeKdeFrequencies, TreeKdeFrequenciesError
 from .dates import numeric_date_type, SUPPORTED_DATE_HELP_TEXT, get_numerical_dates
 from .io import read_metadata
-from .utils import first_line, read_node_data, write_json
+from .utils import read_node_data, write_json
 
 
 def register_parser(parent_subparsers):
-    parser = parent_subparsers.add_parser("frequencies", help=first_line(__doc__))
+    parser = parent_subparsers.add_parser("frequencies", help=__doc__)
     # Shared arguments
     parser.add_argument('--method', choices=["diffusion", "kde"], required=True,
                         help="method by which frequencies should be estimated")
diff --git a/augur/index.py b/augur/index.py
index 4e3328b1e..e59a4a0d6 100644
--- a/augur/index.py
+++ b/augur/index.py
@@ -9,10 +9,9 @@
 import csv
 
 from .io import open_file, read_sequences, is_vcf, read_vcf
-from .utils import first_line
 
 def register_parser(parent_subparsers):
-    parser = parent_subparsers.add_parser("index", help=first_line(__doc__))
+    parser = parent_subparsers.add_parser("index", help=__doc__)
     parser.add_argument('--sequences', '-s', required=True, help="sequences in FASTA or VCF formats. Augur will summarize the content of FASTA sequences and only report the names of strains found in a given VCF.")
     parser.add_argument('--output', '-o', help="tab-delimited file containing the number of bases per sequence in the given file. Output columns include strain, length, and counts for A, C, G, T, N, other valid IUPAC characters, ambiguous characters ('?' and '-'), and other invalid characters.", required=True)
     parser.add_argument('--verbose', '-v', action="store_true", help="print index statistics to stdout")
diff --git a/augur/lbi.py b/augur/lbi.py
index 54fa5ab91..bf7387797 100644
--- a/augur/lbi.py
+++ b/augur/lbi.py
@@ -5,7 +5,7 @@
 from collections import defaultdict
 import json
 import numpy as np
-from .utils import first_line, write_json
+from .utils import write_json
 
 
 def select_nodes_in_season(tree, timepoint, time_window=0.6):
@@ -80,7 +80,7 @@ def calculate_LBI(tree, attr="lbi", tau=0.4, transform=lambda x:x, normalize=Tru
 
 
 def register_parser(parent_subparsers):
-    parser = parent_subparsers.add_parser("lbi", help=first_line(__doc__))
+    parser = parent_subparsers.add_parser("lbi", help=__doc__)
     parser.add_argument("--tree", help="Newick tree", required=True)
     parser.add_argument("--branch-lengths", help="JSON with branch lengths and internal node dates estimated by TreeTime", required=True)
     parser.add_argument("--output", help="JSON file with calculated distances stored by node name and attribute name", required=True)
diff --git a/augur/mask.py b/augur/mask.py
index 247ef0226..14a08596b 100644
--- a/augur/mask.py
+++ b/augur/mask.py
@@ -11,7 +11,7 @@
 from Bio.Seq import MutableSeq
 
 from .io import open_file, read_sequences, write_sequences, run_shell_command, shquote, is_vcf
-from .utils import first_line, load_mask_sites, VALID_NUCLEOTIDES
+from .utils import load_mask_sites, VALID_NUCLEOTIDES
 
 def get_chrom_name(vcf_file):
     """Read the CHROM field from the first non-header line of a vcf file.
@@ -182,7 +182,7 @@ def register_arguments(parser):
                         help="Leave intermediate files around. May be useful for debugging")
 
 def register_parser(parent_subparsers):
-    parser = parent_subparsers.add_parser("mask", help=first_line(__doc__))
+    parser = parent_subparsers.add_parser("mask", help=__doc__)
     register_arguments(parser)
     return parser
 
diff --git a/augur/parse.py b/augur/parse.py
index 9792329d3..acf6be999 100644
--- a/augur/parse.py
+++ b/augur/parse.py
@@ -7,7 +7,6 @@
 from .io import open_file, read_sequences, write_sequences
 from .dates import get_numerical_date_from_value
 from .errors import AugurError
-from .utils import first_line
 
 forbidden_characters = str.maketrans(
     {' ': None,
@@ -139,7 +138,7 @@ def parse_sequence(sequence, fields, strain_key="strain", separator="|", prettif
 
 
 def register_parser(parent_subparsers):
-    parser = parent_subparsers.add_parser("parse", help=first_line(__doc__))
+    parser = parent_subparsers.add_parser("parse", help=__doc__)
     parser.add_argument('--sequences', '-s', required=True, help="sequences in fasta or VCF format")
     parser.add_argument('--output-sequences', help="output sequences file")
     parser.add_argument('--output-metadata', help="output metadata file")
diff --git a/augur/reconstruct_sequences.py b/augur/reconstruct_sequences.py
index d8abd6244..85ba59bee 100644
--- a/augur/reconstruct_sequences.py
+++ b/augur/reconstruct_sequences.py
@@ -6,13 +6,13 @@
 import numpy as np
 from collections import defaultdict
 from Bio import SeqIO, Seq, SeqRecord, Phylo
-from .utils import first_line, read_node_data, write_json
+from .utils import read_node_data, write_json
 from treetime.vcf_utils import read_vcf
 
 
 
 def register_parser(parent_subparsers):
-    parser = parent_subparsers.add_parser("reconstruct-sequences", help=first_line(__doc__))
+    parser = parent_subparsers.add_parser("reconstruct-sequences", help=__doc__)
     parser.add_argument('--tree', required=True, help="tree as Newick file")
     parser.add_argument('--gene', type=str, help="gene to translate (list or file containing list)")
     parser.add_argument('--mutations', required=True, type=str, help="json file containing mutations "
diff --git a/augur/refine.py b/augur/refine.py
index 20711ff4f..caa7bfda9 100644
--- a/augur/refine.py
+++ b/augur/refine.py
@@ -6,7 +6,7 @@
 from Bio import Phylo
 from .dates import get_numerical_dates
 from .io import read_metadata
-from .utils import first_line, read_tree, write_json, InvalidTreeError
+from .utils import read_tree, write_json, InvalidTreeError
 from treetime.vcf_utils import read_vcf, write_vcf
 from treetime.seq_utils import profile_maps
 
@@ -93,7 +93,7 @@ def collect_node_data(T, attributes):
 
 
 def register_parser(parent_subparsers):
-    parser = parent_subparsers.add_parser("refine", help=first_line(__doc__))
+    parser = parent_subparsers.add_parser("refine", help=__doc__)
     parser.add_argument('--alignment', '-a', help="alignment in fasta or VCF format")
     parser.add_argument('--tree', '-t', required=True, help="prebuilt Newick")
     parser.add_argument('--metadata', type=str, metavar="FILE", help="sequence metadata, as CSV or TSV")
diff --git a/augur/sequence_traits.py b/augur/sequence_traits.py
index cfa1da3ee..87659b13e 100644
--- a/augur/sequence_traits.py
+++ b/augur/sequence_traits.py
@@ -6,7 +6,7 @@
 import numpy as np
 from treetime.vcf_utils import read_vcf
 from collections import defaultdict
-from .utils import first_line, write_json, get_json_name
+from .utils import write_json, get_json_name
 
 def read_in_translate_vcf(vcf_file, ref_file):
     """
@@ -291,7 +291,7 @@ def attach_features(annotations, label, count):
 
 
 def register_parser(parent_subparsers):
-    parser = parent_subparsers.add_parser("sequence-traits", help=first_line(__doc__))
+    parser = parent_subparsers.add_parser("sequence-traits", help=__doc__)
     parser.add_argument('--ancestral-sequences', type=str, help="nucleotide alignment (VCF) to search for sequence traits in (can be generated from 'ancestral' using '--output-vcf')")
     parser.add_argument('--translations', type=str, help="AA alignment to search for sequence traits in (can include ancestral sequences)")
     parser.add_argument('--vcf-reference', type=str, help='fasta file of the sequence the nucleotide VCF was mapped to')
diff --git a/augur/titers.py b/augur/titers.py
index 27f622226..860ecd60f 100644
--- a/augur/titers.py
+++ b/augur/titers.py
@@ -9,12 +9,12 @@
 
 from .reconstruct_sequences import load_alignments
 from .titer_model import InsufficientDataException
-from .utils import first_line, read_node_data, write_json
+from .utils import read_node_data, write_json
 from .argparse_ import add_default_command
 
 
 def register_parser(parent_subparsers):
-    parser = parent_subparsers.add_parser("titers", help=first_line(__doc__))
+    parser = parent_subparsers.add_parser("titers", help=__doc__)
     subparsers = parser.add_subparsers()
     add_default_command(parser)
 
diff --git a/augur/traits.py b/augur/traits.py
index 8f87912fd..be71c825c 100644
--- a/augur/traits.py
+++ b/augur/traits.py
@@ -7,7 +7,7 @@
 import os, sys
 import pandas as pd
 from .io import read_metadata
-from .utils import first_line, write_json, get_json_name
+from .utils import write_json, get_json_name
 TINY = 1e-12
 
 def mugration_inference(tree=None, seq_meta=None, field='country', confidence=True,
@@ -105,7 +105,7 @@ def register_parser(parent_subparsers):
     parser : argparse
         subcommand argument parser
     """
-    parser = parent_subparsers.add_parser("traits", help=first_line(__doc__))
+    parser = parent_subparsers.add_parser("traits", help=__doc__)
     parser.add_argument('--tree', '-t', required=True, help="tree to perform trait reconstruction on")
     parser.add_argument('--metadata', required=True, metavar="FILE", help="table with metadata, as CSV or TSV")
     parser.add_argument('--weights', required=False, help="tsv/csv table with equilibrium probabilities of discrete states")
diff --git a/augur/translate.py b/augur/translate.py
index fc70ade5e..c4ab6279a 100644
--- a/augur/translate.py
+++ b/augur/translate.py
@@ -6,7 +6,7 @@
 import numpy as np
 from Bio import SeqIO, SeqFeature, Seq, SeqRecord, Phylo
 from .io import write_VCF_translation
-from .utils import first_line, read_node_data, load_features, write_json, get_json_name
+from .utils import read_node_data, load_features, write_json, get_json_name
 from treetime.vcf_utils import read_vcf
 
 class MissingNodeError(Exception):
@@ -303,7 +303,7 @@ def get_genes_from_file(fname):
 
 
 def register_parser(parent_subparsers):
-    parser = parent_subparsers.add_parser("translate", help=first_line(__doc__))
+    parser = parent_subparsers.add_parser("translate", help=__doc__)
     parser.add_argument('--tree', help="prebuilt Newick -- no tree will be built if provided")
     parser.add_argument('--ancestral-sequences', type=str, help='JSON (fasta input) or VCF (VCF input) containing ancestral and tip sequences')
     parser.add_argument('--reference-sequence', required=True,
diff --git a/augur/tree.py b/augur/tree.py
index 15d31b311..327469865 100644
--- a/augur/tree.py
+++ b/augur/tree.py
@@ -16,7 +16,7 @@
 from pathlib import Path
 
 from .io import read_sequences, run_shell_command, shquote
-from .utils import first_line, nthreads_value, load_mask_sites
+from .utils import nthreads_value, load_mask_sites
 
 DEFAULT_ARGS = {
     "fasttree": "-nt -nosupport",
@@ -396,7 +396,7 @@ def mask_sites_in_multiple_sequence_alignment(alignment_file, excluded_sites_fil
 
 
 def register_parser(parent_subparsers):
-    parser = parent_subparsers.add_parser("tree", help=first_line(__doc__))
+    parser = parent_subparsers.add_parser("tree", help=__doc__)
     parser.add_argument('--alignment', '-a', required=True, help="alignment in fasta or VCF format")
     parser.add_argument('--method', default='iqtree', choices=["fasttree", "raxml", "iqtree"], help="tree builder to use")
     parser.add_argument('--output', '-o', type=str, help='file name to write tree to')
diff --git a/augur/version.py b/augur/version.py
index e71669f2c..4ae47233a 100644
--- a/augur/version.py
+++ b/augur/version.py
@@ -1,11 +1,10 @@
 """
 Print the version of augur.
 """
-from .utils import first_line
 from .__version__ import __version__
 
 def register_parser(parent_subparsers):
-    return parent_subparsers.add_parser("version", help=first_line(__doc__))
+    return parent_subparsers.add_parser("version", help=__doc__)
 
 def run(args):
     print("augur", __version__)

From 6df20666f8187e7c452c5c1c9d83c18a28c150d0 Mon Sep 17 00:00:00 2001
From: Jover <joverlee521@gmail.com>
Date: Tue, 19 Jul 2022 13:04:27 -0700
Subject: [PATCH 10/11] export: create help message from module docstrings

Keep the export modules consistent with the other modules in Augur and
create help messages from the module docstrings.
---
 augur/export_v1.py | 5 ++---
 augur/export_v2.py | 4 ++--
 2 files changed, 4 insertions(+), 5 deletions(-)

diff --git a/augur/export_v1.py b/augur/export_v1.py
index 0822f9c20..9f9fcc180 100644
--- a/augur/export_v1.py
+++ b/augur/export_v1.py
@@ -1,6 +1,5 @@
 """
-Augur Version 1-specific helpers for exporting JSON files suitable for
-visualization with auspice.
+Export version 1 JSON schema (separate meta and tree JSONs) for visualization with Auspice
 """
 
 import os, sys
@@ -332,7 +331,7 @@ def add_option_args(parser):
 
 
 def register_parser(parent_subparsers):
-    parser = parent_subparsers.add_parser("v1", help="Export version 1 JSON schema (separate meta and tree JSONs)")
+    parser = parent_subparsers.add_parser("v1", help=__doc__)
     add_core_args(parser)
     add_option_args(parser)
     parser.add_argument("--v1", help=SUPPRESS, default=True)
diff --git a/augur/export_v2.py b/augur/export_v2.py
index 41d30efeb..aad9d68a4 100644
--- a/augur/export_v2.py
+++ b/augur/export_v2.py
@@ -1,5 +1,5 @@
 """
-Export JSON files suitable for visualization with auspice.
+Export version 2 JSON schema for visualization with Auspice
 """
 from pathlib import Path
 import os, sys
@@ -820,7 +820,7 @@ def node_data_prop_is_normal_trait(name):
 
 
 def register_parser(parent_subparsers):
-    parser = parent_subparsers.add_parser("v2", help="Export version 2 JSON schema")
+    parser = parent_subparsers.add_parser("v2", help=__doc__)
 
     required = parser.add_argument_group(
         title="REQUIRED"

From 60ba754ca19df4e8e50602fd8fa4afa192f0dd83 Mon Sep 17 00:00:00 2001
From: Jover <joverlee521@gmail.com>
Date: Tue, 19 Jul 2022 15:06:08 -0700
Subject: [PATCH 11/11] Update changelog

---
 CHANGES.md | 13 +++++++++++++
 1 file changed, 13 insertions(+)

diff --git a/CHANGES.md b/CHANGES.md
index 33a8041cb..286be1655 100644
--- a/CHANGES.md
+++ b/CHANGES.md
@@ -2,6 +2,19 @@
 
 ## __NEXT__
 
+### Major Changes
+
+* Moved the following modules to subpackages [#1002][]. (@joverlee521)
+  These are technically breaking changes for the API, but they do not change the Augur CLI commands.
+    * `import.py` -> `import_/__init__.py`
+    * `import_beast.py` -> `import_/beast.py`
+    * `measurements.py` -> `measurements/__init__.py` + `measurements/concat.py` + `measurements/export.py`
+* Move the following internal functions/classes [#1002][]. (@joverlee521)
+    * `augur.add_default_command` -> `argparse_.add_default_command`
+    * `utils.HideAsFalseAction` -> `argparse_.HideAsFalseAction`
+* Subcommands must include a `register_parser` function to add their own parser instead of a `register_arguments` function [#1002][]. (@joverlee521)
+
+[#1002]: https://github.com/nextstrain/augur/pull/1002
 
 ## 16.0.3 (6 July 2022)