From 6a998d91c9ba6b65296a31de196a2024e7337e9d Mon Sep 17 00:00:00 2001
From: Victor Lin <13424970+victorlin@users.noreply.github.com>
Date: Mon, 15 Apr 2024 09:57:05 -0700
Subject: [PATCH] Allow repeating an option that takes multiple values

Many command line arguments take multiple values. Previously, to utilize
this feature, all values must be specified after a single option flag
(e.g. --exclude-where 'region=A' 'region=B'). If options were set using
separate option flags (e.g. --exclude-where 'region=A' --exclude-where
'region=B'), only the last flag would be used, which is unintuitive.
This commit enables all option flags to be used.

Done across the codebase by adding action='extend' to all options that
use nargs='+' and did not already specify an action.
---
 augur/ancestral.py                                     |  2 +-
 augur/clades.py                                        |  4 ++--
 augur/export_v2.py                                     |  6 +++---
 augur/filter/__init__.py                               |  6 +++---
 augur/frequencies.py                                   |  6 +++---
 augur/mask.py                                          |  2 +-
 augur/parse.py                                         |  4 ++--
 augur/refine.py                                        |  2 +-
 augur/titers.py                                        |  8 ++++----
 augur/traits.py                                        |  2 +-
 augur/translate.py                                     |  2 +-
 scripts/swap_colors.py                                 |  2 +-
 scripts/tree_to_JSON.py                                | 10 +++++-----
 .../filter/cram/filter-exclude-where-multiple.t        |  3 +--
 14 files changed, 29 insertions(+), 30 deletions(-)

diff --git a/augur/ancestral.py b/augur/ancestral.py
index bf0fba0f2..932d86072 100644
--- a/augur/ancestral.py
+++ b/augur/ancestral.py
@@ -315,7 +315,7 @@ def register_parser(parent_subparsers):
     )
     amino_acid_options_group.add_argument('--annotation',
                         help='GenBank or GFF file containing the annotation')
-    amino_acid_options_group.add_argument('--genes', nargs='+', help="genes to translate (list or file containing list)")
+    amino_acid_options_group.add_argument('--genes', nargs='+', action='extend', help="genes to translate (list or file containing list)")
     amino_acid_options_group.add_argument('--translations', type=str, help="translated alignments for each CDS/Gene. "
                            "Currently only supported for FASTA-input. Specify the file name via a "
                            "template like 'aa_sequences_%%GENE.fasta' where %%GENE will be replaced "
diff --git a/augur/clades.py b/augur/clades.py
index bb39ec511..71b868713 100644
--- a/augur/clades.py
+++ b/augur/clades.py
@@ -341,8 +341,8 @@ def parse_nodes(tree_file, node_data_files):
 def register_parser(parent_subparsers):
     parser = parent_subparsers.add_parser("clades", help=__doc__)
     parser.add_argument('--tree', required=True, help="prebuilt Newick -- no tree will be built if provided")
-    parser.add_argument('--mutations', required=True, metavar="NODE_DATA_JSON", nargs='+', help='JSON(s) containing ancestral and tip nucleotide and/or amino-acid mutations ')
-    parser.add_argument('--reference', nargs='+', help=SUPPRESS)
+    parser.add_argument('--mutations', required=True, metavar="NODE_DATA_JSON", nargs='+', action='extend', help='JSON(s) containing ancestral and tip nucleotide and/or amino-acid mutations ')
+    parser.add_argument('--reference', nargs='+', action='extend', help=SUPPRESS)
     parser.add_argument('--clades', required=True, metavar="TSV", type=str, help='TSV file containing clade definitions by amino-acid')
     parser.add_argument('--output-node-data', type=str,  metavar="NODE_DATA_JSON", help='name of JSON file to save clade assignments to')
     parser.add_argument('--membership-name', type=str, default="clade_membership", help='Key to store clade membership under; use "None" to not export this')
diff --git a/augur/export_v2.py b/augur/export_v2.py
index af563bf50..e11d95f95 100644
--- a/augur/export_v2.py
+++ b/augur/export_v2.py
@@ -889,12 +889,12 @@ def register_parser(parent_subparsers):
     config.add_argument('--maintainers', metavar="name", action="append", nargs='+', help="Analysis maintained by, in format 'Name <URL>' 'Name2 <URL>', ...")
     config.add_argument('--build-url', type=str, metavar="url", help="Build URL/repository to be displayed by Auspice")
     config.add_argument('--description', metavar="description.md", help="Markdown file with description of build and/or acknowledgements to be displayed by Auspice")
-    config.add_argument('--geo-resolutions', metavar="trait", nargs='+', help="Geographic traits to be displayed on map")
-    config.add_argument('--color-by-metadata', metavar="trait", nargs='+', help="Metadata columns to include as coloring options")
+    config.add_argument('--geo-resolutions', metavar="trait", nargs='+', action='extend', help="Geographic traits to be displayed on map")
+    config.add_argument('--color-by-metadata', metavar="trait", nargs='+', action='extend', help="Metadata columns to include as coloring options")
     config.add_argument('--metadata-columns', nargs="+",
                                  help="Metadata columns to export in addition to columns provided by --color-by-metadata or colorings in the Auspice configuration file. " +
                                       "These columns will not be used as coloring options in Auspice but will be visible in the tree.")
-    config.add_argument('--panels', metavar="panels", nargs='+', choices=['tree', 'map', 'entropy', 'frequencies', 'measurements'], help="Restrict panel display in auspice. Options are %(choices)s. Ignore this option to display all available panels.")
+    config.add_argument('--panels', metavar="panels", nargs='+', action='extend', choices=['tree', 'map', 'entropy', 'frequencies', 'measurements'], help="Restrict panel display in auspice. Options are %(choices)s. Ignore this option to display all available panels.")
 
     optional_inputs = parser.add_argument_group(
         title="OPTIONAL INPUT FILES"
diff --git a/augur/filter/__init__.py b/augur/filter/__init__.py
index 6fb09cae6..6edeff724 100644
--- a/augur/filter/__init__.py
+++ b/augur/filter/__init__.py
@@ -39,11 +39,11 @@ def register_arguments(parser):
     metadata_filter_group.add_argument('--exclude-ambiguous-dates-by', choices=['any', 'day', 'month', 'year'],
                                 help='Exclude ambiguous dates by day (e.g., 2020-09-XX), month (e.g., 2020-XX-XX), year (e.g., 200X-10-01), or any date fields. An ambiguous year makes the corresponding month and day ambiguous, too, even if those fields have unambiguous values (e.g., "201X-10-01"). Similarly, an ambiguous month makes the corresponding day ambiguous (e.g., "2010-XX-01").')
     metadata_filter_group.add_argument('--exclude', type=str, nargs="+", help="file(s) with list of strains to exclude")
-    metadata_filter_group.add_argument('--exclude-where', nargs='+',
+    metadata_filter_group.add_argument('--exclude-where', nargs='+', action='extend',
                                 help="Exclude samples matching these conditions. Ex: \"host=rat\" or \"host!=rat\". Multiple values are processed as OR (matching any of those specified will be excluded), not AND")
     metadata_filter_group.add_argument('--exclude-all', action="store_true", help="exclude all strains by default. Use this with the include arguments to select a specific subset of strains.")
     metadata_filter_group.add_argument('--include', type=str, nargs="+", help="file(s) with list of strains to include regardless of priorities, subsampling, or absence of an entry in --sequences.")
-    metadata_filter_group.add_argument('--include-where', nargs='+', help="""
+    metadata_filter_group.add_argument('--include-where', nargs='+', action='extend', help="""
         Include samples with these values. ex: host=rat. Multiple values are
         processed as OR (having any of those specified will be included), not
         AND. This rule is applied last and ensures any strains matching these
@@ -56,7 +56,7 @@ def register_arguments(parser):
     sequence_filter_group.add_argument('--non-nucleotide', action='store_true', help="exclude sequences that contain illegal characters")
 
     subsample_group = parser.add_argument_group("subsampling", "options to subsample filtered data")
-    subsample_group.add_argument('--group-by', nargs='+', help=f"""
+    subsample_group.add_argument('--group-by', nargs='+', action='extend', help=f"""
         categories with respect to subsample.
         Notes:
         (1) Grouping by {sorted(constants.GROUP_BY_GENERATED_COLUMNS)} is only supported when there is a {METADATA_DATE_COLUMN!r} column in the metadata.
diff --git a/augur/frequencies.py b/augur/frequencies.py
index 75a11ee3c..aa9b52e35 100644
--- a/augur/frequencies.py
+++ b/augur/frequencies.py
@@ -28,7 +28,7 @@ def register_parser(parent_subparsers):
                         help="delimiters to accept when reading a metadata file. Only one delimiter will be inferred.")
     parser.add_argument('--metadata-id-columns', default=DEFAULT_ID_COLUMNS, nargs="+",
                         help="names of possible metadata columns containing identifier information, ordered by priority. Only one ID column will be inferred.")
-    parser.add_argument('--regions', type=str, nargs='+', default=[DEFAULT_REGION],
+    parser.add_argument('--regions', type=str, nargs='+', action='extend', default=[DEFAULT_REGION],
                         help="region to filter to. " \
                             f"Regions should match values in the {REGION_COLUMN!r} column of the metadata file " \
                             f"if specifying values other than the default {DEFAULT_REGION!r} region.")
@@ -48,9 +48,9 @@ def register_parser(parent_subparsers):
                         help="calculate frequencies for internal nodes as well as tips")
 
     # Alignment-specific arguments
-    parser.add_argument('--alignments', type=str, nargs='+',
+    parser.add_argument('--alignments', type=str, nargs='+', action='extend',
                         help="alignments to estimate mutations frequencies for")
-    parser.add_argument('--gene-names', nargs='+', type=str,
+    parser.add_argument('--gene-names', nargs='+', action='extend', type=str,
                         help="names of the sequences in the alignment, same order assumed")
     parser.add_argument('--ignore-char', type=str, default='',
                         help="character to be ignored in frequency calculations")
diff --git a/augur/mask.py b/augur/mask.py
index e796393b4..d7f880ffe 100644
--- a/augur/mask.py
+++ b/augur/mask.py
@@ -176,7 +176,7 @@ def register_arguments(parser):
     parser.add_argument('--mask-from-beginning', type=int, default=0, help="FASTA Only: Number of sites to mask from beginning")
     parser.add_argument('--mask-from-end', type=int, default=0, help="FASTA Only: Number of sites to mask from end")
     parser.add_argument('--mask-invalid', action='store_true', help="FASTA Only: Mask invalid nucleotides")
-    parser.add_argument("--mask-sites", nargs='+', type = int,  help="1-indexed list of sites to mask")
+    parser.add_argument("--mask-sites", nargs='+', action='extend', type = int,  help="1-indexed list of sites to mask")
     parser.add_argument('--output', '-o', help="output file")
     parser.add_argument('--no-cleanup', dest="cleanup", action="store_false",
                         help="Leave intermediate files around. May be useful for debugging")
diff --git a/augur/parse.py b/augur/parse.py
index 6b0cbb203..5d3cd34a9 100644
--- a/augur/parse.py
+++ b/augur/parse.py
@@ -150,8 +150,8 @@ def register_parser(parent_subparsers):
     parser.add_argument('--output-metadata', required=True, help="output metadata file")
     parser.add_argument('--output-id-field', required=False,
                         help=f"The record field to use as the sequence identifier in the FASTA output. If not provided, this will use the first available of {PARSE_DEFAULT_ID_COLUMNS}. If none of those are available, this will use the first field in the fasta header.")
-    parser.add_argument('--fields', required=True, nargs='+', help="fields in fasta header")
-    parser.add_argument('--prettify-fields', nargs='+', help="apply string prettifying operations (underscores to spaces, capitalization, etc) to specified metadata fields")
+    parser.add_argument('--fields', required=True, nargs='+', action='extend', help="fields in fasta header")
+    parser.add_argument('--prettify-fields', nargs='+', action='extend', help="apply string prettifying operations (underscores to spaces, capitalization, etc) to specified metadata fields")
     parser.add_argument('--separator', default='|', help="separator of fasta header")
     parser.add_argument('--fix-dates', choices=['dayfirst', 'monthfirst'],
                                 help="attempt to parse non-standard dates and output them in standard YYYY-MM-DD format")
diff --git a/augur/refine.py b/augur/refine.py
index 95717861f..69e151eee 100644
--- a/augur/refine.py
+++ b/augur/refine.py
@@ -138,7 +138,7 @@ def register_parser(parent_subparsers):
     parser.add_argument('--clock-filter-iqd', type=float, help='clock-filter: remove tips that deviate more than n_iqd '
                                 'interquartile ranges from the root-to-tip vs time regression')
     parser.add_argument('--vcf-reference', type=str, help='fasta file of the sequence the VCF was mapped to')
-    parser.add_argument('--year-bounds', type=int, nargs='+', help='specify min or max & min prediction bounds for samples with XX in year')
+    parser.add_argument('--year-bounds', type=int, nargs='+', action='extend', help='specify min or max & min prediction bounds for samples with XX in year')
     parser.add_argument('--divergence-units', type=str, choices=['mutations', 'mutations-per-site'],
                         default='mutations-per-site', help='Units in which sequence divergences is exported.')
     parser.add_argument('--seed', type=int, help='seed for random number generation')
diff --git a/augur/titers.py b/augur/titers.py
index b9f3b3791..2b06fb468 100644
--- a/augur/titers.py
+++ b/augur/titers.py
@@ -17,7 +17,7 @@ def register_parser(parent_subparsers):
     add_default_command(parser)
 
     tree_model = subparsers.add_parser('tree', help='tree model')
-    tree_model.add_argument('--titers', nargs='+', type=str, required=True, help="file with titer measurements")
+    tree_model.add_argument('--titers', nargs='+', action='extend', type=str, required=True, help="file with titer measurements")
     tree_model.add_argument('--tree', '-t', type=str, required=True, help="tree to perform fit titer model to")
     tree_model.add_argument('--allow-empty-model', action="store_true", help="allow model to be empty")
     tree_model.add_argument('--attribute-prefix', default="", help="prefix for node attributes in the JSON output including cumulative titer drop ('cTiter') and per-branch titer drop ('dTiter'). Set a prefix to disambiguate annotations from multiple tree model JSONs in the final Auspice JSON.")
@@ -27,9 +27,9 @@ def register_parser(parent_subparsers):
     )
 
     sub_model = subparsers.add_parser('sub', help='substitution model')
-    sub_model.add_argument('--titers', nargs='+', type=str, required=True, help="file with titer measurements")
-    sub_model.add_argument('--alignment', nargs='+', type=str, required=True, help="sequence to be used in the substitution model, supplied as fasta files")
-    sub_model.add_argument('--gene-names', nargs='+', type=str, required=True, help="names of the sequences in the alignment, same order assumed")
+    sub_model.add_argument('--titers', nargs='+', action='extend', type=str, required=True, help="file with titer measurements")
+    sub_model.add_argument('--alignment', nargs='+', action='extend', type=str, required=True, help="sequence to be used in the substitution model, supplied as fasta files")
+    sub_model.add_argument('--gene-names', nargs='+', action='extend', type=str, required=True, help="names of the sequences in the alignment, same order assumed")
     sub_model.add_argument('--tree', '-t', type=str, help="optional tree to annotate fit titer model to")
     sub_model.add_argument('--allow-empty-model', action="store_true", help="allow model to be empty")
     sub_model.add_argument('--attribute-prefix', default="", help="prefix for node attributes in the JSON output including cumulative titer drop ('cTiterSub') and per-substitution titer drop ('dTiterSub'). Set a prefix to disambiguate annotations from multiple substitution model JSONs in the final Auspice JSON.")
diff --git a/augur/traits.py b/augur/traits.py
index 0b520c600..efc2e9300 100644
--- a/augur/traits.py
+++ b/augur/traits.py
@@ -104,7 +104,7 @@ def register_parser(parent_subparsers):
     parser.add_argument('--metadata-id-columns', default=DEFAULT_ID_COLUMNS, nargs="+",
                         help="names of possible metadata columns containing identifier information, ordered by priority. Only one ID column will be inferred.")
     parser.add_argument('--weights', required=False, help="tsv/csv table with equilibrium probabilities of discrete states")
-    parser.add_argument('--columns', required=True, nargs='+',
+    parser.add_argument('--columns', required=True, nargs='+', action='extend',
                         help='metadata fields to perform discrete reconstruction on')
     parser.add_argument('--confidence',action="store_true",
                         help='record the distribution of subleading mugration states')
diff --git a/augur/translate.py b/augur/translate.py
index a7fc8c091..6ec4d4b3d 100644
--- a/augur/translate.py
+++ b/augur/translate.py
@@ -368,7 +368,7 @@ def register_parser(parent_subparsers):
     parser.add_argument('--ancestral-sequences', required=True, type=str, help='JSON (fasta input) or VCF (VCF input) containing ancestral and tip sequences')
     parser.add_argument('--reference-sequence', required=True,
                         help='GenBank or GFF file containing the annotation')
-    parser.add_argument('--genes', nargs='+', help="genes to translate (list or file containing list)")
+    parser.add_argument('--genes', nargs='+', action='extend', help="genes to translate (list or file containing list)")
     parser.add_argument('--output-node-data', type=str, help='name of JSON file to save aa-mutations to')
     parser.add_argument('--alignment-output', type=str, help="write out translated gene alignments. "
                                    "If a VCF-input, a .vcf or .vcf.gz will be output here (depending on file ending). If fasta-input, specify the file name "
diff --git a/scripts/swap_colors.py b/scripts/swap_colors.py
index fc84743c4..24a9ad78a 100644
--- a/scripts/swap_colors.py
+++ b/scripts/swap_colors.py
@@ -4,7 +4,7 @@
 from glob import glob
 
 parser = argparse.ArgumentParser(description ="Update color ramps in processed meta.JSON files")
-parser.add_argument('--jsons', '--json', default=None, nargs='+', type=str, help="Path to prepared JSON(s) to edit. If none, will update all files like ./*meta.json")
+parser.add_argument('--jsons', '--json', default=None, nargs='+', action='extend', type=str, help="Path to prepared JSON(s) to edit. If none, will update all files like ./*meta.json")
 parser.add_argument('--custom_colors', default=None, type=str, help="Path to .tsv or .csv with custom color ramps; will fall back to nextstrain default colors if not provided.")
 args = parser.parse_args().__dict__
 
diff --git a/scripts/tree_to_JSON.py b/scripts/tree_to_JSON.py
index 39bc1d0a6..9657b5c51 100644
--- a/scripts/tree_to_JSON.py
+++ b/scripts/tree_to_JSON.py
@@ -19,9 +19,9 @@ def get_command_line_args():
     beast = parser.add_argument_group('beast')
     beast.add_argument('--nexus', type=str, help="Path to nexus file")
     beast.add_argument('--most_recent_tip', type=float, help="Date of the most recent tip (in decimal format)")
-    beast.add_argument('--discrete_traits', type=str, nargs='+', default=[], help="Discrete traits to extract from the BEAST annotations")
-    beast.add_argument('--continuous_traits', type=str, nargs='+', default=[], help="Continuous traits to extract from the BEAST annotations")
-    beast.add_argument('--make_traits_log', type=str, nargs='+', default=[], help="Convert these (continous) traits to log space: y=-ln(x)")
+    beast.add_argument('--discrete_traits', type=str, nargs='+', action='extend', default=[], help="Discrete traits to extract from the BEAST annotations")
+    beast.add_argument('--continuous_traits', type=str, nargs='+', action='extend', default=[], help="Continuous traits to extract from the BEAST annotations")
+    beast.add_argument('--make_traits_log', type=str, nargs='+', action='extend', default=[], help="Convert these (continous) traits to log space: y=-ln(x)")
     beast.add_argument("--fake_divergence", action="store_true", help="Set the divergence as time (prevents auspice crashing)")
 
 
@@ -34,8 +34,8 @@ def get_command_line_args():
     general.add_argument("--debug", action="store_const", dest="loglevel", const=logging.DEBUG, help="Enable debugging logging")
     general.add_argument('--output_prefix', '-o', required=True, type=str, help="Output prefix (i.e. \"_meta.json\" will be appended to this)")
     general.add_argument('--title', default=None, type=str, help="Title (to be displayed by auspice)")
-    general.add_argument("--defaults", type=str, nargs='+', default=[], help="Auspice defaults. Format: \"key:value\"")
-    general.add_argument("--filters", type=str, nargs='+', default=[], help="Auspice filters.")
+    general.add_argument("--defaults", type=str, nargs='+', action='extend', default=[], help="Auspice defaults. Format: \"key:value\"")
+    general.add_argument("--filters", type=str, nargs='+', action='extend', default=[], help="Auspice filters.")
     general.add_argument('--geo', type=str, help="CSV File w. header \"trait,value,latitude,longitude\". Turns on the map panel.")
 
 
diff --git a/tests/functional/filter/cram/filter-exclude-where-multiple.t b/tests/functional/filter/cram/filter-exclude-where-multiple.t
index fdd465986..bf21d8a75 100644
--- a/tests/functional/filter/cram/filter-exclude-where-multiple.t
+++ b/tests/functional/filter/cram/filter-exclude-where-multiple.t
@@ -19,8 +19,7 @@ Run command
   >  --exclude-where "region=B" \
   >  --output-strains filtered_strains.txt > /dev/null
 
-Only the last exclusion is applied
+Both exclusions are applied
 
   $ cat filtered_strains.txt
-  SEQ_1
   SEQ_3