From 7debad4e3f57827a2be4cb2e62ffce599f9f20c6 Mon Sep 17 00:00:00 2001
From: Victor Lin <13424970+victorlin@users.noreply.github.com>
Date: Tue, 5 Mar 2024 11:19:46 -0800
Subject: [PATCH 1/7] Remove redundant test

This is the same as filter-min-length-output-strains.t but using
--output-metadata. The contents of --output-metadata are already tested
in other tests such as filter-query-example.t.
---
 .../cram/filter-min-length-output-metadata.t    | 17 -----------------
 1 file changed, 17 deletions(-)
 delete mode 100644 tests/functional/filter/cram/filter-min-length-output-metadata.t

diff --git a/tests/functional/filter/cram/filter-min-length-output-metadata.t b/tests/functional/filter/cram/filter-min-length-output-metadata.t
deleted file mode 100644
index 9cb7f26c2..000000000
--- a/tests/functional/filter/cram/filter-min-length-output-metadata.t
+++ /dev/null
@@ -1,17 +0,0 @@
-Setup
-
-  $ source "$TESTDIR"/_setup.sh
-
-Filter using only metadata without sequence input or output and save results as filtered metadata.
-
-  $ ${AUGUR} filter \
-  >  --sequence-index "$TESTDIR/../data/sequence_index.tsv" \
-  >  --metadata "$TESTDIR/../data/metadata.tsv" \
-  >  --min-date 2012 \
-  >  --min-length 10500 \
-  >  --output-metadata filtered_metadata.tsv > /dev/null
-
-Output should include the 8 sequences matching the filters and a header line.
-
-  $ wc -l filtered_metadata.tsv
-  \s*9 .* (re)

From 7d2c10682a60ecae6bf6a56d3597775dffcefa27 Mon Sep 17 00:00:00 2001
From: Victor Lin <13424970+victorlin@users.noreply.github.com>
Date: Tue, 5 Mar 2024 11:26:13 -0800
Subject: [PATCH 2/7] Test all missing sequence index errors

Repurpose the --min-length test to also include --non-nucleotide.
---
 ...-index-error.t => filter-no-sequence-index-error.t} | 10 +++++++++-
 1 file changed, 9 insertions(+), 1 deletion(-)
 rename tests/functional/filter/cram/{filter-min-length-no-sequence-index-error.t => filter-no-sequence-index-error.t} (51%)

diff --git a/tests/functional/filter/cram/filter-min-length-no-sequence-index-error.t b/tests/functional/filter/cram/filter-no-sequence-index-error.t
similarity index 51%
rename from tests/functional/filter/cram/filter-min-length-no-sequence-index-error.t
rename to tests/functional/filter/cram/filter-no-sequence-index-error.t
index 050db8b26..01b2ff999 100644
--- a/tests/functional/filter/cram/filter-min-length-no-sequence-index-error.t
+++ b/tests/functional/filter/cram/filter-no-sequence-index-error.t
@@ -3,7 +3,8 @@ Setup
   $ source "$TESTDIR"/_setup.sh
 
 Try to filter using only metadata without a sequence index.
-This should fail because the requested filters rely on sequence information.
+
+These should fail because the requested filters rely on sequence information.
 
   $ ${AUGUR} filter \
   >  --metadata "$TESTDIR/../data/metadata.tsv" \
@@ -11,3 +12,10 @@ This should fail because the requested filters rely on sequence information.
   >  --output-strains filtered_strains.txt > /dev/null
   ERROR: You need to provide a sequence index or sequences to filter on sequence-specific information.
   [2]
+
+  $ ${AUGUR} filter \
+  >  --metadata "$TESTDIR/../data/metadata.tsv" \
+  >  --non-nucleotide \
+  >  --output-strains filtered_strains.txt > /dev/null
+  ERROR: You need to provide a sequence index or sequences to filter on sequence-specific information.
+  [2]

From ea692b8d392247a68aafe365524292cedc3445fd Mon Sep 17 00:00:00 2001
From: Victor Lin <13424970+victorlin@users.noreply.github.com>
Date: Tue, 5 Mar 2024 11:33:13 -0800
Subject: [PATCH 3/7] Repurpose --min-length functional test

The purpose of this file is to test --min-length and not other features.
---
 .../cram/filter-min-length-output-strains.t     | 17 -----------------
 .../functional/filter/cram/filter-min-length.t  | 16 ++++++++++++++++
 2 files changed, 16 insertions(+), 17 deletions(-)
 delete mode 100644 tests/functional/filter/cram/filter-min-length-output-strains.t
 create mode 100644 tests/functional/filter/cram/filter-min-length.t

diff --git a/tests/functional/filter/cram/filter-min-length-output-strains.t b/tests/functional/filter/cram/filter-min-length-output-strains.t
deleted file mode 100644
index e0486eb89..000000000
--- a/tests/functional/filter/cram/filter-min-length-output-strains.t
+++ /dev/null
@@ -1,17 +0,0 @@
-Setup
-
-  $ source "$TESTDIR"/_setup.sh
-
-Filter using only metadata and save results as a list of filtered strains.
-
-  $ ${AUGUR} filter \
-  >  --sequence-index "$TESTDIR/../data/sequence_index.tsv" \
-  >  --metadata "$TESTDIR/../data/metadata.tsv" \
-  >  --min-date 2012 \
-  >  --min-length 10500 \
-  >  --output-strains filtered_strains.txt > /dev/null
-
-Output should include only the 8 sequences matching the filters (without a header line).
-
-  $ wc -l filtered_strains.txt
-  \s*8 .* (re)
diff --git a/tests/functional/filter/cram/filter-min-length.t b/tests/functional/filter/cram/filter-min-length.t
new file mode 100644
index 000000000..cbbe4d713
--- /dev/null
+++ b/tests/functional/filter/cram/filter-min-length.t
@@ -0,0 +1,16 @@
+Setup
+
+  $ source "$TESTDIR"/_setup.sh
+
+Filter using --min-length.
+
+  $ ${AUGUR} filter \
+  >  --sequence-index "$TESTDIR/../data/sequence_index.tsv" \
+  >  --metadata "$TESTDIR/../data/metadata.tsv" \
+  >  --min-length 10500 \
+  >  --output-strains filtered_strains.txt
+  4 strains were dropped during filtering
+  	1 had no metadata
+  	1 had no sequence data
+  	2 were dropped because they were shorter than minimum length of 10500bp when only counting standard nucleotide characters A, C, G, or T (case-insensitive)
+  9 strains passed all filters

From e40a340a85d1eef0f0bde37ac7ef389de68e38c2 Mon Sep 17 00:00:00 2001
From: Victor Lin <13424970+victorlin@users.noreply.github.com>
Date: Tue, 5 Mar 2024 11:43:34 -0800
Subject: [PATCH 4/7] Fix grammar in filter summary

---
 augur/filter/_run.py                             | 2 +-
 tests/functional/filter/cram/filter-min-length.t | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/augur/filter/_run.py b/augur/filter/_run.py
index 57a25526e..ca351ab63 100644
--- a/augur/filter/_run.py
+++ b/augur/filter/_run.py
@@ -428,7 +428,7 @@ def run(args):
         include_exclude_rules.filter_by_ambiguous_date.__name__: "{count} {were} dropped because of their ambiguous date in {ambiguity}",
         include_exclude_rules.filter_by_min_date.__name__: "{count} {were} dropped because {they} {were} earlier than {min_date} or missing a date",
         include_exclude_rules.filter_by_max_date.__name__: "{count} {were} dropped because {they} {were} later than {max_date} or missing a date",
-        include_exclude_rules.filter_by_sequence_length.__name__: "{count} {were} dropped because {they} {were} shorter than minimum length of {min_length}bp when only counting standard nucleotide characters A, C, G, or T (case-insensitive)",
+        include_exclude_rules.filter_by_sequence_length.__name__: "{count} {were} dropped because {they} {were} shorter than the minimum length of {min_length}bp when only counting standard nucleotide characters A, C, G, or T (case-insensitive)",
         include_exclude_rules.filter_by_non_nucleotide.__name__: "{count} {were} dropped because {they} had non-nucleotide characters",
         include_exclude_rules.skip_group_by_with_ambiguous_year.__name__: "{count} {were} dropped during grouping due to ambiguous year information",
         include_exclude_rules.skip_group_by_with_ambiguous_month.__name__: "{count} {were} dropped during grouping due to ambiguous month information",
diff --git a/tests/functional/filter/cram/filter-min-length.t b/tests/functional/filter/cram/filter-min-length.t
index cbbe4d713..917232edf 100644
--- a/tests/functional/filter/cram/filter-min-length.t
+++ b/tests/functional/filter/cram/filter-min-length.t
@@ -12,5 +12,5 @@ Filter using --min-length.
   4 strains were dropped during filtering
   	1 had no metadata
   	1 had no sequence data
-  	2 were dropped because they were shorter than minimum length of 10500bp when only counting standard nucleotide characters A, C, G, or T (case-insensitive)
+  	2 were dropped because they were shorter than the minimum length of 10500bp when only counting standard nucleotide characters A, C, G, or T (case-insensitive)
   9 strains passed all filters

From 748417e6a628fef37af36835980a80496828832d Mon Sep 17 00:00:00 2001
From: Victor Lin <13424970+victorlin@users.noreply.github.com>
Date: Tue, 5 Mar 2024 11:06:25 -0800
Subject: [PATCH 5/7] Rename min length filter function

Rename with "min" to pair it with a "max" filter, coming in a following
commit.
---
 augur/filter/_run.py                  |  2 +-
 augur/filter/include_exclude_rules.py | 16 ++++++++--------
 2 files changed, 9 insertions(+), 9 deletions(-)

diff --git a/augur/filter/_run.py b/augur/filter/_run.py
index ca351ab63..c12606a91 100644
--- a/augur/filter/_run.py
+++ b/augur/filter/_run.py
@@ -428,7 +428,7 @@ def run(args):
         include_exclude_rules.filter_by_ambiguous_date.__name__: "{count} {were} dropped because of their ambiguous date in {ambiguity}",
         include_exclude_rules.filter_by_min_date.__name__: "{count} {were} dropped because {they} {were} earlier than {min_date} or missing a date",
         include_exclude_rules.filter_by_max_date.__name__: "{count} {were} dropped because {they} {were} later than {max_date} or missing a date",
-        include_exclude_rules.filter_by_sequence_length.__name__: "{count} {were} dropped because {they} {were} shorter than the minimum length of {min_length}bp when only counting standard nucleotide characters A, C, G, or T (case-insensitive)",
+        include_exclude_rules.filter_by_min_length.__name__: "{count} {were} dropped because {they} {were} shorter than the minimum length of {min_length}bp when only counting standard nucleotide characters A, C, G, or T (case-insensitive)",
         include_exclude_rules.filter_by_non_nucleotide.__name__: "{count} {were} dropped because {they} had non-nucleotide characters",
         include_exclude_rules.skip_group_by_with_ambiguous_year.__name__: "{count} {were} dropped during grouping due to ambiguous year information",
         include_exclude_rules.skip_group_by_with_ambiguous_month.__name__: "{count} {were} dropped during grouping due to ambiguous month information",
diff --git a/augur/filter/include_exclude_rules.py b/augur/filter/include_exclude_rules.py
index a8de9aba7..b1e11cf5a 100644
--- a/augur/filter/include_exclude_rules.py
+++ b/augur/filter/include_exclude_rules.py
@@ -424,7 +424,7 @@ def filter_by_sequence_index(metadata, sequence_index) -> FilterFunctionReturn:
     return metadata_strains & sequence_index_strains
 
 
-def filter_by_sequence_length(metadata, sequence_index, min_length) -> FilterFunctionReturn:
+def filter_by_min_length(metadata, sequence_index, min_length) -> FilterFunctionReturn:
     """Filter metadata by sequence length from a given sequence index.
 
     Parameters
@@ -440,13 +440,13 @@ def filter_by_sequence_length(metadata, sequence_index, min_length) -> FilterFun
     --------
     >>> metadata = pd.DataFrame([{"region": "Africa", "date": "2020-01-01"}, {"region": "Europe", "date": "2020-01-02"}], index=["strain1", "strain2"])
     >>> sequence_index = pd.DataFrame([{"strain": "strain1", "A": 7000, "C": 7000, "G": 7000, "T": 7000}, {"strain": "strain2", "A": 6500, "C": 6500, "G": 6500, "T": 6500}]).set_index("strain")
-    >>> filter_by_sequence_length(metadata, sequence_index, min_length=27000)
+    >>> filter_by_min_length(metadata, sequence_index, min_length=27000)
     {'strain1'}
 
     It is possible for the sequence index to be missing strains present in the metadata.
 
     >>> sequence_index = pd.DataFrame([{"strain": "strain3", "A": 7000, "C": 7000, "G": 7000, "T": 7000}, {"strain": "strain2", "A": 6500, "C": 6500, "G": 6500, "T": 6500}]).set_index("strain")
-    >>> filter_by_sequence_length(metadata, sequence_index, min_length=27000)
+    >>> filter_by_min_length(metadata, sequence_index, min_length=27000)
     set()
 
     """
@@ -676,7 +676,7 @@ def construct_filters(args, sequence_index) -> Tuple[List[FilterOption], List[Fi
             print_err("WARNING: Cannot use min_length for VCF files. Ignoring...")
         else:
             exclude_by.append((
-                filter_by_sequence_length,
+                filter_by_min_length,
                 {
                     "sequence_index": sequence_index,
                     "min_length": args.min_length,
@@ -755,13 +755,13 @@ def apply_filters(metadata, exclude_by: List[FilterOption], include_by: List[Fil
     annotated in a sequence index.
 
     >>> sequence_index = pd.DataFrame([{"strain": "strain1", "A": 7000, "C": 7000, "G": 7000, "T": 7000}, {"strain": "strain2", "A": 6500, "C": 6500, "G": 6500, "T": 6500}, {"strain": "strain3", "A": 1250, "C": 1250, "G": 1250, "T": 1250}]).set_index("strain")
-    >>> exclude_by = [(filter_by_sequence_length, {"sequence_index": sequence_index, "min_length": 27000})]
+    >>> exclude_by = [(filter_by_min_length, {"sequence_index": sequence_index, "min_length": 27000})]
     >>> include_by = [(force_include_where, {"include_where": "region=Europe"})]
     >>> strains_to_keep, strains_to_exclude, strains_to_include = apply_filters(metadata, exclude_by, include_by)
     >>> strains_to_keep
     {'strain1'}
     >>> sorted(strains_to_exclude, key=lambda record: record["strain"])
-    [{'strain': 'strain2', 'filter': 'filter_by_sequence_length', 'kwargs': '[["min_length", 27000]]'}, {'strain': 'strain3', 'filter': 'filter_by_sequence_length', 'kwargs': '[["min_length", 27000]]'}]
+    [{'strain': 'strain2', 'filter': 'filter_by_min_length', 'kwargs': '[["min_length", 27000]]'}, {'strain': 'strain3', 'filter': 'filter_by_min_length', 'kwargs': '[["min_length", 27000]]'}]
     >>> strains_to_include
     [{'strain': 'strain2', 'filter': 'force_include_where', 'kwargs': '[["include_where", "region=Europe"]]'}]
 
@@ -847,9 +847,9 @@ def _filter_kwargs_to_str(kwargs: FilterFunctionKwargs):
     Examples
     --------
     >>> from augur.dates import numeric_date
-    >>> from augur.filter.include_exclude_rules import filter_by_sequence_length, filter_by_min_date
+    >>> from augur.filter.include_exclude_rules import filter_by_min_length, filter_by_min_date
     >>> sequence_index = pd.DataFrame([{"strain": "strain1", "ACGT": 28000}, {"strain": "strain2", "ACGT": 26000}, {"strain": "strain3", "ACGT": 5000}]).set_index("strain")
-    >>> exclude_by = [(filter_by_sequence_length, {"sequence_index": sequence_index, "min_length": 27000})]
+    >>> exclude_by = [(filter_by_min_length, {"sequence_index": sequence_index, "min_length": 27000})]
     >>> _filter_kwargs_to_str(exclude_by[0][1])
     '[["min_length", 27000]]'
     >>> exclude_by = [(filter_by_min_date, {"date_column": "date", "min_date": numeric_date("2020-03-01")})]

From 64eb5ac301cccd26a850193704b539bdb0cbf312 Mon Sep 17 00:00:00 2001
From: Victor Lin <13424970+victorlin@users.noreply.github.com>
Date: Tue, 5 Mar 2024 11:31:19 -0800
Subject: [PATCH 6/7] Add --max-length option

This complements --min-length.
---
 augur/filter/__init__.py                      |  1 +
 augur/filter/_run.py                          |  1 +
 augur/filter/include_exclude_rules.py         | 48 +++++++++++++++++--
 augur/filter/validate_arguments.py            |  1 +
 .../cram/filter-no-sequence-index-error.t     |  7 +++
 ...-min-length.t => filter-sequence-length.t} |  8 ++--
 6 files changed, 58 insertions(+), 8 deletions(-)
 rename tests/functional/filter/cram/{filter-min-length.t => filter-sequence-length.t} (59%)

diff --git a/augur/filter/__init__.py b/augur/filter/__init__.py
index 3e647f2c3..6fb09cae6 100644
--- a/augur/filter/__init__.py
+++ b/augur/filter/__init__.py
@@ -52,6 +52,7 @@ def register_arguments(parser):
 
     sequence_filter_group = parser.add_argument_group("sequence filters", "filters to apply to sequence data")
     sequence_filter_group.add_argument('--min-length', type=int, help="minimal length of the sequences, only counting standard nucleotide characters A, C, G, or T (case-insensitive)")
+    sequence_filter_group.add_argument('--max-length', type=int, help="maximum length of the sequences, only counting standard nucleotide characters A, C, G, or T (case-insensitive)")
     sequence_filter_group.add_argument('--non-nucleotide', action='store_true', help="exclude sequences that contain illegal characters")
 
     subsample_group = parser.add_argument_group("subsampling", "options to subsample filtered data")
diff --git a/augur/filter/_run.py b/augur/filter/_run.py
index c12606a91..3cab70cae 100644
--- a/augur/filter/_run.py
+++ b/augur/filter/_run.py
@@ -429,6 +429,7 @@ def run(args):
         include_exclude_rules.filter_by_min_date.__name__: "{count} {were} dropped because {they} {were} earlier than {min_date} or missing a date",
         include_exclude_rules.filter_by_max_date.__name__: "{count} {were} dropped because {they} {were} later than {max_date} or missing a date",
         include_exclude_rules.filter_by_min_length.__name__: "{count} {were} dropped because {they} {were} shorter than the minimum length of {min_length}bp when only counting standard nucleotide characters A, C, G, or T (case-insensitive)",
+        include_exclude_rules.filter_by_max_length.__name__: "{count} {were} dropped because {they} {were} longer than the maximum length of {max_length}bp when only counting standard nucleotide characters A, C, G, or T (case-insensitive)",
         include_exclude_rules.filter_by_non_nucleotide.__name__: "{count} {were} dropped because {they} had non-nucleotide characters",
         include_exclude_rules.skip_group_by_with_ambiguous_year.__name__: "{count} {were} dropped during grouping due to ambiguous year information",
         include_exclude_rules.skip_group_by_with_ambiguous_month.__name__: "{count} {were} dropped during grouping due to ambiguous month information",
diff --git a/augur/filter/include_exclude_rules.py b/augur/filter/include_exclude_rules.py
index b1e11cf5a..f6d362779 100644
--- a/augur/filter/include_exclude_rules.py
+++ b/augur/filter/include_exclude_rules.py
@@ -459,6 +459,34 @@ def filter_by_min_length(metadata, sequence_index, min_length) -> FilterFunction
     return set(filtered_sequence_index[filtered_sequence_index["ACGT"] >= min_length].index.values)
 
 
+def filter_by_max_length(metadata, sequence_index, max_length) -> FilterFunctionReturn:
+    """Filter metadata by sequence length from a given sequence index.
+
+    Parameters
+    ----------
+    metadata : pandas.DataFrame
+        Metadata indexed by strain name
+    sequence_index : pandas.DataFrame
+        Sequence index
+    max_length : int
+        Maximum number of standard nucleotide characters (A, C, G, or T) in each sequence
+
+    Examples
+    --------
+    >>> metadata = pd.DataFrame([{"region": "Africa", "date": "2020-01-01"}, {"region": "Europe", "date": "2020-01-02"}], index=["strain1", "strain2"])
+    >>> sequence_index = pd.DataFrame([{"strain": "strain1", "A": 7000, "C": 7000, "G": 7000, "T": 7000}, {"strain": "strain2", "A": 6500, "C": 6500, "G": 6500, "T": 6500}]).set_index("strain")
+    >>> filter_by_max_length(metadata, sequence_index, max_length=27000)
+    {'strain2'}
+    """
+    strains = set(metadata.index.values)
+    filtered_sequence_index = sequence_index.loc[
+        sequence_index.index.intersection(strains)
+    ]
+    filtered_sequence_index["ACGT"] = filtered_sequence_index.loc[:, ["A", "C", "G", "T"]].sum(axis=1)
+
+    return set(filtered_sequence_index[filtered_sequence_index["ACGT"] <= max_length].index.values)
+
+
 def filter_by_non_nucleotide(metadata, sequence_index) -> FilterFunctionReturn:
     """Filter metadata for strains with invalid nucleotide content.
 
@@ -667,12 +695,11 @@ def construct_filters(args, sequence_index) -> Tuple[List[FilterOption], List[Fi
         ))
 
     # Filter by sequence length.
+    # Skip VCF files and warn the user that length filters do not
+    # make sense for VCFs.
+    is_vcf = filename_is_vcf(args.sequences)
     if args.min_length:
-        # Skip VCF files and warn the user that the min length filter does not
-        # make sense for VCFs.
-        is_vcf = filename_is_vcf(args.sequences)
-
-        if is_vcf: #doesn't make sense for VCF, ignore.
+        if is_vcf:
             print_err("WARNING: Cannot use min_length for VCF files. Ignoring...")
         else:
             exclude_by.append((
@@ -682,6 +709,17 @@ def construct_filters(args, sequence_index) -> Tuple[List[FilterOption], List[Fi
                     "min_length": args.min_length,
                 }
             ))
+    if args.max_length:
+        if is_vcf:
+            print_err("WARNING: Cannot use max_length for VCF files. Ignoring...")
+        else:
+            exclude_by.append((
+                filter_by_max_length,
+                {
+                    "sequence_index": sequence_index,
+                    "max_length": args.max_length,
+                }
+            ))
 
     # Exclude sequences with non-nucleotide characters.
     if args.non_nucleotide:
diff --git a/augur/filter/validate_arguments.py b/augur/filter/validate_arguments.py
index 866989303..49ee310bd 100644
--- a/augur/filter/validate_arguments.py
+++ b/augur/filter/validate_arguments.py
@@ -4,6 +4,7 @@
 
 SEQUENCE_ONLY_FILTERS = (
     "min_length",
+    "max_length",
     "non_nucleotide",
 )
 
diff --git a/tests/functional/filter/cram/filter-no-sequence-index-error.t b/tests/functional/filter/cram/filter-no-sequence-index-error.t
index 01b2ff999..657463da5 100644
--- a/tests/functional/filter/cram/filter-no-sequence-index-error.t
+++ b/tests/functional/filter/cram/filter-no-sequence-index-error.t
@@ -13,6 +13,13 @@ These should fail because the requested filters rely on sequence information.
   ERROR: You need to provide a sequence index or sequences to filter on sequence-specific information.
   [2]
 
+  $ ${AUGUR} filter \
+  >  --metadata "$TESTDIR/../data/metadata.tsv" \
+  >  --max-length 10000 \
+  >  --output-strains filtered_strains.txt > /dev/null
+  ERROR: You need to provide a sequence index or sequences to filter on sequence-specific information.
+  [2]
+
   $ ${AUGUR} filter \
   >  --metadata "$TESTDIR/../data/metadata.tsv" \
   >  --non-nucleotide \
diff --git a/tests/functional/filter/cram/filter-min-length.t b/tests/functional/filter/cram/filter-sequence-length.t
similarity index 59%
rename from tests/functional/filter/cram/filter-min-length.t
rename to tests/functional/filter/cram/filter-sequence-length.t
index 917232edf..abd5303bd 100644
--- a/tests/functional/filter/cram/filter-min-length.t
+++ b/tests/functional/filter/cram/filter-sequence-length.t
@@ -2,15 +2,17 @@ Setup
 
   $ source "$TESTDIR"/_setup.sh
 
-Filter using --min-length.
+Filter using --min-length and --max-length.
 
   $ ${AUGUR} filter \
   >  --sequence-index "$TESTDIR/../data/sequence_index.tsv" \
   >  --metadata "$TESTDIR/../data/metadata.tsv" \
   >  --min-length 10500 \
+  >  --max-length 10700 \
   >  --output-strains filtered_strains.txt
-  4 strains were dropped during filtering
+  7 strains were dropped during filtering
   	1 had no metadata
   	1 had no sequence data
   	2 were dropped because they were shorter than the minimum length of 10500bp when only counting standard nucleotide characters A, C, G, or T (case-insensitive)
-  9 strains passed all filters
+  	3 were dropped because they were longer than the maximum length of 10700bp when only counting standard nucleotide characters A, C, G, or T (case-insensitive)
+  6 strains passed all filters

From 7c7353b83c66e12b335915fb0103185c3dfb30ec Mon Sep 17 00:00:00 2001
From: Victor Lin <13424970+victorlin@users.noreply.github.com>
Date: Tue, 5 Mar 2024 11:50:00 -0800
Subject: [PATCH 7/7] Update changelog

---
 CHANGES.md | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/CHANGES.md b/CHANGES.md
index 2ffb17681..40d55200d 100644
--- a/CHANGES.md
+++ b/CHANGES.md
@@ -2,11 +2,16 @@
 
 ## __NEXT__
 
+### Features
+
+* filter: Added a new option `--max-length` to filter out sequences that are longer than a certain amount of base pairs. [#1429][] (@victorlin)
+
 ### Bug Fixes
 
 * filter: Updated docs with an example of tiered subsampling. [#1425][] (@victorlin)
 
 [#1425]: https://github.com/nextstrain/augur/pull/1425
+[#1429]: https://github.com/nextstrain/augur/pull/1429
 
 ## 24.2.3 (23 February 2024)