Skip to content

Commit

Permalink
Merge pull request #708 from nextstrain/filter-date-bounds-inclusive
Browse files Browse the repository at this point in the history
Filter date bounds inclusive
  • Loading branch information
huddlej authored Apr 2, 2021
2 parents f0ff957 + 8d7e2df commit 24baeaf
Show file tree
Hide file tree
Showing 2 changed files with 32 additions and 4 deletions.
8 changes: 4 additions & 4 deletions augur/filter.py
Original file line number Diff line number Diff line change
Expand Up @@ -105,8 +105,8 @@ def register_arguments(parser):
Uses Pandas Dataframe querying, see https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#indexing-query for syntax.
(e.g., --query "country == 'Colombia'" or --query "(country == 'USA' & (division == 'Washington'))")"""
)
metadata_filter_group.add_argument('--min-date', type=numeric_date, help="minimal cutoff for date; may be specified as an Augur-style numeric date (with the year as the integer part) or YYYY-MM-DD")
metadata_filter_group.add_argument('--max-date', type=numeric_date, help="maximal cutoff for date; may be specified as an Augur-style numeric date (with the year as the integer part) or YYYY-MM-DD")
metadata_filter_group.add_argument('--min-date', type=numeric_date, help="minimal cutoff for date, the cutoff date is inclusive; may be specified as an Augur-style numeric date (with the year as the integer part) or YYYY-MM-DD")
metadata_filter_group.add_argument('--max-date', type=numeric_date, help="maximal cutoff for date, the cutoff date is inclusive; may be specified as an Augur-style numeric date (with the year as the integer part) or YYYY-MM-DD")
metadata_filter_group.add_argument('--exclude-ambiguous-dates-by', choices=['any', 'day', 'month', 'year'],
help='Exclude ambiguous dates by day (e.g., 2020-09-XX), month (e.g., 2020-XX-XX), year (e.g., 200X-10-01), or any date fields. An ambiguous year makes the corresponding month and day ambiguous, too, even if those fields have unambiguous values (e.g., "201X-10-01"). Similarly, an ambiguous month makes the corresponding day ambiguous (e.g., "2010-XX-01").')
metadata_filter_group.add_argument('--exclude', type=str, nargs="+", help="file(s) with list of strains to exclude")
Expand Down Expand Up @@ -348,9 +348,9 @@ def run(args):
dates = get_numerical_dates(meta_dict, fmt="%Y-%m-%d")
tmp = {s for s in seq_keep if dates[s] is not None}
if args.min_date:
tmp = {s for s in tmp if (np.isscalar(dates[s]) or all(dates[s])) and np.max(dates[s])>args.min_date}
tmp = {s for s in tmp if (np.isscalar(dates[s]) or all(dates[s])) and np.max(dates[s])>=args.min_date}
if args.max_date:
tmp = {s for s in tmp if (np.isscalar(dates[s]) or all(dates[s])) and np.min(dates[s])<args.max_date}
tmp = {s for s in tmp if (np.isscalar(dates[s]) or all(dates[s])) and np.min(dates[s])<=args.max_date}
num_excluded_by_date = len(seq_keep) - len(tmp)
seq_keep = tmp

Expand Down
28 changes: 28 additions & 0 deletions tests/test_filter.py
Original file line number Diff line number Diff line change
Expand Up @@ -215,3 +215,31 @@ def test_filter_run_with_query_and_include_where(self, tmpdir, fasta_fn, argpars
augur.filter.run(args)
output = SeqIO.to_dict(SeqIO.parse(out_fn, "fasta"))
assert list(output.keys()) == ["SEQ_1", "SEQ_3"]

def test_filter_run_min_date(self, tmpdir, fasta_fn, argparser):
"""Test that filter --min-date is inclusive"""
out_fn = str(tmpdir / "out.fasta")
min_date = "2020-02-26"
meta_fn = write_metadata(tmpdir, (("strain","date"),
("SEQ_1","2020-02-XX"),
("SEQ_2","2020-02-26"),
("SEQ_3","2020-02-25")))
args = argparser('-s %s --metadata %s -o %s --min-date %s'
% (fasta_fn, meta_fn, out_fn, min_date))
augur.filter.run(args)
output = SeqIO.to_dict(SeqIO.parse(out_fn, "fasta"))
assert list(output.keys()) == ["SEQ_1", "SEQ_2"]

def test_filter_run_max_date(self, tmpdir, fasta_fn, argparser):
"""Test that filter --max-date is inclusive"""
out_fn = str(tmpdir / "out.fasta")
max_date = "2020-03-01"
meta_fn = write_metadata(tmpdir, (("strain","date"),
("SEQ_1","2020-03-XX"),
("SEQ_2","2020-03-01"),
("SEQ_3","2020-03-02")))
args = argparser('-s %s --metadata %s -o %s --max-date %s'
% (fasta_fn, meta_fn, out_fn, max_date))
augur.filter.run(args)
output = SeqIO.to_dict(SeqIO.parse(out_fn, "fasta"))
assert list(output.keys()) == ["SEQ_1", "SEQ_2"]

0 comments on commit 24baeaf

Please sign in to comment.