From 25927e4290053e6872e5c9fdbfdd4d98f516854e Mon Sep 17 00:00:00 2001 From: Benjamin Date: Wed, 31 Mar 2021 10:53:09 +0200 Subject: [PATCH 1/2] Change date bounds for filter --min-date and --max-date to be inclusive. Updates help to document that the date bounds are inclusive. --- augur/filter.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/augur/filter.py b/augur/filter.py index 3f4014f65..02d14af62 100644 --- a/augur/filter.py +++ b/augur/filter.py @@ -105,8 +105,8 @@ def register_arguments(parser): Uses Pandas Dataframe querying, see https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#indexing-query for syntax. (e.g., --query "country == 'Colombia'" or --query "(country == 'USA' & (division == 'Washington'))")""" ) - metadata_filter_group.add_argument('--min-date', type=numeric_date, help="minimal cutoff for date; may be specified as an Augur-style numeric date (with the year as the integer part) or YYYY-MM-DD") - metadata_filter_group.add_argument('--max-date', type=numeric_date, help="maximal cutoff for date; may be specified as an Augur-style numeric date (with the year as the integer part) or YYYY-MM-DD") + metadata_filter_group.add_argument('--min-date', type=numeric_date, help="minimal cutoff for date, the cutoff date is inclusive; may be specified as an Augur-style numeric date (with the year as the integer part) or YYYY-MM-DD") + metadata_filter_group.add_argument('--max-date', type=numeric_date, help="maximal cutoff for date, the cutoff date is inclusive; may be specified as an Augur-style numeric date (with the year as the integer part) or YYYY-MM-DD") metadata_filter_group.add_argument('--exclude-ambiguous-dates-by', choices=['any', 'day', 'month', 'year'], help='Exclude ambiguous dates by day (e.g., 2020-09-XX), month (e.g., 2020-XX-XX), year (e.g., 200X-10-01), or any date fields. An ambiguous year makes the corresponding month and day ambiguous, too, even if those fields have unambiguous values (e.g., "201X-10-01"). Similarly, an ambiguous month makes the corresponding day ambiguous (e.g., "2010-XX-01").') metadata_filter_group.add_argument('--exclude', type=str, nargs="+", help="file(s) with list of strains to exclude") @@ -348,9 +348,9 @@ def run(args): dates = get_numerical_dates(meta_dict, fmt="%Y-%m-%d") tmp = {s for s in seq_keep if dates[s] is not None} if args.min_date: - tmp = {s for s in tmp if (np.isscalar(dates[s]) or all(dates[s])) and np.max(dates[s])>args.min_date} + tmp = {s for s in tmp if (np.isscalar(dates[s]) or all(dates[s])) and np.max(dates[s])>=args.min_date} if args.max_date: - tmp = {s for s in tmp if (np.isscalar(dates[s]) or all(dates[s])) and np.min(dates[s]) Date: Wed, 31 Mar 2021 14:27:43 +0200 Subject: [PATCH 2/2] Adds tests to assure filter --min-date and --max-date are inclusive --- tests/test_filter.py | 28 ++++++++++++++++++++++++++++ 1 file changed, 28 insertions(+) diff --git a/tests/test_filter.py b/tests/test_filter.py index 1d4399bbb..106617e46 100644 --- a/tests/test_filter.py +++ b/tests/test_filter.py @@ -215,3 +215,31 @@ def test_filter_run_with_query_and_include_where(self, tmpdir, fasta_fn, argpars augur.filter.run(args) output = SeqIO.to_dict(SeqIO.parse(out_fn, "fasta")) assert list(output.keys()) == ["SEQ_1", "SEQ_3"] + + def test_filter_run_min_date(self, tmpdir, fasta_fn, argparser): + """Test that filter --min-date is inclusive""" + out_fn = str(tmpdir / "out.fasta") + min_date = "2020-02-26" + meta_fn = write_metadata(tmpdir, (("strain","date"), + ("SEQ_1","2020-02-XX"), + ("SEQ_2","2020-02-26"), + ("SEQ_3","2020-02-25"))) + args = argparser('-s %s --metadata %s -o %s --min-date %s' + % (fasta_fn, meta_fn, out_fn, min_date)) + augur.filter.run(args) + output = SeqIO.to_dict(SeqIO.parse(out_fn, "fasta")) + assert list(output.keys()) == ["SEQ_1", "SEQ_2"] + + def test_filter_run_max_date(self, tmpdir, fasta_fn, argparser): + """Test that filter --max-date is inclusive""" + out_fn = str(tmpdir / "out.fasta") + max_date = "2020-03-01" + meta_fn = write_metadata(tmpdir, (("strain","date"), + ("SEQ_1","2020-03-XX"), + ("SEQ_2","2020-03-01"), + ("SEQ_3","2020-03-02"))) + args = argparser('-s %s --metadata %s -o %s --max-date %s' + % (fasta_fn, meta_fn, out_fn, max_date)) + augur.filter.run(args) + output = SeqIO.to_dict(SeqIO.parse(out_fn, "fasta")) + assert list(output.keys()) == ["SEQ_1", "SEQ_2"] \ No newline at end of file