Skip to content

Commit

Permalink
Rename ambiguity option to "any" and update logic and tests accordingly
Browse files Browse the repository at this point in the history
Replaces the "all" option for `is_date_ambiguous` with "any" and updates
the help documentation, ambiguity boolean logic, and unit tests to
clarify this hierarchical treatment of ambiguity.
  • Loading branch information
huddlej committed Nov 12, 2020
1 parent ca1b9c5 commit 2ac2223
Show file tree
Hide file tree
Showing 3 changed files with 20 additions and 14 deletions.
4 changes: 2 additions & 2 deletions augur/filter.py
Original file line number Diff line number Diff line change
Expand Up @@ -105,8 +105,8 @@ def register_arguments(parser):
help="Exclude samples matching these conditions. Ex: \"host=rat\" or \"host!=rat\". Multiple values are processed as OR (matching any of those specified will be excluded), not AND")
parser.add_argument('--include-where', nargs='+',
help="Include samples with these values. ex: host=rat. Multiple values are processed as OR (having any of those specified will be included), not AND. This rule is applied last and ensures any sequences matching these rules will be included.")
parser.add_argument('--exclude-ambiguous-dates-by', choices=['all', 'day', 'month', 'year'],
help='Exclude ambiguous dates by day (e.g., 2020-09-XX), month (e.g., 2020-XX-XX), year (e.g., 200X-10-01), or all date fields')
parser.add_argument('--exclude-ambiguous-dates-by', choices=['any', 'day', 'month', 'year'],
help='Exclude ambiguous dates by day (e.g., 2020-09-XX), month (e.g., 2020-XX-XX), year (e.g., 200X-10-01), or any date fields. An ambiguous year makes the corresponding month and day ambiguous, too, even if those fields have unambiguous values (e.g., "201X-10-01"). Similarly, an ambiguous month makes the corresponding day ambiguous (e.g., "2010-XX-01").')
parser.add_argument('--query', help="Filter samples by attribute. Uses Pandas Dataframe querying, see https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#indexing-query for syntax.")
parser.add_argument('--output', '-o', help="output file", required=True)

Expand Down
19 changes: 10 additions & 9 deletions augur/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -73,16 +73,16 @@ def ambiguous_date_to_date_range(uncertain_date, fmt, min_max_year=None):
def read_metadata(fname, query=None):
return MetadataFile(fname, query).read()

def is_date_ambiguous(date, ambiguous_by="all"):
def is_date_ambiguous(date, ambiguous_by="any"):
"""
Returns whether a given date string in the format of YYYY-MM-DD is ambiguous by a given part of the date (e.g., day, month, year, or all parts).
Returns whether a given date string in the format of YYYY-MM-DD is ambiguous by a given part of the date (e.g., day, month, year, or any parts).
Parameters
----------
date : str
Date string in the format of YYYY-MM-DD
ambiguous_by : str
Field of the date string to test for ambiguity ("day", "month", "year", "all")
Field of the date string to test for ambiguity ("day", "month", "year", "any")
"""
date_components = date.split('-', 2)

Expand All @@ -96,12 +96,13 @@ def is_date_ambiguous(date, ambiguous_by="all"):
month = "XX"
day = "XX"

return (
(ambiguous_by == 'all' and ('X' in year or 'X' in month or 'X' in day)) or
(ambiguous_by == 'day' and 'X' in day) or
(ambiguous_by == 'month' and 'X' in month) or
(ambiguous_by == 'year' and 'X' in year)
)
# Determine ambiguity hierarchically such that, for example, an ambiguous
# month implicates an ambiguous day even when day information is available.
return any((
"X" in year,
"X" in month and ambiguous_by in ("any", "month", "day"),
"X" in day and ambiguous_by in ("any", "day")
))

def get_numerical_dates(meta_dict, name_col = None, date_col='date', fmt=None, min_max_year=None):
if fmt:
Expand Down
11 changes: 8 additions & 3 deletions tests/test_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -104,19 +104,24 @@ def test_read_mask_file_drm_file(self, tmpdir):
def test_is_date_ambiguous(self):
"""is_date_ambiguous should return true for ambiguous dates and false for valid dates."""
# Test complete date strings with ambiguous values.
assert utils.is_date_ambiguous("2019-0X-0X", "all")
assert utils.is_date_ambiguous("2019-0X-0X", "any")
assert utils.is_date_ambiguous("2019-XX-09", "month")
assert utils.is_date_ambiguous("2019-03-XX", "day")
assert utils.is_date_ambiguous("201X-03-09", "year")
assert utils.is_date_ambiguous("20XX-01-09", "month")
assert utils.is_date_ambiguous("2019-XX-03", "day")
assert utils.is_date_ambiguous("20XX-01-03", "day")

# Test incomplete date strings with ambiguous values.
assert utils.is_date_ambiguous("2019", "all")
assert utils.is_date_ambiguous("2019", "any")
assert utils.is_date_ambiguous("201X", "year")
assert utils.is_date_ambiguous("2019-XX", "month")
assert utils.is_date_ambiguous("2019-10", "day")
assert utils.is_date_ambiguous("2019-XX", "any")
assert utils.is_date_ambiguous("2019-XX", "day")

# Test complete date strings without ambiguous dates for the requested field.
assert not utils.is_date_ambiguous("2019-09-03", "all")
assert not utils.is_date_ambiguous("2019-09-03", "any")
assert not utils.is_date_ambiguous("2019-03-XX", "month")
assert not utils.is_date_ambiguous("2019-09-03", "day")
assert not utils.is_date_ambiguous("2019-XX-XX", "year")
Expand Down

0 comments on commit 2ac2223

Please sign in to comment.