Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Allow exception to bubble up when priority file is bad. #487

Closed
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
25 changes: 10 additions & 15 deletions augur/filter.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,20 +47,15 @@ def write_vcf(compressed, input_file, output_file, dropped_samps):
pass

def read_priority_scores(fname):
priorities = defaultdict(float)
if not os.path.isfile(fname):
print("ERROR: priority file %s doesn't exist"%fname)
return priorities

with open(fname) as pfile:
for l in pfile:
f = l.strip().split()
try:
priorities[f[0]] = float(f[1])
except:
print("ERROR: malformatted priority:",l)

return priorities
try:
with open(fname) as pfile:
return {
elems[0]: float(elems[1])
for elems in (line.strip().split() for line in pfile.readlines())
}
except Exception as e:
print(f"ERROR: missing or malformed priority scores file {fname}", file=sys.stderr)
raise e


def register_arguments(parser):
Expand All @@ -72,7 +67,7 @@ def register_arguments(parser):
parser.add_argument('--non-nucleotide', action='store_true', help="exclude sequences that contain illegal characters")
parser.add_argument('--exclude', type=str, help="file with list of strains that are to be excluded")
parser.add_argument('--include', type=str, help="file with list of strains that are to be included regardless of priorities or subsampling")
parser.add_argument('--priority', type=str, help="file with list priority scores for sequences (strain\tpriority)")
parser.add_argument('--priority', type=str, help="file with list of priority scores for sequences (strain\tpriority)")
parser.add_argument('--sequences-per-group', type=int, help="subsample to no more than this number of sequences per category")
parser.add_argument('--group-by', nargs='+', help="categories with respect to subsample; two virtual fields, \"month\" and \"year\", are supported if they don't already exist as real fields but a \"date\" field does exist")
parser.add_argument('--subsample-seed', help="random number generator seed to allow reproducible sub-sampling (with same input data). Can be number or string.")
Expand Down
1 change: 1 addition & 0 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,7 @@
"pylint >=1.7.6, ==1.7.*",
"pytest >=5.4.1, ==5.4.*",
"pytest-cov >=2.8.1, ==2.8.*",
"pytest-mock >= 2.0.0, ==2.0.*",
"recommonmark >=0.5.0, ==0.*",
"Sphinx >=2.0.1, ==2.*",
"sphinx-argparse >=0.2.5, ==0.*",
Expand Down
30 changes: 30 additions & 0 deletions tests/test_filter.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,37 @@
import augur.filter
import pytest


@pytest.fixture
def mock_priorities_file_valid(mocker):
mocker.patch(
"builtins.open", mocker.mock_open(read_data="strain1 5\nstrain2 6\nstrain3 8\n")
)


@pytest.fixture
def mock_priorities_file_malformed(mocker):
mocker.patch("builtins.open", mocker.mock_open(read_data="strain1 X\n"))


class TestFilter:
def test_read_priority_scores_valid(self, mock_priorities_file_valid):
# builtins.open is stubbed, but we need a valid file to satisfy the existence check
priorities = augur.filter.read_priority_scores(
"tests/builds/tb/data/lee_2015.vcf"
)

assert priorities == {"strain1": 5, "strain2": 6, "strain3": 8}

def test_read_priority_scores_malformed(self, mock_priorities_file_malformed):
with pytest.raises(ValueError):
# builtins.open is stubbed, but we need a valid file to satisfy the existence check
augur.filter.read_priority_scores("tests/builds/tb/data/lee_2015.vcf")

def test_read_priority_scores_does_not_exist(self):
with pytest.raises(FileNotFoundError):
augur.filter.read_priority_scores("/does/not/exist.txt")

def test_read_vcf_compressed(self):
seq_keep, all_seq = augur.filter.read_vcf(
"tests/builds/tb/data/lee_2015.vcf.gz"
Expand Down