Skip to content

Commit

Permalink
[clades] check for multiple mutations at same pos
Browse files Browse the repository at this point in the history
Multiple mutations at the same position on a single branch are now a
fatal error. Previous behaviour was to overwrite such mutations when
parsing. Suggested by #735.
  • Loading branch information
jameshadfield committed Apr 11, 2023
1 parent 40e549d commit e5cfc3a
Show file tree
Hide file tree
Showing 3 changed files with 40 additions and 1 deletion.
15 changes: 15 additions & 0 deletions augur/clades.py
Original file line number Diff line number Diff line change
Expand Up @@ -170,6 +170,20 @@ def is_node_in_clade(clade_alleles, node, root_sequence):

return all(conditions)

def ensure_no_multiple_mutations(all_muts):
multiples = []

for name,node in all_muts.items():
nt_positions = [int(mut[1:-1])-1 for mut in node.get('muts', [])]
if len(set(nt_positions))!=len(nt_positions):
multiples.append(f"Node {name} (nuc)")
for gene in node.get('aa_muts', {}):
aa_positions = [int(mut[1:-1])-1 for mut in node['aa_muts'][gene]]
if len(set(aa_positions))!=len(aa_positions):
multiples.append(f"Node {name} ({gene})")

if multiples:
raise AugurError(f"Multiple mutations at the same position on a single branch were found: {', '.join(multiples)}")

def assign_clades(clade_designations, all_muts, tree, ref=None):
'''
Expand Down Expand Up @@ -314,6 +328,7 @@ def parse_nodes(tree_file, node_data_files):
tree_nodes = set([clade.name for clade in tree.find_clades()])
if not json_nodes.issubset(tree_nodes):
raise AugurError(f"The following nodes in the node_data files ({', '.join(node_data_files)}) are not found in the tree ({tree_file}): {', '.join(json_nodes - tree_nodes)}")
ensure_no_multiple_mutations(node_data['nodes'])
return (tree, node_data['nodes'])

def register_parser(parent_subparsers):
Expand Down
11 changes: 10 additions & 1 deletion tests/functional/clades.t
Original file line number Diff line number Diff line change
Expand Up @@ -82,4 +82,13 @@ if the (branch leading to the) root has the clade-defining mutation.

$ python3 "$TESTDIR/../../scripts/diff_jsons.py" clades/toy_clades_2.json "$TMP/toy_clades_2b.json" \
> --exclude-paths "root['generated_by']"
{}
{}

Multiple mutations at the same position on a single branch are a fatal error

$ ${AUGUR} clades \
> --tree clades/toy_tree.nwk \
> --mutations clades/toy_muts_multiple.json \
> --clades clades/toy_clades_nuc.tsv
ERROR: Multiple mutations at the same position on a single branch were found: Node A (nuc), Node AB (geneName)
[2]
15 changes: 15 additions & 0 deletions tests/functional/clades/toy_muts_multiple.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
{
"nodes": {
"A": {
"muts": ["A10T", "T10C"]
},
"AB": {
"aa_muts": {
"geneName": ["S42L", "R42H", "Y50W"]
}
},
"B": {
"muts": ["A10T", "T11C"]
}
}
}

0 comments on commit e5cfc3a

Please sign in to comment.