diff --git a/CHANGES.md b/CHANGES.md index 2cf2ff234..388368a7d 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -5,15 +5,20 @@ ### Features * Support treetime 0.11.* [#1310][] (@corneliusroemer) -* Allow minimal export using only a (newick) tree in `augur export v2`. [#1299][] (@jameshadfield) +* export: Allow minimal export using only a (newick) tree in `augur export v2`. [#1299][] (@jameshadfield) * A number of schema updates and improvements [#1299][] (@jameshadfield) * We now require all nodes to have `node_attrs` on them with one of `div` or `num_date` present * Some never-used properties are removed from the schemas, including a pattern for defining nucleotide INDELs which was never used by augur or auspice. * Tip label defaults are now settable within the auspice-config JSON * Empty colorings definitions are allowed (the tree will be grey in Auspice) -[#1310]: https://github.com/nextstrain/augur/pull/1310 +### Bug fixes + +* ancestral: Export amino acid sequences inferred for the root node of the tree in the node data JSON output for compatibility with `augur translate` output. [#1317][] (@huddlej) + [#1299]: https://github.com/nextstrain/augur/pull/1299 +[#1310]: https://github.com/nextstrain/augur/pull/1310 +[#1317]: https://github.com/nextstrain/augur/pull/1317 ## 23.0.0 (5 September 2023) diff --git a/augur/ancestral.py b/augur/ancestral.py index 389bb68a5..64b25d6fe 100644 --- a/augur/ancestral.py +++ b/augur/ancestral.py @@ -323,6 +323,14 @@ def run(args): for key, node in anc_seqs['nodes'].items(): if 'aa_muts' not in node: node['aa_muts'] = {} node['aa_muts'][gene] = aa_result['mutations']['nodes'][key]['muts'] + + # Add amino acid sequences to the root node of the tree. + if key == T.root.name: + if "aa_sequences" not in node: + node["aa_sequences"] = {} + + node["aa_sequences"][gene] = aa_result['tt'].sequence(T.root, as_string=True, reconstructed=True) + anc_seqs['reference'][gene] = aa_result['root_seq'] # FIXME: Note that this is calculating the end of the CDS as 3*length of translation # this is equivalent to the annotation for single segment CDS, but not for cds diff --git a/tests/functional/ancestral/cram/infer-amino-acid-sequences.t b/tests/functional/ancestral/cram/infer-amino-acid-sequences.t index 5564d9781..35804fdf1 100644 --- a/tests/functional/ancestral/cram/infer-amino-acid-sequences.t +++ b/tests/functional/ancestral/cram/infer-amino-acid-sequences.t @@ -21,6 +21,13 @@ Check that the reference length was correctly exported as the nuc annotation "PRO": { "nuc": { +Check that amino acid sequences exist for the root node of the tree. + + $ grep -A 2 "aa_sequences" "$CRAMTMP/$TESTFILE/ancestral_mutations.json" + "aa_sequences": { + "ENV": .* (re) + "PRO": .* (re) + Check that internal nodes have ancestral amino acid sequences. $ grep "NODE" "$CRAMTMP/$TESTFILE/ancestral_aa_sequences_ENV.fasta" | wc -l