From 3f54ef5b397ed66bc9a12b1496ce23f54113b57a Mon Sep 17 00:00:00 2001 From: Nate Date: Wed, 28 Aug 2024 05:57:22 -0700 Subject: [PATCH 1/2] Allow spaces in BEAST annotations Spaces can be included in discrete states during BEAST runs, even if they aren't proper according to NEXUS specs. I've added a space to the regexes for comments, strings in comments, and sets in comments. --- augur/import_/beast.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/augur/import_/beast.py b/augur/import_/beast.py index 83ed54ae9..cc69f2473 100644 --- a/augur/import_/beast.py +++ b/augur/import_/beast.py @@ -116,15 +116,15 @@ def parse_beast_tree(data, tipMap, verbose=False): print('%d adding multitype node %s'%(i,multitypeNode.group(1))) i+=len(multitypeNode.group(1)) - commentBlock=re.match(r'(\:)*\[(&[A-Za-z\_\-{}\,0-9\.\%=\"\'\+!#]+)\]',data[i:])## look for MCC comments + commentBlock=re.match(r'(\:)*\[(&[A-Za-z\_\-{}\,0-9\.\%=\"\'\+ !#]+)\]',data[i:])## look for MCC comments if commentBlock is not None: if verbose==True: print('%d comment: %s'%(i,commentBlock.group(2))) comment=commentBlock.group(2) numerics=re.findall(r'[,&][A-Za-z\_\.0-9]+=[0-9\-Ee\.]+',comment) ## find all entries that have values as floats - strings=re.findall(r'[,&][A-Za-z\_\.0-9]+=["|\']*[A-Za-z\_0-9\.\+]+["|\']*',comment) ## strings + strings=re.findall(r'[,&][A-Za-z\_\.0-9]+=["|\']*[A-Za-z\_0-9\.\+ ]+["|\']*',comment) ## strings treelist=re.findall(r'[,&][A-Za-z\_\.0-9]+={[A-Za-z\_,{}0-9\.]+}',comment) ## complete history logged robust counting (MCMC trees) - sets=re.findall(r'[,&][A-Za-z\_\.0-9\%]+={[A-Za-z\.\-0-9eE,\"\_]+}',comment) ## sets and ranges + sets=re.findall(r'[,&][A-Za-z\_\.0-9\%]+={[A-Za-z\.\-0-9 ,\"\_]+}',comment) ## sets and ranges figtree=re.findall(r'\![A-Za-z]+=[A-Za-z0-9#]+',comment) ## figtree comments, in case MCC was manipulated in FigTree for vals in strings: ## string states go here From 4604976fa0500994c2f23bde8aedf8a92fcab645 Mon Sep 17 00:00:00 2001 From: Nate Date: Thu, 29 Aug 2024 05:36:07 -0700 Subject: [PATCH 2/2] Indicate PR #1610 in changes --- CHANGES.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/CHANGES.md b/CHANGES.md index 8080c6eb4..7b26e0f21 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -11,11 +11,13 @@ * filter: Improved warning and error messages in the case of missing columns. [#1604] (@victorlin) * merge: Any user-customized `~/.sqliterc` file is now ignored so it doesn't break `augur merge`'s internal use of SQLite. [#1608][] (@tsibley) * merge: Non-id columns in metadata inputs that would conflict with the output id column are now forbidden and will cause an error if present. Previously they would overwrite values in the output id column, causing incorrect output. [#1593][] (@tsibley) +* import: Spaces in BEAST MCC tree annotations (for example, from a discrete state reconstruction) no longer break `augur import beast`'s parsing. [#1610][] (@watronfire) [#1593]: https://github.com/nextstrain/augur/pull/1593 [#1594]: https://github.com/nextstrain/augur/pull/1594 [#1604]: https://github.com/nextstrain/augur/pull/1604 [#1608]: https://github.com/nextstrain/augur/pull/1608 +[#1610]: https://github.com/nextstrain/augur/pull/1610 ## 25.3.0 (22 August 2024)