Skip to content

Commit

Permalink
Merge pull request #61 from nextstrain/feat/nextclade_compatibility
Browse files Browse the repository at this point in the history
inline root sequence, add pathogen json and nextclade extension
  • Loading branch information
rneher authored May 24, 2024
2 parents a1788ce + 5967140 commit 619455b
Show file tree
Hide file tree
Showing 6 changed files with 47 additions and 15 deletions.
6 changes: 3 additions & 3 deletions Snakefile
Original file line number Diff line number Diff line change
@@ -1,5 +1,8 @@
configfile: "config/configfile.yaml"

wildcard_constraints:
a_or_b = r"a|b"

build_dir = 'results'
auspice_dir = 'auspice'

Expand All @@ -8,9 +11,6 @@ rule all:
expand("auspice/rsv_{subtype}_{build}.json",
subtype = config.get("subtypes",['a']),
build = config.get("buildstorun", ['genome'])),
expand("auspice/rsv_{subtype}_{build}_root-sequence.json",
subtype = config.get("subtypes",['a']),
build = config.get("buildstorun", ['genome']))

include: "workflow/snakemake_rules/chores.smk"

Expand Down
12 changes: 11 additions & 1 deletion config/configfile.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -38,4 +38,14 @@ ancestral:
inference: "joint"

traits:
columns: "country"
columns: "country"

nextclade_attributes:
a:
name: "RSV-A NextClade using real-time tree"
reference_name: "hRSV/A/England/397/2017"
accession: "EPI_ISL_412866"
b:
name: "RSV-B NextClade using real-time tree"
reference_name: "hRSV/B/Australia/VIC-RCH056/2019"
accession: "EPI_ISL_1653999"
1 change: 1 addition & 0 deletions nextclade/Snakefile
Original file line number Diff line number Diff line change
Expand Up @@ -330,6 +330,7 @@ rule export:
--node-data {input.node_data}\
--auspice-config {input.auspice_config} \
--color-by-metadata {params.fields} \
--include-root-sequence-inline \
--minify-json \
--title "Nextclade reference tree for RSV-{wildcards.a_or_b} with root {wildcards.reference} built on {params.date}" \
--output {output.auspice_json} 2>&1;
Expand Down
15 changes: 15 additions & 0 deletions scripts/clade_names.py → scripts/rename_and_nextclade.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,15 +17,30 @@ def replace_clade_recursive(node):
)

parser.add_argument('--input-auspice-json', type=str, required=True, help="input auspice_json")
parser.add_argument('--pathogen-json', type=str, required=True, help="pathogen json")
parser.add_argument('--reference', type=str, required=True, help="reference")
parser.add_argument('--build-name', type=str, required=True, help="nextclade build name")
parser.add_argument('--reference-accession', type=str, required=True, help="reference accession")
parser.add_argument('--output', type=str, metavar="JSON", required=True, help="output Auspice JSON")
args = parser.parse_args()

# read pathogen json
with open(args.pathogen_json, 'r') as fh:
pathogen_data = json.load(fh)

with open(args.input_auspice_json, 'r') as fh:
data = json.load(fh)

data["meta"]["colorings"] = [x for x in data["meta"]["colorings"]
if x["key"] != "genome_clade_annotation"]
replace_clade_recursive(data['tree'])

# remove unneeded files structure
pathogen_data.pop("files")

pathogen_data["attributes"] = {"reference accession": args.reference_accession, "reference name": args.reference, "name": args.build_name}
pathogen_data["experimental"] = True
data["meta"]["extensions"] = {'nextclade': {'pathogen': pathogen_data}}

with open(args.output, 'w') as fh:
json.dump(data, fh, indent=0)
4 changes: 3 additions & 1 deletion workflow/snakemake_rules/core.smk
Original file line number Diff line number Diff line change
Expand Up @@ -213,7 +213,8 @@ rule ancestral:
"""
input:
tree = rules.refine.output.tree,
alignment = get_alignment
alignment = get_alignment,
root_sequence = build_dir + "/{a_or_b}/{build_name}/{build_name}_reference.gbk"
output:
node_data = build_dir + "/{a_or_b}/{build_name}/nt_muts.json"
params:
Expand All @@ -224,6 +225,7 @@ rule ancestral:
--tree {input.tree} \
--alignment {input.alignment} \
--output-node-data {output.node_data} \
--root-sequence {input.root_sequence} \
--inference {params.inference}
"""

Expand Down
24 changes: 14 additions & 10 deletions workflow/snakemake_rules/export.smk
Original file line number Diff line number Diff line change
Expand Up @@ -36,8 +36,7 @@ rule export:
auspice_config = config["files"]["auspice_config"],
description = config["description"]
output:
auspice_json = build_dir + "/{a_or_b}/{build_name}/tree.json",
root_sequence = build_dir + "/{a_or_b}/{build_name}/tree_root-sequence.json"
auspice_json = build_dir + "/{a_or_b}/{build_name}/tree.json"
params:
title = lambda w: f"RSV-{w.a_or_b.upper()} phylogeny",
strain_id=config["strain_id_field"],
Expand All @@ -53,7 +52,7 @@ rule export:
--description {input.description} \
--colors {input.colors} \
--auspice-config {input.auspice_config} \
--include-root-sequence \
--include-root-sequence-inline \
--output {output.auspice_json}
"""

Expand All @@ -76,18 +75,23 @@ rule final_strain_name:
"""


rule rename_clade_labels:
rule rename_and_ready_for_nextclade:
input:
auspice_json= rules.final_strain_name.output.auspice_json,
root_sequence= rules.export.output.root_sequence
pathogen_json= "nextclade/config/pathogen.json"
output:
auspice_json= "auspice/rsv_{a_or_b}_{build_name}.json",
root_sequence= "auspice/rsv_{a_or_b}_{build_name}_root-sequence.json"
auspice_json= "auspice/rsv_{a_or_b}_{build_name}.json"
params:
accession= lambda w: config["nextclade_attributes"][w.a_or_b]["accession"],
name= lambda w: config["nextclade_attributes"][w.a_or_b]["name"],
ref_name= lambda w: config["nextclade_attributes"][w.a_or_b]["reference_name"]
shell:
"""
python3 scripts/clade_names.py \
python3 scripts/rename_and_nextclade.py \
--input-auspice-json {input.auspice_json} \
--pathogen-json {input.pathogen_json} \
--reference {params.ref_name:q} \
--build-name {params.name:q} \
--reference-accession {params.accession:q} \
--output {output.auspice_json}
cp {input.root_sequence} {output.root_sequence}
"""

0 comments on commit 619455b

Please sign in to comment.