From 5967140ec4b80819e37f5ba94ea71b75067b5ca6 Mon Sep 17 00:00:00 2001 From: Richard Neher Date: Fri, 24 May 2024 15:32:47 +0200 Subject: [PATCH] add rename/nextclade script --- scripts/rename_and_nextclade.py | 46 +++++++++++++++++++++++++++++++++ 1 file changed, 46 insertions(+) create mode 100644 scripts/rename_and_nextclade.py diff --git a/scripts/rename_and_nextclade.py b/scripts/rename_and_nextclade.py new file mode 100644 index 0000000..63c1026 --- /dev/null +++ b/scripts/rename_and_nextclade.py @@ -0,0 +1,46 @@ +import json, argparse + +def replace_clade_recursive(node): + if "genome_clade_annotation" in node["node_attrs"]: + if "labels" not in node["branch_attrs"]: + node["branch_attrs"]["labels"] = {} + node["branch_attrs"]["labels"]["genome_clade"] = node["node_attrs"]["genome_clade_annotation"]["value"] + node["node_attrs"].pop("genome_clade_annotation") + if "children" in node: + for child in node["children"]: + replace_clade_recursive(child) + +if __name__=="__main__": + parser = argparse.ArgumentParser( + description="fix genome clade info", + formatter_class=argparse.ArgumentDefaultsHelpFormatter + ) + + parser.add_argument('--input-auspice-json', type=str, required=True, help="input auspice_json") + parser.add_argument('--pathogen-json', type=str, required=True, help="pathogen json") + parser.add_argument('--reference', type=str, required=True, help="reference") + parser.add_argument('--build-name', type=str, required=True, help="nextclade build name") + parser.add_argument('--reference-accession', type=str, required=True, help="reference accession") + parser.add_argument('--output', type=str, metavar="JSON", required=True, help="output Auspice JSON") + args = parser.parse_args() + + # read pathogen json + with open(args.pathogen_json, 'r') as fh: + pathogen_data = json.load(fh) + + with open(args.input_auspice_json, 'r') as fh: + data = json.load(fh) + + data["meta"]["colorings"] = [x for x in data["meta"]["colorings"] + if x["key"] != "genome_clade_annotation"] + replace_clade_recursive(data['tree']) + + # remove unneeded files structure + pathogen_data.pop("files") + + pathogen_data["attributes"] = {"reference accession": args.reference_accession, "reference name": args.reference, "name": args.build_name} + pathogen_data["experimental"] = True + data["meta"]["extensions"] = {'nextclade': {'pathogen': pathogen_data}} + + with open(args.output, 'w') as fh: + json.dump(data, fh, indent=0)