Skip to content

Commit

Permalink
fix: refactor to use augur library
Browse files Browse the repository at this point in the history
Refactored to use the augur library to load the metadata file.

* Using the augur.io.read_metadata simplified creating the name_lookup dictionary
* To keep coding style consistent, separated out main, parse_args, and methods
* To be consistent with augur export v2, write out a minified json file
  • Loading branch information
j23414 committed Apr 5, 2023
1 parent 430ff78 commit e74fc6b
Showing 1 changed file with 46 additions and 21 deletions.
67 changes: 46 additions & 21 deletions bin/set_final_strain_name.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,29 @@
import pandas as pd
#! /usr/bin/env python

import json, argparse
import augur


def parse_args():
parser = argparse.ArgumentParser(
description="Swaps out the strain names in the Auspice JSON with the final strain name",
formatter_class=argparse.ArgumentDefaultsHelpFormatter,
)
parser.add_argument(
"--input-auspice-json", type=str, required=True, help="input auspice_json"
)
parser.add_argument("--metadata", type=str, required=True, help="input data")
parser.add_argument(
"--display-strain-name",
type=str,
required=True,
help="field to use as strain name in auspice",
)
parser.add_argument(
"--output", type=str, metavar="JSON", required=True, help="output Auspice JSON"
)
return parser.parse_args()


def replace_name_recursive(node, lookup):
if node["name"] in lookup:
Expand All @@ -9,28 +33,29 @@ def replace_name_recursive(node, lookup):
for child in node["children"]:
replace_name_recursive(child, lookup)

if __name__=="__main__":
parser = argparse.ArgumentParser(
description="Swaps out the strain names in the Auspice JSON with the final strain name",
formatter_class=argparse.ArgumentDefaultsHelpFormatter
)

parser.add_argument('--input-auspice-json', type=str, required=True, help="input auspice_json")
parser.add_argument('--metadata', type=str, required=True, help="input data")
parser.add_argument('--display-strain-name', type=str, required=True, help="field to use as strain name in auspice")
parser.add_argument('--output', type=str, metavar="JSON", required=True, help="output Auspice JSON")
args = parser.parse_args()
def set_final_strain_name(auspice_json, metadata_file, display_strain_name, output):
with open(auspice_json, "r") as fh:
data = json.load(fh)

metadata = pd.read_csv(args.metadata, sep='\t')
name_lookup = {}
for ri, row in metadata.iterrows():
strain_id = row['strain']
name_lookup[strain_id] = args.display_strain_name if pd.isna(row[args.display_strain_name]) else row[args.display_strain_name]
metadata = augur.io.read_metadata(metadata_file)
if display_strain_name not in metadata.columns:
with open(output, "w") as fh:
json.dump(data, fh, allow_nan=False, indent=None, separators=",:")
return

with open(args.input_auspice_json, 'r') as fh:
data = json.load(fh)
name_lookup = metadata[[display_strain_name]].to_dict()[display_strain_name]
replace_name_recursive(data["tree"], name_lookup)
with open(output, "w") as fh:
json.dump(data, fh, allow_nan=False, indent=None, separators=",:")


def main():
args = parse_args()
set_final_strain_name(
args.input_auspice_json, args.metadata, args.display_strain_name, args.output
)

replace_name_recursive(data['tree'], name_lookup)

with open(args.output, 'w') as fh:
json.dump(data, fh)
if __name__ == "__main__":
main()

0 comments on commit e74fc6b

Please sign in to comment.