diff --git a/post-process-ncov.py b/post-process-ncov.py deleted file mode 100644 index 8434e8e2c..000000000 --- a/post-process-ncov.py +++ /dev/null @@ -1,156 +0,0 @@ -NEW_ANNOTATION = { - "nuc": { - "start": 1, - "end": 29903, - "strand": "+" - }, - "ORF1ab": { - "gene": "ORF1ab", - "strand": "+", - "segments":[ - {"start": 266, "end": 13468, "name": "ORF1a"}, - {"start": 13468, "end": 21555, "name": "ORF1b"} - ], - "display_name": "AKA polyprotein PP1ab. -1 ribisomal frameshift. Cleaved to yield 15 nonstructural proteins (NSP1-10, 12-16)" - }, - "PP1a": { - "gene": "ORF1ab", - "start": 266, - "end": 13483, - "display_name": "Polyprotein PP1a. Cleaved to yield 11 nonstructural proteins (NSP1-11)" - }, - "NSP3": { - "gene": "ORF1ab", - "color": "#2c7fb8", - "start": 266 + (819-1)*3, - "end": 266 + (2763-1)*3 -1, - "display_name": "Cleaved from short + long polyproteins", - "strand": "+", - }, - "RdRp": { - "gene": "ORF1ab", - "color": "#41b6c4", - # Length is 2796nt (932aa) - "segments":[ - { # first segment is before the slip - "start": 266 + (4393-1)*3, # 13442 - "end": 13468, - }, - { - "start": 13468, - "end": 13468 + 2796 -1 - } - ], - "display_name": "NSP12; Cleaved from long polyprotein only; I'm not sure if the coordinates are correct, BTW!!!", - "strand": "+", - }, - "S": { - "gene": "Spike", - "end": 25384, - "display_name": "structural protein; spike protein; surface glycoprotein", - "start": 21563, - "strand": "+", - }, - "E": { - "end": 26472, - "dsiplay_name": "ORF4; structural protein; E protein", - "start": 26245, - "strand": "+", - "type": "CDS" - }, - "M": { - "end": 27191, - "start": 26523, - "strand": "+", - "gene": "M", - "display_name": "ORF5; structural protein (membrane glycoprotein)" - }, - "N": { - "end": 29533, - "display_name": "nucleocapsid phosphoprotein (ORF9)", - "start": 28274, - "strand": "+", - }, - "ORF3a": { - "end": 26220, - "start": 25393, - "strand": "+", - }, - "ORF6": { - "end": 27387, - "start": 27202, - "strand": "+", - }, - "ORF7a": { - "end": 27759, - "start": 27394, - "strand": "+", - }, - "ORF7b": { - "end": 27887, - "start": 27756, - "strand": "+", - }, - "ORF8": { - "end": 28259, - "start": 27894, - "strand": "+", - }, - "ORF9b": { - "end": 28577, - "start": 28284, - "strand": "+", - }, -} - -def a_pos_b(m): - return (m[0], int(m[1:-1]), m[-1]) - -def recurse(node): - - mutations = node.get('branch_attrs', {}).get('mutations', {}) - if 'ORF1a' in mutations: - # ORF1a -> ORF1ab is no-change - mutations['ORF1ab'] = [*mutations['ORF1a']] - mutations['PP1a'] = [*mutations['ORF1a']] - del mutations['ORF1a'] - if 'ORF1b' in mutations: - if 'ORF1ab' not in mutations: - mutations['ORF1ab'] = []; - for m in mutations['ORF1b']: - # ORF1b is in phase with ORF1a - a, pos, b = a_pos_b(m) - mutations['ORF1ab'].append(f"{a}{pos+4401}{b}") - del mutations['ORF1b'] - - # Extract mutations which fall in NSP3 - if 'ORF1ab' in mutations: - mutations['NSP3'] = [] - for m in mutations['ORF1ab']: - a, pos, b = a_pos_b(m) - # relative to PP1ab the coords are 819..2763 (in aa space) - if pos>=819 and pos<=2763: - mutations['NSP3'].append(f"{a}{pos-819+1}{b}") - - # Extract mutations which fall in RdRp - if 'ORF1ab' in mutations: - mutations['RdRp'] = [] - for m in mutations['ORF1ab']: - a, pos, b = a_pos_b(m) - # relative to PP1ab the coords are 4393..5324 (in aa space, so don't need to worry about -1 slippage) - if pos>=4393 and pos<=5324: - mutations['RdRp'].append(f"{a}{pos-4393+1}{b}") - - if "children" in node: - [recurse(child) for child in node["children"]] - - - -import json -with open("./data/nextclade_sars-cov-2.json", 'r') as fh: - dataset = json.load(fh) -recurse(dataset['tree']) -dataset['meta']['genome_annotations'] = NEW_ANNOTATION -dataset['meta']['title'] = 'nCoV with adjusted annotations (use with caution!)' -with open("./datasets/entropy2023/entropy-test-data_ncov.json", 'w') as fh: - json.dump(dataset, fh, indent=2) diff --git a/scripts/get-data.sh b/scripts/get-data.sh index e91c3edf8..7861efdc6 100755 --- a/scripts/get-data.sh +++ b/scripts/get-data.sh @@ -1,17 +1,83 @@ #!/bin/bash data_files=( - "entropy-test-data_hepB.json" \ - "entropy-test-data_ncov.json" \ + "dengue_all.json" "dengue_denv1.json" "dengue_denv2.json" "dengue_denv3.json" "dengue_denv4.json"\ + "ebola.json" "ebola_root-sequence.json" \ + "ebola_2019-09-14-no-epi-id_meta.json" "ebola_2019-09-14-no-epi-id_tree.json" \ + "lassa_s_tree.json" "lassa_s_meta.json" \ + "lassa_l_tree.json" "lassa_l_meta.json" \ + "measles.json" \ + "mers_tree.json" "mers_meta.json" \ + "mumps_global.json" "mumps_na.json" \ + "WNV_NA_tree.json" "WNV_NA_meta.json" \ "zika.json" \ - "monkeypox_mpxv.json" \ + "tb_global_meta.json" "tb_global_tree.json" \ + "enterovirus_d68_genome_meta.json" "enterovirus_d68_genome_tree.json" \ + "enterovirus_d68_vp1_meta.json" "enterovirus_d68_vp1_tree.json" \ + ############## AVIAN FLU ############## + "flu_avian_h7n9_ha.json" \ + "flu_avian_h7n9_mp.json" \ + "flu_avian_h7n9_na.json" \ + "flu_avian_h7n9_np.json" \ + "flu_avian_h7n9_ns.json" \ + "flu_avian_h7n9_pa.json" \ + "flu_avian_h7n9_pb1.json" \ + "flu_avian_h7n9_pb2.json" \ + ############## SEASONAL FLU ############## + "flu_seasonal_h3n2_ha_2y.json" "flu_seasonal_h3n2_ha_2y_tip-frequencies.json" \ + "flu_seasonal_h3n2_ha_3y.json" "flu_seasonal_h3n2_ha_3y_tip-frequencies.json" \ + "flu_seasonal_h3n2_ha_6y.json" "flu_seasonal_h3n2_ha_6y_tip-frequencies.json" \ + "flu_seasonal_h3n2_ha_12y.json" "flu_seasonal_h3n2_ha_12y_tip-frequencies.json" \ + "flu_seasonal_h3n2_na_2y.json" "flu_seasonal_h3n2_na_2y_tip-frequencies.json" \ + "flu_seasonal_h3n2_na_3y.json" "flu_seasonal_h3n2_na_3y_tip-frequencies.json" \ + "flu_seasonal_h3n2_na_6y.json" "flu_seasonal_h3n2_na_6y_tip-frequencies.json" \ + "flu_seasonal_h3n2_na_12y.json" "flu_seasonal_h3n2_na_12y_tip-frequencies.json" \ + "flu_seasonal_h1n1pdm_ha_2y.json" "flu_seasonal_h1n1pdm_ha_2y_tip-frequencies.json" \ + "flu_seasonal_h1n1pdm_ha_3y.json" "flu_seasonal_h1n1pdm_ha_3y_tip-frequencies.json" \ + "flu_seasonal_h1n1pdm_ha_6y.json" "flu_seasonal_h1n1pdm_ha_6y_tip-frequencies.json" \ + "flu_seasonal_h1n1pdm_ha_12y.json" "flu_seasonal_h1n1pdm_ha_12y_tip-frequencies.json" \ + "flu_seasonal_h1n1pdm_ha_pandemic_meta.json" "flu_seasonal_h1n1pdm_ha_pandemic_tree.json" "flu_seasonal_h1n1pdm_ha_pandemic_tip-frequencies.json" \ + "flu_seasonal_h1n1pdm_na_2y.json" "flu_seasonal_h1n1pdm_na_2y_tip-frequencies.json" \ + "flu_seasonal_h1n1pdm_na_3y.json" "flu_seasonal_h1n1pdm_na_3y_tip-frequencies.json" \ + "flu_seasonal_h1n1pdm_na_6y.json" "flu_seasonal_h1n1pdm_na_6y_tip-frequencies.json" \ + "flu_seasonal_h1n1pdm_na_12y.json" "flu_seasonal_h1n1pdm_na_12y_tip-frequencies.json" \ + "flu_seasonal_h1n1pdm_na_pandemic_tree.json" "flu_seasonal_h1n1pdm_na_pandemic_meta.json" "flu_seasonal_h1n1pdm_na_pandemic_tip-frequencies.json" \ + "flu_seasonal_vic_ha_2y.json" "flu_seasonal_vic_ha_2y_tip-frequencies.json" "flu_seasonal_vic_ha_2y_root-sequence.json" \ + "flu_seasonal_vic_ha_3y.json" "flu_seasonal_vic_ha_3y_tip-frequencies.json" "flu_seasonal_vic_ha_3y_root-sequence.json" \ + "flu_seasonal_vic_ha_6y.json" "flu_seasonal_vic_ha_6y_tip-frequencies.json" "flu_seasonal_vic_ha_6y_root-sequence.json" \ + "flu_seasonal_vic_ha_12y.json" "flu_seasonal_vic_ha_12y_tip-frequencies.json" "flu_seasonal_vic_ha_12y_root-sequence.json" \ + "flu_seasonal_vic_na_2y.json" "flu_seasonal_vic_na_2y_tip-frequencies.json" "flu_seasonal_vic_na_2y_root-sequence.json" \ + "flu_seasonal_vic_na_3y.json" "flu_seasonal_vic_na_3y_tip-frequencies.json" "flu_seasonal_vic_na_3y_root-sequence.json" \ + "flu_seasonal_vic_na_6y.json" "flu_seasonal_vic_na_6y_tip-frequencies.json" "flu_seasonal_vic_na_6y_root-sequence.json" \ + "flu_seasonal_vic_na_12y.json" "flu_seasonal_vic_na_12y_tip-frequencies.json" "flu_seasonal_vic_na_12y_root-sequence.json" \ + "flu_seasonal_yam_ha_2y.json" "flu_seasonal_yam_ha_2y_tip-frequencies.json" "flu_seasonal_yam_ha_2y_root-sequence.json" \ + "flu_seasonal_yam_ha_3y.json" "flu_seasonal_yam_ha_3y_tip-frequencies.json" "flu_seasonal_yam_ha_3y_root-sequence.json" \ + "flu_seasonal_yam_ha_6y.json" "flu_seasonal_yam_ha_6y_tip-frequencies.json" "flu_seasonal_yam_ha_6y_root-sequence.json" \ + "flu_seasonal_yam_ha_12y.json" "flu_seasonal_yam_ha_12y_tip-frequencies.json" "flu_seasonal_yam_ha_12y_root-sequence.json" \ + "flu_seasonal_yam_na_2y.json" "flu_seasonal_yam_na_2y_tip-frequencies.json" "flu_seasonal_yam_na_2y_root-sequence.json" \ + "flu_seasonal_yam_na_3y.json" "flu_seasonal_yam_na_3y_tip-frequencies.json" "flu_seasonal_yam_na_3y_root-sequence.json" \ + "flu_seasonal_yam_na_6y.json" "flu_seasonal_yam_na_6y_tip-frequencies.json" "flu_seasonal_yam_na_6y_root-sequence.json" \ + "flu_seasonal_yam_na_12y.json" "flu_seasonal_yam_na_12y_tip-frequencies.json" "flu_seasonal_yam_na_12y_root-sequence.json" \ + ############## LATEST CORE SARS-CoV-2 (COVID-19) BUILDS ############## + "ncov_gisaid_global.json" "ncov_gisaid_global_tip-frequencies.json" \ + "ncov_gisaid_africa.json" "ncov_gisaid_africa_tip-frequencies.json" \ + "ncov_gisaid_asia.json" "ncov_gisaid_asia_tip-frequencies.json" \ + "ncov_gisaid_europe.json" "ncov_gisaid_europe_tip-frequencies.json" \ + "ncov_gisaid_north-america.json" "ncov_gisaid_north-america_tip-frequencies.json" \ + "ncov_gisaid_oceania.json" "ncov_gisaid_oceania_tip-frequencies.json" \ + "ncov_gisaid_south-america.json" "ncov_gisaid_south-america_tip-frequencies.json" \ + ############## TIMESTAMPED SARS-CoV-2 BUILDS USED IN NARRATIVES ############# + "ncov_2020-01-23.json" "ncov_2020-01-25.json" "ncov_2020-01-26.json" "ncov_2020-01-30.json" \ + "ncov_2020-03-04.json" "ncov_2020-03-05.json" "ncov_2020-03-11.json" "ncov_2020-03-13.json" \ + "ncov_2020-03-20.json" "ncov_2020-03-27.json" "ncov_2020-04-03.json" \ + "ncov_global_2020-04-09.json" "ncov_north-america_2020-04-17.json" \ ) rm -rf data/ mkdir -p data/ for i in "${data_files[@]}" do - curl http://staging.nextstrain.org/"${i}" --compressed -o data/"${i}" + curl http://data.nextstrain.org/"${i}" --compressed -o data/"${i}" done echo "Copying the test datasets from test/data to data" diff --git a/src/actions/entropy.js b/src/actions/entropy.js index 165cff876..e449c3f1a 100644 --- a/src/actions/entropy.js +++ b/src/actions/entropy.js @@ -1,6 +1,6 @@ -import { debounce } from 'lodash'; +import { debounce, isEqual } from 'lodash'; import { calcEntropyInView } from "../util/entropy"; -import { nucleotide_gene, equalArrays } from "../util/globals"; +import { nucleotide_gene } from "../util/globals"; import * as types from "./types"; import { isColorByGenotype, decodeColorByGenotype, getCdsFromGenotype} from "../util/getGenotype"; @@ -18,18 +18,18 @@ export const updateEntropyVisibility = debounce((dispatch, getState) => { }, 500, { leading: true, trailing: true }); /** - * Returns a thunk which makes zero or one dispatches to update the entropy reducer + * Returns a thunk which makes zero or one dispatches to update the entropy reducer * if the selected CDS and/or positions need updating. - * + * * The argument may vary: * - It may be a colorBy string, which may or may not be a genotype coloring * - It may be a CDS (Object) * - It may be the constant `nucleotide_gene` - * + * * The expected state updates to (selectedCds, selectedPositions) are as follows: * (nuc = nucleotide_gene, x,y,z are positions, [*] means any (or no) positions selected, * no-op means that no dispatches are made and thus the state is unchanged): - * + * * ----------------------------------------------------------------------------------- * PREVIOUS STATE | EXAMPLE ARGUMENT | NEW STATE ($ = entropy bar recalc needed) * ----------------------------------------------------------------------------------- @@ -49,13 +49,16 @@ export const updateEntropyVisibility = debounce((dispatch, getState) => { * CdsA, [*] | CdsB | CdsB, [] $ * CdsA, [*] | nucleotide_gene | nuc, [] $ * ----------------------------------------------------------------------------------- - * + * * @returns {ReduxThunk} */ export const changeEntropyCdsSelection = (arg) => (dispatch, getState) => { const action = {type: types.CHANGE_ENTROPY_CDS_SELECTION} const entropy = getState().entropy; + // no-op if the entropy data isn't present + if (!entropy.loaded) return; + if (arg === nucleotide_gene) { if (entropy.selectedCds === nucleotide_gene) { return @@ -91,7 +94,7 @@ export const changeEntropyCdsSelection = (arg) => (dispatch, getState) => { const [data, maxYVal] = calcEntropyInView(state.tree.nodes, state.tree.visibility, action.selectedCds, entropy.showCounts); action.bars = data; action.maxYVal = maxYVal; - } else if (equalArrays(action.selectedPositions, entropy.selectedPositions)) { + } else if (isEqual(action.selectedPositions, entropy.selectedPositions)) { return; } diff --git a/src/components/entropy/index.js b/src/components/entropy/index.js index 3821ee242..f0ed6a0b6 100644 --- a/src/components/entropy/index.js +++ b/src/components/entropy/index.js @@ -5,6 +5,7 @@ import { select } from "d3-selection"; import { withTranslation } from "react-i18next"; import 'd3-transition'; import { FaInfoCircle } from "react-icons/fa"; +import { isEqual } from 'lodash'; import Card from "../framework/card"; import { changeColorBy } from "../../actions/colors"; import { tabGroup, tabGroupMember, tabGroupMemberSelected } from "../../globalStyles"; @@ -13,7 +14,7 @@ import InfoPanel from "./infoPanel"; import { changeEntropyCdsSelection, showCountsNotEntropy } from "../../actions/entropy"; import { timerStart, timerEnd } from "../../util/perf"; import { encodeColorByGenotype } from "../../util/getGenotype"; -import { nucleotide_gene, equalArrays } from "../../util/globals"; +import { nucleotide_gene } from "../../util/globals"; import { getCdsByName } from "../../util/entropy"; import { StyledTooltip } from "../controls/styles"; import "../../css/entropy.css"; @@ -228,7 +229,7 @@ class Entropy extends React.Component { if (this.props.showCounts !== nextProps.showCounts) { updateParams.showCounts = nextProps.showCounts; } - if (!equalArrays(this.props.selectedPositions, nextProps.selectedPositions)) { + if (!isEqual(this.props.selectedPositions, nextProps.selectedPositions)) { updateParams.selectedPositions = nextProps.selectedPositions } if (Object.keys(updateParams).length) { diff --git a/src/util/globals.js b/src/util/globals.js index aaf6281c9..da8964366 100644 --- a/src/util/globals.js +++ b/src/util/globals.js @@ -247,5 +247,3 @@ const aminoAcids = { }; export const getAminoAcidName = (x) => aminoAcids[x.toUpperCase()] || "Unknown"; - -export const equalArrays = (a,b) => a.length===b.length && a.every((el, idx) => b[idx]===el);