Skip to content

Commit

Permalink
Merge pull request #498 from PoonLab/xbbtree
Browse files Browse the repository at this point in the history
Xbbtree
  • Loading branch information
GopiGugan authored Dec 21, 2023
2 parents 89cf47d + 68c22e6 commit e2cd776
Show file tree
Hide file tree
Showing 26 changed files with 1,037 additions and 109,697 deletions.
4 changes: 4 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,10 @@
!.github
data/*
data_test/*
!data_test/timetree.nwk
!data_test/xbbtree.nwk
!data_test/clusters.json
!data_test/dbstats.json
!data/*mut_annotations.json
.idea/
treetime/*
Expand Down
65 changes: 59 additions & 6 deletions batch.py
Original file line number Diff line number Diff line change
Expand Up @@ -149,13 +149,60 @@ def process_feed(args, callback=None):
# filter data, align genomes, extract features, sort by lineage
by_lineage = process_feed(args, cb.callback)

# reconstruct time-scaled tree relating lineages
timetree, residuals = build_timetree(by_lineage, args, cb.callback)
# separate XBB and other recombinant lineages
aliases = parse_alias(args.alias)
designation = {}
for prefix, truename in aliases.items():
if type(truename) is list:
designation.update({prefix: {
'type': 'XBB' if prefix == 'XBB' else 'recombinant',
'fullname': '/'.join(truename)
}})
else:
designation.update({prefix: {
'type': 'XBB' if truename.startswith("XBB") else 'non-recombinant',
'fullname': truename
}})

# use results to partition by_lineage database
non_recomb = {}
xbb = {}
other_recomb = {}
for lineage, ldata in by_lineage.items():
# Put unassigned lineages in non-recombinant category
if lineage.lower() == "unassigned":
non_recomb.update({lineage: ldata})
continue

prefix = lineage.split('.')[0]
category = designation[prefix]['type']
if category == 'non-recombinant':
non_recomb.update({lineage: ldata})
elif category == 'XBB':
xbb.update({lineage: ldata})
else:
other_recomb.update({lineage: ldata})

if len(xbb) < 2:
other_recomb.update(xbb)
xbb = None # no point in building a tree


# reconstruct time-scaled trees
timetree, residuals = build_timetree(non_recomb, args, cb.callback)
timestamp = datetime.now().isoformat().split('.')[0]
nwk_file = os.path.join(args.outdir, 'timetree.{}.nwk'.format(timestamp))
with open(nwk_file, 'w') as handle:
Phylo.write(timetree, file=handle, format='newick')

xbb_file = os.path.join(args.outdir, 'xbbtree.{}.nwk'.format(timestamp))
with open(xbb_file, 'w') as handle:
if xbb is not None:
timetree_xbb, residuals_xbb = build_timetree(xbb, args, cb.callback)
residuals.update(residuals_xbb)
Phylo.write(timetree_xbb, file=handle, format='newick')
# else empty file

# clustering analysis of lineages
result, infection_prediction = make_beadplots(by_lineage, args, cb.callback, t0=cb.t0.timestamp())
clust_file = os.path.join(args.outdir, 'clusters.{}.json'.format(timestamp))
Expand All @@ -172,9 +219,7 @@ def process_feed(args, callback=None):
# write data stats
dbstat_file = os.path.join(args.outdir, 'dbstats.{}.json'.format(timestamp))

alias = parse_alias(args.alias)

with open(dbstat_file, 'w') as handle:
with (open(dbstat_file, 'w') as handle):
# total number of sequences
nseqs = 0
for records in by_lineage.values():
Expand All @@ -187,7 +232,14 @@ def process_feed(args, callback=None):
}
for lineage, records in by_lineage.items():
prefix = lineage.split('.')[0]
lname = lineage.replace(prefix, alias[prefix]) if lineage.lower() not in ['unclassifiable', 'unassigned'] and not prefix.startswith('X') and alias[prefix] != '' else lineage

# resolve PANGO prefix aliases
lname = lineage
if (lineage.lower() not in ['unclassifiable', 'unassigned']
and not prefix.startswith('X')
and aliases[prefix] != ''):
lname = lineage.replace(prefix, aliases[prefix])

samples = unpack_records(records)
ndiffs = [len(x['diffs']) for x in samples]
val['lineages'][lineage] = {
Expand All @@ -206,6 +258,7 @@ def process_feed(args, callback=None):
if not args.dry_run:
server_root = 'filogeneti.ca:/var/www/html/covizu/data'
subprocess.check_call(['scp', nwk_file, '{}/timetree.nwk'.format(server_root)])
subprocess.check_call(['scp', xbb_file, '{}/xbbtree.nwk'.format(server_root)])
subprocess.check_call(['scp', clust_file, '{}/clusters.json'.format(server_root)])
subprocess.check_call(['scp', dbstat_file, '{}/dbstats.json'.format(server_root)])

Expand Down
8 changes: 8 additions & 0 deletions config/dbconfig.js
Original file line number Diff line number Diff line change
Expand Up @@ -5,18 +5,22 @@ const $JSON_DATA_FOLDER = "data"
const $DATABASE__PRIMARY = "covizu_1";
const $DATABASE__SECONDARY = "covizu_2";
const $COLLECTION__CLUSTERS = "clusters";
const $COLLECTION__DBSTATS = "dbstats";
const $COLLECTION__BEADDATA = "beaddata";
const $COLLECTION__TIPS = "tips";
const $COLLECTION__RECOMBINANT_TIPS = "recombinant_tips";
const $COLLECTION__ACCN_TO_CID = "accn_to_cid";
const $COLLECTION__LINEAGE_TO_CID = "lineage_to_cid";
const $COLLECTION__REGION_MAP = "region_map";
const $COLLECTION__DF_TREE = "df_tree";
const $COLLECTION__XBB_TREE = "xbb_tree";
const $COLLECTION__AUTOCOMPLETE_DATA = "autocomplete_data";
const $COLLECTION__FLAT_DATA = "flat_data";

const $JSONFILE__CLUSTERS = "clusters.json";
const $JSONFILE__DBSTATS = "dbstats.json";
const $NWKFILE__TREE = "timetree.nwk";
const $XBB__TREE = "xbbtree.nwk";

var $NODE_ENV = process.env.NODE_ENV;

Expand Down Expand Up @@ -73,17 +77,21 @@ module.exports = {
$COVIZU_CONNECTION_URI,
$ACTIVE_DATABASE,
$COLLECTION__CLUSTERS,
$COLLECTION__DBSTATS,
$COLLECTION__BEADDATA,
$COLLECTION__TIPS,
$COLLECTION__RECOMBINANT_TIPS,
$COLLECTION__ACCN_TO_CID,
$COLLECTION__LINEAGE_TO_CID,
$COLLECTION__REGION_MAP,
$COLLECTION__DF_TREE,
$COLLECTION__XBB_TREE,
$COLLECTION__AUTOCOMPLETE_DATA,
$COLLECTION__FLAT_DATA,
$PROJECT_ROOT,
$JSON_DATA_FOLDER,
$JSONFILE__CLUSTERS,
$JSONFILE__DBSTATS,
$NWKFILE__TREE,
$XBB__TREE
}
2 changes: 1 addition & 1 deletion covizu/data/ProblematicSites_SARS-CoV2
Submodule ProblematicSites_SARS-CoV2 updated 33 files
+249 −245 README.md
+0 −0 archived_vcf/problematic_sites_sarsCov2.2021-10-14-11:49.vcf
+568 −0 archived_vcf/problematic_sites_sarsCov2.2021-10-27-18:39.vcf
+ compressed_vcf/problematic_sites_sarsCov2.v7.caution.vcf.gz
+ compressed_vcf/problematic_sites_sarsCov2.v7.caution.vcf.gz.csi
+ compressed_vcf/problematic_sites_sarsCov2.v7.caution.vcf.gz.tbi
+ compressed_vcf/problematic_sites_sarsCov2.v7.mask.vcf.gz
+ compressed_vcf/problematic_sites_sarsCov2.v7.mask.vcf.gz.csi
+ compressed_vcf/problematic_sites_sarsCov2.v7.mask.vcf.gz.tbi
+ compressed_vcf/problematic_sites_sarsCov2.v7.vcf.gz
+ compressed_vcf/problematic_sites_sarsCov2.v7.vcf.gz.csi
+ compressed_vcf/problematic_sites_sarsCov2.v7.vcf.gz.tbi
+ compressed_vcf/problematic_sites_sarsCov2.v8.caution.vcf.gz
+ compressed_vcf/problematic_sites_sarsCov2.v8.caution.vcf.gz.csi
+ compressed_vcf/problematic_sites_sarsCov2.v8.caution.vcf.gz.tbi
+ compressed_vcf/problematic_sites_sarsCov2.v8.mask.vcf.gz
+ compressed_vcf/problematic_sites_sarsCov2.v8.mask.vcf.gz.csi
+ compressed_vcf/problematic_sites_sarsCov2.v8.mask.vcf.gz.tbi
+ compressed_vcf/problematic_sites_sarsCov2.v8.vcf.gz
+ compressed_vcf/problematic_sites_sarsCov2.v8.vcf.gz.csi
+ compressed_vcf/problematic_sites_sarsCov2.v8.vcf.gz.tbi
+329 −0 data/problematic_sites.tsv
+255 −251 problematic_sites_sarsCov2.vcf
+7 −2 src/fill_alt_positions_from_vcf.py
+8 −4 src/generate_new_vcf.sh
+63 −0 src/generate_readme.sh
+5 −0 src/mask_alignment_using_vcf.py
+12 −12 src/site_list_to_vcf.py
+1 −1 src/vcf2markdown.py
+175 −174 subset_vcf/problematic_sites_sarsCov2.caution.vcf
+82 −78 subset_vcf/problematic_sites_sarsCov2.mask.vcf
+2 −2 test/parse_vcf.test.py
+570 −0 test/tst.vcf
2 changes: 1 addition & 1 deletion covizu/data/pango-designation
Submodule pango-designation updated 182 files
2 changes: 1 addition & 1 deletion css/style.css
Original file line number Diff line number Diff line change
Expand Up @@ -782,7 +782,7 @@ h2.modal-title {

#tree-slider.ui-slider-horizontal {
appearance: none;
width: 79%;
width: 195px; /* Setting the slider width to match width of the tree axis */
height: 5px;
opacity: 0.7; /* Set transparency (for mouse-over effects on hover) */
transition: opacity .2s;
Expand Down
57 changes: 23 additions & 34 deletions cypress/e2e/beadplot.spec.cy.js
Original file line number Diff line number Diff line change
Expand Up @@ -3,21 +3,19 @@ import clusters from '../../data_test/clusters.json'
describe('Beadplot components', () => {
it('Edge Labels', ()=> {
cy.get('.beadplot-content>svg>g>text').should(($text) => {
expect($text).to.have.length(3)
expect($text.eq(0)).to.contain('unsampled0')
expect($text.eq(1)).to.contain('Scotland/GCVR-16F930')
expect($text.eq(2)).to.contain('Scotland/GCVR-170556')
expect($text).to.have.length(14)
expect($text.eq(0)).to.contain('USA/AZ-CDC-LC0932884')
expect($text.eq(1)).to.contain('USA/AZ-CDC-STM-TNFPTQHEE')
expect($text.eq(2)).to.contain('USA/AZ-CDC-LC0951048')
})
})
it('Samples', () => {
cy.get('.beadplot-content>svg>g>circle').then(($circ) => {
expect($circ).to.have.length(4)
for (let i = 0; i < 3; i++) {
cy.get($circ.eq(i)).should('have.attr', 'cy', 45)
expect($circ).to.have.length(23)
for (let i = 0; i < 7; i++) {
cy.get($circ.eq(i)).should('have.attr', 'cy', 20)
}
cy.get($circ.eq(3)).should('have.attr', 'cy', 70)
})

})
})
})

Expand All @@ -29,19 +27,15 @@ describe('Tables', () => {
})
it('Country details: gentable', () => {
cy.get('#tabs').contains('Countries').click()
var tips = {'allregions': {'North America': 13}, 'country': {'Canada': 8, 'USA': 5}}
var tips = {'allregions': {'North America': 23}, 'country': {'USA': 23}}

cy.window().then((win) => {
win.gentable(tips)
})
cy.get('table:visible>tbody>tr').should('have.length', 2)
cy.get('table:visible>tbody>tr').should('have.length', 1)
cy.get('table:visible>tbody>tr').eq(0).contains('North America')
cy.get('table:visible>tbody>tr').eq(0).contains('Canada')
cy.get('table:visible>tbody>tr').eq(0).contains('8')

cy.get('table:visible>tbody>tr').eq(1).contains('North America')
cy.get('table:visible>tbody>tr').eq(1).contains('USA')
cy.get('table:visible>tbody>tr').eq(1).contains('5')
cy.get('table:visible>tbody>tr').eq(0).contains('USA')
cy.get('table:visible>tbody>tr').eq(0).contains('23')
})
})

Expand All @@ -60,7 +54,7 @@ describe('Edge slider', () => {

// https://stackoverflow.com/questions/64855669/moving-slider-with-cypress

let targetValue = 12
let targetValue = 5.01
let currentValue = 2
let increment = 0.01
steps = (targetValue - currentValue) / increment + 1
Expand All @@ -74,28 +68,23 @@ describe('Edge slider', () => {
cy.get('#custom-handle').should('contain', targetValue)
})
it('All edges are visible on max slider value ', () => {
cy.get('[stroke="#bbd"]').should('have.length', 2)
})
it('No edges are visible on slider value of 11.99 ', () => {
cy.get('#left-arrow').click()
cy.get('#custom-handle').should('contain', 11.99)
cy.get('[stroke="#bbd"]').should('not.exist')
cy.get('[stroke="#bbd"]').should('have.length', 12)
})
})

describe('Tooltips', () => {
it('Horizontal Edge', () => {
cy.get('#Scotland-GCVR-16F930').first().trigger('mouseover')
cy.get('.tooltip').contains('Parent: unsampled0')
cy.get('.tooltip').contains('Europe: 3')
cy.get('.tooltip').contains('Unique collection dates: 3')
cy.get('.tooltip').contains('2020-03-19 / 2020-03-27')
cy.get('#USA-AZ-CDC-LC0983866').first().trigger('mouseover')
cy.get('.tooltip').contains('Parent: USA/AZ-CDC-LC0932884')
cy.get('.tooltip').contains('North America: 2')
cy.get('.tooltip').contains('Unique collection dates: 2')
cy.get('.tooltip').contains('2023-01-02 / 2023-01-07')
})
it('Bead', () => {
cy.get('circle:visible').first().trigger('mouseover')
cy.get('#EPI_ISL_1054790').first().trigger('mouseover')
cy.get('.tooltip').contains('Parent: unsampled0')
cy.get('.tooltip').contains('Genomic distance: 12')
cy.get('.tooltip').contains('Europe: 1')
cy.get('.tooltip').contains('Collection date: 2020-03-19')
cy.get('.tooltip').contains('Genomic distance: 1.82')
cy.get('.tooltip').contains('North America: 1')
cy.get('.tooltip').contains('Collection date: 2022-11-12')
})
})
72 changes: 54 additions & 18 deletions cypress/e2e/covizu.spec.cy.js
Original file line number Diff line number Diff line change
Expand Up @@ -89,9 +89,9 @@ describe('Search Interface', () => {
})

// this test was failing because (i think) it was searching for a data-point ('HMH') that no longer exists in the current dataset
// I've replaced 'HMH' with 'China'
it("Searching 'China' results in selection and expected point count", () => {
cy.get('#search-input').type('China')
// I've replaced 'HMH' with 'Japan'
it("Searching 'Japan' results in selection and expected point count", () => {
cy.get('#search-input').type('Japan')
cy.get('#search-button').click({force:true})
cy.get('.selectionH').should('exist')
cy.get('.SelectedCluster').should('exist')
Expand Down Expand Up @@ -123,20 +123,27 @@ describe('Search Interface', () => {

describe('Tooltips', () => {
it('Appear on hover over cluster', () => {
cy.get('[id=id-0]').trigger('mouseover');
cy.get('.tooltip').should('be.visible').should('have.length', 1)
cy.window().then((win)=>{
win.reset_tree(true);
const tip_id = win.display_id.non_recombinants.last;
cy.get(`[id=id-${tip_id}]`).trigger('mouseover');
cy.get('.tooltip').should('be.visible').should('have.length', 1)
});
})
it('Cluster tooltips contain relevant and correct information', () => {
cy.get('[id=id-0]').trigger('mouseover');
cy.window().then((win)=>{
cy.get('.tooltip').contains(`Sampled: ${win.tips[0]['varcount']}`)
cy.get('.tooltip').contains(`Displayed: ${win.tips[0]['sampled_varcount']}`)
cy.wrap(Object.keys(win.tips[0]['allregions'])).each(($el, index) => {
cy.get('.tooltip').contains(`${$el}: ${Object.values(win.tips[0]['allregions'])[index]}`)
win.reset_tree(true);
const tip_id = win.display_id.non_recombinants.last;
const last_index = win.tips.length - 1;
cy.get(`[id=id-${tip_id}]`).trigger('mouseover');
cy.get('.tooltip').contains(`Sampled: ${win.tips[last_index]['varcount']}`)
cy.get('.tooltip').contains(`Displayed: ${win.tips[last_index]['sampled_varcount']}`)
cy.wrap(Object.keys(win.tips[last_index]['allregions'])).each(($el, index) => {
cy.get('.tooltip').contains(`${$el}: ${Object.values(win.tips[last_index]['allregions'])[index]}`)
})
cy.get('.tooltip').contains(`Mean diffs from root: ${Math.round(100*win.tips[0]['mean_ndiffs'])/100}`)
cy.get('.tooltip').contains(`Deviation from clock: ${win.tips[0]['residual'].toFixed(2)}`)
cy.get('.tooltip').contains(`${win.tips[0]['first_date'].toISOString().slice(0, 10)} / ${win.tips[0]['last_date'].toISOString().slice(0, 10)}`)
cy.get('.tooltip').contains(`Mean diffs from root: ${Math.round(100*win.tips[last_index]['mean_ndiffs'])/100}`)
cy.get('.tooltip').contains(`Deviation from clock: ${win.tips[last_index]['residual'].toFixed(2)}`)
cy.get('.tooltip').contains(`${win.tips[last_index]['first_date'].toISOString().slice(0, 10)} / ${win.tips[last_index]['last_date'].toISOString().slice(0, 10)}`)
})
})
it('Appear on hover over bead', () => {
Expand All @@ -161,9 +168,38 @@ describe('Tooltips', () => {
})
})

describe("Display options", () => {
it("Verify options within the Display dropdown", () => {
cy.visit("http://localhost:8001");
cy.get("#splash-button").click();
cy.get("#display-tree").children().should(($options) => {
expect($options).to.have.length(3);
expect($options.eq(0)).to.contain('Non-Recombinants');
expect($options.eq(1)).to.contain('XBB Lineages');
expect($options.eq(2)).to.contain('Other Recombinants')
})
})

it("Selecting 'XBB Lineages' Display option", () => {
cy.get("#display-tree").select(1).should('have.value','XBB Lineages')
cy.window().then((win)=> {
win.reset_tree(true);
cy.get(`[id=id-${win.display_id.xbb.last}]`).should('be.visible').trigger('mouseover');
cy.get(`[id=id-${win.display_id.xbb.first}]`).should('be.visible').trigger('mouseover');
})
})

it("Selecting 'Other Recombinants' Display option", () => {
cy.get("#display-tree").select(2).should('have.value','Other Recombinants')
cy.window().then((win)=> {
win.reset_tree(true);
cy.get(`[id=id-${win.display_id.other_recombinants.last}]`).should('be.visible').trigger('mouseover');
cy.get(`[id=id-${win.display_id.other_recombinants.first}]`).should('be.visible').trigger('mouseover');
})
})
})

describe("Colour tree", () => {

it("Verify <options> within <select> Colour tree", ()=>{
cy.visit("http://localhost:8001");
cy.get("#splash-button").click();
Expand All @@ -182,7 +218,7 @@ describe("Colour tree", () => {
let region_color_map = {};
cy.get("#select-tree-colours").select(0).should('have.value','Region')
cy.window().then((win)=>{
regions = [...Array.from(new Set(Object.values(win.region_map))),"China"]
regions = [...Array.from(new Set(Object.values(win.region_map))),"China", "South America", "Oceania"]
return regions;
})
.then(()=>{
Expand Down Expand Up @@ -214,14 +250,14 @@ describe("Colour tree", () => {
// check and see if this region_color_map is correctly represented in the tree graph
cy.window().then((win)=>{
let region_title;
const tip_id = 215;
win.reset_tree(true);
const tip_id = win.display_id.non_recombinants.last;
cy.get(`[id=id-${tip_id}]`).should('be.visible').trigger('mouseover');
cy.get(`[id=id-${tip_id}]`).invoke('css','fill').then((tip_color)=>{
region_title = Object.keys(win.tips[tip_id]['allregions'])[0];
region_title = Object.keys(win.tips[win.tips.length - 1]['allregions'])[0];
// expect(tip_color).to.equal(region_color_map[region_title])
})
})
})
})

})
Loading

0 comments on commit e2cd776

Please sign in to comment.