Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Xbbtree #498

Merged
merged 17 commits into from
Dec 21, 2023
Merged
Show file tree
Hide file tree
Changes from 9 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,10 @@
!.github
data/*
data_test/*
!data_test/timetree.nwk
!data_test/xbbtree.nwk
!data_test/clusters.json
!data_test/dbstats.json
!data/*mut_annotations.json
.idea/
treetime/*
Expand Down
64 changes: 58 additions & 6 deletions batch.py
GopiGugan marked this conversation as resolved.
Show resolved Hide resolved
Original file line number Diff line number Diff line change
Expand Up @@ -149,13 +149,60 @@ def process_feed(args, callback=None):
# filter data, align genomes, extract features, sort by lineage
by_lineage = process_feed(args, cb.callback)

# reconstruct time-scaled tree relating lineages
timetree, residuals = build_timetree(by_lineage, args, cb.callback)
# separate XBB and other recombinant lineages
aliases = parse_alias(args.alias)
designation = {}
for prefix, truename in aliases.items():
if type(truename) is list:
designation.update({prefix: {
'type': 'XBB' if prefix == 'XBB' else 'recombinant',
'fullname': '/'.join(truename)
}})
else:
designation.update({prefix: {
'type': 'XBB' if truename.startswith("XBB") else 'non-recombinant',
'fullname': truename
}})

# use results to partition by_lineage database
non_recomb = {}
xbb = {}
other_recomb = {}
for lineage, ldata in by_lineage.items():
# Put unassigned lineages in non-recombinant category
if lineage == "Unassigned":
non_recomb.update({lineage: ldata})
continue

prefix = lineage.split('.')[0]
category = designation[prefix]['type']
if category == 'non-recombinant':
non_recomb.update({lineage: ldata})
elif category == 'XBB':
xbb.update({lineage: ldata})
else:
other_recomb.update({lineage: ldata})

if len(xbb) < 2:
other_recomb.update(xbb)
xbb = None # no point in building a tree


# reconstruct time-scaled trees
timetree, residuals = build_timetree(non_recomb, args, cb.callback)
timestamp = datetime.now().isoformat().split('.')[0]
nwk_file = os.path.join(args.outdir, 'timetree.{}.nwk'.format(timestamp))
with open(nwk_file, 'w') as handle:
Phylo.write(timetree, file=handle, format='newick')

xbb_file = os.path.join(args.outdir, 'xbbtree.{}.nwk'.format(timestamp))
with open(xbb_file, 'w') as handle:
if xbb is not None:
timetree_xbb, residuals_xbb = build_timetree(xbb, args, cb.callback)
residuals.update(residuals_xbb)
Phylo.write(timetree_xbb, file=handle, format='newick')
# else empty file

# clustering analysis of lineages
result, infection_prediction = make_beadplots(by_lineage, args, cb.callback, t0=cb.t0.timestamp())
clust_file = os.path.join(args.outdir, 'clusters.{}.json'.format(timestamp))
Expand All @@ -172,9 +219,7 @@ def process_feed(args, callback=None):
# write data stats
dbstat_file = os.path.join(args.outdir, 'dbstats.{}.json'.format(timestamp))

alias = parse_alias(args.alias)

with open(dbstat_file, 'w') as handle:
with (open(dbstat_file, 'w') as handle):
# total number of sequences
nseqs = 0
for records in by_lineage.values():
Expand All @@ -187,7 +232,14 @@ def process_feed(args, callback=None):
}
for lineage, records in by_lineage.items():
prefix = lineage.split('.')[0]
lname = lineage.replace(prefix, alias[prefix]) if lineage.lower() not in ['unclassifiable', 'unassigned'] and not prefix.startswith('X') and alias[prefix] != '' else lineage

# resolve PANGO prefix aliases
lname = lineage
if (lineage.lower() not in ['unclassifiable', 'unassigned']
and not prefix.startswith('X')
and aliases[prefix] != ''):
lname = lineage.replace(prefix, aliases[prefix])

samples = unpack_records(records)
ndiffs = [len(x['diffs']) for x in samples]
val['lineages'][lineage] = {
Expand Down
2 changes: 1 addition & 1 deletion covizu/data/ProblematicSites_SARS-CoV2
Submodule ProblematicSites_SARS-CoV2 updated 33 files
+249 −245 README.md
+0 −0 archived_vcf/problematic_sites_sarsCov2.2021-10-14-11:49.vcf
+568 −0 archived_vcf/problematic_sites_sarsCov2.2021-10-27-18:39.vcf
+ compressed_vcf/problematic_sites_sarsCov2.v7.caution.vcf.gz
+ compressed_vcf/problematic_sites_sarsCov2.v7.caution.vcf.gz.csi
+ compressed_vcf/problematic_sites_sarsCov2.v7.caution.vcf.gz.tbi
+ compressed_vcf/problematic_sites_sarsCov2.v7.mask.vcf.gz
+ compressed_vcf/problematic_sites_sarsCov2.v7.mask.vcf.gz.csi
+ compressed_vcf/problematic_sites_sarsCov2.v7.mask.vcf.gz.tbi
+ compressed_vcf/problematic_sites_sarsCov2.v7.vcf.gz
+ compressed_vcf/problematic_sites_sarsCov2.v7.vcf.gz.csi
+ compressed_vcf/problematic_sites_sarsCov2.v7.vcf.gz.tbi
+ compressed_vcf/problematic_sites_sarsCov2.v8.caution.vcf.gz
+ compressed_vcf/problematic_sites_sarsCov2.v8.caution.vcf.gz.csi
+ compressed_vcf/problematic_sites_sarsCov2.v8.caution.vcf.gz.tbi
+ compressed_vcf/problematic_sites_sarsCov2.v8.mask.vcf.gz
+ compressed_vcf/problematic_sites_sarsCov2.v8.mask.vcf.gz.csi
+ compressed_vcf/problematic_sites_sarsCov2.v8.mask.vcf.gz.tbi
+ compressed_vcf/problematic_sites_sarsCov2.v8.vcf.gz
+ compressed_vcf/problematic_sites_sarsCov2.v8.vcf.gz.csi
+ compressed_vcf/problematic_sites_sarsCov2.v8.vcf.gz.tbi
+329 −0 data/problematic_sites.tsv
+255 −251 problematic_sites_sarsCov2.vcf
+7 −2 src/fill_alt_positions_from_vcf.py
+8 −4 src/generate_new_vcf.sh
+63 −0 src/generate_readme.sh
+5 −0 src/mask_alignment_using_vcf.py
+12 −12 src/site_list_to_vcf.py
+1 −1 src/vcf2markdown.py
+175 −174 subset_vcf/problematic_sites_sarsCov2.caution.vcf
+82 −78 subset_vcf/problematic_sites_sarsCov2.mask.vcf
+2 −2 test/parse_vcf.test.py
+570 −0 test/tst.vcf
2 changes: 1 addition & 1 deletion covizu/data/pango-designation
Submodule pango-designation updated 182 files
57 changes: 23 additions & 34 deletions cypress/e2e/beadplot.spec.cy.js
Original file line number Diff line number Diff line change
Expand Up @@ -3,21 +3,19 @@ import clusters from '../../data_test/clusters.json'
describe('Beadplot components', () => {
it('Edge Labels', ()=> {
cy.get('.beadplot-content>svg>g>text').should(($text) => {
expect($text).to.have.length(3)
expect($text.eq(0)).to.contain('unsampled0')
expect($text.eq(1)).to.contain('Scotland/GCVR-16F930')
expect($text.eq(2)).to.contain('Scotland/GCVR-170556')
expect($text).to.have.length(14)
expect($text.eq(0)).to.contain('USA/AZ-CDC-LC0932884')
expect($text.eq(1)).to.contain('USA/AZ-CDC-STM-TNFPTQHEE')
expect($text.eq(2)).to.contain('USA/AZ-CDC-LC0951048')
})
})
it('Samples', () => {
cy.get('.beadplot-content>svg>g>circle').then(($circ) => {
expect($circ).to.have.length(4)
for (let i = 0; i < 3; i++) {
cy.get($circ.eq(i)).should('have.attr', 'cy', 45)
expect($circ).to.have.length(23)
for (let i = 0; i < 7; i++) {
cy.get($circ.eq(i)).should('have.attr', 'cy', 20)
}
cy.get($circ.eq(3)).should('have.attr', 'cy', 70)
})

})
})
})

Expand All @@ -29,19 +27,15 @@ describe('Tables', () => {
})
it('Country details: gentable', () => {
cy.get('#tabs').contains('Countries').click()
var tips = {'allregions': {'North America': 13}, 'country': {'Canada': 8, 'USA': 5}}
var tips = {'allregions': {'North America': 23}, 'country': {'USA': 23}}

cy.window().then((win) => {
win.gentable(tips)
})
cy.get('table:visible>tbody>tr').should('have.length', 2)
cy.get('table:visible>tbody>tr').should('have.length', 1)
cy.get('table:visible>tbody>tr').eq(0).contains('North America')
cy.get('table:visible>tbody>tr').eq(0).contains('Canada')
cy.get('table:visible>tbody>tr').eq(0).contains('8')

cy.get('table:visible>tbody>tr').eq(1).contains('North America')
cy.get('table:visible>tbody>tr').eq(1).contains('USA')
cy.get('table:visible>tbody>tr').eq(1).contains('5')
cy.get('table:visible>tbody>tr').eq(0).contains('USA')
cy.get('table:visible>tbody>tr').eq(0).contains('23')
})
})

Expand All @@ -60,7 +54,7 @@ describe('Edge slider', () => {

// https://stackoverflow.com/questions/64855669/moving-slider-with-cypress

let targetValue = 12
let targetValue = 5.01
let currentValue = 2
let increment = 0.01
steps = (targetValue - currentValue) / increment + 1
Expand All @@ -74,28 +68,23 @@ describe('Edge slider', () => {
cy.get('#custom-handle').should('contain', targetValue)
})
it('All edges are visible on max slider value ', () => {
cy.get('[stroke="#bbd"]').should('have.length', 2)
})
it('No edges are visible on slider value of 11.99 ', () => {
cy.get('#left-arrow').click()
cy.get('#custom-handle').should('contain', 11.99)
cy.get('[stroke="#bbd"]').should('not.exist')
cy.get('[stroke="#bbd"]').should('have.length', 12)
})
})

describe('Tooltips', () => {
it('Horizontal Edge', () => {
cy.get('#Scotland-GCVR-16F930').first().trigger('mouseover')
cy.get('.tooltip').contains('Parent: unsampled0')
cy.get('.tooltip').contains('Europe: 3')
cy.get('.tooltip').contains('Unique collection dates: 3')
cy.get('.tooltip').contains('2020-03-19 / 2020-03-27')
cy.get('#USA-AZ-CDC-LC0983866').first().trigger('mouseover')
cy.get('.tooltip').contains('Parent: USA/AZ-CDC-LC0932884')
cy.get('.tooltip').contains('North America: 2')
cy.get('.tooltip').contains('Unique collection dates: 2')
cy.get('.tooltip').contains('2023-01-02 / 2023-01-07')
})
it('Bead', () => {
cy.get('circle:visible').first().trigger('mouseover')
cy.get('#EPI_ISL_1054790').first().trigger('mouseover')
cy.get('.tooltip').contains('Parent: unsampled0')
cy.get('.tooltip').contains('Genomic distance: 12')
cy.get('.tooltip').contains('Europe: 1')
cy.get('.tooltip').contains('Collection date: 2020-03-19')
cy.get('.tooltip').contains('Genomic distance: 1.82')
cy.get('.tooltip').contains('North America: 1')
cy.get('.tooltip').contains('Collection date: 2022-11-12')
})
})
42 changes: 25 additions & 17 deletions cypress/e2e/covizu.spec.cy.js
Original file line number Diff line number Diff line change
Expand Up @@ -89,9 +89,9 @@ describe('Search Interface', () => {
})

// this test was failing because (i think) it was searching for a data-point ('HMH') that no longer exists in the current dataset
// I've replaced 'HMH' with 'China'
it("Searching 'China' results in selection and expected point count", () => {
cy.get('#search-input').type('China')
// I've replaced 'HMH' with 'Japan'
it("Searching 'Japan' results in selection and expected point count", () => {
cy.get('#search-input').type('Japan')
cy.get('#search-button').click({force:true})
cy.get('.selectionH').should('exist')
cy.get('.SelectedCluster').should('exist')
Expand Down Expand Up @@ -123,20 +123,28 @@ describe('Search Interface', () => {

describe('Tooltips', () => {
it('Appear on hover over cluster', () => {
cy.get('[id=id-0]').trigger('mouseover');
cy.get('.tooltip').should('be.visible').should('have.length', 1)
cy.window().then((win)=>{
win.reset_tree(true);
const tip_id = win.display_id.non_recombinants.last;
cy.get(`[id=id-${tip_id}]`).trigger('mouseover');
cy.get('[id=id-563]').trigger('mouseover');
cy.get('.tooltip').should('be.visible').should('have.length', 1)
});
})
it('Cluster tooltips contain relevant and correct information', () => {
cy.get('[id=id-0]').trigger('mouseover');
cy.window().then((win)=>{
cy.get('.tooltip').contains(`Sampled: ${win.tips[0]['varcount']}`)
cy.get('.tooltip').contains(`Displayed: ${win.tips[0]['sampled_varcount']}`)
cy.wrap(Object.keys(win.tips[0]['allregions'])).each(($el, index) => {
cy.get('.tooltip').contains(`${$el}: ${Object.values(win.tips[0]['allregions'])[index]}`)
win.reset_tree(true);
const tip_id = win.display_id.non_recombinants.last;
const last_index = win.tips.length - 1;
cy.get(`[id=id-${tip_id}]`).trigger('mouseover');
cy.get('.tooltip').contains(`Sampled: ${win.tips[last_index]['varcount']}`)
cy.get('.tooltip').contains(`Displayed: ${win.tips[last_index]['sampled_varcount']}`)
cy.wrap(Object.keys(win.tips[last_index]['allregions'])).each(($el, index) => {
cy.get('.tooltip').contains(`${$el}: ${Object.values(win.tips[last_index]['allregions'])[index]}`)
})
cy.get('.tooltip').contains(`Mean diffs from root: ${Math.round(100*win.tips[0]['mean_ndiffs'])/100}`)
cy.get('.tooltip').contains(`Deviation from clock: ${win.tips[0]['residual'].toFixed(2)}`)
cy.get('.tooltip').contains(`${win.tips[0]['first_date'].toISOString().slice(0, 10)} / ${win.tips[0]['last_date'].toISOString().slice(0, 10)}`)
cy.get('.tooltip').contains(`Mean diffs from root: ${Math.round(100*win.tips[last_index]['mean_ndiffs'])/100}`)
cy.get('.tooltip').contains(`Deviation from clock: ${win.tips[last_index]['residual'].toFixed(2)}`)
cy.get('.tooltip').contains(`${win.tips[last_index]['first_date'].toISOString().slice(0, 10)} / ${win.tips[last_index]['last_date'].toISOString().slice(0, 10)}`)
})
})
it('Appear on hover over bead', () => {
Expand Down Expand Up @@ -182,7 +190,7 @@ describe("Colour tree", () => {
let region_color_map = {};
cy.get("#select-tree-colours").select(0).should('have.value','Region')
cy.window().then((win)=>{
regions = [...Array.from(new Set(Object.values(win.region_map))),"China"]
regions = [...Array.from(new Set(Object.values(win.region_map))),"China", "South America", "Oceania"]
return regions;
})
.then(()=>{
Expand Down Expand Up @@ -214,14 +222,14 @@ describe("Colour tree", () => {
// check and see if this region_color_map is correctly represented in the tree graph
cy.window().then((win)=>{
let region_title;
const tip_id = 215;
win.reset_tree(true);
const tip_id = win.display_id.non_recombinants.last;
cy.get(`[id=id-${tip_id}]`).should('be.visible').trigger('mouseover');
cy.get(`[id=id-${tip_id}]`).invoke('css','fill').then((tip_color)=>{
region_title = Object.keys(win.tips[tip_id]['allregions'])[0];
region_title = Object.keys(win.tips[win.tips.length - 1]['allregions'])[0];
// expect(tip_color).to.equal(region_color_map[region_title])
})
})
})
})

})
4 changes: 2 additions & 2 deletions cypress/e2e/utils.spec.cy.js
Original file line number Diff line number Diff line change
Expand Up @@ -13,8 +13,8 @@ describe('isLineage Function', () => {
it('Retruns true if the string is a lineage ', () => {
cy.window().then((win) => {
expect(win.isLineage("B.101.511")).to.eq(false)
expect(win.isLineage("B.4.1")).to.eq(true)
expect(win.isLineage("B.1.446")).to.eq(true)
expect(win.isLineage("BA.5.3")).to.eq(true)
expect(win.isLineage("CZ.2")).to.eq(true)
})
})
})
Expand Down
Loading
Loading