Skip to content

Commit

Permalink
Fix #404 by including counts from cascade.csv.
Browse files Browse the repository at this point in the history
  • Loading branch information
donkirkby committed Aug 22, 2017
1 parent 7c3adaa commit 8cb8b34
Showing 1 changed file with 22 additions and 10 deletions.
32 changes: 22 additions & 10 deletions micall/monitor/update_qai.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
import os

from micall import settings # Import first for logging configuration.

from micall.g2p.fastq_g2p import HIV_SEED_NAME
from micall.monitor import qai_helper
from micall.utils import sample_sheet_parser
from micall.core.project_config import ProjectConfig
Expand Down Expand Up @@ -120,6 +120,7 @@ def build_conseqs(conseqs_file,

def build_review_decisions(coverage_file,
collated_counts_file,
cascade_file,
sample_sheet,
sequencings,
project_regions,
Expand All @@ -129,6 +130,7 @@ def build_review_decisions(coverage_file,
@param coverage_file: CSV file with coverage scores
@param collated_counts_file: CSV file with read counts
@param cascade_file: CSV file with read counts throughout the pipeline
@param sample_sheet: the sample sheet for the run
@param sequencings: the sequencing records from QAI
@param project_regions: [{"id": project_region_id,
Expand All @@ -146,20 +148,24 @@ def build_review_decisions(coverage_file,
sample_names = dict(map(itemgetter('tags', 'filename'), sample_sheet['DataSplit']))

counts_map = {} # {tags: raw, (tags, seed): mapped]}
unreported_tags = set()
# sample,type,count
for counts in csv.DictReader(collated_counts_file):
count = int(counts['count'])
tags = sample_tags[counts['sample']]
count_type = counts['type']
if count_type == 'raw':
counts_map[tags] = count
unreported_tags.add(tags)
elif count_type != 'unmapped':
if count_type not in ('raw', 'unmapped'):
seed = count_type.split(' ', 1)[1]
key = tags, seed
current_count = counts_map.get(key, 0)
counts_map[key] = max(current_count, count)
counts_map[key] = count

unreported_tags = set()
for counts in csv.DictReader(cascade_file):
tags = sample_tags[counts['sample']]
counts_map[tags] = int(counts['demultiplexed'])*2
unreported_tags.add(tags)

key = tags, HIV_SEED_NAME
counts_map[key] = int(counts['v3loop'])*2

sequencing_map = defaultdict(dict) # {tags: {project: sequencing}}
for sequencing in sequencings:
Expand Down Expand Up @@ -227,6 +233,7 @@ def build_review_decisions(coverage_file,

def upload_review_to_qai(coverage_file,
collated_counts_file,
cascade_file,
run,
sample_sheet,
conseqs,
Expand All @@ -235,6 +242,7 @@ def upload_review_to_qai(coverage_file,
@param coverage_file: the coverage scores to upload
@param collated_counts_file: CSV file of read counts to upload
@param cascade_file: CSV file of read counts throughout the pipeline
@param run: a hash with the attributes of the run record, including a
sequencing summary of all the samples and their target projects
@param sample_sheet: details of the run so we can tell which sample used
Expand All @@ -256,6 +264,7 @@ def upload_review_to_qai(coverage_file,

decisions = build_review_decisions(coverage_file,
collated_counts_file,
cascade_file,
sample_sheet,
sequencings,
project_regions,
Expand Down Expand Up @@ -330,7 +339,7 @@ def process_folder(result_folder):
logger.info('Uploading data to Oracle from {}'.format(result_folder))
collated_conseqs = os.path.join(result_folder, 'conseq.csv')
collated_counts = os.path.join(result_folder, 'remap_counts.csv')
nuc_variants = os.path.join(result_folder, 'nuc_variants.csv')
cascade = os.path.join(result_folder, 'cascade.csv')
coverage_scores = os.path.join(result_folder, 'coverage_scores.csv')
all_results_path, _ = os.path.split(os.path.normpath(result_folder))
run_path, _ = os.path.split(all_results_path)
Expand All @@ -352,9 +361,12 @@ def process_folder(result_folder):
run,
sample_sheet,
ok_sample_regions)
with open(coverage_scores, "rU") as f, open(collated_counts, "rU") as f2:
with open(coverage_scores, "rU") as f, \
open(collated_counts, "rU") as f2, \
open(cascade, "rU") as f3:
upload_review_to_qai(f,
f2,
f3,
run,
sample_sheet,
conseqs,
Expand Down

0 comments on commit 8cb8b34

Please sign in to comment.