Skip to content

Commit

Permalink
Pass bad_cycles.csv to single sample handler functions, as part of #530.
Browse files Browse the repository at this point in the history
  • Loading branch information
donkirkby committed Jun 10, 2020
1 parent d34b42a commit fed2058
Show file tree
Hide file tree
Showing 3 changed files with 67 additions and 48 deletions.
8 changes: 6 additions & 2 deletions micall/core/remap.py
Original file line number Diff line number Diff line change
Expand Up @@ -898,13 +898,17 @@ def convert_prelim(prelim_csv,
ref_counts = defaultdict(lambda: [0, 0]) # {rname: [filtered_count, count]}
reader = csv.DictReader(prelim_csv)
for row in reader:
counts = ref_counts[row['rname']]
is_unmapped = is_unmapped_read(row['flag'])
refname = row['rname']
if is_unmapped:
refname = '*'
counts = ref_counts[refname]
counts[1] += 1 # full count

# write SAM row
target.write('\t'.join([row[field] for field in SAM_FIELDS]) + '\n')

if is_unmapped_read(row['flag']):
if is_unmapped:
continue
if is_short_read(row, max_primer_length=50):
# exclude short reads
Expand Down
47 changes: 47 additions & 0 deletions micall/tests/test_remap.py
Original file line number Diff line number Diff line change
Expand Up @@ -1034,6 +1034,53 @@ def test_star_region(self):
self.assertEqual(expected_remap_counts, self.remap_counts.getvalue())
self.assertEqual(expected_seed_counts, seed_counts)

def test_unmapped_flag(self):
self.maxDiff = None
prelim_csv = StringIO("""\
qname,flag,rname,pos,mapq,cigar,rnext,pnext,tlen,seq,qual
example1,89,R1-seed,1,0,54M,=,1,0,\
AAACCCTTTAAACCCTTTAAACCCTTTAAACCCTTTAAACCCTTTAAACCCTTT,\
BBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBB
example2,89,R1-seed,1,0,54M,=,1,0,\
AAAACCTTTAAAACCTTTAAAACCTTTAAAACCTTTAAAACCTTTAAAACCTTT,\
BBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBB
example3,93,R1-seed,*,*,*,*,*,*,\
AAAACCTTTAAAACCTTTAAAACCTTTAAAACCTTTAAAACCTTTAAAACCTTT,\
BBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBB
""")
count_threshold = 2
expected_sam_file = """\
@HD VN:1.0 SO:unsorted
@SQ SN:R1-seed LN:9
@SQ SN:R2-seed LN:12
@PG ID:bowtie2 PN:bowtie2 VN:2.2.3 CL:""
example1\t89\tR1-seed\t1\t0\t54M\t=\t1\t0\t\
AAACCCTTTAAACCCTTTAAACCCTTTAAACCCTTTAAACCCTTTAAACCCTTT\t\
BBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBB
example2\t89\tR1-seed\t1\t0\t54M\t=\t1\t0\t\
AAAACCTTTAAAACCTTTAAAACCTTTAAAACCTTTAAAACCTTTAAAACCTTT\t\
BBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBB
example3\t93\tR1-seed\t*\t*\t*\t*\t*\t*\t\
AAAACCTTTAAAACCTTTAAAACCTTTAAAACCTTTAAAACCTTTAAAACCTTT\t\
BBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBB
"""
expected_remap_counts = """\
type,filtered_count,count
prelim *,0,1
prelim R1-seed,2,2
"""
expected_seed_counts = {'R1-seed': 2}

seed_counts = convert_prelim(prelim_csv,
self.sam_file,
self.remap_counts_writer,
count_threshold,
self.projects)

self.assertEqual(expected_sam_file, self.sam_file.getvalue())
self.assertEqual(expected_remap_counts, self.remap_counts.getvalue())
self.assertEqual(expected_seed_counts, seed_counts)

def test_best_in_group(self):
self.maxDiff = None
prelim_csv = StringIO("""\
Expand Down
60 changes: 14 additions & 46 deletions micall_docker.py
Original file line number Diff line number Diff line change
Expand Up @@ -524,6 +524,8 @@ def basespace_run(args):
resolved_args = MiCallArgs(args)
run_info = load_samples(resolved_args.results_folder)
process_run(run_info, args)
zip_folder(run_info.output_path, 'resistance_reports')
zip_folder(run_info.output_path, 'coverage_maps')


def process_folder(args):
Expand Down Expand Up @@ -572,47 +574,6 @@ def process_run(run_info, args):
logger.info('Done.')


def sample_process_helper(resolved_args, scratch_path, use_denovo, skip=()):
sample = Sample(
fastq1=resolved_args.fastq1,
fastq2=resolved_args.fastq2,
bad_cycles_csv=resolved_args.bad_cycles_csv,
g2p_csv=resolved_args.g2p_csv,
g2p_summary_csv=resolved_args.g2p_summary_csv,
remap_counts_csv=resolved_args.remap_counts_csv,
remap_conseq_csv=resolved_args.remap_conseq_csv,
unmapped1_fastq=resolved_args.unmapped1_fastq,
unmapped2_fastq=resolved_args.unmapped2_fastq,
conseq_ins_csv=resolved_args.conseq_ins_csv,
failed_csv=resolved_args.failed_csv,
cascade_csv=resolved_args.cascade_csv,
nuc_csv=resolved_args.nuc_csv,
amino_csv=resolved_args.amino_csv,
coord_ins_csv=resolved_args.coord_ins_csv,
conseq_csv=resolved_args.conseq_csv,
conseq_all_csv=resolved_args.conseq_all_csv,
conseq_region_csv=resolved_args.conseq_region_csv,
failed_align_csv=resolved_args.failed_align_csv,
coverage_scores_csv=resolved_args.coverage_scores_csv,
aligned_csv=resolved_args.aligned_csv,
g2p_aligned_csv=resolved_args.g2p_aligned_csv,
contigs_csv=resolved_args.contigs_csv,
genome_coverage_csv=resolved_args.genome_coverage_csv,
genome_coverage_svg=resolved_args.genome_coverage_svg,
read_entropy_csv=resolved_args.read_entropy_csv,
resistance_csv=resolved_args.resistance_csv,
resistance_pdf=resolved_args.resistance_pdf,
resistance_fail_csv=resolved_args.resistance_fail_csv,
resistance_consensus_csv=resolved_args.resistance_consensus_csv,
mutations_csv=resolved_args.mutations_csv,
scratch_path=scratch_path,
skip=skip)

pssm = Pssm()
sample.process(pssm, use_denovo=use_denovo)
return sample


def single_sample(args):
args.max_active = 1
resolved_args = MiCallArgs(args)
Expand All @@ -625,7 +586,10 @@ def single_sample(args):
output_path=args.results_folder,
scratch_path=scratch_path,
is_denovo=args.denovo)
sample = Sample(fastq1=resolved_args.fastq1, scratch_path=scratch_path)
sample = Sample(fastq1=resolved_args.fastq1,
fastq2=resolved_args.fastq2,
bad_cycles_csv=resolved_args.bad_cycles_csv,
scratch_path=scratch_path)
sample_group = SampleGroup(sample)
sample_groups.append(sample_group)

Expand All @@ -647,8 +611,14 @@ def hcv_sample(args):
output_path=args.results_folder,
scratch_path=scratch_path,
is_denovo=args.denovo)
main_sample = Sample(fastq1=resolved_args.fastq1, scratch_path=scratch_path)
midi_sample = Sample(fastq1=midi_args.fastq1, scratch_path=midi_scratch_path)
main_sample = Sample(fastq1=resolved_args.fastq1,
fastq2=resolved_args.fastq2,
bad_cycles_csv=resolved_args.bad_cycles_csv,
scratch_path=scratch_path)
midi_sample = Sample(fastq1=midi_args.fastq1,
fastq2=midi_args.fastq2,
bad_cycles_csv=resolved_args.bad_cycles_csv,
scratch_path=midi_scratch_path)
main_and_midi = SampleGroup(main_sample, midi_sample)
sample_groups.append(main_and_midi)

Expand Down Expand Up @@ -1127,8 +1097,6 @@ def collate_samples(run_info):
except OSError:
# Guess it wasn't empty.
pass
zip_folder(run_info.output_path, 'resistance_reports')
zip_folder(run_info.output_path, 'coverage_maps')


# noinspection PyTypeChecker,PyUnresolvedReferences
Expand Down

0 comments on commit fed2058

Please sign in to comment.