Skip to content

Commit

Permalink
was CG9.py. limits seq repeats from fastq files. arg1=trunc to number…
Browse files Browse the repository at this point in the history
…, arg2=repeated string, arg3=filename.
  • Loading branch information
bdklahn committed Aug 10, 2011
1 parent ab1aa58 commit 62c2abd
Showing 1 changed file with 25 additions and 0 deletions.
25 changes: 25 additions & 0 deletions trunc_rep.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
#! /usr/bin/env python
import sys, screed, re

# "rep" = "repeat"

rep_num = int(sys.argv[1])
rep_str = sys.argv[2]

trunc_rep = rep_str * rep_num

rep_pat_txt = trunc_rep + rep_str + '+'
rep_pat = re.compile (rep_pat_txt)

for record in screed.open(sys.argv[3]):
name = record.name
sequence = record.sequence
qual = record.accuracy

for match in rep_pat.finditer(sequence):
qual_chop_end = match.start() + rep_num * len(rep_str)
qual = qual[0:qual_chop_end] + qual[match.end():]

sequence = rep_pat.sub(trunc_rep, sequence)

print '@%s\n%s\n+\n%s' % (name, sequence, qual)

0 comments on commit 62c2abd

Please sign in to comment.