Skip to content

Commit

Permalink
Using biopython for parsing FASTA, and FASTA-like output.
Browse files Browse the repository at this point in the history
  • Loading branch information
ggirelli committed Apr 4, 2018
1 parent 51e37b3 commit 25e5e89
Show file tree
Hide file tree
Showing 3 changed files with 42 additions and 29 deletions.
4 changes: 4 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,10 @@ and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0.
## [Unreleased]
### Added
- Version control of scripts and `--version` flag.
- `melt_duplex` output in FASTA format.

### Changed
- FASTA input parsed with biopython.

### Fixed
- Proper script help page formatting.
Expand Down
65 changes: 37 additions & 28 deletions bin/melt_duplex
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,9 @@
# DEPENDENCIES =================================================================

import argparse
from Bio import SeqIO
import os
import re
import sys

import oligo_melting as OligoMelt
Expand All @@ -40,8 +42,7 @@ import oligo_melting as OligoMelt
parser = argparse.ArgumentParser(
description = '''
Calculate melting temperature of a DNA duplex at provided [oligo],
[Na+], [Mg2+]. Either provide an oligo sequence or a file with one oligo
per line (and use -F option). References:
[Na+], [Mg2+]. Either provide an oligo sequence or a FASTA file. References:
[1] Freier et al, PNAS(83), 1986;
[2] Sugimoto et al, Biochemistry(34), 1995.
[3] Allawi & Santalucia, Biochemistry(36), 1997;
Expand Down Expand Up @@ -112,6 +113,10 @@ parser.add_argument('-C', '--celsius',
dest = 'celsius', action = 'store_const',
const = True, default = False,
help = 'Output temperature in Celsius degrees. Default: Kelvin')
parser.add_argument('-F', '--fasta-like',
dest = 'fastaLike', action = 'store_const',
const = True, default = False,
help = 'Output in FASTA format.')
parser.add_argument('-v', '--verbose',
dest = 'verbose', action = 'store_const',
const = True, default = False,
Expand Down Expand Up @@ -157,6 +162,11 @@ fa_conc = args.faconc[0]
fa_mode = args.fa_mode[0]
fa_mval_s = args.fa_mvalue[0]

# Output format
fasta_like = args.fastaLike
if fasta_like: silent = True
else: silent = False

# Additional checks ------------------------------------------------------------

# Check proper curve step/range pair
Expand All @@ -182,43 +192,42 @@ data = {
'curve_step' : curve_step,
'curve_range' : curve_range,
'curve_outpath' : curve_outpath,
'silent' : False
'silent' : silent
}

# CALCULATE --------------------------------------------------------------------

if not is_verbose and not silent:
print("oligo_name\tdG\tdH\tdS\tTm\tSeq")

if not use_file:
# Single sequence case
data['name'] = 'seq'
data['seq'] = seq
OligoMelt.Duplex.calc_tm(**data)
output = OligoMelt.Duplex.calc_tm(**data)
if fasta_like:
print(">seq tm:%.2f;\n%s" % (output[4], seq))
else:
print("%s\t%f\t%f\t%f\t%f\t%s" % output)
else:
if not is_verbose:
print("oligo_name\tdG\tdH\tdS\tTm\tSeq")

# Input file case
curr_head = ""
curr_seq = ""
with open(seq) as fin:
for row in fin:
if ">" == row[0]:
if not 0 == len(curr_seq) and not 0 == len(curr_head):
# Calculate before moving to the next item
if " " in curr_head: curr_head = curr_head.split(" ")[0]
data['name'] = curr_head
data['seq'] = curr_seq
OligoMelt.Duplex.calc_tm(**data)

curr_head = row[1:].strip()
curr_seq = ""
with open(seq, "r") as fin:
for record in SeqIO.parse(fin, "fasta"):
data['name'] = record.name
data['seq'] = str(record.seq)
output = OligoMelt.Duplex.calc_tm(**data)

if fasta_like:
fields = dict(re.findall(r'(tm):(.*?);', record.description))
if "tm" in fields.keys():
record.description = re.sub(r'(tm):(.*?);',
'tm=%.2f;' % output[4], record.description)
print(">%s\n%s" % (record.description, record.seq))
else:
print(">%s tm:%.2f;\n%s" % (
record.description, output[4], record.seq))
else:
curr_seq += row.strip()
print("%s\t%f\t%f\t%f\t%f\t%s" % output)

# Calculate for last item
if " " in curr_head: curr_head = curr_head.split(" ")[0]
data['name'] = curr_head
data['seq'] = curr_seq
OligoMelt.Duplex.calc_tm(**data)

# END ==========================================================================

Expand Down
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@
],
keywords='DNA chemistry melting temperature modeling RNA salt denaturant',
packages=["oligo_melting"],
install_requires=[],
install_requires=["biopython"],
scripts=["bin/melt_duplex", "bin/melt_secstr"],
test_suite="nose.collector",
tests_require=["nose"],
Expand Down

0 comments on commit 25e5e89

Please sign in to comment.