-
Notifications
You must be signed in to change notification settings - Fork 0
/
align.py
55 lines (49 loc) · 1.25 KB
/
align.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
import sys
import re
from convert_to_amino import *
from get_score import *
from affine import *
#Open dna sequence file
dna_seq_file = sys.argv[1]
f = open(dna_seq_file, "r")
#Open protein sequence file
protein_seq_file = sys.argv[2]
prot_file = open(protein_seq_file, "r")
#Open codon to amino acid letter translation and Blosum62 scoring matrix
codons = open("codon.txt", "r")
blosum = open("blosum62.txt", "r")
#create scoring table
table = list()
for line in blosum:
line = line.strip()
table.append(line.split(","))
#dictionary of codons for dna conversion
codon_dict = dict()
for line in codons:
cols = line.split("\t")
if (cols[2] == "O"):
cols[2] = "."
codon_dict[cols[0]] = cols[2]
#Read DNA and Protein Fafsas
count = 0
dna_name = ""
dna_seq = ""
for line in f:
if(not count):
dna_name = line
else:
seq = line.strip()
dna_seq += seq
count += 1
count = 0
prot_name = ""
prot_seq = ""
for line in prot_file:
if(not count):
prot_name = line
else:
seq = line.strip()
prot_seq += seq
count += 1
reading_frames = convert_to_amino(dna_seq, codon_dict)
smith_waterman_gotoh(reading_frames[0],reading_frames[1], reading_frames[2], prot_seq, dna_seq, table, 1, 1, 1)