-
Notifications
You must be signed in to change notification settings - Fork 4
/
complexity.py
110 lines (87 loc) · 2.93 KB
/
complexity.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
"""
XPRESSpipe
An alignment and analysis pipeline for RNAseq data
alias: xpresspipe
Copyright (C) 2019 Jordan A. Berg
jordan <dot> berg <at> biochem <dot> utah <dot> edu
This program is free software: you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
Foundation, either version 3 of the License, or (at your option) any later
version.
This program is distributed in the hope that it will be useful, but WITHOUT ANY
WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A
PARTICULAR PURPOSE. See the GNU General Public License for more details.
You should have received a copy of the GNU General Public License along with
this program. If not, see <https://www.gnu.org/licenses/>.
"""
from __future__ import print_function
"""IMPORT DEPENDENCIES"""
import os
import sys
from math import ceil
"""IMPORT INTERNAL DEPENDENCIES"""
from .parallel import parallelize
from .compile import compile_complexity_metrics
from .utils import add_directory, get_files
__path__ = str(os.path.dirname(os.path.realpath(__file__))) + '/'
def run_complexity(
args):
"""Measure library complexity"""
file, args_dict = args[0], args[1]
# Determine sequencing type
if str(args_dict['type']).upper() == 'PE':
paired = 'True'
else:
paired = 'False'
# Run dupRadar in R
os.system(
'Rscript'
+ ' ' + str(__path__) + 'Rcomplexity.r'
+ ' ' + str(args_dict['input']) + str(file)
+ ' ' + str(args_dict['gtf'])
+ ' ' + str(paired)
+ ' ' + str(args_dict['threads'])
+ ' ' + str(args_dict['complexity']) + 'metrics/' + str(file).rsplit('.',1)[0] + '_metrics.txt'
+ str(args_dict['log']))
def make_complexity(args_dict):
"""Manager for running complexity summary plotting"""
print('\nRunning complexity analysis of sequence libraries...')
args_dict = add_directory(
args_dict,
'output',
'complexity')
args_dict = add_directory(
args_dict,
'complexity',
'metrics')
# Get BAM files
files = get_files(
args_dict['input'],
['dedupMarked.bam', 'UMImarked.bam'])
# Perform metagene analysis
parallelize(
run_complexity,
files,
args_dict,
mod_workers = False)
# Get metrics to plot
files = get_files(
str(args_dict['complexity']) + 'metrics/',
['_metrics.txt'])
file_number = ceil(len(files) / 6)
file_lists = []
y = 0
for x in range(file_number):
file_lists.append(files[y:y+6])
y += 6
z = 1
for file_list in file_lists:
compile_complexity_metrics(
str(args_dict['complexity']) + 'metrics/',
file_list,
'RPK',
'dupRate',
'library_complexity_' + str(z),
args_dict['experiment'],
args_dict['complexity'])
z += 1