-
Notifications
You must be signed in to change notification settings - Fork 4
/
normalizeMatrix.py
88 lines (76 loc) · 3.05 KB
/
normalizeMatrix.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
"""
XPRESSpipe
An alignment and analysis pipeline for RNAseq data
alias: xpresspipe
Copyright (C) 2019 Jordan A. Berg
jordan <dot> berg <at> biochem <dot> utah <dot> edu
This program is free software: you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
Foundation, either version 3 of the License, or (at your option) any later
version.
This program is distributed in the hope that it will be useful, but WITHOUT ANY
WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A
PARTICULAR PURPOSE. See the GNU General Public License for more details.
You should have received a copy of the GNU General Public License along with
this program. If not, see <https://www.gnu.org/licenses/>.
"""
from __future__ import print_function
"""IMPORT DEPENDENCIES"""
import os
import sys
import pandas as pd
from .xpressplot import batch_normalize, rpm, tpm, r_fpkm
"""Run normalization of count dataframe"""
def run_normalization(
args_dict,
sep='\t'):
# Run sample normalization
if 'method' in args_dict and args_dict['method'] != None:
df = pd.read_csv(
str(args_dict['input']),
sep = sep,
header = 0,
index_col = 0,
comment = '#',
low_memory = False)
# RPM normalization
if args_dict['method'].upper() == 'RPM':
type = 'rpm'
df = rpm(df)
df.to_csv(
str(args_dict['input']).rsplit('.',1)[0] + '_' + str(type) + 'Normalized.tsv',
sep = '\t')
# RPKM or FPKM normalization
elif args_dict['method'].upper() == 'RPKM' \
or args_dict['method'].upper() == 'FPKM':
if args_dict['gtf'] == None:
raise Exception('A GTF reference file is required for RPKM and FPKM normalization')
type = 'r_fpkm'
df = r_fpkm(
df,
args_dict['gtf'])
df.to_csv(
str(args_dict['input']).rsplit('.',1)[0] + '_' + str(type) + 'Normalized.tsv',
sep = '\t')
elif args_dict['method'].upper() == 'TPM':
if args_dict['gtf'] == None:
raise Exception('A GTF reference file is required for RPKM and FPKM normalization')
type = 'tpm'
df = tpm(
df,
args_dict['gtf'])
df.to_csv(
str(args_dict['input']).rsplit('.',1)[0] + '_' + str(type) + 'Normalized.tsv',
sep = '\t')
else:
raise Exception('Unknown \"method\" argument provided')
# Run in batch normalization
if 'batch' in args_dict and args_dict['batch'] != None:
batch_normalize(
str(args_dict['input']).rsplit('.',1)[0] + '_' + str(type) + 'Normalized.csv',
str(args_dict['batch']))
else:
if 'batch' in args_dict and args_dict['batch'] != None:
batch_normalize(
str(args_dict['input']),
str(args_dict['batch']))