-
Notifications
You must be signed in to change notification settings - Fork 0
/
getBarPlot.m
136 lines (128 loc) · 5.58 KB
/
getBarPlot.m
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
% Comparing numericDataA vs. numericDataB.
% For each gene, we compare the gene expression of patients with
% high gene expression/mutation (numericDataA) vs. patients with
% low gene expression/non-mutation (numericDataB).
% P-values are calculated using two-sample t-test.
% Barplots are also plotted if printAllFigures is 'true'.
% 'analysisType' ('mutation' or 'expression')
% determinces whether we split the patients by their
% mutation/non-mutation in the driver gene (geneName) or by
% its expression high/low.
function tbl = getBarPlot(numericDataA, numericDataB, geneNames, ...
titlePlot, inputFile, geneName, outputFile, analysisType, ...
printAllFigures, varargin)
dataMeanA = mean(numericDataA, 2);
dataMeanB = mean(numericDataB, 2);
[~, pvalues] = ttest2(numericDataA', numericDataB');
if printAllFigures == true
barPlot(dataMeanA, dataMeanB, numericDataA, numericDataB, ...
geneNames, titlePlot, geneName, analysisType);
bool = (pvalues' < 0.05);
% Plotting bar plot with significant genes only
numericDataSignificantA = numericDataA(bool, :);
numericDataSignificantB = numericDataB(bool, :);
dataMeanSignificantA = dataMeanA(bool);
dataMeanSignificantB = dataMeanB(bool);
geneNamesSignificant = geneNames(bool);
titlePlotSignficant = sprintf('%s - significant genes only', titlePlot);
barPlot(dataMeanSignificantA, dataMeanSignificantB, numericDataSignificantA, ...
numericDataSignificantB, geneNamesSignificant, titlePlotSignficant, ...
geneName, analysisType);
end
switch analysisType
case 'expression'
tbl = createTable(geneName, geneNames, inputFile, numericDataA, ...
numericDataB, dataMeanA, dataMeanB, pvalues, analysisType, varargin);
case 'mutation'
tbl = createTable(geneName, geneNames, inputFile, numericDataA, ...
numericDataB, dataMeanA, dataMeanB, pvalues, analysisType);
end
xlswrite(outputFile, tbl);
end
% Plotting a bar graph of two groups of patients.
% One bar for every gene. Groups are split based on mutation/non-mutation
% or expression high/low in main gene.
% dataA is always high/mutated.
% dataB is low/non-mutated
function barPlot(dataMeanA, dataMeanB, numericDataA, numericDataB, ...
geneNames, titlePlot, geneName, analysisType)
barData = cat(2, dataMeanA, dataMeanB);
numBars = length(geneNames);
figure('Name', titlePlot, 'visible', 'off');
hBar = bar(1:numBars, barData);
bar1Coor = bsxfun(@plus, hBar(1).XData, [hBar(1).XOffset]');
bar2Coor = bsxfun(@plus, hBar(2).XData, [hBar(2).XOffset]');
hold on
stdA = std(numericDataA, 0, 2)./sqrt(length(dataMeanA));
stdB = std(numericDataB, 0, 2)./sqrt(length(dataMeanB));
errorbar(bar1Coor, dataMeanA, stdA, '.');
errorbar(bar2Coor, dataMeanB, stdB, '.');
hold off
clear title xlabel ylabel;
xticks(1:numBars);
xticklabels(geneNames);
xtickangle(90);
switch analysisType
case 'expression'
groupA = sprintf('High %s', geneName);
groupB = sprintf('Low %s', geneName);
case 'mutation'
groupA = sprintf('Mutated %s', geneName);
groupB = sprintf('Non-Mutated %s', geneName);
end
legend(groupA, groupB, 'Location', 'northeastoutside');
xlabel('Gene Names');
ylabel('Expression of Genes');
title(titlePlot);
end
% Constructing a table for excel summarising the results of t-test2.
function tbl = createTable(geneName, geneNames, inputFile, numericDataA, ...
numericDataB, dataMeanA, dataMeanB, pvalues, analysisType, varargin)
geneNameChar = sprintf('%s', geneName);
% First 2 rows:
tbl = {inputFile, '', '', '', '', '', ''; ...
'data split according to', geneNameChar, '', '', '', '', ''};
% 3rd & 4th rows
switch analysisType
case 'expression'
numHighPatientsStr = sprintf('no. of high-%s patients', geneName);
numLowPatientsStr = sprintf('no. of low-%s patients', geneName);
tbl = [tbl; {'top percentage', numHighPatientsStr, ...
'low percentage', numLowPatientsStr, '', '', ''}];
topPerc = varargin{1};
tbl = [tbl; {topPerc{1}, size(numericDataA, 2), 100-topPerc{1}, ...
size(numericDataB, 2), '', '', ''}];
case 'mutation'
numMutatedPatientsStr = sprintf('no. of mutated-%s patients', geneName);
numNonMutatedPatientsStr = sprintf('no. of non mutated-%s patients', geneName);
tbl = [tbl; {numMutatedPatientsStr, numNonMutatedPatientsStr, ...
'', '', '', '', ''}];
tbl = [tbl; {size(numericDataA, 2), size(numericDataB, 2), ...
'', '', '', '', ''}];
end
% 5th row
switch analysisType
case 'expression'
tbl = [tbl; {'Gene Names', 'mean high expression in main gene', ...
'standard error for patients with high expression', ...
'mean low expression in main gene', ...
'standard error for patients with low expression', ...
'P-value', 'Significant'}];
case 'mutation'
tbl = [tbl; {'Gene Names', ...
'mean expression of group with mutation in main gene', ...
'standard error for patients with mutation', ...
'mean expression of group without mutaion in main gene', ...
'standard error for patients without mutation', ...
'P-value', 'Significant'}];
end
% The rest of the rows
stdA = std(numericDataA, 0, 2)./sqrt(length(dataMeanA));
stdB = std(numericDataB, 0, 2)./sqrt(length(dataMeanB));
% bool = (pvalues < 0.05 & pvalues <= pvalues(geneIdx));
bool = (pvalues < 0.05);
tmp = [geneNames, num2cell(dataMeanA), num2cell(stdA), ...
num2cell(dataMeanB), num2cell(stdB), num2cell(pvalues'), num2cell(bool')];
tmp = sortrows(tmp, 6); % sort by p-values
tbl = [tbl; tmp];
end