-
Notifications
You must be signed in to change notification settings - Fork 0
/
log_reg_GSE64016.R
56 lines (43 loc) · 2.01 KB
/
log_reg_GSE64016.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
#Differential expression with logistic regression (Ntranos, et al.)
library("MultiAssayExperiment")
library("SummarizedExperiment") #for rowData()
library("lmtest") #for lrtest()
##load data
data1 = readRDS("data/GSE63818-GPL16791.rds")
#download from "http://imlspenticton.uzh.ch/robinson_lab/conquer/data-mae/GSE63818-GPL16791.rds"
##extract
data1_tx = experiments(data1)[["tx"]] #transcripts
cData = colData(data1) #column info
fData = rowData(data1@ExperimentList$tx) #transcript info
load(sca_name) #from MAST_GSE64016.R
tpm = t(assays(data1_tx)[["TPM"]]) #design matrix
cell_select = cData$geo_accession %in% getwellKey(sca)
tpm = tpm[cell_select, as.character(mcols(sca)$transcript)] #select genes
y = as.numeric(droplevels(cData$source_name_ch1[cell_select])) - 1 #0/1 binary response vector
y_samp = sample(y)
load(sets_name) #from MAST_GSE64016.R
regs = rep(NA, length(sets)) #init vector for storing results
#LR test for full model vs just mean
for (i in 1:length(sets)){
regs[i] = lrtest(glm(y~.,data = data.frame(tpm[,sets[[i]]]), family = binomial))[2,5]
} #replace y_samp with y (or vice versa)
adjusted = p.adjust(regs, 'fdr')
out1 = data.frame(gene = names(sets), p = regs, fdr = adjusted) #output
out1 = out1[out1$fdr<0.05,] #select top genes
hist(regs, main = "Distribution of logistic regression p-values", xlab = "p")
#subsample
y_samp2 = y_samp[1:60]
regs2 = rep(NA, length(sets)) #init vector for storing results
#LR test for full model vs just mean
for (i in 1:length(sets)){
regs2[i] = lrtest(glm(y_samp2~.,data = data.frame(tpm[1:60,sets[[i]]]), family = binomial))[2,5]
}
hist(regs2, main = "Distribution of logistic regression p-values", xlab = "p")
##all cells
y2 = sample(c(0,1), size = 372, replace = T)
regs3 = rep(NA, length(sets)) #init vector for storing results
#LR test for full model vs just mean
for (i in 1:length(sets)){
regs3[i] = lrtest(glm(y2~.,data = data.frame(tpm[,sets[[i]]]), family = binomial))[2,5]
}
hist(regs3, main = "Distribution of logistic regression p-values", xlab = "p")