Final Functions

MelodyJIN-Y · Oct 31, 2021 · 22bf612 · 22bf612
1 parent 62c0a9b
commit 22bf612
Show file tree

Hide file tree

Showing 7 changed files with 124 additions and 151 deletions.
diff --git a/final_files/computeAUC.R b/final_files/computeAUC.R
@@ -0,0 +1,45 @@
+computeAUC<- function(result, markers){
+  auc.values<- as.data.frame(matrix(0, nrow=2, ncol=ncol(result$obs.pval)+1))
+  colnames(auc.values)<- c("test", colnames(result$obs.pval))
+  auc.values$test<- c("limma","permutation")
+  for (ct in 1:ncol(result$obs.pval)){
+    curr.ct.name<- colnames(result$obs.pval)[ct]
+    if (curr.ct.name %in% names(markers)){
+      resp=list()
+
+      resp.g <- as.vector(ifelse(test=(row.names(result$perm.pval.adj) %in% markers[[curr.ct.name]]),  
+                                 yes=1, no=0))
+      resp[[curr.ct.name]]<-resp.g
+
+      pred.perm<-result$perm.pval.adj[,curr.ct.name]
+      pred.limma<-result$obs.pval.adj[,curr.ct.name]
+
+      roc_perm <-roc( response=resp[[curr.ct.name]], predictor= pred.perm,plot=FALSE, 
+                      legacy.axes=TRUE, percent=FALSE)
+
+
+      roc_limma <-roc( response =resp[[curr.ct.name]], predictor=pred.limma, plot=FALSE, 
+                       legacy.axes=TRUE, percent=FALSE) 
+
+      rocp_df <- as.data.frame(cbind("sensitivity" = roc_perm$sensitivities, 
+                                     "FPP" = 1-roc_perm$specificities, 
+                                     "AUC" = roc_perm$auc, 
+                                     "test.name" =rep("permutation", length(roc_perm$sensitivities))))
+      rocl_df <- as.data.frame(cbind("sensitivity" = roc_limma$sensitivities, 
+                                     "FPP" = 1- roc_limma$specificities, 
+                                     "AUC" = roc_limma$auc, 
+                                     "test.name" =rep("limma", length(roc_limma$sensitivities))))
+
+      roc_df<- rbind(rocp_df, rocl_df)
+      roc_df$sensitivity<- as.numeric(roc_df$sensitivity)
+      roc_df$FPP<- as.numeric(roc_df$FPP)
+      roc_df$AUC<- as.numeric(roc_df$AUC)
+      auc.values[auc.values$test=="limma",curr.ct.name]<- roc_limma$auc
+      auc.values[auc.values$test=="permutation",curr.ct.name]<- roc_perm$auc
+    }else{ 
+      auc.values[auc.values$test=="limma",curr.ct.name]<- 0
+      auc.values[auc.values$test=="permutation",curr.ct.name]<- 0
+    }
+  }
+  return (auc.values)
+}
diff --git a/final_files/computeComparison.R b/final_files/computeComparison.R
@@ -31,8 +31,8 @@ computeComparison<- function(obs.p, perm.p, marker.genes,
       summ[i, "num.cells"]<- cell_num[i]
       summ[i+ncol(obs.p), "num.cells"]<- cell_num[i]
 
-      summ[i, "num.true.marker"]<- length(marker.genes[[colnames(obs.p)[i]]])
-      summ[i+ncol(obs.p), "num.true.marker"]<- length(marker.genes[[colnames(obs.p)[i]]])
+      summ[i, "num.true.marker"]<- length(unique.marker.genes[[colnames(obs.p)[i]]])
+      summ[i+ncol(obs.p), "num.true.marker"]<- length(unique.marker.genes[[colnames(obs.p)[i]]])
 
       # positive genes 
       limma_upreg_genes =  row.names(obs.p[which(obs.p[,i] < pvalue.cutoff),])
@@ -48,7 +48,7 @@ computeComparison<- function(obs.p, perm.p, marker.genes,
       summ[i, "overlap"]<-overlap
       summ[i+ncol(obs.p), "overlap"]<- overlap
 
-      if (is.null(marker.genes) ==TRUE){
+      if (is.null(marker.genes[[colnames(obs.p)[i]]]) ==TRUE){
 
         summ[i, c("TP", "precision","sensitivity","F1","TP.overlap","AUC")]<- 0
         summ[i+ncol(obs.p), c("TP", "precision","sensitivity","F1","TP.overlap","AUC")]<- 0

diff --git a/final_files/pPermutation.R b/final_files/pPermutation.R
@@ -2,7 +2,7 @@
 # Input
 # 
 pPermutation<- function(cm, groups, perm.size = 1000, statistic = "os.t",  
-                 lfc.cutoff=0.1, n.cores=1){
+                 lfc.cutoff=0.1, n.cores=1, seed_value=97){
 
   logcounts.all <- lognormCounts(cm, log=TRUE, prior.count=0.5) 
   all.bct <- factor(groups)
@@ -17,13 +17,17 @@ pPermutation<- function(cm, groups, perm.size = 1000, statistic = "os.t",
   tic.clearlog()
   tic(paste(perm.size, "permutation total time"))
   doParallel::registerDoParallel(cl = my.cluster)
+  set.seed(seed_value)
+  sds<- sample(1:200000, size= perm.size, replace = T)
   # for (i in 1:perm.size) {
   t.perm.array<-foreach (i = 1:perm.size) %dopar% {
     # permutate the all.bct only 
+    set.seed(sds[i])
     all.bct.shuffled <- sample(all.bct, replace = FALSE, size =length(all.bct))
 
     #########################
-    # Limma DE framework written by Belinda Phipson 
+    # Limma DE framework written by Belinda Phipson
+
     design <- model.matrix(~0+all.bct.shuffled)
     colnames(design)[1:(length(levels(all.bct.shuffled)))] <- levels(all.bct.shuffled)
     mycont <- matrix(0,

diff --git a/final_files/runPermutation.R b/final_files/runPermutation.R
@@ -1,6 +1,6 @@
 runPermuation<-function(cm, group, perm.size, statistic = "os.t", 
                              lfc.cutoff=0.1, n.cores=1, perm.arrays = NULL, 
-                        normalise=FALSE){
+                        normalise=FALSE, seed_value=97){
   message(paste("Statistic = ", statistic))
   #obs_res<- computeObservation(cm, factor(group),statistic= statistic, lfc.cutoff=lfc.cutoff)
   tm1 <- system.time(
@@ -24,7 +24,7 @@ runPermuation<-function(cm, group, perm.size, statistic = "os.t",
     # permutation stats
     perm_stat <- pPermutation(cm, group, perm.size=perm.size, 
                               statistic= statistic,lfc.cutoff=lfc.cutoff, 
-                              n.cores=n.cores)
+                              n.cores=n.cores, seed_value=seed_value)
     # permutation stats
     perm.arrays<- perm_stat$t.perm
   }

diff --git a/lognormCounts.R b/lognormCounts.R
@@ -0,0 +1,29 @@
+#' Title
+#'
+#' @param x
+#' @param log
+#' @param prior.count
+#' @param lib.size
+#'
+#' @return
+#' @export
+#'
+#' @examples
+lognormCounts <- function(x, log = TRUE, prior.count = 0.5)
+  # Function to log normalise a counts matrix to median library size
+  # Input is counts matrix
+  # Genes as rows, cells as columns
+  # Belinda Phipson
+  # 26 February 2020
+{
+  x <- as.matrix(x)
+  lib.size <- colSums(x)
+  M <- median(lib.size)
+  if(log){
+    prior.count.scaled <- lib.size/mean(lib.size)*prior.count
+    lib.size <- lib.size + 2*prior.count.scaled
+    log2(t((t(x)+prior.count.scaled)/lib.size*M))
+  }
+  else t(t(x)/lib.size*M)
+}
+
diff --git a/runComparison.R b/runComparison.R
@@ -1,14 +1,21 @@
-runComparison<-function(sim, perm.size, statistic = "os.t", lfc.cutoff=0.1, n.cores=1){
+runComparison<-function(sim, perm.size, statistic = "os.t", sim_perm_arrays,
+                               lfc.cutoff=0.1, n.cores=1, true.upreg, ){
   message(paste("Statistic = ", statistic))
   obs_res<- computeObs(counts(sim), factor(sim$Group),statistic= statistic, lfc.cutoff=lfc.cutoff)
   obs.stat<- obs_res$obs.stat
   obs.pval<- obs_res$obs.pval
   obs.pval<- as.data.frame(obs.pval)
-
+  tm1 <- system.time(
+    {
+      obs_res<- computeObs(counts(sim), factor(sim$Group),statistic= statistic, lfc.cutoff=lfc.cutoff)
+    })
+
   if (n.cores>1){
     message(paste("Running", perm.size, "permutation with", n.cores, "cores in parallel"))
+    message(paste("Estimated waiting time: ", perm.size*tm1/n.cores))
   }else{
     message(paste("Running", perm.size, "permutation in sequential"))
+    message(paste("Estimated waiting time: ", perm.size*tm1))
   }
   # run permutation
   #sim_perm_arrays <- tPerm(counts(sim), sim$Group, perm.size=perm.size, 
@@ -56,11 +63,11 @@ runComparison<-function(sim, perm.size, statistic = "os.t", lfc.cutoff=0.1, n.co
   rownames(obs.pval) <- row.names(counts(sim))
   colnames(obs.pval) <- sim_perm_arrays$ct.names
 
-  true.sup.g1<- row.names(sim)[sim@rowRanges@elementMetadata$DEFacGroup1 > 1]
-  true.sup.g2<- row.names(sim)[sim@rowRanges@elementMetadata$DEFacGroup2 > 1]
-  true.sup.g1<- intersect(true.sup.g1, row.names(sim))
-  true.sup.g2<- intersect(true.sup.g2, row.names(sim))
-  true.upreg = list(true.sup.g1, true.sup.g2)
+  #true.sup.g1<- row.names(sim)[sim@rowRanges@elementMetadata$DEFacGroup1 > 1]
+  #true.sup.g2<- row.names(sim)[sim@rowRanges@elementMetadata$DEFacGroup2 > 1]
+  #true.sup.g1<- intersect(true.sup.g1, row.names(sim))
+  #true.sup.g2<- intersect(true.sup.g2, row.names(sim))
+  #true.upreg = list(true.sup.g1, true.sup.g2)
 
   group1.df<- obs_res$summary.tb[,c("AveExpr", "logFC.g1")] 
   group2.df<- obs_res$summary.tb[,c("AveExpr", "logFC.g2")]