Skip to content

Commit

Permalink
volcano plots edited
Browse files Browse the repository at this point in the history
  • Loading branch information
seanyoon777 committed May 4, 2023
1 parent 4934fc7 commit 0c6568a
Show file tree
Hide file tree
Showing 6 changed files with 178 additions and 77 deletions.
255 changes: 178 additions & 77 deletions Scripts/clusteranalysis.R
Original file line number Diff line number Diff line change
Expand Up @@ -27,23 +27,38 @@ load_lib("circlize")
load_lib("Cairo")
load_lib("ComplexHeatmap")
load_lib("loessclust")
load_lib("ggrepel")

get_biodata <- function(path) {
dir <- "/Users/seonghyunyoon/Downloads/proteomics_project/data/Proteomic"
dir <- "/Users/seonghyunyoon/Downloads/proteomics_project/data"
read.csv(paste(dir, path, sep = "/"))
}

plasma_meta <- get_biodata("Plasma/WashU_Plasma_SomaScan7K_sample_metadata.csv")
plasma_prot <- get_biodata("Plasma/WashU_Plasma_SomaScan7K_sample_protein_expression.csv")
plasma_patient <- get_biodata("Plasma/Plasma_BasicDemo.csv")
CSF_meta <- get_biodata("CSF/WashU_CSF_SomaScan7K_sample_metadata.csv")
CSF_prot <- get_biodata("CSF/WashU_CSF_SomaScan7K_sample_protein_expression.csv")
CSF_patient <- get_biodata("CSF/CSF_BasicDemo.csv")
plasma_meta <- get_biodata("Proteomic/Plasma/WashU_Plasma_SomaScan7K_sample_metadata.csv")
plasma_prot <- get_biodata("Proteomic/Plasma/WashU_Plasma_SomaScan7K_sample_protein_expression.csv")
plasma_patient <- get_biodata("Proteomic/Plasma/Plasma_BasicDemo.csv")
CSF_meta <- get_biodata("Proteomic/CSF/WashU_CSF_SomaScan7K_sample_metadata.csv")
CSF_prot <- get_biodata("Proteomic/CSF/WashU_CSF_SomaScan7K_sample_protein_expression.csv")
CSF_patient <- get_biodata("Proteomic/CSF/CSF_BasicDemo.csv")

patient_meta <- get_biodata("commonfile/WashU_ADNI_demographic.csv")
patient_meta <- get_biodata("Proteomic/commonfile/WashU_ADNI_demographic.csv")

all_prots_ID <- names(plasma_prot)[4:ncol(plasma_prot)]

protMeta <- read_csv("/Users/seonghyunyoon/Downloads/proteomics_project/data/ProteinMetadata_with_LOD.csv", col_types = )
protMeta$SeqId = str_replace(protMeta$SeqId, "-", ".")
protMeta <- filter(protMeta, (Organism == "Human" | Organism == "HIV-1" | Organism == "HIV-2") & Type == "Protein")
all_prots <- str_remove(all_prots_ID, "^X")
my_order <- match(protMeta$SeqId, all_prots)
all_prots <- protMeta$Key_2
nprots <- length(all_prots)

plasma_prot <- plasma_prot[c(1:3, 3 + my_order)]
colnames(plasma_prot) <- c("PA_DB_UID", "Barcode2d", "ExtIdentifier", all_prots)

CSF_prot <- CSF_prot[c(1:3, 3 + my_order)]
colnames(CSF_prot) <- c("PA_DB_UID", "Barcode2d", "ExtIdentifier", all_prots)

nprots <- ncol(plasma_prot)
all_prots <- names(plasma_prot)[4:nprots]

# Logarithmic
plasma_prot[4:nprots] <- log10(plasma_prot[4:nprots])
Expand Down Expand Up @@ -120,12 +135,11 @@ intake_fil <- intake_fil[patientdata_summary$final_status %in% c("AD", "CO", "FT
patientdata_summary <- patientdata_summary %>%
filter(patientdata_summary$final_status %in% c("AD", "CO", "FTD", "Non-AD dementia"))

# set date window
# strict: set date window
date_window <- 180
patient_strict <- patientdata_summary[patientdata_summary$drawdate_diff <= date_window, ]
intake_strict <- intake_fil[patientdata_summary$drawdate_diff <= date_window, ]


for(i in 1:length(all_prots)) {
intake_strict_lm_models[[i]] <- summary(lm(intake_strict[, i] ~
relevel(factor(patient_strict$final_status), ref = "CO") +
Expand All @@ -149,92 +163,179 @@ for (i in 1:length(all_prots)) {
intake_strict_lm_summary <- rbind(intake_strict_lm_summary, intake_strict_tidy_result)
}

volcano_strict_AD <- intake_strict_lm_summary[intake_strict_lm_summary$xVar == "AD", ] %>%
mutate(qval = -log10(p.adjust(Pval, method = "fdr")),
log2fc = Coefficient) %>%
select(Protein, qval, log2fc)

ggplot(volcano_strict_AD, aes(x = log2fc, y = qval)) +
geom_point()


# lenient time window
date_window <- 2000
patient_lenient <- patientdata_summary[patientdata_summary$drawdate_diff <= date_window, ]
intake_lenient <- intake_fil[patientdata_summary$drawdate_diff <= date_window, ]

DEPlot <- ggplot(volcanoPlot, aes(x = log2fc, y = qvalue, color = factor(diffexpressed))) +
geom_point(size = 3, alpha = 0.8, na.rm = T) + # add gene points
geom_text_repel(max.overlaps = 10, aes(label = delabel)) +
theme_bw(base_size = 16) +
theme(legend.position = "none") +
ggtitle(label = paste(str_split(plot_title, '_', simplify = T), sep = "", collapse = " ")) +
xlab("log2 FC") +
ylab(expression(-log[10]("q"))) +
scale_color_manual(values = c("Upregulated" = "indianred1",
"Downregulated" = "royalblue1",
"none" = "grey60"))
intake_lenient_lm_models <- list()
for(i in 1:length(all_prots)) {
intake_lenient_lm_models[[i]] <- summary(lm(intake_lenient[, i] ~
relevel(factor(patient_lenient$final_status), ref = "CO") +
patient_lenient$Sex +
patient_lenient$age +
patient_lenient$storage_days))
}



## 0.2. Wrangle common patientdata
drawdate_diff <- difftime(CSF_patient_fil$DrawDate, plasma_patient_fil$drawdate, units = "days") %>%
data.frame(Diff = ., absDiff = abs(.))
intake_lenient_lm_summary <- data.frame()
for (i in 1:length(all_prots)) {
intake_lenient_tidy_result <- data.frame(intake_lenient_lm_models[[i]]$coefficients) %>%
select(Estimate, Std..Error, Pr...t..)
colnames(intake_lenient_tidy_result) <- c("Coefficient", "StdError", "Pval")
intake_lenient_tidy_result <- intake_lenient_tidy_result[2:nrow(intake_lenient_tidy_result), ] %>%
mutate(xVar = c("AD", "FTD", "Non-AD dementia", "Male", "Age", "Storage_days"),
Protein = all_prots[i]) %>%
select(Protein, xVar, Coefficient, StdError, Pval)
rownames(intake_lenient_tidy_result) <- c()

intake_lenient_lm_summary <- rbind(intake_lenient_lm_summary, intake_lenient_tidy_result)
}



generate_lmsummary <- function(data, date_window, data_type = c("ADRC", "Knight"))


generate_volcanodata <- function(data, variable) {
volcano_data <- data %>% filter(xVar == variable) %>%
#group_by(Protein) %>%
mutate(padj = p.adjust(Pval, method = "fdr")) %>%
mutate(qval = -log10(padj),
log2fc = Coefficient) %>%
select(Protein, Pval, padj, qval, log2fc) %>%
mutate(diffexpressed = ifelse(log2fc > 0 & qval >= 1,
yes = "Upregulated",
no = ifelse(log2fc < 0 & qval >= 1,
yes = "Downregulated", no="none")))
return(volcano_data)
}

# set date window
date_window <- 180
patient_strict <- patientdata_fil[patientdata_fil$drawdate_diff <= date_window, ]
intake_strict <- intake_fil[patientdata_fil$drawdate_diff <= date_window, ]

# split dataset
intake_strict_AD <- intake_strict[patient_strict$Plasma_drawstatus == "AD", ]
intake_strict_CO <- intake_strict[patient_strict$Plasma_drawstatus == "CO", ]
split_intake_strict_age <- split(patient_strict$drawage, patient_strict$Plasma_drawstatus)
intake_strict_age_AD <- split_intake_strict_age$AD
intake_strict_age_CO <- split_intake_strict_age$CO

intake_strict_agemax <- min(max(intake_strict_age_AD), max(intake_strict_age_CO))
intake_strict_agemin <- max(min(intake_strict_age_AD), min(intake_strict_age_CO))
age_resolution <- 0.25
intake_age_seq <- seq(intake_strict_agemin, intake_strict_agemax, by = age_resolution)

loess_predict <- function(x, Y) {
models <- vector("list", length(all_prots))
for (i in 1:length(all_prots)) {
models[[i]] <- loess(Y[, i] ~ x)
}
data <- data.frame(age = intake_age_seq)
for (i in 1:length(all_prots)) {
data[, i + 1] <- predict(models[[i]], intake_age_seq)
}
names(data) <- c("Age", all_prots)
return(data)
generate_volcanoplot <- function(volcano_data, variable) {
ggplot(volcano_data, aes(x = log2fc, y = qval, color = factor(diffexpressed))) +
geom_point(size = 0.5, alpha = 0.8, na.rm = T) +
#geom_text_repel(max.overlaps = 10, aes(label = delabel)) +
theme_bw(base_size = 16) +
theme(legend.position = "none") +
#ggtitle(label = paste(str_split(plot_title, '_', simplify = T), sep = "", collapse = " ")) +
xlab("log2 FC") +
ylab(expression(-log[10]("q"))) +
scale_color_manual(values = c("Upregulated" = "indianred1",
"Downregulated" = "royalblue1",
"none" = "grey60")) +
theme(panel.border = element_rect(colour = "black", fill = NA, size = 1),
plot.background = element_rect(fill = "white"),
axis.title = element_text(size = 9.5),
axis.text = element_text(size = 7),
plot.title = element_text(size = 10, hjust = 0.5),
axis.line = element_blank()) +
annotate("text", x = min(volcano_data$log2fc)*1.1, y = max(volcano_data$qval),
label = variable, hjust = 0, size = 4)
}

models <- vector("list", length(all_prots))
for (i in 1:length(all_prots)) {
models[[i]] <- loess(intake_strict_AD[, i] ~ intake_strict_age_AD)

data_knight <- c(intake_strict_lm_summary, intake_lenient_lm_summary)
xVars <- c("AD", "FTD", "Non-AD dementia", "Male", "Age", "Storage_days")

knight_strict_volcanodata <- list()
for (i in 1:length(xVars)) {
knight_strict_volcanodata[[i]] <- generate_volcanodata(intake_strict_lm_summary,
variable = xVars[i])
}
data <- data.frame(age = intake_age_seq)
for (i in 1:length(all_prots)) {
data[, i + 1] <- predict(models[[i]], intake_age_seq)
knight_strict_volcanoplot <- list()
for (i in 1:length(xVars)) {
knight_strict_volcanoplot[[i]] <- generate_volcanoplot(knight_strict_volcanodata[[i]],
variable = xVars[i])
}

names(data) <- c("Age", all_prots)
intake_strict_AD_pred <- loess_predict(intake_strict_age_AD, intake_strict_AD)
gridExtra::grid.arrange(grobs = knight_strict_volcanoplot, ncol = 3)


# 1. confirm if group_by is right?
# 2. generate new volcano plots
# 3. Send the new volcanodata datasets
# 4. Clustering

# linear model, but in two ways: one with stringent dates, and one with up to 2000
# run lm on Knight, then create volcano plots for both stanford and the knight linear models
intake_strict_lm_summary <- vector("list", length(all_prots))
for(i in 1:length(all_prots)) {
intake_strict_lm_summary[[i]] <- summary(lm(intake_filtered[, i + 3] ~
patient_strict$Plasma_drawstatus +
patient_strict$Sex +
patient_strict$drawage))






#This time for lenient datewindow
knight_lenient_volcanodata <- list()
for (i in 1:length(xVars)) {
knight_lenient_volcanodata[[i]] <- generate_volcanodata(intake_lenient_lm_summary,
variable = xVars[i])
}
knight_lenient_volcanoplot <- list()
for (i in 1:length(xVars)) {
knight_lenient_volcanoplot[[i]] <- generate_volcanoplot(knight_lenient_volcanodata[[i]],
variable = xVars[i])
}

gridExtra::grid.arrange(grobs = knight_lenient_volcanoplot, ncol = 3)


# export some data
(knight_strict_volcanodata[[1]])$Protein <- all_prots


# same thing with stanford data?
barcodes2 <- get_biodata("CSF_plasma_matched_barcodes.csv")
CSF_patient2 <- get_biodata("CSF_metadata_2023-03-04.csv")
CSF_protein2 <- get_biodata("CSFProts.log10.noLODFilter.csv")
plasma_patient2 <- get_biodata("Plasma_metadata_samplesOnly_2023-03-04.csv")
plasma_protein2 <- get_biodata("plasmaProts.log10.csv")

CSF_patient2$Age <- gsub('^"&"$', '', CSF_patient2$Age)
CSF_patient2$Age <- as.numeric(CSF_patient2$Age)

common_prots2 <- intersect(names(CSF_protein2), names(plasma_protein2))
CSF_protein2 <- data.frame(Barcode = CSF_patient2$Barcode, CSF_protein2[, common_prots2])
plasma_protein2 <- data.frame(Barcode = plasma_protein2$Barcode, plasma_protein2[, common_prots2])


CSF_protein2_fil <- CSF_protein2[CSF_protein2$Barcode %in% barcodes2$CSF_Barcode, ]
CSF_patient2_fil <- CSF_patient2[CSF_patient2$Barcode %in% barcodes2$CSF_Barcode, ]
plasma_protein2_fil <- plasma_protein2 %>%
filter(Barcode %in% barcodes2$Plasma_Barcode) %>%
arrange(match(Barcode, barcodes2$Plasma_Barcode)) %>%
slice(order(match(Barcode, barcodes2$Plasma_Barcode)))

age2_filtered <- plasma_patient2 %>%
select(Age, Barcode) %>%
rename(Plasma_Barcode = Barcode) %>%
filter(Plasma_Barcode %in% barcodes2$Plasma_Barcode) %>%
arrange(match(Plasma_Barcode, barcodes2$Plasma_Barcode)) %>%
mutate(CSF_Barcode = CSF_patient2_fil$Barcode) %>%
select(Age, CSF_Barcode, Plasma_Barcode)

patientdata_filtered <- plasma %>%
rename(Plasma_Barcode = Barcode, Plasma_Zscore = ConnectivityZscore) %>%
filter(Plasma_Barcode %in% barcodes$Plasma_Barcode) %>%
arrange(match(Plasma_Barcode, barcodes$Plasma_Barcode)) %>%
mutate(CSF_Zscore = CSF_patientdata_filtered$ConnectivityZscore,
CSF_Barcode = CSF_patientdata_filtered$Barcode) %>%
.[, c(ncol(.), 1:(ncol(.) - 1))] %>%
.[, c(1:(ncol(.) - 2), ncol(.), ncol(.) - 1)]





# CLUSTERING ANALYSIS
intake_fil










Expand Down
Binary file modified figures_draft/.DS_Store
Binary file not shown.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.

0 comments on commit 0c6568a

Please sign in to comment.