From 3257515495eb6434ec0cfd6392a042f248f7c54c Mon Sep 17 00:00:00 2001 From: Tanushree Tunstall Date: Fri, 23 Oct 2020 13:31:47 +0100 Subject: [PATCH] added all_output_plots.R, legend_adjustment.R and stats_corr.R --- all_output_plots.R | 796 ++++++++++++++++++++++++++++++++++++++++++++ legend_adjustment.R | 37 ++ stats_corr.R | 89 +++++ 3 files changed, 922 insertions(+) create mode 100755 all_output_plots.R create mode 100755 legend_adjustment.R create mode 100755 stats_corr.R diff --git a/all_output_plots.R b/all_output_plots.R new file mode 100755 index 0000000..15ae894 --- /dev/null +++ b/all_output_plots.R @@ -0,0 +1,796 @@ +#!/usr/bin/Rscript +getwd() +setwd('~/git/covid_analysis/') +getwd() +############################################################ +# TASK: basic plots +# boxplots +# spaghetti plots +# ggridges +# pairs.panel +# heatmap + +# useful links: +# http://www.sthda.com/english/wiki/ggplot2-dot-plot-quick-start-guide-r-software-and-data-visualization +############################################################ +# source data +source("read_data.R") + +# clear unwanted variables +rm(my_data) +############################################################ +#=========================== +# Output: different types +# of plots, correlation +# and heatmaps +#============================= +output_plots = paste0(outdir_plots, "output_plots.pdf") +corr_and_hmap = paste0(outdir_plots, "corr_hmap.pdf") + +#%%======================================================== +# read file +# data assignment for plots +wf = wf_data +lf = lf_data + +#===================== +# data for plots: LF +#===================== +dim(lf); str(lf) +levels(factor(lf$mediator)); table(lf$mediator) + +nice_mediator_names = c(Angiopoietin2 = "Angiopoietin2" + , PF = "PF" + #, PSELECTIN = "PSelectin" + , PSelectin = "PSelectin" + , sESelectin = "sESelectin" + , sICAM1="sICAM1" + , sRAGE= "sRAGE" + , sVCAM1="sVCAM1") +# assign nice names +lf$mediator <- as.character(nice_mediator_names[lf$mediator]) + +dim(lf); str(lf) +levels(factor(lf$mediator)); table(lf$mediator) + +lf$outcome_category = as.factor(lf$outcomes) +levels(lf$outcome_category) + +#%%======================================================== +# count na in mediator col +if (table(is.na(lf$mediator)) == nrow(lf)){ + print("PASS: No NAs detected, lf data is good for plotting") +}else{ + print("FAIL: NAs detected in mediator columna. Check source formatting for lf data") + #lf <- lf[!is.na(lf$mediator),] + #head(lf); str(lf) + quit() +} + +# FIXME : stats on log 10 value +# =========== +# stats +# =========== +# with adjustment: also gives unadjusted P-values (BH and fdr are the same) +my_stat = compare_means(value~timepoint, group.by = "mediator" + , data = lf, paired = TRUE + , p.adjust.method = "BH") + #, p.adjust.method = "bonferroni") + +# !! check: satsified!! +wilcox.test(wf$sESelectin_ngmL_t1, wf$sESelectin_ngmL_t2, paired = T) +wilcox.test(wf$sRAGE_pgmL_t1, wf$sRAGE_pgmL_t2, paired = T) +wilcox.test(wf$sVCAM1_ngmL_t1, wf$sVCAM1_ngmL_t3, paired = T) + +my_comparisons <- list( c("t1", "t2"), c("t2", "t3"), c("t1", "t3") ) + +# FIXME +#my_stat_minmax = compare_means(minmax_score~timepoint, group.by = "mediator", data = lf, paired = TRUE) + +#lf$timepoint <- as.factor(lf$timepoint) +#lf$mediator <- as.factor(lf$mediator) +#lf$outcomes <- as.factor(lf$outcomes) + +# if you really wanted to custom colour category +# currently unused +my_outcome_cols = c("0" = "red", "1" = "green") + +#==================================== +# Output plots as one pdf +cat("Output plots will be in:", output_plots) +pdf(output_plots, width=15, height=12) + +# ==================================== +# 1) Boxplot with facet wrap: +# x = time +# y = Linear (Levels) +# coloured: outcome +# ==================================== +y_value = "value" +my_title1 = "Boxplots of mediators over time: linear scale" + +p1 = ggplot(lf, aes(x = timepoint + , y = eval(parse(text=y_value)) )) + + #geom_text(aes(label=id)) + + facet_wrap(~ mediator, nrow = 2, scales = "free_y") + + geom_boxplot(fill = "white", outlier.colour = NA + # position = position_dodge(width = 0.9) + , width = 0.5) + + geom_point(position = position_jitterdodge(dodge.width=0.01) + , aes(colour = factor(outcomes))) + + theme(axis.text.x = element_text(size = 15) + , axis.text.y = element_text(size = 15 + , angle = 0 + , hjust = 1 + , vjust = 0) + , axis.title.x = element_text(size = 15) + , axis.title.y = element_text(size = 15) + , plot.title = element_text(size = 20, hjust = 0.5) + , strip.text.x = element_text(size = 15, colour = "black") + , legend.title = element_text(color = "black", size = 20) + , legend.text = element_text(size = 15) + , legend.direction = "vertical") + + labs(title = my_title1 + , x = "" + , y = "Levels") + + scale_colour_discrete(name = "Patient outcome", labels = c("Death", "Recovered"))+ + #scale_colour_manual(values = my_outcome_cols) + stat_compare_means(comparisons = my_comparisons + , method = "wilcox.test" + , paired = TRUE + , label = "p.format") + +shift_legend2(p1) + +# ==================================== +# 2) Boxplot with facet wrap: +# x = time +# y = Log(Levels) +# coloured: outcome +# ==================================== +#y_value = "log10(value)" # shows log transformed scale +y_value = "value" +my_title2 = "Boxplots of mediators over time: log scale" + +p2 = ggplot(lf, aes(x = timepoint + , y = eval(parse(text=y_value)) )) + + #geom_text(aes(label = id)) + + facet_wrap(~ mediator, nrow = 2, scales = "free_y") + + scale_y_log10()+ + geom_boxplot(fill = "white", outlier.colour = NA + # position = position_dodge(width = 0.9) + , width = 0.5) + + geom_point(position = position_jitterdodge(dodge.width=0.01) + , aes(colour = factor(outcomes))) + + theme(axis.text.x = element_text(size = 15) + , axis.text.y = element_text(size = 15 + , angle = 0 + , hjust = 1 + , vjust = 0) + , axis.title.x = element_text(size = 15) + , axis.title.y = element_text(size = 15) + , plot.title = element_text(size = 20, hjust = 0.5) + , strip.text.x = element_text(size = 15, colour = "black") + , legend.title = element_text(color = "black", size = 20) + , legend.text = element_text(size = 15) + , legend.direction = "vertical") + + labs(title = my_title2 + , x = "" + , y = "Levels (Log10)")+ + scale_colour_discrete(name = "Patient outcome" + , labels = c("Death", "Recovered")) + + stat_compare_means(comparisons = my_comparisons + , method = "wilcox.test" + , paired = TRUE + , label = "p.format") + +shift_legend2(p2) + +#======================== +# outlier plot: log +#======================== +# create a separate df +lf2 = lf +lf2$value2 = lf2$value +lf2$value2 = replace_na(lf2$value2,0) + +# outlier detection +is_outlier <- function(x) { + return(x < quantile(x, 0.25) - 1.5 * IQR(x) | x > quantile(x, 0.75) + 1.5 * IQR(x)) +} + +lf2 = lf2 %>% group_by(timepoint, mediator) %>% + mutate(is_outlier = ifelse(is_outlier(value2), value2 + , as.numeric(NA) )) +lf2$outlier_id=lf2$id + +lf2$outlier_id[which(is.na(lf2$is_outlier))] <- as.numeric(NA) + +#******************** +# labelled outliers +#******************** +my_outlier_title = "Boxplot with top outliers labelled" +p_outlier = ggplot(lf2, aes(x = timepoint + , y = value)) + + facet_wrap(~ mediator, nrow = 2, scales = "free_y") + + scale_y_log10() + + geom_boxplot(fill = "white", outlier.colour = NA + , width = 0.5) + + geom_point(position = position_jitterdodge(dodge.width=0.01) + , aes(colour = factor(outcomes))) + + theme(axis.text.x = element_text(size = 15) + , axis.text.y = element_text(size = 15 + , angle = 0 + , hjust = 1 + , vjust = 0) + , axis.title.x = element_text(size = 15) + , axis.title.y = element_text(size = 15) + , plot.title = element_text(size = 20, hjust = 0.5) + , strip.text.x = element_text(size = 15, colour = "black") + , legend.title = element_text(color = "black", size = 15) + , legend.text = element_text(size = 15) + , legend.direction = "vertical") + + labs(title = my_outlier_title + , x = "" + , y = "Levels (Log10)")+ + scale_colour_discrete(name = "Patient outcome" + , labels = c("Death", "Recovered")) + + geom_text_repel(aes(label = outlier_id), na.rm = TRUE) + +shift_legend2(p_outlier) + +# ==================================== +# 3) Spaghetti plot with facet wrap: +# x = time +# y = Linear(Levels) +# coloured: outcome +# ==================================== +y_value = "value" +my_title3 = "Spaghetti plot of mediators over time: linear scale" + +p3 = ggplot(lf, aes(x = timepoint, + y = eval(parse(text=y_value)), + group = id, + colour = factor(outcomes))) + + #geom_text(aes(label=id))+ + facet_wrap(~mediator, scales = "free") + + #ylab("Levels") + + geom_line(alpha = 0.8) + + geom_beeswarm(alpha = 0.7, + size = 1.5) + + + theme(axis.text.x = element_text(size = 13) + , axis.text.y = element_text(size = 13 + , angle = 0 + , hjust = 1 + , vjust = 0) + , axis.title.x = element_text(size = 13) + , axis.title.y = element_text(size = 13) + , plot.title = element_text(size = 20, hjust = 0.5) + , strip.text.x = element_text(size = 13, colour = "black") + , legend.title = element_text(color = "black", size = 20) + , legend.text = element_text(size = 15) + , legend.direction = "vertical") + + labs(title = my_title3 + , x = "" + , y = "Levels")+ + scale_colour_discrete(name = "Patient outcome" + , labels = c("Death", "Recovered")) + + stat_compare_means(comparisons = my_comparisons + , method = "wilcox.test" + , paired = TRUE + , label = "p.format") +shift_legend2(p3) + +# ==================================== +# Spaghetti plot with facet wrap: +# x = time +# y = Log(Levels) +# coloured: outcome +# ==================================== +#y_value = "log10(value)" +y_value = "value" +my_title4 = "Spaghetti plot of mediators over time: log scale" + +p4 = ggplot(lf, aes(x = timepoint, + y = eval(parse(text=y_value)), + group = id, + colour = factor(outcomes))) + + #geom_text(aes(label=id))+ + facet_wrap(~mediator, scales = "free") + + #ylab("Levels") + + scale_y_log10()+ + geom_line(alpha = 0.8) + + geom_beeswarm(alpha = 0.7, + size = 1.5) + + + theme(axis.text.x = element_text(size = 13) + , axis.text.y = element_text(size = 13 + , angle = 0 + , hjust = 1 + , vjust = 0) + , axis.title.x = element_text(size = 13) + , axis.title.y = element_text(size = 13) + , plot.title = element_text(size = 20, hjust = 0.5) + , strip.text.x = element_text(size = 15, colour = "black") + , legend.title = element_text(color = "black", size = 20) + , legend.text = element_text(size = 15) + #, legend.position = "right" + #, legend.position = c(1, 0.2) + , legend.direction = "vertical") + + labs(title = my_title4 + , x = "" + , y = "Levels (Log10)")+ + scale_colour_discrete(name = "Patient outcome" + , labels = c("Death", "Recovered")) + + stat_compare_means(comparisons = my_comparisons + , method = "wilcox.test" + , paired = TRUE + , label = "p.format") +shift_legend2(p4) + +#=================================================== +# ggridges +#=================================================== +#******************************* +#5) ggrides: all timeoints +#******************************* + +my_title5 = "Distributions of mediators: all timepoints" + +p5 = ggplot(lf, aes(x = log10(value) , y = mediator)) + + geom_density_ridges(aes(fill = factor(outcomes)) + , alpha = 0.6) + + theme(axis.text.x = element_text(size = 13) + , axis.text.y = element_text(size = 13 + , angle = 0 + , hjust = 1 + , vjust = 0) + , axis.title.x = element_text(size = 13) + , axis.title.y = element_text(size = 13) + , plot.title = element_text(size = 15, hjust = 0.5) + , strip.text.x = element_text(size = 13, colour = "black") + , legend.title = element_text(color = "black", size = 13) + , legend.text = element_text(size = 13) + , legend.position = "right" + , legend.direction = "vertical") + + labs(title = my_title5 + , x = "Log10(Levels)" + , y = "") + + scale_fill_discrete(name = "Patient outcome", labels = c("Death", "Recovered")) + +p5 +#================================== +# 6) ggrdiges facet +# x = levels (Log) +# y = timepoints +# colour = outcomes +#================================== + +my_title6 = "Distributions of mediators facet by time: Log" + +p6 = ggplot(lf, aes(x = log10(value) , y = mediator)) + + facet_wrap (~timepoint) + + geom_density_ridges(aes(fill = factor(outcomes)) + , alpha = 0.6) + + theme(axis.text.x = element_text(size = 13) + , axis.text.y = element_text(size = 13 + , angle = 0 + , hjust = 1 + , vjust = 0) + , axis.title.x = element_text(size = 13) + , axis.title.y = element_text(size = 13) + , plot.title = element_text(size = 15, hjust = 0.5) + , strip.text.x = element_text(size = 13, colour = "black") + , legend.title = element_text(color = "black", size = 13) + , legend.text = element_text(size = 13) + , legend.position = "right" + , legend.direction = "vertical") + + labs(title = my_title6 + , x = "Log10(Levels)" + , y = "") + + scale_fill_discrete(name = "Patient outcome", labels = c("Death", "Recovered")) + +p6 + +#******************************* +#7) ggrides: at T1 +#******************************* +data_t1 = dplyr::filter(lf, timepoint == "t1") + +my_title7 = "Distributions of mediators at T1" + +p7 = ggplot(data_t1, aes(x = log10(value) , y = mediator)) + + geom_density_ridges(aes(fill = factor(outcomes)) + ,alpha = 0.6) + + +theme(axis.text.x = element_text(size = 13) + , axis.text.y = element_text(size = 13 + , angle = 0 + , hjust = 1 + , vjust = 0) + , axis.title.x = element_text(size = 13) + , axis.title.y = element_text(size = 13) + , plot.title = element_text(size = 15, hjust = 0.5) + , strip.text.x = element_text(size = 13, colour = "black") + , legend.title = element_text(color = "black", size = 13) + , legend.text = element_text(size = 13) + , legend.position = "right" + , legend.direction = "vertical") + + labs(title = my_title7 + , x = "Log10(Levels)" + , y = "") + + scale_fill_discrete(name = "Patient outcome", labels = c("Death", "Recovered")) +p7 + +#******************************* +#8) ggrides: at T2 +#******************************* +data_t2 = dplyr::filter(lf, timepoint == "t2") + +my_title8 = "Distributions of mediators at T2" + +p8 = ggplot(data_t2, aes(x = log10(value) , y = mediator)) + + geom_density_ridges(aes(fill = factor(outcomes)) + , alpha = 0.6) + + + theme(axis.text.x = element_text(size = 13) + , axis.text.y = element_text(size = 13 + , angle = 0 + , hjust = 1 + , vjust = 0) + , axis.title.x = element_text(size = 13) + , axis.title.y = element_text(size = 13) + , plot.title = element_text(size = 15, hjust = 0.5) + , strip.text.x = element_text(size = 13, colour = "black") + , legend.title = element_text(color = "black", size = 13) + , legend.text = element_text(size = 13) + , legend.position = "right" + , legend.direction = "vertical") + + labs(title = my_title8 + , x = "Log10(Levels)" + , y = "") + + scale_fill_discrete(name = "Patient outcome", labels = c("Death", "Recovered")) +p8 + +#******************************* +#9) ggrides: at T3 +#******************************* +data_t3 = dplyr::filter(lf, timepoint == "t3") + +my_title9 = "Distributions of mediators at T3" + +p9 = ggplot(data_t3, aes(x = log10(value) , y = mediator)) + + geom_density_ridges(aes(fill = factor(outcomes)) + , alpha = 0.6) + + + theme(axis.text.x = element_text(size = 13) + , axis.text.y = element_text(size = 13 + , angle = 0 + , hjust = 1 + , vjust = 0) + , axis.title.x = element_text(size = 13) + , axis.title.y = element_text(size = 13) + , plot.title = element_text(size = 15, hjust = 0.5) + , strip.text.x = element_text(size = 13, colour = "black") + , legend.title = element_text(color = "black", size = 13) + , legend.text = element_text(size = 13) + , legend.position = "right" + , legend.direction = "vertical") + + labs(title = my_title9 + , x = "Log10(Levels)" + , y = "") + + scale_fill_discrete(name = "Patient outcome", labels = c("Death", "Recovered")) +p9 + +#================================== +# 10) ggrdiges facet +# x = levels (Linear) +# y = timepoints +# colour = outcomes +#================================== +my_title10 = "Distributions of mediators: linear" + +p10 = ggplot(lf, aes(x = value , y = timepoint)) + + geom_density_ridges(aes(fill = factor(outcomes)) + , alpha = 0.6) + + facet_wrap(~mediator, nrow = 2, scales = "free") + +theme(axis.text.x = element_text(size = 13) + , axis.text.y = element_text(size = 13 + , angle = 0 + , hjust = 1 + , vjust = 0) + , axis.title.x = element_text(size = 13) + , axis.title.y = element_text(size = 13) + , plot.title = element_text(size = 20, hjust = 0.5) + , strip.text.x = element_text(size = 15, colour = "black") + , legend.title = element_text(color = "black", size = 20) + , legend.text = element_text(size = 15) + #, legend.position = "right" + , legend.direction = "vertical") + + labs(title = my_title10 + , x = "Levels" + , y = "") + + scale_fill_discrete(name = "Patient outcome", labels = c("Death", "Recovered")) + +shift_legend2(p10) + +#================================== +# 11) ggrdiges facet +# x = levels (Log) +# y = timepoints +# colour = outcomes +#================================== +my_title11 = "Distributions of mediators: Log " + +p11 = ggplot(lf, aes(x = value , y = timepoint)) + + scale_x_log10()+ + geom_density_ridges(aes(fill = factor(outcomes)) + , alpha = 0.6) + + facet_wrap(~mediator, nrow = 2, scales = "free") + + theme(axis.text.x = element_text(size = 13) + , axis.text.y = element_text(size = 13 + , angle = 0 + , hjust = 1 + , vjust = 0) + , axis.title.x = element_text(size = 13) + , axis.title.y = element_text(size = 13) + , plot.title = element_text(size = 20, hjust = 0.5) + , strip.text.x = element_text(size = 15, colour = "black") + , legend.title = element_text(color = "black", size = 20) + , legend.text = element_text(size = 15) + #, legend.position = "right" + , legend.direction = "vertical") + + labs(title = my_title11 + , x = "Levels(Log)" + , y = "") + + scale_fill_discrete(name = "Patient outcome", labels = c("Death", "Recovered")) + +shift_legend2(p11) + +dev.off() +######################################################################## + + +######################################################################## +# Plots with wf data +# clustering k means good example: TO DO? +######################################################################## + +cat("Plots will be in:", corr_and_hmap) +pdf(corr_and_hmap , width = 15, height = 12) + +# ======================= +# 5) Correlation plot at T1 +#0.40 to 0.69 A moderate correlation +#0.70 to 0.89 A strong correlation +#0.90 to 1.00 A very strong correlation +# ======================= +#library(ggcorrplot) +wf = wf_data +wf <- wf[order(wf$outcomes),] + +# add colour based on outcomes column to match colour grouping with ggplots +my_red = "#F8766D" +my_green = "#00BA38" +my_blue = "#619CFF" + +str(wf$outcomes) + +wf$idcol <- mapvalues(wf$outcomes + , from = c(0,1) + , to = c(my_red, my_blue)) + +n = which(colnames(wf) == "idcol"); print(n) + +# subset t1 data +wf_t1 = cbind(wf[c(1:3,n)], wf %>% dplyr:: select(grep("_t1", names(wf)))) + +colnames(wf_t1) + +# assign nice column names +my_colnames = c("id" + , "studygroup" + , "outcomes" + , "idcol" + , "Angiopoietin2" + , "PF" + , "PSelectin" + , "sESelectin" + , "sICAM1" + , "sRAGE" + , "sVCAM1") + +if (length(my_colnames) == length(my_colnames)){ + print("PASS: Reassigning column names...") + colnames(wf_t1) = my_colnames +} else{ + cat(paste0("FAIL:Cannot assign column names, length mismatch" + , "\nExpected length: ", length(orig_colanmes) + , "\nGot:", length(my_colnames))) + quit() +} + +print('formatted colnames are:'); print(colnames(wf_t1)) + +# inspect +class(wf_t1); str(wf_t1) + +# Compute a matrix of correlation p-values +start_med = which(colnames(wf_t1) == "Angiopoietin2") +end_med = which(colnames(wf_t1) == "sVCAM1") +outcomes_col = which(colnames(wf_t1) == "outcomes") + +#=========== +# Data for corrrelations +#=========== + +### THIS PLOT CORR +# Data: But this is not needed as this is inside the function +corr_M = wf_t1[start_med:end_med] +corr_data = wf_t1[c(outcomes_col, start_med:end_med)] + +#=========== +# Corrplot with R and P values +#=========== +my_corr = psych::corr.test(corr_M + , method = "spearman" + , use = "pairwise.complete.obs")$r + +my_pval = psych::corr.test(corr_M + , method = "spearman" + , adjust = "none" + , use = "pairwise.complete.obs")$p + +# check: satisfied! +psych::corr.test(wf_t1$sICAM1, wf_t1$PSelectin + , method = "spearman" + , use = "pairwise.complete.obs")$r +psych::corr.test(wf_t1$sICAM1, wf_t1$PSelectin + , method = "spearman" + , use = "pairwise.complete.obs")$p + +#---------------- +# ggcorrplot: Not so good! +#---------------- +#ggcorr_plot = ggcorrplot(my_corr +# , p.mat = my_pval +# #, method = "circle" +# #, insig = "blank" +# , hc.order = TRUE +# , outline.col = "black" +# , lab = T +# , lab_size = 6 +# #, ggtheme = ggplot2::theme_gray +# , colors = c("#6D9EC1", "white", "#E46726") +# , title = "Spearman correlations: T1\n(X: not significant)") + + +#---------------- +# paris.panel +#---------------- + +corr_plot2 = pairs.panels(corr_data[2:length(corr_data)] + , method = "spearman" # correlation method + , hist.col = "grey" ##00AFBB + , density = TRUE # show density plots + , ellipses = F # show correlation ellipses + , stars = TRUE + , rug = F + , breaks = "Sturges" + , show.points = T + , bg = c(my_red, my_blue)[unclass(factor(corr_data$outcomes))] + , pch = 21 + #, alpha = .05 + #, points(pch = 19, col = c("#f8766d", "#00bfc4")) + , cex =2 + , cex.axis = 1.4 + , cex.labels = 2 + , cex.cor = 1 + , smooth = F + , main = "Pairwise Spearman correlations with signifcance: T1" +) + +corr_plot2 + +# =============== +# Heatmaps of mediators at T1 +# =============== +library("RColorBrewer") +library(gplots) + +#my_palette <- colorRampPalette(brewer.pal(10, "RdYlBu"))(n=50) +#my_palette <- colorRampPalette(c("red", "white", "blue"))(n = 256) +my_palette <- cm.colors(n = 256) +my_palette <- cm.colors(n = 50) +#my_palette <- topo.colors(n=256) + +# data for heatmap +wf_t1_m = as.matrix(wf_t1[start_med:end_med]) +colnames(wf_t1_m) + +#hmap_colnames = c("Angiopoietin2" +#,"PF" +#,"PSelectin" +#,"sESelectin" +#,"sICAM1" +#,"sRAGE" +#,"sVCAM1") + +#colnames(wf_t1_m) = hmap_colnames +rownames(wf_t1_m) = wf_t1$id + +my_red = "#F8766D" +my_green = "#00BA38" +my_blue = "#619CFF" + +# =============== +# Heatmaps of mediators at T1 +# log scaled values +# NO subject ordering +# =============== + +hmap_data = "log10(wf_t1_m)" +my_title_hmap1 = "Heirarchical clustering of mediators at T1: Log scaled values" + +heatmap.2(eval(parse(text = hmap_data)) + , na.rm = T + , scale = "column" + , Rowv = F + , Colv = T + , dendrogram = "column" + , col = my_palette + , trace = "none" + , density.info = "density" + #, RowSideColors = c(rep(my_red, 11), rep(my_green, 11), rep(my_blue, 9)) + , RowSideColors = c(rep(my_red, 11), rep(my_blue, 20)) #version 3 + , keysize = 0.9 + #, key.xlab = "Z-score" + , cexRow = 1.75 + , cexCol = 1.75 + , sepcolor = "black" + , main = "" + , srtCol = 20) +par(lend = 1) +legend("topright" + , legend = c("Death", "Recovered") + , col = c(my_red, my_blue) + , lty= 1 + , lwd = 10) +title(main = my_title_hmap1, cex.main = 1.3) + +# =============== +# Heatmaps of mediators at T1 +# log scaled values +# no subject ordering +# =============== +#hmap_data = "wf_t1_m" +hmap_data = "log10(wf_t1_m)" +my_title_hmap2 = "Heirarchical clustering mediators and subjects at T1: Log scaled values" + +heatmap.2(eval(parse(text = hmap_data)) + , scale = "column" + , Rowv = T + , Colv = T + , dendrogram = "both" + , col = my_palette + , trace = "none" + , density.info = "density" + #, RowSideColors = c(rep(my_red, 11), rep(my_green, 11), rep(my_blue, 9)) + , RowSideColors = c(rep(my_red, 11), rep(my_blue, 20)) #version 3 + , keysize = 0.8 + , key.xlab = "Z-score" + , cexRow = 1.75 + , cexCol = 1.75 + , sepcolor = "black" + , main = "" + , srtCol = 20) +par(lend = 1) +legend("topright" + , legend = c("Death", "Recovered") + , col = c(my_red, my_blue) + , lty= 1 + , lwd = 10) +title(main = my_title_hmap2, cex.main = 1.3) + +dev.off() diff --git a/legend_adjustment.R b/legend_adjustment.R new file mode 100755 index 0000000..b236a72 --- /dev/null +++ b/legend_adjustment.R @@ -0,0 +1,37 @@ + +library(ggplot2) +library(gtable) +library(lemon) +# https://stackoverflow.com/questions/54438495/shift-legend-into-empty-facets-of-a-faceted-plot-in-ggplot2 + +shift_legend2 <- function(p) { + # check if p is a valid object + if(!(inherits(p, "gtable"))){ + if(inherits(p, "ggplot")){ + gp <- ggplotGrob(p) # convert to grob + } else { + message("This is neither a ggplot object nor a grob generated from ggplotGrob. Returning original plot.") + return(p) + } + } else { + gp <- p + } + + # check for unfilled facet panels + facet.panels <- grep("^panel", gp[["layout"]][["name"]]) + empty.facet.panels <- sapply(facet.panels, function(i) "zeroGrob" %in% class(gp[["grobs"]][[i]]), + USE.NAMES = F) + empty.facet.panels <- facet.panels[empty.facet.panels] + + if(length(empty.facet.panels) == 0){ + message("There are no unfilled facet panels to shift legend into. Returning original plot.") + return(p) + } + + # establish name of empty panels + empty.facet.panels <- gp[["layout"]][empty.facet.panels, ] + names <- empty.facet.panels$name + + # return repositioned legend + reposition_legend(p, 'center', panel=names) +} diff --git a/stats_corr.R b/stats_corr.R new file mode 100755 index 0000000..af08849 --- /dev/null +++ b/stats_corr.R @@ -0,0 +1,89 @@ +#!/usr/bin/Rscript +getwd() +setwd('~/git/covid_analysis/') +getwd() +############################################################ +# TASK: Pairwise correaltion and P-values: T1 +############################################################ +# source data +source("read_data.R") + +# clear unwanted variables +rm(my_data) +############################################################ +#============= +# Output +#============= +# corr analysis +my_spearman_corr = paste0(outdir_stats, "corr_spearman_t1_v3.csv") +my_pvals = paste0(outdir_stats, "corr_pvalues_t1_v3.csv") +############################################################ +# data assignment for stats +wf = wf_data +lf = lf_data +######################################################################## +# Pairwise correaltions and analyses +######################################################################## + +head(wf$outcomes) +wf <- wf[order(wf$outcomes),] + +# subset t1 data +wf_t1 = cbind(wf[c(1:3)], wf %>% dplyr:: select(grep("_t1", names(wf)))) +colnames(wf_t1) + +# assign nice column names +my_colnames = c("id" + , "studygroup" + , "outcomes" + #, "idcol" + , "Angiopoietin2" + , "PF" + , "PSelectin" + , "sESelectin" + , "sICAM1" + , "sRAGE" + , "sVCAM1") + +if (length(my_colnames) == length(my_colnames)){ + print("PASS: Reassigning column names...") + colnames(wf_t1) = my_colnames +} else{ + cat(paste0("FAIL:Cannot assign column names, length mismatch" + , "\nExpected length: ", length(orig_colanmes) + , "\nGot:", length(my_colnames))) + quit() +} + +print('formatted colnames are:'); print(colnames(wf_t1)) + +# inspect +class(wf_t1); str(wf_t1) + +# Compute a matrix of correlation p-values +start_med = which(colnames(wf_t1) == "Angiopoietin2") +end_med = which(colnames(wf_t1) == "sVCAM1") + +#--------------- +# Data for corr analysis +#--------------- +corr_M = wf_t1[start_med:end_med] + +my_corr1 = cor(corr_M + , method = "spearman" + , use = "pairwise.complete.obs") + +my_corr = psych::corr.test(corr_M + , method = "spearman" + , use = "pairwise.complete.obs")$r + +my_pval = psych::corr.test(corr_M + , method = "spearman" + , adjust = "none" + , use = "pairwise.complete.obs")$p + +#****************** +# write output file: correlations and pvalues +#****************** +write.csv(my_corr, my_spearman_corr, row.names = TRUE) +write.csv(my_pval, my_pvals, row.names = TRUE)