From 3257515495eb6434ec0cfd6392a042f248f7c54c Mon Sep 17 00:00:00 2001
From: Tanushree Tunstall <tanu@tunstall.in>
Date: Fri, 23 Oct 2020 13:31:47 +0100
Subject: [PATCH] added all_output_plots.R, legend_adjustment.R and
 stats_corr.R

---
 all_output_plots.R  | 796 ++++++++++++++++++++++++++++++++++++++++++++
 legend_adjustment.R |  37 ++
 stats_corr.R        |  89 +++++
 3 files changed, 922 insertions(+)
 create mode 100755 all_output_plots.R
 create mode 100755 legend_adjustment.R
 create mode 100755 stats_corr.R

diff --git a/all_output_plots.R b/all_output_plots.R
new file mode 100755
index 0000000..15ae894
--- /dev/null
+++ b/all_output_plots.R
@@ -0,0 +1,796 @@
+#!/usr/bin/Rscript   
+getwd()
+setwd('~/git/covid_analysis/')
+getwd()
+############################################################
+# TASK: basic plots
+# boxplots
+# spaghetti plots
+# ggridges
+# pairs.panel
+# heatmap
+
+# useful links:
+# http://www.sthda.com/english/wiki/ggplot2-dot-plot-quick-start-guide-r-software-and-data-visualization
+############################################################
+# source data
+source("read_data.R")
+
+# clear unwanted variables
+rm(my_data)
+############################################################
+#===========================
+# Output: different types 
+# of plots, correlation
+# and heatmaps
+#=============================
+output_plots = paste0(outdir_plots, "output_plots.pdf")
+corr_and_hmap = paste0(outdir_plots, "corr_hmap.pdf")
+
+#%%========================================================
+# read file
+# data assignment for plots
+wf = wf_data
+lf = lf_data
+
+#=====================
+# data for plots: LF
+#=====================
+dim(lf); str(lf)
+levels(factor(lf$mediator)); table(lf$mediator)
+
+nice_mediator_names = c(Angiopoietin2 = "Angiopoietin2"
+                        , PF = "PF"
+                        #, PSELECTIN = "PSelectin"
+                        , PSelectin = "PSelectin"
+                        , sESelectin = "sESelectin"
+                        , sICAM1="sICAM1"
+                        , sRAGE= "sRAGE"
+                        , sVCAM1="sVCAM1")
+# assign nice names 
+lf$mediator <- as.character(nice_mediator_names[lf$mediator])
+
+dim(lf); str(lf)
+levels(factor(lf$mediator)); table(lf$mediator)
+
+lf$outcome_category = as.factor(lf$outcomes)
+levels(lf$outcome_category)
+
+#%%========================================================
+# count na in mediator col
+if (table(is.na(lf$mediator)) == nrow(lf)){
+  print("PASS: No NAs detected, lf data is good for plotting")
+}else{
+  print("FAIL: NAs detected in mediator columna. Check source formatting for lf data")
+  #lf <- lf[!is.na(lf$mediator),]
+  #head(lf); str(lf)
+  quit()
+}
+
+# FIXME : stats on log 10 value
+# ===========
+# stats 
+# ===========
+# with adjustment: also gives unadjusted P-values (BH and fdr are the same)
+my_stat = compare_means(value~timepoint, group.by = "mediator"
+                        , data = lf,  paired = TRUE
+                        , p.adjust.method = "BH")
+                        #, p.adjust.method = "bonferroni")
+
+# !! check: satsified!!
+wilcox.test(wf$sESelectin_ngmL_t1, wf$sESelectin_ngmL_t2, paired = T)
+wilcox.test(wf$sRAGE_pgmL_t1, wf$sRAGE_pgmL_t2, paired = T)
+wilcox.test(wf$sVCAM1_ngmL_t1, wf$sVCAM1_ngmL_t3, paired = T)
+
+my_comparisons <- list( c("t1", "t2"), c("t2", "t3"), c("t1", "t3") )
+
+# FIXME
+#my_stat_minmax = compare_means(minmax_score~timepoint, group.by = "mediator", data = lf, paired = TRUE)
+
+#lf$timepoint <- as.factor(lf$timepoint)
+#lf$mediator <- as.factor(lf$mediator)
+#lf$outcomes <- as.factor(lf$outcomes)
+
+# if you really wanted to custom colour category
+# currently unused
+my_outcome_cols =  c("0" = "red", "1" = "green") 
+
+#====================================
+# Output plots as one pdf
+cat("Output plots will be in:", output_plots)
+pdf(output_plots, width=15, height=12)
+
+# ====================================
+# 1) Boxplot with facet wrap:
+# x = time
+# y = Linear (Levels)
+# coloured: outcome
+# ====================================
+y_value = "value"
+my_title1 = "Boxplots of mediators over time: linear scale"
+
+p1 = ggplot(lf, aes(x = timepoint
+              , y     = eval(parse(text=y_value)) ))  + 
+  #geom_text(aes(label=id)) +
+  facet_wrap(~ mediator, nrow = 2, scales = "free_y") + 
+  geom_boxplot(fill = "white", outlier.colour = NA
+              # position = position_dodge(width = 0.9)
+              , width = 0.5) +
+  geom_point(position = position_jitterdodge(dodge.width=0.01)
+             , aes(colour = factor(outcomes))) +
+  theme(axis.text.x = element_text(size = 15)
+        , axis.text.y = element_text(size = 15
+                                     , angle = 0
+                                     , hjust = 1
+                                     , vjust = 0)
+        , axis.title.x = element_text(size = 15)
+        , axis.title.y = element_text(size = 15)
+        , plot.title = element_text(size = 20, hjust = 0.5)
+        , strip.text.x = element_text(size = 15, colour = "black")
+        , legend.title = element_text(color = "black", size = 20)
+        , legend.text = element_text(size = 15)
+        , legend.direction = "vertical") +
+  labs(title = my_title1
+       , x = ""
+       , y = "Levels") +
+  scale_colour_discrete(name = "Patient outcome", labels = c("Death", "Recovered"))+
+  #scale_colour_manual(values = my_outcome_cols)
+  stat_compare_means(comparisons = my_comparisons
+                     , method = "wilcox.test"
+                     , paired = TRUE
+                     , label = "p.format") 
+
+shift_legend2(p1)
+
+# ====================================
+# 2) Boxplot with facet wrap:
+# x = time
+# y = Log(Levels)
+# coloured: outcome
+# ====================================
+#y_value = "log10(value)" # shows log transformed scale
+y_value = "value"
+my_title2 = "Boxplots of mediators over time: log scale"
+
+p2 = ggplot(lf, aes(x = timepoint
+                  , y = eval(parse(text=y_value)) ))  + 
+  #geom_text(aes(label = id)) +
+  facet_wrap(~ mediator, nrow = 2, scales = "free_y") + 
+  scale_y_log10()+
+  geom_boxplot(fill = "white", outlier.colour = NA 
+               # position = position_dodge(width = 0.9)
+               , width = 0.5) +
+  geom_point(position = position_jitterdodge(dodge.width=0.01)
+             , aes(colour = factor(outcomes))) +
+  theme(axis.text.x = element_text(size = 15)
+        , axis.text.y = element_text(size = 15
+                                     , angle = 0
+                                     , hjust = 1
+                                     , vjust = 0)
+        , axis.title.x = element_text(size = 15)
+        , axis.title.y = element_text(size = 15)
+        , plot.title = element_text(size = 20, hjust = 0.5)
+        , strip.text.x = element_text(size = 15, colour = "black")
+        , legend.title = element_text(color = "black", size = 20)
+        , legend.text = element_text(size = 15)
+        , legend.direction = "vertical") +
+  labs(title = my_title2
+       , x = ""
+       , y = "Levels (Log10)")+
+  scale_colour_discrete(name = "Patient outcome"
+                        , labels = c("Death", "Recovered")) +
+  stat_compare_means(comparisons = my_comparisons
+                     , method = "wilcox.test"
+                     , paired = TRUE
+                     , label = "p.format") 
+
+shift_legend2(p2)
+
+#========================
+# outlier plot: log
+#========================
+# create a separate df
+lf2 = lf
+lf2$value2 = lf2$value
+lf2$value2 = replace_na(lf2$value2,0)
+
+# outlier detection
+is_outlier <- function(x) {
+  return(x < quantile(x, 0.25) - 1.5 * IQR(x) | x > quantile(x, 0.75) + 1.5 * IQR(x))
+}
+
+lf2 = lf2 %>% group_by(timepoint, mediator) %>%
+  mutate(is_outlier = ifelse(is_outlier(value2), value2
+                             , as.numeric(NA) ))
+lf2$outlier_id=lf2$id
+
+lf2$outlier_id[which(is.na(lf2$is_outlier))] <- as.numeric(NA)
+
+#********************
+# labelled outliers
+#********************
+my_outlier_title = "Boxplot with top outliers labelled"
+p_outlier = ggplot(lf2, aes(x = timepoint
+                            , y = value))  + 
+  facet_wrap(~ mediator, nrow = 2, scales = "free_y") + 
+  scale_y_log10() + 
+  geom_boxplot(fill = "white", outlier.colour = NA 
+               , width = 0.5) +
+  geom_point(position = position_jitterdodge(dodge.width=0.01)
+             , aes(colour = factor(outcomes))) +
+  theme(axis.text.x = element_text(size = 15)
+        , axis.text.y = element_text(size = 15
+                                     , angle = 0
+                                     , hjust = 1
+                                     , vjust = 0)
+        , axis.title.x = element_text(size = 15)
+        , axis.title.y = element_text(size = 15)
+        , plot.title = element_text(size = 20, hjust = 0.5)
+        , strip.text.x = element_text(size = 15, colour = "black")
+        , legend.title = element_text(color = "black", size = 15)
+        , legend.text = element_text(size = 15)
+        , legend.direction = "vertical") +
+  labs(title = my_outlier_title
+       , x = ""
+       , y = "Levels (Log10)")+
+  scale_colour_discrete(name = "Patient outcome"
+                        , labels = c("Death", "Recovered")) +
+  geom_text_repel(aes(label = outlier_id), na.rm = TRUE)
+
+shift_legend2(p_outlier)
+
+# ====================================
+# 3) Spaghetti plot with facet wrap:
+# x = time
+# y = Linear(Levels)
+# coloured: outcome
+# ====================================
+y_value = "value"
+my_title3 = "Spaghetti plot of mediators over time: linear scale"
+
+p3 =  ggplot(lf, aes(x = timepoint, 
+                 y = eval(parse(text=y_value)), 
+                 group = id,
+                 colour = factor(outcomes))) +
+    #geom_text(aes(label=id))+
+    facet_wrap(~mediator, scales = "free") + 
+    #ylab("Levels") +
+    geom_line(alpha = 0.8) + 
+    geom_beeswarm(alpha = 0.7,
+               size = 1.5) +
+    
+    theme(axis.text.x = element_text(size = 13)
+          , axis.text.y = element_text(size = 13
+                                       , angle = 0
+                                       , hjust = 1
+                                       , vjust = 0)
+          , axis.title.x = element_text(size = 13)
+          , axis.title.y = element_text(size = 13)
+          , plot.title = element_text(size = 20, hjust = 0.5)
+          , strip.text.x = element_text(size = 13, colour = "black")
+          , legend.title = element_text(color = "black", size = 20)
+          , legend.text = element_text(size = 15)
+          , legend.direction = "vertical") +
+    labs(title = my_title3
+         , x = ""
+         , y = "Levels")+
+    scale_colour_discrete(name = "Patient outcome"
+                          , labels = c("Death", "Recovered")) +
+    stat_compare_means(comparisons = my_comparisons
+                       , method = "wilcox.test"
+                       , paired = TRUE
+                       , label = "p.format") 
+shift_legend2(p3)
+
+# ====================================
+# Spaghetti plot with facet wrap:
+# x = time
+# y = Log(Levels)
+# coloured: outcome
+# ====================================
+#y_value = "log10(value)"
+y_value = "value"
+my_title4 = "Spaghetti plot of mediators over time: log scale"
+
+p4 =  ggplot(lf, aes(x = timepoint, 
+                     y = eval(parse(text=y_value)), 
+                     group = id,
+                     colour = factor(outcomes))) +
+  #geom_text(aes(label=id))+
+  facet_wrap(~mediator, scales = "free") + 
+  #ylab("Levels") +
+  scale_y_log10()+
+  geom_line(alpha = 0.8) + 
+  geom_beeswarm(alpha = 0.7,
+                size = 1.5) +
+  
+  theme(axis.text.x = element_text(size = 13)
+        , axis.text.y = element_text(size = 13
+                                     , angle = 0
+                                     , hjust = 1
+                                     , vjust = 0)
+        , axis.title.x = element_text(size = 13)
+        , axis.title.y = element_text(size = 13)
+        , plot.title = element_text(size = 20, hjust = 0.5)
+        , strip.text.x = element_text(size = 15, colour = "black")
+        , legend.title = element_text(color = "black", size = 20)
+        , legend.text = element_text(size = 15)
+        #, legend.position = "right"
+        #, legend.position = c(1, 0.2)
+        , legend.direction = "vertical") +
+  labs(title = my_title4
+       , x = ""
+       , y = "Levels (Log10)")+
+  scale_colour_discrete(name = "Patient outcome"
+                        , labels = c("Death", "Recovered")) +
+  stat_compare_means(comparisons = my_comparisons
+                     , method = "wilcox.test"
+                     , paired = TRUE
+                     , label = "p.format") 
+shift_legend2(p4)
+
+#===================================================
+# ggridges
+#===================================================
+#*******************************
+#5) ggrides: all timeoints
+#*******************************
+
+my_title5 = "Distributions of mediators: all timepoints"
+
+p5 = ggplot(lf, aes(x = log10(value) , y = mediator)) +
+  geom_density_ridges(aes(fill = factor(outcomes))
+                      , alpha = 0.6) +
+  theme(axis.text.x = element_text(size = 13)
+        , axis.text.y = element_text(size = 13
+                                     , angle = 0
+                                     , hjust = 1
+                                     , vjust = 0)
+        , axis.title.x = element_text(size = 13)
+        , axis.title.y = element_text(size = 13)
+        , plot.title = element_text(size = 15, hjust = 0.5)
+        , strip.text.x = element_text(size = 13, colour = "black")
+        , legend.title = element_text(color = "black", size = 13)
+        , legend.text = element_text(size = 13)
+        , legend.position = "right"
+        , legend.direction = "vertical") +
+  labs(title = my_title5
+       , x = "Log10(Levels)"
+       , y = "") +
+  scale_fill_discrete(name = "Patient outcome", labels = c("Death", "Recovered")) 
+
+p5
+#==================================
+# 6) ggrdiges facet
+# x = levels (Log)
+# y = timepoints
+# colour = outcomes
+#==================================
+
+my_title6 = "Distributions of mediators facet by time: Log"
+
+p6 = ggplot(lf, aes(x = log10(value) , y = mediator)) +
+  facet_wrap (~timepoint) +
+  geom_density_ridges(aes(fill = factor(outcomes))
+                      , alpha = 0.6) +
+  theme(axis.text.x = element_text(size = 13)
+        , axis.text.y = element_text(size = 13
+                                     , angle = 0
+                                     , hjust = 1
+                                     , vjust = 0)
+        , axis.title.x = element_text(size = 13)
+        , axis.title.y = element_text(size = 13)
+        , plot.title = element_text(size = 15, hjust = 0.5)
+        , strip.text.x = element_text(size = 13, colour = "black")
+        , legend.title = element_text(color = "black", size = 13)
+        , legend.text = element_text(size = 13)
+        , legend.position = "right"
+        , legend.direction = "vertical") +
+  labs(title = my_title6
+       , x = "Log10(Levels)"
+       , y = "") +
+  scale_fill_discrete(name = "Patient outcome", labels = c("Death", "Recovered")) 
+
+p6
+
+#*******************************
+#7) ggrides: at T1
+#*******************************
+data_t1 = dplyr::filter(lf, timepoint == "t1")
+
+my_title7 = "Distributions of mediators at T1"
+
+p7 = ggplot(data_t1, aes(x = log10(value) , y = mediator)) +
+  geom_density_ridges(aes(fill = factor(outcomes))
+                      ,alpha = 0.6)  + 
+
+theme(axis.text.x = element_text(size = 13)
+      , axis.text.y = element_text(size = 13
+                                   , angle = 0
+                                   , hjust = 1
+                                   , vjust = 0)
+      , axis.title.x = element_text(size = 13)
+      , axis.title.y = element_text(size = 13)
+      , plot.title = element_text(size = 15, hjust = 0.5)
+      , strip.text.x = element_text(size = 13, colour = "black")
+      , legend.title = element_text(color = "black", size = 13)
+      , legend.text = element_text(size = 13)
+      , legend.position = "right"
+      , legend.direction = "vertical") +
+  labs(title = my_title7
+       , x = "Log10(Levels)"
+       , y = "") +
+  scale_fill_discrete(name = "Patient outcome", labels = c("Death", "Recovered")) 
+p7
+
+#*******************************
+#8) ggrides: at T2
+#*******************************
+data_t2 = dplyr::filter(lf, timepoint == "t2")
+
+my_title8 = "Distributions of mediators at T2"
+
+p8 = ggplot(data_t2, aes(x = log10(value) , y = mediator)) +
+  geom_density_ridges(aes(fill = factor(outcomes))
+                      , alpha = 0.6)  + 
+  
+  theme(axis.text.x = element_text(size = 13)
+        , axis.text.y = element_text(size = 13
+                                     , angle = 0
+                                     , hjust = 1
+                                     , vjust = 0)
+        , axis.title.x = element_text(size = 13)
+        , axis.title.y = element_text(size = 13)
+        , plot.title = element_text(size = 15, hjust = 0.5)
+        , strip.text.x = element_text(size = 13, colour = "black")
+        , legend.title = element_text(color = "black", size = 13)
+        , legend.text = element_text(size = 13)
+        , legend.position = "right"
+        , legend.direction = "vertical") +
+  labs(title = my_title8
+       , x = "Log10(Levels)"
+       , y = "") +
+  scale_fill_discrete(name = "Patient outcome", labels = c("Death", "Recovered")) 
+p8
+
+#*******************************
+#9) ggrides: at T3
+#*******************************
+data_t3 = dplyr::filter(lf, timepoint == "t3")
+
+my_title9 = "Distributions of mediators at T3"
+
+p9 = ggplot(data_t3, aes(x = log10(value) , y = mediator)) +
+  geom_density_ridges(aes(fill = factor(outcomes))
+                      , alpha = 0.6)  + 
+  
+  theme(axis.text.x = element_text(size = 13)
+        , axis.text.y = element_text(size = 13
+                                     , angle = 0
+                                     , hjust = 1
+                                     , vjust = 0)
+        , axis.title.x = element_text(size = 13)
+        , axis.title.y = element_text(size = 13)
+        , plot.title = element_text(size = 15, hjust = 0.5)
+        , strip.text.x = element_text(size = 13, colour = "black")
+        , legend.title = element_text(color = "black", size = 13)
+        , legend.text = element_text(size = 13)
+        , legend.position = "right"
+        , legend.direction = "vertical") +
+  labs(title = my_title9 
+       , x = "Log10(Levels)"
+       , y = "") +
+  scale_fill_discrete(name = "Patient outcome", labels = c("Death", "Recovered")) 
+p9
+
+#==================================
+# 10) ggrdiges facet
+# x = levels (Linear)
+# y = timepoints
+# colour = outcomes
+#==================================
+my_title10 = "Distributions of mediators: linear"
+
+p10 = ggplot(lf, aes(x = value , y = timepoint)) +
+  geom_density_ridges(aes(fill = factor(outcomes))
+                      , alpha = 0.6) +
+  facet_wrap(~mediator, nrow = 2, scales = "free") +
+theme(axis.text.x = element_text(size = 13)
+      , axis.text.y = element_text(size = 13
+                                   , angle = 0
+                                   , hjust = 1
+                                   , vjust = 0)
+      , axis.title.x = element_text(size = 13)
+      , axis.title.y = element_text(size = 13)
+      , plot.title = element_text(size = 20, hjust = 0.5)
+      , strip.text.x = element_text(size = 15, colour = "black")
+      , legend.title = element_text(color = "black", size = 20)
+      , legend.text = element_text(size = 15)
+      #, legend.position = "right"
+      , legend.direction = "vertical") +
+  labs(title = my_title10
+       , x = "Levels"
+       , y = "") +
+  scale_fill_discrete(name = "Patient outcome", labels = c("Death", "Recovered"))
+
+shift_legend2(p10)
+
+#==================================
+# 11) ggrdiges facet
+# x = levels (Log)
+# y = timepoints
+# colour = outcomes
+#==================================
+my_title11 = "Distributions of mediators: Log "
+
+p11 = ggplot(lf, aes(x = value , y = timepoint)) +
+  scale_x_log10()+
+  geom_density_ridges(aes(fill = factor(outcomes))
+                      , alpha = 0.6) +
+  facet_wrap(~mediator, nrow = 2, scales = "free") +
+  theme(axis.text.x = element_text(size = 13)
+        , axis.text.y = element_text(size = 13
+                                     , angle = 0
+                                     , hjust = 1
+                                     , vjust = 0)
+        , axis.title.x = element_text(size = 13)
+        , axis.title.y = element_text(size = 13)
+        , plot.title = element_text(size = 20, hjust = 0.5)
+        , strip.text.x = element_text(size = 15, colour = "black")
+        , legend.title = element_text(color = "black", size = 20)
+        , legend.text = element_text(size = 15)
+        #, legend.position = "right"
+        , legend.direction = "vertical") +
+  labs(title = my_title11
+       , x = "Levels(Log)"
+       , y = "") +
+  scale_fill_discrete(name = "Patient outcome", labels = c("Death", "Recovered"))
+
+shift_legend2(p11)
+
+dev.off()
+########################################################################
+
+
+########################################################################
+#                           Plots with wf data
+#                       clustering k means good example: TO DO?
+########################################################################
+
+cat("Plots will be in:", corr_and_hmap)
+pdf(corr_and_hmap , width = 15, height = 12)
+
+# =======================
+# 5) Correlation plot at T1
+#0.40 to 0.69	A moderate correlation
+#0.70 to 0.89	A strong correlation
+#0.90 to 1.00	A very strong correlation
+# =======================
+#library(ggcorrplot)
+wf = wf_data
+wf <- wf[order(wf$outcomes),]
+
+# add colour based on outcomes column to match colour grouping with ggplots
+my_red = "#F8766D"
+my_green = "#00BA38" 
+my_blue = "#619CFF"
+
+str(wf$outcomes)
+
+wf$idcol <- mapvalues(wf$outcomes
+                      , from = c(0,1)
+                      , to = c(my_red, my_blue))  
+
+n = which(colnames(wf) == "idcol"); print(n)
+
+# subset t1 data
+wf_t1 = cbind(wf[c(1:3,n)],  wf %>% dplyr:: select(grep("_t1", names(wf))))
+
+colnames(wf_t1)
+
+# assign nice column names
+my_colnames = c("id"
+                , "studygroup"
+                , "outcomes"
+                , "idcol"
+                , "Angiopoietin2"
+                , "PF"       
+                , "PSelectin"     
+                , "sESelectin"  
+                , "sICAM1"
+                , "sRAGE"     
+                , "sVCAM1")
+
+if (length(my_colnames) == length(my_colnames)){
+  print("PASS: Reassigning column names...")
+  colnames(wf_t1) = my_colnames 
+} else{
+  cat(paste0("FAIL:Cannot assign column names, length mismatch"
+             , "\nExpected length: ", length(orig_colanmes)
+             , "\nGot:", length(my_colnames)))
+  quit()
+}    
+
+print('formatted colnames are:'); print(colnames(wf_t1))
+
+# inspect
+class(wf_t1); str(wf_t1)
+
+# Compute a matrix of correlation p-values
+start_med = which(colnames(wf_t1) == "Angiopoietin2")
+end_med = which(colnames(wf_t1) == "sVCAM1")
+outcomes_col = which(colnames(wf_t1) == "outcomes")
+
+#=========== 
+# Data for corrrelations
+#===========
+
+### THIS PLOT CORR
+# Data: But this is not needed as this is inside the function
+corr_M = wf_t1[start_med:end_med]
+corr_data = wf_t1[c(outcomes_col, start_med:end_med)]
+
+#===========
+# Corrplot with R and P values
+#===========
+my_corr = psych::corr.test(corr_M
+                           , method = "spearman"
+                           , use =  "pairwise.complete.obs")$r
+
+my_pval =  psych::corr.test(corr_M
+                            , method = "spearman"
+                            , adjust = "none"
+                            , use =  "pairwise.complete.obs")$p
+
+# check: satisfied!
+psych::corr.test(wf_t1$sICAM1, wf_t1$PSelectin
+                 , method = "spearman"
+                 , use =  "pairwise.complete.obs")$r
+psych::corr.test(wf_t1$sICAM1, wf_t1$PSelectin
+                 , method = "spearman"
+                 , use =  "pairwise.complete.obs")$p
+
+#----------------
+# ggcorrplot: Not so good!
+#----------------
+#ggcorr_plot = ggcorrplot(my_corr
+#                         , p.mat = my_pval
+#                         #, method = "circle"
+#                         #, insig = "blank"
+#                         , hc.order = TRUE
+#                         , outline.col = "black"
+#                         , lab = T
+#                         , lab_size = 6
+#                         #, ggtheme = ggplot2::theme_gray
+#                         , colors = c("#6D9EC1", "white", "#E46726")
+#                         , title = "Spearman correlations: T1\n(X: not significant)")    
+
+
+#----------------
+# paris.panel
+#----------------
+
+corr_plot2 = pairs.panels(corr_data[2:length(corr_data)]
+                          , method = "spearman" # correlation method
+                          , hist.col = "grey" ##00AFBB
+                          , density = TRUE  # show density plots
+                          , ellipses = F # show correlation ellipses
+                          , stars = TRUE
+                          , rug = F
+                          , breaks = "Sturges"
+                          , show.points = T
+                          , bg = c(my_red, my_blue)[unclass(factor(corr_data$outcomes))]
+                          , pch = 21
+                          #, alpha = .05
+                          #, points(pch = 19, col = c("#f8766d", "#00bfc4"))
+                          , cex =2
+                          , cex.axis = 1.4
+                          , cex.labels = 2
+                          , cex.cor = 1
+                          , smooth = F
+                          , main = "Pairwise Spearman correlations with signifcance: T1"
+)
+
+corr_plot2
+
+# ===============
+# Heatmaps of mediators at T1
+# ===============
+library("RColorBrewer")
+library(gplots)
+
+#my_palette <- colorRampPalette(brewer.pal(10, "RdYlBu"))(n=50)
+#my_palette <- colorRampPalette(c("red", "white", "blue"))(n = 256)
+my_palette <- cm.colors(n = 256)
+my_palette <- cm.colors(n = 50)
+#my_palette <- topo.colors(n=256)
+
+# data for heatmap
+wf_t1_m = as.matrix(wf_t1[start_med:end_med])
+colnames(wf_t1_m) 
+
+#hmap_colnames = c("Angiopoietin2"
+#,"PF"       
+#,"PSelectin"     
+#,"sESelectin"  
+#,"sICAM1"
+#,"sRAGE"     
+#,"sVCAM1")
+
+#colnames(wf_t1_m) = hmap_colnames
+rownames(wf_t1_m) = wf_t1$id
+
+my_red = "#F8766D"
+my_green = "#00BA38" 
+my_blue = "#619CFF"
+
+# ===============
+# Heatmaps of mediators at T1
+# log scaled values
+# NO subject ordering
+# ===============
+
+hmap_data = "log10(wf_t1_m)"
+my_title_hmap1 = "Heirarchical clustering of mediators at T1: Log scaled values"
+
+heatmap.2(eval(parse(text = hmap_data))
+          , na.rm = T
+          , scale = "column"
+          , Rowv = F
+          , Colv = T
+          , dendrogram = "column"
+          , col =  my_palette
+          , trace = "none"
+          , density.info = "density"
+          #, RowSideColors = c(rep(my_red, 11), rep(my_green, 11), rep(my_blue, 9))
+          , RowSideColors = c(rep(my_red, 11), rep(my_blue, 20)) #version 3
+          , keysize = 0.9
+          #, key.xlab = "Z-score"
+          , cexRow = 1.75
+          , cexCol = 1.75
+          , sepcolor = "black"
+          , main = ""
+          , srtCol = 20)
+par(lend = 1)
+legend("topright"      
+       , legend = c("Death", "Recovered")
+       , col = c(my_red, my_blue)
+       , lty= 1       
+       , lwd = 10)
+title(main = my_title_hmap1, cex.main = 1.3)
+
+# ===============
+# Heatmaps of mediators at T1
+# log scaled values
+# no subject ordering
+# ===============
+#hmap_data = "wf_t1_m"
+hmap_data = "log10(wf_t1_m)"
+my_title_hmap2 = "Heirarchical clustering mediators and subjects at T1: Log scaled values"
+
+heatmap.2(eval(parse(text = hmap_data))
+          , scale = "column"
+          , Rowv = T
+          , Colv = T
+          , dendrogram = "both"
+          , col =  my_palette
+          , trace = "none"
+          , density.info = "density"
+          #, RowSideColors = c(rep(my_red, 11), rep(my_green, 11), rep(my_blue, 9))
+          , RowSideColors = c(rep(my_red, 11), rep(my_blue, 20)) #version 3
+          , keysize = 0.8
+          , key.xlab = "Z-score"
+          , cexRow = 1.75
+          , cexCol = 1.75
+          , sepcolor = "black"
+          , main = ""
+          , srtCol = 20)
+par(lend = 1)
+legend("topright"      
+       , legend = c("Death", "Recovered")
+       , col = c(my_red, my_blue)
+       , lty= 1       
+       , lwd = 10)
+title(main = my_title_hmap2, cex.main = 1.3)
+
+dev.off()
diff --git a/legend_adjustment.R b/legend_adjustment.R
new file mode 100755
index 0000000..b236a72
--- /dev/null
+++ b/legend_adjustment.R
@@ -0,0 +1,37 @@
+
+library(ggplot2)
+library(gtable)
+library(lemon)
+# https://stackoverflow.com/questions/54438495/shift-legend-into-empty-facets-of-a-faceted-plot-in-ggplot2
+
+shift_legend2 <- function(p) {
+  # check if p is a valid object
+  if(!(inherits(p, "gtable"))){
+    if(inherits(p, "ggplot")){
+      gp <- ggplotGrob(p) # convert to grob
+    } else {
+      message("This is neither a ggplot object nor a grob generated from ggplotGrob. Returning original plot.")
+      return(p)
+    }
+  } else {
+    gp <- p
+  }
+  
+  # check for unfilled facet panels
+  facet.panels <- grep("^panel", gp[["layout"]][["name"]])
+  empty.facet.panels <- sapply(facet.panels, function(i) "zeroGrob" %in% class(gp[["grobs"]][[i]]), 
+                               USE.NAMES = F)
+  empty.facet.panels <- facet.panels[empty.facet.panels]
+  
+  if(length(empty.facet.panels) == 0){
+    message("There are no unfilled facet panels to shift legend into. Returning original plot.")
+    return(p)
+  }
+  
+  # establish name of empty panels
+  empty.facet.panels <- gp[["layout"]][empty.facet.panels, ]
+  names <- empty.facet.panels$name
+  
+  # return repositioned legend
+  reposition_legend(p, 'center', panel=names)
+}
diff --git a/stats_corr.R b/stats_corr.R
new file mode 100755
index 0000000..af08849
--- /dev/null
+++ b/stats_corr.R
@@ -0,0 +1,89 @@
+#!/usr/bin/Rscript   
+getwd()
+setwd('~/git/covid_analysis/')
+getwd()
+############################################################
+# TASK: Pairwise correaltion and P-values: T1
+############################################################
+# source data
+source("read_data.R")
+
+# clear unwanted variables
+rm(my_data)
+############################################################
+#=============
+# Output
+#=============
+# corr analysis
+my_spearman_corr = paste0(outdir_stats, "corr_spearman_t1_v3.csv")
+my_pvals = paste0(outdir_stats, "corr_pvalues_t1_v3.csv")
+############################################################
+# data assignment for stats
+wf = wf_data
+lf = lf_data
+########################################################################
+#           Pairwise correaltions and analyses
+########################################################################
+
+head(wf$outcomes)
+wf <- wf[order(wf$outcomes),]
+
+# subset t1 data
+wf_t1 = cbind(wf[c(1:3)],  wf %>% dplyr:: select(grep("_t1", names(wf))))
+colnames(wf_t1)
+
+# assign nice column names
+my_colnames = c("id"
+                , "studygroup"
+                , "outcomes"
+                #, "idcol"
+                , "Angiopoietin2"
+                , "PF"       
+                , "PSelectin"     
+                , "sESelectin"  
+                , "sICAM1"
+                , "sRAGE"     
+                , "sVCAM1")
+
+if (length(my_colnames) == length(my_colnames)){
+  print("PASS: Reassigning column names...")
+  colnames(wf_t1) = my_colnames 
+} else{
+  cat(paste0("FAIL:Cannot assign column names, length mismatch"
+             , "\nExpected length: ", length(orig_colanmes)
+             , "\nGot:", length(my_colnames)))
+  quit()
+}    
+
+print('formatted colnames are:'); print(colnames(wf_t1))
+
+# inspect
+class(wf_t1); str(wf_t1)
+
+# Compute a matrix of correlation p-values
+start_med = which(colnames(wf_t1) == "Angiopoietin2")
+end_med = which(colnames(wf_t1) == "sVCAM1")
+
+#---------------
+# Data for corr analysis
+#---------------
+corr_M = wf_t1[start_med:end_med]
+
+my_corr1 = cor(corr_M
+              , method = "spearman"
+              , use =  "pairwise.complete.obs")
+
+my_corr = psych::corr.test(corr_M
+                           , method = "spearman"
+                           , use =  "pairwise.complete.obs")$r
+
+my_pval =  psych::corr.test(corr_M
+                            , method = "spearman"
+                            , adjust = "none"
+                            , use =  "pairwise.complete.obs")$p
+
+#******************
+# write output file: correlations and pvalues
+#******************
+write.csv(my_corr, my_spearman_corr, row.names = TRUE)
+write.csv(my_pval, my_pvals, row.names = TRUE)