covid_analysis/boxplot_t1.R

160 lines
5.6 KiB
R
Executable file

#!/usr/bin/Rscript
getwd()
setwd('~/git/covid_analysis/')
getwd()
############################################################
# TASK: boxplots at T1
# FIXME: currently not rendering, problem with NAs for stats?
############################################################
# source data
source("read_data.R")
# clear unwanted variables
rm(my_data)
############################################################
#=============
# Output: boxplot at T1 for all mediators
# currently ununsed
#=============
output_boxplot_t1 = paste0(outdir, "boxplot_t1_v3.pdf")
#%%========================================================
# read file
# data assignment for plots
wf = wf_data
lf = lf_data
#=====================
# data for plots: LF@T1
#=====================
table(lf$timepoint)
lf_t1 = lf[lf$timepoint == "t1",]
dim(lf_t1); str(lf_t1)
levels(factor(lf_t1$mediator)); table(lf_t1$mediator)
lf_t1$outcome_category = as.factor(lf_t1$outcomes)
levels(lf_t1$outcome_category)
lf_t1$outcomes = as.factor(lf_t1$outcomes)
str(lf_t1$outcomes)
#%%========================================================
# count na in mediator col
if (table(is.na(lf_t1$mediator)) == nrow(lf_t1)){
print("PASS: No NAs detected, lf data is good for plotting")
}else{
print("FAIL: NAs detected in mediator columna. Check source formatting for lf data")
#lf <- lf[!is.na(lf$mediator),]
#head(lf); str(lf)
quit()
}
# FIXME : stats on log 10 value
# ===========
# stats
# ===========
# with adjustment: also gives unadjusted P-values (BH and fdr are the same)
my_stat = compare_means(value~outcomes, group.by = "mediator"
, data = lf_t1
, paired = FALSE
, p.adjust.method = "BH")
#, p.adjust.method = "bonferroni")
# !! check: satsified!!
wf_death = wf_data[wf_data$outcomes == 0,]; table(wf_death$outcomes)
wf_recovered = wf_data[wf_data$outcomes == 1,]; table(wf_recovered$outcomes)
wilcox.test(wf_death$Angiopoietin2_pgmL_t1, wf_recovered$Angiopoietin2_pgmL_t1, paired = F)
wilcox.test(wf_death$sICAM1_ngmL_t1, wf_recovered$sICAM1_ngmL_t1, paired = F)
my_comparisons <- list( c(0, 1) )
my_comparisons <- list( c("0", "1") )
#====================================
# Output plots as one pdf
cat("Output plots will be in:", output_boxplot_t1)
pdf(output_plots, width=15, height=12)
# ====================================
# 1) Boxplot with facet wrap:
# x = time
# y = Linear (Levels)
# coloured: outcome
# ====================================
y_value = "value"
my_title1 = "Boxplots of mediators at t1: linear scale"
p1 = ggplot(lf_t1, aes(x = factor(outcomes), y = eval(parse(text=y_value)) )) +
facet_wrap(~ mediator, nrow = 2, scales = "free_y") +
geom_boxplot(fill = "white", outlier.colour = NA
, width = 0.5) +
geom_point(position = position_jitterdodge(dodge.width=0.01)
, aes(colour = factor(outcomes))) +
theme(axis.text.x = element_text(size = 15)
, axis.text.y = element_text(size = 15
, angle = 0
, hjust = 1
, vjust = 0)
, axis.title.x = element_text(size = 15)
, axis.title.y = element_text(size = 15)
, plot.title = element_text(size = 20, hjust = 0.5)
, strip.text.x = element_text(size = 15, colour = "black")
, legend.title = element_text(color = "black", size = 20)
, legend.text = element_text(size = 15)
, legend.direction = "vertical") +
labs(title = my_title1
, x = ""
, y = "Levels") +
scale_colour_discrete(name = "Patient outcome", labels = c("Non-survivors", "Survivors"))+
#guides(fill = guide_legend(reverse = T))
#scale_colour_manual(values = my_outcome_cols)
stat_compare_means(comparisons = my_comparisons
, method = "wilcox.test"
, paired = F
, label = "p.format")
shift_legend2(p1)
# ====================================
# 2) Boxplot with facet wrap:
# x = time
# y = Log(Levels)
# coloured: outcome
# ====================================
#y_value = "log10(value)" # shows log transformed scale
y_value = "value"
my_title2 = "Boxplots of mediators at t1: log scale"
p2 = ggplot(lf_t1, aes(x = factor(outcomes), y = eval(parse(text=y_value)) )) +
facet_wrap(~ mediator, nrow = 2, scales = "free_y") +
scale_y_log10()+
geom_boxplot(fill = "white", outlier.colour = NA
, width = 0.5) +
geom_point(position = position_jitterdodge(dodge.width=0.01)
, aes(colour = factor(outcomes))) +
theme(axis.text.x = element_text(size = 15)
, axis.text.y = element_text(size = 15
, angle = 0
, hjust = 1
, vjust = 0)
, axis.title.x = element_text(size = 15)
, axis.title.y = element_text(size = 15)
, plot.title = element_text(size = 20, hjust = 0.5)
, strip.text.x = element_text(size = 15, colour = "black")
, legend.title = element_text(color = "black", size = 20)
, legend.text = element_text(size = 15)
, legend.direction = "vertical") +
labs(title = my_title2
, x = ""
, y = "Levels (Log10)") +
scale_colour_discrete(name = "Patient outcome", labels = c("Non-survivors", "Survivors"))+
#scale_colour_manual(values = my_outcome_cols)
stat_compare_means(comparisons = my_comparisons
, method = "wilcox.test"
, paired = F
, label = "p.format")
shift_legend2(p2)
dev.off()