added boxplots_stat_ob_paper.R and plot_data_ob_paper.R

This commit is contained in:
Tanushree Tunstall 2022-11-18 17:20:07 +00:00
parent dc7e277a72
commit 7011dc75fa
2 changed files with 447 additions and 0 deletions

225
boxplots_stat_ob_paper.R Normal file
View file

@ -0,0 +1,225 @@
#!/usr/bin/Rscript
getwd()
setwd("~/git/mosaic_2020/")
getwd()
###############################################################
# TASK: boxplots with stats for NPA, SAM and SERUM
# for each mediator, at each timepoint by group
# TODO: check boxplot_stat_function.R
# output plots even if stats fail due to data containing only LLODs
# however for a given mediator, if there is error for ANY timepoint
# stats will not be added for the entire plot! To be handled in
# next iteration!
###############################################################
#=============
# Input
#=============
source("boxplot_stat_function.R")
source("plot_data_ob_paper.R")
#=============
# Output
#=============
outdir_plots_paper = "/home/tanu/git/mosaic_2020/output/plots_paper/"
#outfile_bp = paste0("boxplots_stats_paper-%03d.png")
#output_boxplot_stats = paste0(outdir_plots_paper, outfile_bp); output_boxplot_stats
#svg(output_boxplot_stats, width=22, height=16)
#png(output_boxplot_stats, width = 11, height = 8, units = "in", res = 300)
###############################################################
#===============================
# data assignment for plots
#===============================
#-------------
# NPA: VL
#-------------
#linear
cat("plotting for FP adult patients:",
length(unique(vl_lf_plot_df$mosaic)))
# linear : but data is not visible
plots_npa_vl = doMyPlotsStats(vl_lf_plot_df)
npa_plot_vl = ggpubr::ggarrange(plotlist = plots_npa_vl
, align = "hv"
#, legend = c(0.8,1)
#, legend = "bottom"
, common.legend = T)
npa_plot_vl
# log
plots_npa_vl_log = doMyPlotsStats(vl_lf_plot_df_log)
npa_plot_vl_log = ggpubr::ggarrange(plotlist = plots_npa_vl_log
, align = "hv"
, common.legend = T
)
npa_plot_vl_log
############################################################
mediator_order = c("IFN-α2a","IFN-β", "IFN-γ", "IL-1", "IL-6", "CXCL8", "CXCL10", "TNF-α")
#-----------------------------
# NPA: selected mediators
#-----------------------------
# linear
plots_npa = doMyPlotsStats(npa_lf_plot_df, custom_order = mediator_order)
npa_plot_1 = ggpubr::ggarrange(plotlist = plots_npa
, align = "hv"
, ncol = 8
, nrow = 1
, common.legend = T)
npa_plot = annotate_figure(npa_plot_1,
top = text_grob("NPA"
, color = "black"
#, face = "bold"
, size = 14))
# log
plots_npa_log = doMyPlotsStats(npa_lf_plot_df_log, custom_order = mediator_order)
npa_plot_log_1 = ggpubr::ggarrange(plotlist = plots_npa_log
, align = "hv"
, ncol = 8
, nrow = 1
, common.legend = T)
npa_plot_log = annotate_figure(npa_plot_log_1,
top = text_grob("NPA"
, color = "black"
#, face = "bold"
, size = 14))
npa_plot_log
#-------------------------------
# SERUM: selected mediators
#--------------------------------
# linear
plots_serum = doMyPlotsStats(serum_lf_plot_df, custom_order = mediator_order)
serum_plot_1 = ggpubr::ggarrange(plotlist = plots_serum
, align = "hv"
, ncol = 8
, nrow = 1
, common.legend = F
)
serum_plot = annotate_figure(serum_plot_1,
top = text_grob("Serum"
, color = "black"
#, face = "bold"
, size = 14))
serum_plot
# log
plots_serum_log = doMyPlotsStats(serum_lf_plot_log, custom_order = mediator_order)
serum_plot_log_1 = ggpubr::ggarrange(plotlist = plots_serum_log
, align = "hv"
, ncol = 8
, nrow = 1
, common.legend = F)
serum_plot_log = annotate_figure(serum_plot_log_1,
top = text_grob("Serum"
, color = "black"
#, face = "bold"
, size = 14))
serum_plot_log
##############################################################
##############################################################
cat ("Outplots will be in:", outdir_plots_paper)
#======================
# Output: Linear graphs
#======================
# viral load: linear
viral_load_annot = annotate_figure(
npa_plot_vl
, left = text_grob("pg/ml"
, color = "black"
, rot = 90
, size = 18)
)
png(paste0(outdir_plots_paper, "viral_load_linear"),
width = 5,
height = 5,
units = "in",
res = 300)
viral_load_annot
dev.off()
# npa and serum mediators: linear
outplot_linear = cowplot::plot_grid(
npa_plot,
serum_plot,
nrow = 2,
rel_heights = c(1.15,1)
)
outplot_linear_annot = annotate_figure(
outplot_linear
, left = text_grob("pg/ml"
, color = "black"
, rot = 90
, size = 16)
)
png(paste0(outdir_plots_paper, "npa_serum_linear"),
width = 18,
height = 6,
units = "in",
res = 300)
outplot_linear_annot
dev.off()
#======================
# Output: Log graphs
#======================
# viral load: log
viral_load_log_annot = annotate_figure(
npa_plot_vl_log
, left = text_grob("Log10 (pg/ml)"
, color = "black"
, rot = 90
, size = 18)
)
png(paste0(outdir_plots_paper, "viral_load_log" ),
width = 5,
height = 5,
units = "in",
res = 300)
viral_load_log_annot
dev.off()
# npa and serum mediators: log
outplot_log = cowplot::plot_grid(
npa_plot_log,
serum_plot_log,
nrow = 2,
rel_heights = c(1.15,1)
)
outplot_log_annot = annotate_figure(
outplot_log
, left = text_grob("Log10 (pg/ml)"
, color = "black"
, rot = 90
, size = 16)
)
png(paste0(outdir_plots_paper, "npa_serum_log"),
width = 18,
height = 6,
units = "in",
res = 300)
outplot_log_annot
dev.off()
###############################################################
# END OF SCRIPT #
###############################################################

222
plot_data_ob_paper.R Normal file
View file

@ -0,0 +1,222 @@
#!/usr/bin/Rscript
getwd()
setwd("~/git/mosaic_2020/")
getwd()
############################################################
# Graphs for paper?
#============
# Mediators required
#============
#NPA and Serum
#IFNa2a
#IFN-b
#IFN-g
#IL-1
#IL-6
#IL-8
#IP-10
#TNF-a
#vl_pfu_ul
#Figure 1a: Study design
#Figure 1b: Viral load
#Figure 1c: NPA
#Figure 1d: Serum
############################################################
#=============
# Input
#=============
source("data_extraction_mediators.R")
# check: adult variable and age variable discrepancy!
metadata_all$mosaic[metadata_all$adult==1 & metadata_all$age<=18]
#===============================
# data assignment for plots
#================================
#time_letter = "t"
time_letter = "T"
#-----------
# npa
#-----------
wf_fp_npa = npa_wf[npa_wf$flustat == 1,]
lf_fp_npa = npa_lf[npa_lf$flustat == 1,]
lf_fp_npa$timepoint = paste0(time_letter, lf_fp_npa$timepoint)
lf_fp_npa$timepoint = as.factor(lf_fp_npa$timepoint)
lf_fp_npa$obesity = as.factor(lf_fp_npa$obesity)
table(lf_fp_npa$mediator)
head(lf_fp_npa$value[lf_fp_npa$mediator == "vitd"])
lf_fp_npa = lf_fp_npa[!lf_fp_npa$mediator == "vitd",]
table(lf_fp_npa$mediator)
table(lf_fp_npa$sample_type, lf_fp_npa$timepoint)
a = lf_fp_npa %>%
select(matches("^(vl_pfu|timepoint|mediator|sample_type)"))
table(a$sample_type)
#-----------
# serum
#-----------
wf_fp_serum = serum_wf[serum_wf$flustat == 1,]
lf_fp_serum = serum_lf[serum_lf$flustat == 1,]
lf_fp_serum$timepoint = paste0(time_letter, lf_fp_serum$timepoint)
lf_fp_serum$timepoint = as.factor(lf_fp_serum$timepoint)
lf_fp_serum$obesity = as.factor(lf_fp_serum$obesity)
head(lf_fp_serum$value[lf_fp_serum$mediator == "vitd"])
########################################################################
rm(npa_lf, sam_lf, serum_lf)
########################################################################
viral_load_npa = c("vl_pfu_ul")
selected_mediators = c("ifna2a", "ifnb", "ifn", "il1", "il6", "il8", "ip10", "tnf")
cols_to_select = c("mosaic", "obesity", "flustat", "asthma",
"sample_type", "timepoint", "mediator", "value" )
# sanity checks
table(lf_fp_npa$mediator, lf_fp_npa$timepoint)
class(lf_fp_npa$mediator)
str(lf_fp_npa)
table(lf_fp_npa$mediator)
# selected dfs for plotting for paper
#--------
# VL: NPA
#--------
vl_lf_plot = lf_fp_npa[lf_fp_npa$mediator%in%viral_load_npa,]
vl_lf_plot_df = vl_lf_plot[,colnames(vl_lf_plot)%in%cols_to_select]
a6 = vl_lf_plot_df
table(vl_lf_plot $mediator, vl_lf_plot$timepoint, vl_lf_plot$sample_type)
table(vl_lf_plot$sample_type)
table(lf_fp_npa$sample_type)
a5 = npa_wf%>%
select(matches("^(vl_pfu|mosaic)"))
identical(a5$vl_pfu_ul_npa1, a5$vl_pfu_ul_npa1.1)
# Minor fix: somehow upstream introduced this...
table(vl_lf_plot_df$sample_type[vl_lf_plot_df$sample_type == "npa1."])
table(vl_lf_plot_df$sample_type[vl_lf_plot_df$sample_type == "npa"])
vl_lf_plot_df$sample_type[vl_lf_plot_df$sample_type == "npa1."] <- "npa"
table(vl_lf_plot_df$sample_type[vl_lf_plot_df$sample_type == "npa1."])
table(vl_lf_plot_df$sample_type)
#==========================
# DFs for plotting: VL NPA
#==========================
# linear: raw values
dim(vl_lf_plot_df)
table(vl_lf_plot_df$mediator, vl_lf_plot_df$timepoint)
table(vl_lf_plot_df$sample_type, vl_lf_plot_df$timepoint)
# display names
vl_lf_plot_df$mediator[vl_lf_plot_df$mediator == "vl_pfu_ul"] <- "Viral load" # U+03B1
# log values: log10
vl_lf_plot_df_log = vl_lf_plot_df
table(vl_lf_plot_df_log$mediator=="Viral load")
# vl data juggling with 0!
vl_lf_plot_df_log$value[1:30]
table(vl_lf_plot_df_log$value == 0)
vl_lf_plot_df_log$value[vl_lf_plot_df_log$value == 0] <- 1
vl_lf_plot_df_log$value[1:30]
table(vl_lf_plot_df_log$value == 1)
orig_nrow = nrow(vl_lf_plot_df_log); orig_nrow; table(vl_lf_plot_df_log$mediator)
head(vl_lf_plot_df_log$value)
# log all values and reassign
vl_lf_plot_df_log$value = log10(vl_lf_plot_df_log$value)
vl_lf_plot_df_log$value[1:30]
##############################################################
#--------
# NPA: mediators
#--------
# linear: raw values
npa_lf_plot = lf_fp_npa[lf_fp_npa$mediator%in%selected_mediators,]
npa_lf_plot_df = npa_lf_plot[,colnames(npa_lf_plot)%in%cols_to_select]
table(npa_lf_plot_df$sample_type)
table(npa_lf_plot_df$sample_type,npa_lf_plot_df$timepoint, npa_lf_plot_df$mediator )
# Display names
npa_lf_plot_df$mediator[npa_lf_plot_df$mediator == "ifna2a"] <- "IFN-α2a" # U+03B1
npa_lf_plot_df$mediator[npa_lf_plot_df$mediator == "ifnb"] <- "IFN-β" # U+03B2
npa_lf_plot_df$mediator[npa_lf_plot_df$mediator == "ifn"] <- "IFN-γ" # U+03B3
npa_lf_plot_df$mediator[npa_lf_plot_df$mediator == "il1"] <- "IL-1"
npa_lf_plot_df$mediator[npa_lf_plot_df$mediator == "il6"] <- "IL-6"
npa_lf_plot_df$mediator[npa_lf_plot_df$mediator == "il8"] <- "CXCL8"
npa_lf_plot_df$mediator[npa_lf_plot_df$mediator == "ip10"] <- "CXCL10"
npa_lf_plot_df$mediator[npa_lf_plot_df$mediator == "tnf"] <- "TNF-α" # U+03B1
# check display names
table(npa_lf_plot_df$mediator)
# log values: log10
npa_lf_plot_df_log = npa_lf_plot_df
table(npa_lf_plot_df_log$value == 1.000000e+04)
table(npa_lf_plot_df_log$value == 0)
# log all values and reassign
npa_lf_plot_df_log$value[1:10]
npa_lf_plot_df_log$value = log10(npa_lf_plot_df_log$value)
npa_lf_plot_df_log$value[1:10]
# check display names
table(npa_lf_plot_df_log$mediator)
#--------
# Serum: mediators
#--------
# linear: raw values
serum_lf_plot = lf_fp_serum[lf_fp_serum$mediator%in%selected_mediators,]
serum_lf_plot_df = serum_lf_plot[,colnames(serum_lf_plot)%in%cols_to_select]
table(serum_lf_plot_df$sample_type)
table(serum_lf_plot_df$sample_type,serum_lf_plot_df$timepoint, serum_lf_plot_df$mediator )
# Display names: Serum
serum_lf_plot_df$mediator[serum_lf_plot_df$mediator == "ifna2a"] <- "IFN-α2a" # U+03B1
serum_lf_plot_df$mediator[serum_lf_plot_df$mediator == "ifnb"] <- "IFN-β" # U+03B2
serum_lf_plot_df$mediator[serum_lf_plot_df$mediator == "ifn"] <- "IFN-γ" # U+03B3
serum_lf_plot_df$mediator[serum_lf_plot_df$mediator == "il1"] <- "IL-1"
serum_lf_plot_df$mediator[serum_lf_plot_df$mediator == "il6"] <- "IL-6"
serum_lf_plot_df$mediator[serum_lf_plot_df$mediator == "il8"] <- "CXCL8"
serum_lf_plot_df$mediator[serum_lf_plot_df$mediator == "ip10"] <- "CXCL10"
serum_lf_plot_df$mediator[serum_lf_plot_df$mediator == "tnf"] <- "TNF-α" # U+03B1
# check display names
table(serum_lf_plot_df$mediator)
# log values: log10
serum_lf_plot_log = serum_lf_plot_df
table(serum_lf_plot_log$value == 1.000000e+04)
table(serum_lf_plot_log$value == 0)
# log all values and reassign
serum_lf_plot_log$value[1:10]
serum_lf_plot_log$value = log10(serum_lf_plot$value)
serum_lf_plot_log$value[1:10]
# check display names
table(serum_lf_plot_log$mediator)
########################################################################
# remove variables
rm(vl_lf_plot, npa_lf_plot, serum_lf_plot)