added boxplots_stat_ob_paper.R and plot_data_ob_paper.R
This commit is contained in:
parent
dc7e277a72
commit
7011dc75fa
2 changed files with 447 additions and 0 deletions
225
boxplots_stat_ob_paper.R
Normal file
225
boxplots_stat_ob_paper.R
Normal file
|
@ -0,0 +1,225 @@
|
||||||
|
#!/usr/bin/Rscript
|
||||||
|
getwd()
|
||||||
|
setwd("~/git/mosaic_2020/")
|
||||||
|
getwd()
|
||||||
|
###############################################################
|
||||||
|
# TASK: boxplots with stats for NPA, SAM and SERUM
|
||||||
|
# for each mediator, at each timepoint by group
|
||||||
|
|
||||||
|
# TODO: check boxplot_stat_function.R
|
||||||
|
# output plots even if stats fail due to data containing only LLODs
|
||||||
|
# however for a given mediator, if there is error for ANY timepoint
|
||||||
|
# stats will not be added for the entire plot! To be handled in
|
||||||
|
# next iteration!
|
||||||
|
###############################################################
|
||||||
|
|
||||||
|
#=============
|
||||||
|
# Input
|
||||||
|
#=============
|
||||||
|
source("boxplot_stat_function.R")
|
||||||
|
source("plot_data_ob_paper.R")
|
||||||
|
|
||||||
|
#=============
|
||||||
|
# Output
|
||||||
|
#=============
|
||||||
|
outdir_plots_paper = "/home/tanu/git/mosaic_2020/output/plots_paper/"
|
||||||
|
|
||||||
|
#outfile_bp = paste0("boxplots_stats_paper-%03d.png")
|
||||||
|
#output_boxplot_stats = paste0(outdir_plots_paper, outfile_bp); output_boxplot_stats
|
||||||
|
#svg(output_boxplot_stats, width=22, height=16)
|
||||||
|
#png(output_boxplot_stats, width = 11, height = 8, units = "in", res = 300)
|
||||||
|
|
||||||
|
###############################################################
|
||||||
|
#===============================
|
||||||
|
# data assignment for plots
|
||||||
|
#===============================
|
||||||
|
#-------------
|
||||||
|
# NPA: VL
|
||||||
|
#-------------
|
||||||
|
#linear
|
||||||
|
cat("plotting for FP adult patients:",
|
||||||
|
length(unique(vl_lf_plot_df$mosaic)))
|
||||||
|
|
||||||
|
# linear : but data is not visible
|
||||||
|
plots_npa_vl = doMyPlotsStats(vl_lf_plot_df)
|
||||||
|
npa_plot_vl = ggpubr::ggarrange(plotlist = plots_npa_vl
|
||||||
|
, align = "hv"
|
||||||
|
#, legend = c(0.8,1)
|
||||||
|
#, legend = "bottom"
|
||||||
|
, common.legend = T)
|
||||||
|
npa_plot_vl
|
||||||
|
|
||||||
|
# log
|
||||||
|
plots_npa_vl_log = doMyPlotsStats(vl_lf_plot_df_log)
|
||||||
|
npa_plot_vl_log = ggpubr::ggarrange(plotlist = plots_npa_vl_log
|
||||||
|
, align = "hv"
|
||||||
|
, common.legend = T
|
||||||
|
)
|
||||||
|
npa_plot_vl_log
|
||||||
|
|
||||||
|
############################################################
|
||||||
|
mediator_order = c("IFN-α2a","IFN-β", "IFN-γ", "IL-1", "IL-6", "CXCL8", "CXCL10", "TNF-α")
|
||||||
|
|
||||||
|
#-----------------------------
|
||||||
|
# NPA: selected mediators
|
||||||
|
#-----------------------------
|
||||||
|
# linear
|
||||||
|
plots_npa = doMyPlotsStats(npa_lf_plot_df, custom_order = mediator_order)
|
||||||
|
npa_plot_1 = ggpubr::ggarrange(plotlist = plots_npa
|
||||||
|
, align = "hv"
|
||||||
|
, ncol = 8
|
||||||
|
, nrow = 1
|
||||||
|
, common.legend = T)
|
||||||
|
npa_plot = annotate_figure(npa_plot_1,
|
||||||
|
top = text_grob("NPA"
|
||||||
|
, color = "black"
|
||||||
|
#, face = "bold"
|
||||||
|
, size = 14))
|
||||||
|
|
||||||
|
# log
|
||||||
|
plots_npa_log = doMyPlotsStats(npa_lf_plot_df_log, custom_order = mediator_order)
|
||||||
|
npa_plot_log_1 = ggpubr::ggarrange(plotlist = plots_npa_log
|
||||||
|
, align = "hv"
|
||||||
|
, ncol = 8
|
||||||
|
, nrow = 1
|
||||||
|
, common.legend = T)
|
||||||
|
npa_plot_log = annotate_figure(npa_plot_log_1,
|
||||||
|
top = text_grob("NPA"
|
||||||
|
, color = "black"
|
||||||
|
#, face = "bold"
|
||||||
|
, size = 14))
|
||||||
|
npa_plot_log
|
||||||
|
#-------------------------------
|
||||||
|
# SERUM: selected mediators
|
||||||
|
#--------------------------------
|
||||||
|
# linear
|
||||||
|
plots_serum = doMyPlotsStats(serum_lf_plot_df, custom_order = mediator_order)
|
||||||
|
serum_plot_1 = ggpubr::ggarrange(plotlist = plots_serum
|
||||||
|
, align = "hv"
|
||||||
|
, ncol = 8
|
||||||
|
, nrow = 1
|
||||||
|
, common.legend = F
|
||||||
|
)
|
||||||
|
|
||||||
|
serum_plot = annotate_figure(serum_plot_1,
|
||||||
|
top = text_grob("Serum"
|
||||||
|
, color = "black"
|
||||||
|
#, face = "bold"
|
||||||
|
, size = 14))
|
||||||
|
serum_plot
|
||||||
|
|
||||||
|
# log
|
||||||
|
plots_serum_log = doMyPlotsStats(serum_lf_plot_log, custom_order = mediator_order)
|
||||||
|
serum_plot_log_1 = ggpubr::ggarrange(plotlist = plots_serum_log
|
||||||
|
, align = "hv"
|
||||||
|
, ncol = 8
|
||||||
|
, nrow = 1
|
||||||
|
, common.legend = F)
|
||||||
|
|
||||||
|
serum_plot_log = annotate_figure(serum_plot_log_1,
|
||||||
|
top = text_grob("Serum"
|
||||||
|
, color = "black"
|
||||||
|
#, face = "bold"
|
||||||
|
, size = 14))
|
||||||
|
|
||||||
|
serum_plot_log
|
||||||
|
|
||||||
|
##############################################################
|
||||||
|
##############################################################
|
||||||
|
cat ("Outplots will be in:", outdir_plots_paper)
|
||||||
|
#======================
|
||||||
|
# Output: Linear graphs
|
||||||
|
#======================
|
||||||
|
# viral load: linear
|
||||||
|
viral_load_annot = annotate_figure(
|
||||||
|
npa_plot_vl
|
||||||
|
, left = text_grob("pg/ml"
|
||||||
|
, color = "black"
|
||||||
|
, rot = 90
|
||||||
|
, size = 18)
|
||||||
|
)
|
||||||
|
|
||||||
|
png(paste0(outdir_plots_paper, "viral_load_linear"),
|
||||||
|
width = 5,
|
||||||
|
height = 5,
|
||||||
|
units = "in",
|
||||||
|
res = 300)
|
||||||
|
|
||||||
|
viral_load_annot
|
||||||
|
dev.off()
|
||||||
|
|
||||||
|
# npa and serum mediators: linear
|
||||||
|
outplot_linear = cowplot::plot_grid(
|
||||||
|
npa_plot,
|
||||||
|
serum_plot,
|
||||||
|
nrow = 2,
|
||||||
|
rel_heights = c(1.15,1)
|
||||||
|
)
|
||||||
|
|
||||||
|
outplot_linear_annot = annotate_figure(
|
||||||
|
outplot_linear
|
||||||
|
, left = text_grob("pg/ml"
|
||||||
|
, color = "black"
|
||||||
|
, rot = 90
|
||||||
|
, size = 16)
|
||||||
|
)
|
||||||
|
|
||||||
|
png(paste0(outdir_plots_paper, "npa_serum_linear"),
|
||||||
|
width = 18,
|
||||||
|
height = 6,
|
||||||
|
units = "in",
|
||||||
|
res = 300)
|
||||||
|
outplot_linear_annot
|
||||||
|
dev.off()
|
||||||
|
|
||||||
|
|
||||||
|
#======================
|
||||||
|
# Output: Log graphs
|
||||||
|
#======================
|
||||||
|
# viral load: log
|
||||||
|
viral_load_log_annot = annotate_figure(
|
||||||
|
npa_plot_vl_log
|
||||||
|
, left = text_grob("Log10 (pg/ml)"
|
||||||
|
, color = "black"
|
||||||
|
, rot = 90
|
||||||
|
, size = 18)
|
||||||
|
)
|
||||||
|
|
||||||
|
png(paste0(outdir_plots_paper, "viral_load_log" ),
|
||||||
|
width = 5,
|
||||||
|
height = 5,
|
||||||
|
units = "in",
|
||||||
|
res = 300)
|
||||||
|
|
||||||
|
viral_load_log_annot
|
||||||
|
dev.off()
|
||||||
|
|
||||||
|
# npa and serum mediators: log
|
||||||
|
outplot_log = cowplot::plot_grid(
|
||||||
|
npa_plot_log,
|
||||||
|
serum_plot_log,
|
||||||
|
nrow = 2,
|
||||||
|
rel_heights = c(1.15,1)
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
outplot_log_annot = annotate_figure(
|
||||||
|
outplot_log
|
||||||
|
, left = text_grob("Log10 (pg/ml)"
|
||||||
|
, color = "black"
|
||||||
|
, rot = 90
|
||||||
|
, size = 16)
|
||||||
|
)
|
||||||
|
|
||||||
|
png(paste0(outdir_plots_paper, "npa_serum_log"),
|
||||||
|
width = 18,
|
||||||
|
height = 6,
|
||||||
|
units = "in",
|
||||||
|
res = 300)
|
||||||
|
outplot_log_annot
|
||||||
|
dev.off()
|
||||||
|
|
||||||
|
|
||||||
|
###############################################################
|
||||||
|
# END OF SCRIPT #
|
||||||
|
###############################################################
|
222
plot_data_ob_paper.R
Normal file
222
plot_data_ob_paper.R
Normal file
|
@ -0,0 +1,222 @@
|
||||||
|
#!/usr/bin/Rscript
|
||||||
|
getwd()
|
||||||
|
setwd("~/git/mosaic_2020/")
|
||||||
|
getwd()
|
||||||
|
############################################################
|
||||||
|
# Graphs for paper?
|
||||||
|
#============
|
||||||
|
# Mediators required
|
||||||
|
#============
|
||||||
|
#NPA and Serum
|
||||||
|
|
||||||
|
#IFNa2a
|
||||||
|
#IFN-b
|
||||||
|
#IFN-g
|
||||||
|
#IL-1
|
||||||
|
#IL-6
|
||||||
|
#IL-8
|
||||||
|
#IP-10
|
||||||
|
#TNF-a
|
||||||
|
#vl_pfu_ul
|
||||||
|
|
||||||
|
#Figure 1a: Study design
|
||||||
|
#Figure 1b: Viral load
|
||||||
|
#Figure 1c: NPA
|
||||||
|
#Figure 1d: Serum
|
||||||
|
|
||||||
|
############################################################
|
||||||
|
|
||||||
|
#=============
|
||||||
|
# Input
|
||||||
|
#=============
|
||||||
|
source("data_extraction_mediators.R")
|
||||||
|
|
||||||
|
# check: adult variable and age variable discrepancy!
|
||||||
|
metadata_all$mosaic[metadata_all$adult==1 & metadata_all$age<=18]
|
||||||
|
|
||||||
|
#===============================
|
||||||
|
# data assignment for plots
|
||||||
|
#================================
|
||||||
|
#time_letter = "t"
|
||||||
|
time_letter = "T"
|
||||||
|
|
||||||
|
#-----------
|
||||||
|
# npa
|
||||||
|
#-----------
|
||||||
|
wf_fp_npa = npa_wf[npa_wf$flustat == 1,]
|
||||||
|
lf_fp_npa = npa_lf[npa_lf$flustat == 1,]
|
||||||
|
lf_fp_npa$timepoint = paste0(time_letter, lf_fp_npa$timepoint)
|
||||||
|
lf_fp_npa$timepoint = as.factor(lf_fp_npa$timepoint)
|
||||||
|
lf_fp_npa$obesity = as.factor(lf_fp_npa$obesity)
|
||||||
|
|
||||||
|
table(lf_fp_npa$mediator)
|
||||||
|
head(lf_fp_npa$value[lf_fp_npa$mediator == "vitd"])
|
||||||
|
lf_fp_npa = lf_fp_npa[!lf_fp_npa$mediator == "vitd",]
|
||||||
|
table(lf_fp_npa$mediator)
|
||||||
|
|
||||||
|
table(lf_fp_npa$sample_type, lf_fp_npa$timepoint)
|
||||||
|
|
||||||
|
a = lf_fp_npa %>%
|
||||||
|
select(matches("^(vl_pfu|timepoint|mediator|sample_type)"))
|
||||||
|
table(a$sample_type)
|
||||||
|
|
||||||
|
#-----------
|
||||||
|
# serum
|
||||||
|
#-----------
|
||||||
|
wf_fp_serum = serum_wf[serum_wf$flustat == 1,]
|
||||||
|
lf_fp_serum = serum_lf[serum_lf$flustat == 1,]
|
||||||
|
lf_fp_serum$timepoint = paste0(time_letter, lf_fp_serum$timepoint)
|
||||||
|
lf_fp_serum$timepoint = as.factor(lf_fp_serum$timepoint)
|
||||||
|
lf_fp_serum$obesity = as.factor(lf_fp_serum$obesity)
|
||||||
|
|
||||||
|
head(lf_fp_serum$value[lf_fp_serum$mediator == "vitd"])
|
||||||
|
|
||||||
|
########################################################################
|
||||||
|
rm(npa_lf, sam_lf, serum_lf)
|
||||||
|
|
||||||
|
|
||||||
|
########################################################################
|
||||||
|
viral_load_npa = c("vl_pfu_ul")
|
||||||
|
selected_mediators = c("ifna2a", "ifnb", "ifn", "il1", "il6", "il8", "ip10", "tnf")
|
||||||
|
cols_to_select = c("mosaic", "obesity", "flustat", "asthma",
|
||||||
|
"sample_type", "timepoint", "mediator", "value" )
|
||||||
|
|
||||||
|
# sanity checks
|
||||||
|
table(lf_fp_npa$mediator, lf_fp_npa$timepoint)
|
||||||
|
class(lf_fp_npa$mediator)
|
||||||
|
str(lf_fp_npa)
|
||||||
|
table(lf_fp_npa$mediator)
|
||||||
|
|
||||||
|
# selected dfs for plotting for paper
|
||||||
|
#--------
|
||||||
|
# VL: NPA
|
||||||
|
#--------
|
||||||
|
vl_lf_plot = lf_fp_npa[lf_fp_npa$mediator%in%viral_load_npa,]
|
||||||
|
vl_lf_plot_df = vl_lf_plot[,colnames(vl_lf_plot)%in%cols_to_select]
|
||||||
|
a6 = vl_lf_plot_df
|
||||||
|
|
||||||
|
table(vl_lf_plot $mediator, vl_lf_plot$timepoint, vl_lf_plot$sample_type)
|
||||||
|
table(vl_lf_plot$sample_type)
|
||||||
|
table(lf_fp_npa$sample_type)
|
||||||
|
|
||||||
|
a5 = npa_wf%>%
|
||||||
|
select(matches("^(vl_pfu|mosaic)"))
|
||||||
|
identical(a5$vl_pfu_ul_npa1, a5$vl_pfu_ul_npa1.1)
|
||||||
|
|
||||||
|
# Minor fix: somehow upstream introduced this...
|
||||||
|
table(vl_lf_plot_df$sample_type[vl_lf_plot_df$sample_type == "npa1."])
|
||||||
|
table(vl_lf_plot_df$sample_type[vl_lf_plot_df$sample_type == "npa"])
|
||||||
|
|
||||||
|
vl_lf_plot_df$sample_type[vl_lf_plot_df$sample_type == "npa1."] <- "npa"
|
||||||
|
table(vl_lf_plot_df$sample_type[vl_lf_plot_df$sample_type == "npa1."])
|
||||||
|
table(vl_lf_plot_df$sample_type)
|
||||||
|
|
||||||
|
#==========================
|
||||||
|
# DFs for plotting: VL NPA
|
||||||
|
#==========================
|
||||||
|
# linear: raw values
|
||||||
|
dim(vl_lf_plot_df)
|
||||||
|
table(vl_lf_plot_df$mediator, vl_lf_plot_df$timepoint)
|
||||||
|
table(vl_lf_plot_df$sample_type, vl_lf_plot_df$timepoint)
|
||||||
|
|
||||||
|
# display names
|
||||||
|
vl_lf_plot_df$mediator[vl_lf_plot_df$mediator == "vl_pfu_ul"] <- "Viral load" # U+03B1
|
||||||
|
|
||||||
|
# log values: log10
|
||||||
|
vl_lf_plot_df_log = vl_lf_plot_df
|
||||||
|
table(vl_lf_plot_df_log$mediator=="Viral load")
|
||||||
|
|
||||||
|
# vl data juggling with 0!
|
||||||
|
vl_lf_plot_df_log$value[1:30]
|
||||||
|
table(vl_lf_plot_df_log$value == 0)
|
||||||
|
|
||||||
|
vl_lf_plot_df_log$value[vl_lf_plot_df_log$value == 0] <- 1
|
||||||
|
vl_lf_plot_df_log$value[1:30]
|
||||||
|
table(vl_lf_plot_df_log$value == 1)
|
||||||
|
|
||||||
|
orig_nrow = nrow(vl_lf_plot_df_log); orig_nrow; table(vl_lf_plot_df_log$mediator)
|
||||||
|
head(vl_lf_plot_df_log$value)
|
||||||
|
|
||||||
|
# log all values and reassign
|
||||||
|
vl_lf_plot_df_log$value = log10(vl_lf_plot_df_log$value)
|
||||||
|
vl_lf_plot_df_log$value[1:30]
|
||||||
|
|
||||||
|
##############################################################
|
||||||
|
|
||||||
|
#--------
|
||||||
|
# NPA: mediators
|
||||||
|
#--------
|
||||||
|
# linear: raw values
|
||||||
|
npa_lf_plot = lf_fp_npa[lf_fp_npa$mediator%in%selected_mediators,]
|
||||||
|
npa_lf_plot_df = npa_lf_plot[,colnames(npa_lf_plot)%in%cols_to_select]
|
||||||
|
table(npa_lf_plot_df$sample_type)
|
||||||
|
table(npa_lf_plot_df$sample_type,npa_lf_plot_df$timepoint, npa_lf_plot_df$mediator )
|
||||||
|
|
||||||
|
# Display names
|
||||||
|
npa_lf_plot_df$mediator[npa_lf_plot_df$mediator == "ifna2a"] <- "IFN-α2a" # U+03B1
|
||||||
|
npa_lf_plot_df$mediator[npa_lf_plot_df$mediator == "ifnb"] <- "IFN-β" # U+03B2
|
||||||
|
npa_lf_plot_df$mediator[npa_lf_plot_df$mediator == "ifn"] <- "IFN-γ" # U+03B3
|
||||||
|
npa_lf_plot_df$mediator[npa_lf_plot_df$mediator == "il1"] <- "IL-1"
|
||||||
|
|
||||||
|
npa_lf_plot_df$mediator[npa_lf_plot_df$mediator == "il6"] <- "IL-6"
|
||||||
|
npa_lf_plot_df$mediator[npa_lf_plot_df$mediator == "il8"] <- "CXCL8"
|
||||||
|
npa_lf_plot_df$mediator[npa_lf_plot_df$mediator == "ip10"] <- "CXCL10"
|
||||||
|
npa_lf_plot_df$mediator[npa_lf_plot_df$mediator == "tnf"] <- "TNF-α" # U+03B1
|
||||||
|
|
||||||
|
# check display names
|
||||||
|
table(npa_lf_plot_df$mediator)
|
||||||
|
|
||||||
|
# log values: log10
|
||||||
|
npa_lf_plot_df_log = npa_lf_plot_df
|
||||||
|
table(npa_lf_plot_df_log$value == 1.000000e+04)
|
||||||
|
table(npa_lf_plot_df_log$value == 0)
|
||||||
|
|
||||||
|
# log all values and reassign
|
||||||
|
npa_lf_plot_df_log$value[1:10]
|
||||||
|
npa_lf_plot_df_log$value = log10(npa_lf_plot_df_log$value)
|
||||||
|
npa_lf_plot_df_log$value[1:10]
|
||||||
|
|
||||||
|
# check display names
|
||||||
|
table(npa_lf_plot_df_log$mediator)
|
||||||
|
|
||||||
|
#--------
|
||||||
|
# Serum: mediators
|
||||||
|
#--------
|
||||||
|
# linear: raw values
|
||||||
|
serum_lf_plot = lf_fp_serum[lf_fp_serum$mediator%in%selected_mediators,]
|
||||||
|
serum_lf_plot_df = serum_lf_plot[,colnames(serum_lf_plot)%in%cols_to_select]
|
||||||
|
table(serum_lf_plot_df$sample_type)
|
||||||
|
table(serum_lf_plot_df$sample_type,serum_lf_plot_df$timepoint, serum_lf_plot_df$mediator )
|
||||||
|
|
||||||
|
# Display names: Serum
|
||||||
|
serum_lf_plot_df$mediator[serum_lf_plot_df$mediator == "ifna2a"] <- "IFN-α2a" # U+03B1
|
||||||
|
serum_lf_plot_df$mediator[serum_lf_plot_df$mediator == "ifnb"] <- "IFN-β" # U+03B2
|
||||||
|
serum_lf_plot_df$mediator[serum_lf_plot_df$mediator == "ifn"] <- "IFN-γ" # U+03B3
|
||||||
|
serum_lf_plot_df$mediator[serum_lf_plot_df$mediator == "il1"] <- "IL-1"
|
||||||
|
|
||||||
|
serum_lf_plot_df$mediator[serum_lf_plot_df$mediator == "il6"] <- "IL-6"
|
||||||
|
serum_lf_plot_df$mediator[serum_lf_plot_df$mediator == "il8"] <- "CXCL8"
|
||||||
|
serum_lf_plot_df$mediator[serum_lf_plot_df$mediator == "ip10"] <- "CXCL10"
|
||||||
|
serum_lf_plot_df$mediator[serum_lf_plot_df$mediator == "tnf"] <- "TNF-α" # U+03B1
|
||||||
|
|
||||||
|
# check display names
|
||||||
|
table(serum_lf_plot_df$mediator)
|
||||||
|
|
||||||
|
# log values: log10
|
||||||
|
serum_lf_plot_log = serum_lf_plot_df
|
||||||
|
table(serum_lf_plot_log$value == 1.000000e+04)
|
||||||
|
table(serum_lf_plot_log$value == 0)
|
||||||
|
|
||||||
|
# log all values and reassign
|
||||||
|
serum_lf_plot_log$value[1:10]
|
||||||
|
serum_lf_plot_log$value = log10(serum_lf_plot$value)
|
||||||
|
serum_lf_plot_log$value[1:10]
|
||||||
|
|
||||||
|
# check display names
|
||||||
|
table(serum_lf_plot_log$mediator)
|
||||||
|
|
||||||
|
########################################################################
|
||||||
|
# remove variables
|
||||||
|
rm(vl_lf_plot, npa_lf_plot, serum_lf_plot)
|
||||||
|
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue