diff --git a/boxplots_stat_ob_paper.R b/boxplots_stat_ob_paper.R new file mode 100644 index 0000000..08f073b --- /dev/null +++ b/boxplots_stat_ob_paper.R @@ -0,0 +1,225 @@ +#!/usr/bin/Rscript +getwd() +setwd("~/git/mosaic_2020/") +getwd() +############################################################### +# TASK: boxplots with stats for NPA, SAM and SERUM +# for each mediator, at each timepoint by group + +# TODO: check boxplot_stat_function.R +# output plots even if stats fail due to data containing only LLODs +# however for a given mediator, if there is error for ANY timepoint +# stats will not be added for the entire plot! To be handled in +# next iteration! +############################################################### + +#============= +# Input +#============= +source("boxplot_stat_function.R") +source("plot_data_ob_paper.R") + +#============= +# Output +#============= +outdir_plots_paper = "/home/tanu/git/mosaic_2020/output/plots_paper/" + +#outfile_bp = paste0("boxplots_stats_paper-%03d.png") +#output_boxplot_stats = paste0(outdir_plots_paper, outfile_bp); output_boxplot_stats +#svg(output_boxplot_stats, width=22, height=16) +#png(output_boxplot_stats, width = 11, height = 8, units = "in", res = 300) + +############################################################### +#=============================== +# data assignment for plots +#=============================== +#------------- +# NPA: VL +#------------- +#linear +cat("plotting for FP adult patients:", + length(unique(vl_lf_plot_df$mosaic))) + +# linear : but data is not visible +plots_npa_vl = doMyPlotsStats(vl_lf_plot_df) +npa_plot_vl = ggpubr::ggarrange(plotlist = plots_npa_vl + , align = "hv" + #, legend = c(0.8,1) + #, legend = "bottom" + , common.legend = T) +npa_plot_vl + +# log +plots_npa_vl_log = doMyPlotsStats(vl_lf_plot_df_log) +npa_plot_vl_log = ggpubr::ggarrange(plotlist = plots_npa_vl_log + , align = "hv" + , common.legend = T + ) +npa_plot_vl_log + +############################################################ +mediator_order = c("IFN-α2a","IFN-β", "IFN-γ", "IL-1", "IL-6", "CXCL8", "CXCL10", "TNF-α") + +#----------------------------- +# NPA: selected mediators +#----------------------------- +# linear +plots_npa = doMyPlotsStats(npa_lf_plot_df, custom_order = mediator_order) +npa_plot_1 = ggpubr::ggarrange(plotlist = plots_npa + , align = "hv" + , ncol = 8 + , nrow = 1 + , common.legend = T) +npa_plot = annotate_figure(npa_plot_1, + top = text_grob("NPA" + , color = "black" + #, face = "bold" + , size = 14)) + +# log +plots_npa_log = doMyPlotsStats(npa_lf_plot_df_log, custom_order = mediator_order) +npa_plot_log_1 = ggpubr::ggarrange(plotlist = plots_npa_log + , align = "hv" + , ncol = 8 + , nrow = 1 + , common.legend = T) +npa_plot_log = annotate_figure(npa_plot_log_1, + top = text_grob("NPA" + , color = "black" + #, face = "bold" + , size = 14)) +npa_plot_log +#------------------------------- +# SERUM: selected mediators +#-------------------------------- +# linear +plots_serum = doMyPlotsStats(serum_lf_plot_df, custom_order = mediator_order) +serum_plot_1 = ggpubr::ggarrange(plotlist = plots_serum + , align = "hv" + , ncol = 8 + , nrow = 1 + , common.legend = F + ) + +serum_plot = annotate_figure(serum_plot_1, + top = text_grob("Serum" + , color = "black" + #, face = "bold" + , size = 14)) +serum_plot + +# log +plots_serum_log = doMyPlotsStats(serum_lf_plot_log, custom_order = mediator_order) +serum_plot_log_1 = ggpubr::ggarrange(plotlist = plots_serum_log + , align = "hv" + , ncol = 8 + , nrow = 1 + , common.legend = F) + +serum_plot_log = annotate_figure(serum_plot_log_1, + top = text_grob("Serum" + , color = "black" + #, face = "bold" + , size = 14)) + +serum_plot_log + +############################################################## +############################################################## +cat ("Outplots will be in:", outdir_plots_paper) +#====================== +# Output: Linear graphs +#====================== +# viral load: linear +viral_load_annot = annotate_figure( + npa_plot_vl + , left = text_grob("pg/ml" + , color = "black" + , rot = 90 + , size = 18) +) + +png(paste0(outdir_plots_paper, "viral_load_linear"), + width = 5, + height = 5, + units = "in", + res = 300) + +viral_load_annot +dev.off() + +# npa and serum mediators: linear +outplot_linear = cowplot::plot_grid( + npa_plot, + serum_plot, + nrow = 2, + rel_heights = c(1.15,1) +) + +outplot_linear_annot = annotate_figure( + outplot_linear + , left = text_grob("pg/ml" + , color = "black" + , rot = 90 + , size = 16) +) + +png(paste0(outdir_plots_paper, "npa_serum_linear"), + width = 18, + height = 6, + units = "in", + res = 300) +outplot_linear_annot +dev.off() + + +#====================== +# Output: Log graphs +#====================== +# viral load: log +viral_load_log_annot = annotate_figure( + npa_plot_vl_log + , left = text_grob("Log10 (pg/ml)" + , color = "black" + , rot = 90 + , size = 18) +) + +png(paste0(outdir_plots_paper, "viral_load_log" ), + width = 5, + height = 5, + units = "in", + res = 300) + +viral_load_log_annot +dev.off() + +# npa and serum mediators: log +outplot_log = cowplot::plot_grid( + npa_plot_log, + serum_plot_log, + nrow = 2, + rel_heights = c(1.15,1) + ) + + +outplot_log_annot = annotate_figure( + outplot_log + , left = text_grob("Log10 (pg/ml)" + , color = "black" + , rot = 90 + , size = 16) +) + +png(paste0(outdir_plots_paper, "npa_serum_log"), + width = 18, + height = 6, + units = "in", + res = 300) +outplot_log_annot +dev.off() + + +############################################################### +# END OF SCRIPT # +############################################################### diff --git a/plot_data_ob_paper.R b/plot_data_ob_paper.R new file mode 100644 index 0000000..ba67820 --- /dev/null +++ b/plot_data_ob_paper.R @@ -0,0 +1,222 @@ +#!/usr/bin/Rscript +getwd() +setwd("~/git/mosaic_2020/") +getwd() +############################################################ +# Graphs for paper? +#============ +# Mediators required +#============ +#NPA and Serum + +#IFNa2a +#IFN-b +#IFN-g +#IL-1 +#IL-6 +#IL-8 +#IP-10 +#TNF-a +#vl_pfu_ul + +#Figure 1a: Study design +#Figure 1b: Viral load +#Figure 1c: NPA +#Figure 1d: Serum + +############################################################ + +#============= +# Input +#============= +source("data_extraction_mediators.R") + +# check: adult variable and age variable discrepancy! +metadata_all$mosaic[metadata_all$adult==1 & metadata_all$age<=18] + +#=============================== +# data assignment for plots +#================================ +#time_letter = "t" +time_letter = "T" + +#----------- +# npa +#----------- +wf_fp_npa = npa_wf[npa_wf$flustat == 1,] +lf_fp_npa = npa_lf[npa_lf$flustat == 1,] +lf_fp_npa$timepoint = paste0(time_letter, lf_fp_npa$timepoint) +lf_fp_npa$timepoint = as.factor(lf_fp_npa$timepoint) +lf_fp_npa$obesity = as.factor(lf_fp_npa$obesity) + +table(lf_fp_npa$mediator) +head(lf_fp_npa$value[lf_fp_npa$mediator == "vitd"]) +lf_fp_npa = lf_fp_npa[!lf_fp_npa$mediator == "vitd",] +table(lf_fp_npa$mediator) + +table(lf_fp_npa$sample_type, lf_fp_npa$timepoint) + +a = lf_fp_npa %>% + select(matches("^(vl_pfu|timepoint|mediator|sample_type)")) +table(a$sample_type) + +#----------- +# serum +#----------- +wf_fp_serum = serum_wf[serum_wf$flustat == 1,] +lf_fp_serum = serum_lf[serum_lf$flustat == 1,] +lf_fp_serum$timepoint = paste0(time_letter, lf_fp_serum$timepoint) +lf_fp_serum$timepoint = as.factor(lf_fp_serum$timepoint) +lf_fp_serum$obesity = as.factor(lf_fp_serum$obesity) + +head(lf_fp_serum$value[lf_fp_serum$mediator == "vitd"]) + +######################################################################## +rm(npa_lf, sam_lf, serum_lf) + + +######################################################################## +viral_load_npa = c("vl_pfu_ul") +selected_mediators = c("ifna2a", "ifnb", "ifn", "il1", "il6", "il8", "ip10", "tnf") +cols_to_select = c("mosaic", "obesity", "flustat", "asthma", + "sample_type", "timepoint", "mediator", "value" ) + +# sanity checks +table(lf_fp_npa$mediator, lf_fp_npa$timepoint) +class(lf_fp_npa$mediator) +str(lf_fp_npa) +table(lf_fp_npa$mediator) + +# selected dfs for plotting for paper +#-------- +# VL: NPA +#-------- +vl_lf_plot = lf_fp_npa[lf_fp_npa$mediator%in%viral_load_npa,] +vl_lf_plot_df = vl_lf_plot[,colnames(vl_lf_plot)%in%cols_to_select] +a6 = vl_lf_plot_df + +table(vl_lf_plot $mediator, vl_lf_plot$timepoint, vl_lf_plot$sample_type) +table(vl_lf_plot$sample_type) +table(lf_fp_npa$sample_type) + +a5 = npa_wf%>% + select(matches("^(vl_pfu|mosaic)")) +identical(a5$vl_pfu_ul_npa1, a5$vl_pfu_ul_npa1.1) + +# Minor fix: somehow upstream introduced this... +table(vl_lf_plot_df$sample_type[vl_lf_plot_df$sample_type == "npa1."]) +table(vl_lf_plot_df$sample_type[vl_lf_plot_df$sample_type == "npa"]) + +vl_lf_plot_df$sample_type[vl_lf_plot_df$sample_type == "npa1."] <- "npa" +table(vl_lf_plot_df$sample_type[vl_lf_plot_df$sample_type == "npa1."]) +table(vl_lf_plot_df$sample_type) + +#========================== +# DFs for plotting: VL NPA +#========================== +# linear: raw values +dim(vl_lf_plot_df) +table(vl_lf_plot_df$mediator, vl_lf_plot_df$timepoint) +table(vl_lf_plot_df$sample_type, vl_lf_plot_df$timepoint) + +# display names +vl_lf_plot_df$mediator[vl_lf_plot_df$mediator == "vl_pfu_ul"] <- "Viral load" # U+03B1 + +# log values: log10 +vl_lf_plot_df_log = vl_lf_plot_df +table(vl_lf_plot_df_log$mediator=="Viral load") + +# vl data juggling with 0! +vl_lf_plot_df_log$value[1:30] +table(vl_lf_plot_df_log$value == 0) + +vl_lf_plot_df_log$value[vl_lf_plot_df_log$value == 0] <- 1 +vl_lf_plot_df_log$value[1:30] +table(vl_lf_plot_df_log$value == 1) + +orig_nrow = nrow(vl_lf_plot_df_log); orig_nrow; table(vl_lf_plot_df_log$mediator) +head(vl_lf_plot_df_log$value) + +# log all values and reassign +vl_lf_plot_df_log$value = log10(vl_lf_plot_df_log$value) +vl_lf_plot_df_log$value[1:30] + +############################################################## + +#-------- +# NPA: mediators +#-------- +# linear: raw values +npa_lf_plot = lf_fp_npa[lf_fp_npa$mediator%in%selected_mediators,] +npa_lf_plot_df = npa_lf_plot[,colnames(npa_lf_plot)%in%cols_to_select] +table(npa_lf_plot_df$sample_type) +table(npa_lf_plot_df$sample_type,npa_lf_plot_df$timepoint, npa_lf_plot_df$mediator ) + +# Display names +npa_lf_plot_df$mediator[npa_lf_plot_df$mediator == "ifna2a"] <- "IFN-α2a" # U+03B1 +npa_lf_plot_df$mediator[npa_lf_plot_df$mediator == "ifnb"] <- "IFN-β" # U+03B2 +npa_lf_plot_df$mediator[npa_lf_plot_df$mediator == "ifn"] <- "IFN-γ" # U+03B3 +npa_lf_plot_df$mediator[npa_lf_plot_df$mediator == "il1"] <- "IL-1" + +npa_lf_plot_df$mediator[npa_lf_plot_df$mediator == "il6"] <- "IL-6" +npa_lf_plot_df$mediator[npa_lf_plot_df$mediator == "il8"] <- "CXCL8" +npa_lf_plot_df$mediator[npa_lf_plot_df$mediator == "ip10"] <- "CXCL10" +npa_lf_plot_df$mediator[npa_lf_plot_df$mediator == "tnf"] <- "TNF-α" # U+03B1 + +# check display names +table(npa_lf_plot_df$mediator) + +# log values: log10 +npa_lf_plot_df_log = npa_lf_plot_df +table(npa_lf_plot_df_log$value == 1.000000e+04) +table(npa_lf_plot_df_log$value == 0) + +# log all values and reassign +npa_lf_plot_df_log$value[1:10] +npa_lf_plot_df_log$value = log10(npa_lf_plot_df_log$value) +npa_lf_plot_df_log$value[1:10] + +# check display names +table(npa_lf_plot_df_log$mediator) + +#-------- +# Serum: mediators +#-------- +# linear: raw values +serum_lf_plot = lf_fp_serum[lf_fp_serum$mediator%in%selected_mediators,] +serum_lf_plot_df = serum_lf_plot[,colnames(serum_lf_plot)%in%cols_to_select] +table(serum_lf_plot_df$sample_type) +table(serum_lf_plot_df$sample_type,serum_lf_plot_df$timepoint, serum_lf_plot_df$mediator ) + +# Display names: Serum +serum_lf_plot_df$mediator[serum_lf_plot_df$mediator == "ifna2a"] <- "IFN-α2a" # U+03B1 +serum_lf_plot_df$mediator[serum_lf_plot_df$mediator == "ifnb"] <- "IFN-β" # U+03B2 +serum_lf_plot_df$mediator[serum_lf_plot_df$mediator == "ifn"] <- "IFN-γ" # U+03B3 +serum_lf_plot_df$mediator[serum_lf_plot_df$mediator == "il1"] <- "IL-1" + +serum_lf_plot_df$mediator[serum_lf_plot_df$mediator == "il6"] <- "IL-6" +serum_lf_plot_df$mediator[serum_lf_plot_df$mediator == "il8"] <- "CXCL8" +serum_lf_plot_df$mediator[serum_lf_plot_df$mediator == "ip10"] <- "CXCL10" +serum_lf_plot_df$mediator[serum_lf_plot_df$mediator == "tnf"] <- "TNF-α" # U+03B1 + +# check display names +table(serum_lf_plot_df$mediator) + +# log values: log10 +serum_lf_plot_log = serum_lf_plot_df +table(serum_lf_plot_log$value == 1.000000e+04) +table(serum_lf_plot_log$value == 0) + +# log all values and reassign +serum_lf_plot_log$value[1:10] +serum_lf_plot_log$value = log10(serum_lf_plot$value) +serum_lf_plot_log$value[1:10] + +# check display names +table(serum_lf_plot_log$mediator) + +######################################################################## +# remove variables +rm(vl_lf_plot, npa_lf_plot, serum_lf_plot) + +