added boxplots_stat_ob_paper.R and plot_data_ob_paper.R

This commit is contained in:
Tanushree Tunstall 2022-11-18 17:20:07 +00:00
parent dc7e277a72
commit 7011dc75fa
2 changed files with 447 additions and 0 deletions

222
plot_data_ob_paper.R Normal file
View file

@ -0,0 +1,222 @@
#!/usr/bin/Rscript
getwd()
setwd("~/git/mosaic_2020/")
getwd()
############################################################
# Graphs for paper?
#============
# Mediators required
#============
#NPA and Serum
#IFNa2a
#IFN-b
#IFN-g
#IL-1
#IL-6
#IL-8
#IP-10
#TNF-a
#vl_pfu_ul
#Figure 1a: Study design
#Figure 1b: Viral load
#Figure 1c: NPA
#Figure 1d: Serum
############################################################
#=============
# Input
#=============
source("data_extraction_mediators.R")
# check: adult variable and age variable discrepancy!
metadata_all$mosaic[metadata_all$adult==1 & metadata_all$age<=18]
#===============================
# data assignment for plots
#================================
#time_letter = "t"
time_letter = "T"
#-----------
# npa
#-----------
wf_fp_npa = npa_wf[npa_wf$flustat == 1,]
lf_fp_npa = npa_lf[npa_lf$flustat == 1,]
lf_fp_npa$timepoint = paste0(time_letter, lf_fp_npa$timepoint)
lf_fp_npa$timepoint = as.factor(lf_fp_npa$timepoint)
lf_fp_npa$obesity = as.factor(lf_fp_npa$obesity)
table(lf_fp_npa$mediator)
head(lf_fp_npa$value[lf_fp_npa$mediator == "vitd"])
lf_fp_npa = lf_fp_npa[!lf_fp_npa$mediator == "vitd",]
table(lf_fp_npa$mediator)
table(lf_fp_npa$sample_type, lf_fp_npa$timepoint)
a = lf_fp_npa %>%
select(matches("^(vl_pfu|timepoint|mediator|sample_type)"))
table(a$sample_type)
#-----------
# serum
#-----------
wf_fp_serum = serum_wf[serum_wf$flustat == 1,]
lf_fp_serum = serum_lf[serum_lf$flustat == 1,]
lf_fp_serum$timepoint = paste0(time_letter, lf_fp_serum$timepoint)
lf_fp_serum$timepoint = as.factor(lf_fp_serum$timepoint)
lf_fp_serum$obesity = as.factor(lf_fp_serum$obesity)
head(lf_fp_serum$value[lf_fp_serum$mediator == "vitd"])
########################################################################
rm(npa_lf, sam_lf, serum_lf)
########################################################################
viral_load_npa = c("vl_pfu_ul")
selected_mediators = c("ifna2a", "ifnb", "ifn", "il1", "il6", "il8", "ip10", "tnf")
cols_to_select = c("mosaic", "obesity", "flustat", "asthma",
"sample_type", "timepoint", "mediator", "value" )
# sanity checks
table(lf_fp_npa$mediator, lf_fp_npa$timepoint)
class(lf_fp_npa$mediator)
str(lf_fp_npa)
table(lf_fp_npa$mediator)
# selected dfs for plotting for paper
#--------
# VL: NPA
#--------
vl_lf_plot = lf_fp_npa[lf_fp_npa$mediator%in%viral_load_npa,]
vl_lf_plot_df = vl_lf_plot[,colnames(vl_lf_plot)%in%cols_to_select]
a6 = vl_lf_plot_df
table(vl_lf_plot $mediator, vl_lf_plot$timepoint, vl_lf_plot$sample_type)
table(vl_lf_plot$sample_type)
table(lf_fp_npa$sample_type)
a5 = npa_wf%>%
select(matches("^(vl_pfu|mosaic)"))
identical(a5$vl_pfu_ul_npa1, a5$vl_pfu_ul_npa1.1)
# Minor fix: somehow upstream introduced this...
table(vl_lf_plot_df$sample_type[vl_lf_plot_df$sample_type == "npa1."])
table(vl_lf_plot_df$sample_type[vl_lf_plot_df$sample_type == "npa"])
vl_lf_plot_df$sample_type[vl_lf_plot_df$sample_type == "npa1."] <- "npa"
table(vl_lf_plot_df$sample_type[vl_lf_plot_df$sample_type == "npa1."])
table(vl_lf_plot_df$sample_type)
#==========================
# DFs for plotting: VL NPA
#==========================
# linear: raw values
dim(vl_lf_plot_df)
table(vl_lf_plot_df$mediator, vl_lf_plot_df$timepoint)
table(vl_lf_plot_df$sample_type, vl_lf_plot_df$timepoint)
# display names
vl_lf_plot_df$mediator[vl_lf_plot_df$mediator == "vl_pfu_ul"] <- "Viral load" # U+03B1
# log values: log10
vl_lf_plot_df_log = vl_lf_plot_df
table(vl_lf_plot_df_log$mediator=="Viral load")
# vl data juggling with 0!
vl_lf_plot_df_log$value[1:30]
table(vl_lf_plot_df_log$value == 0)
vl_lf_plot_df_log$value[vl_lf_plot_df_log$value == 0] <- 1
vl_lf_plot_df_log$value[1:30]
table(vl_lf_plot_df_log$value == 1)
orig_nrow = nrow(vl_lf_plot_df_log); orig_nrow; table(vl_lf_plot_df_log$mediator)
head(vl_lf_plot_df_log$value)
# log all values and reassign
vl_lf_plot_df_log$value = log10(vl_lf_plot_df_log$value)
vl_lf_plot_df_log$value[1:30]
##############################################################
#--------
# NPA: mediators
#--------
# linear: raw values
npa_lf_plot = lf_fp_npa[lf_fp_npa$mediator%in%selected_mediators,]
npa_lf_plot_df = npa_lf_plot[,colnames(npa_lf_plot)%in%cols_to_select]
table(npa_lf_plot_df$sample_type)
table(npa_lf_plot_df$sample_type,npa_lf_plot_df$timepoint, npa_lf_plot_df$mediator )
# Display names
npa_lf_plot_df$mediator[npa_lf_plot_df$mediator == "ifna2a"] <- "IFN-α2a" # U+03B1
npa_lf_plot_df$mediator[npa_lf_plot_df$mediator == "ifnb"] <- "IFN-β" # U+03B2
npa_lf_plot_df$mediator[npa_lf_plot_df$mediator == "ifn"] <- "IFN-γ" # U+03B3
npa_lf_plot_df$mediator[npa_lf_plot_df$mediator == "il1"] <- "IL-1"
npa_lf_plot_df$mediator[npa_lf_plot_df$mediator == "il6"] <- "IL-6"
npa_lf_plot_df$mediator[npa_lf_plot_df$mediator == "il8"] <- "CXCL8"
npa_lf_plot_df$mediator[npa_lf_plot_df$mediator == "ip10"] <- "CXCL10"
npa_lf_plot_df$mediator[npa_lf_plot_df$mediator == "tnf"] <- "TNF-α" # U+03B1
# check display names
table(npa_lf_plot_df$mediator)
# log values: log10
npa_lf_plot_df_log = npa_lf_plot_df
table(npa_lf_plot_df_log$value == 1.000000e+04)
table(npa_lf_plot_df_log$value == 0)
# log all values and reassign
npa_lf_plot_df_log$value[1:10]
npa_lf_plot_df_log$value = log10(npa_lf_plot_df_log$value)
npa_lf_plot_df_log$value[1:10]
# check display names
table(npa_lf_plot_df_log$mediator)
#--------
# Serum: mediators
#--------
# linear: raw values
serum_lf_plot = lf_fp_serum[lf_fp_serum$mediator%in%selected_mediators,]
serum_lf_plot_df = serum_lf_plot[,colnames(serum_lf_plot)%in%cols_to_select]
table(serum_lf_plot_df$sample_type)
table(serum_lf_plot_df$sample_type,serum_lf_plot_df$timepoint, serum_lf_plot_df$mediator )
# Display names: Serum
serum_lf_plot_df$mediator[serum_lf_plot_df$mediator == "ifna2a"] <- "IFN-α2a" # U+03B1
serum_lf_plot_df$mediator[serum_lf_plot_df$mediator == "ifnb"] <- "IFN-β" # U+03B2
serum_lf_plot_df$mediator[serum_lf_plot_df$mediator == "ifn"] <- "IFN-γ" # U+03B3
serum_lf_plot_df$mediator[serum_lf_plot_df$mediator == "il1"] <- "IL-1"
serum_lf_plot_df$mediator[serum_lf_plot_df$mediator == "il6"] <- "IL-6"
serum_lf_plot_df$mediator[serum_lf_plot_df$mediator == "il8"] <- "CXCL8"
serum_lf_plot_df$mediator[serum_lf_plot_df$mediator == "ip10"] <- "CXCL10"
serum_lf_plot_df$mediator[serum_lf_plot_df$mediator == "tnf"] <- "TNF-α" # U+03B1
# check display names
table(serum_lf_plot_df$mediator)
# log values: log10
serum_lf_plot_log = serum_lf_plot_df
table(serum_lf_plot_log$value == 1.000000e+04)
table(serum_lf_plot_log$value == 0)
# log all values and reassign
serum_lf_plot_log$value[1:10]
serum_lf_plot_log$value = log10(serum_lf_plot$value)
serum_lf_plot_log$value[1:10]
# check display names
table(serum_lf_plot_log$mediator)
########################################################################
# remove variables
rm(vl_lf_plot, npa_lf_plot, serum_lf_plot)