mosaic_2020/plot_data_ob_paper.R

222 lines
6.8 KiB
R
Raw Permalink Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

#!/usr/bin/Rscript
getwd()
setwd("~/git/mosaic_2020/")
getwd()
############################################################
# Graphs for paper?
#============
# Mediators required
#============
#NPA and Serum
#IFNa2a
#IFN-b
#IFN-g
#IL-1
#IL-6
#IL-8
#IP-10
#TNF-a
#vl_pfu_ul
#Figure 1a: Study design
#Figure 1b: Viral load
#Figure 1c: NPA
#Figure 1d: Serum
############################################################
#=============
# Input
#=============
source("data_extraction_mediators.R")
# check: adult variable and age variable discrepancy!
metadata_all$mosaic[metadata_all$adult==1 & metadata_all$age<=18]
#===============================
# data assignment for plots
#================================
#time_letter = "t"
time_letter = "T"
#-----------
# npa
#-----------
wf_fp_npa = npa_wf[npa_wf$flustat == 1,]
lf_fp_npa = npa_lf[npa_lf$flustat == 1,]
lf_fp_npa$timepoint = paste0(time_letter, lf_fp_npa$timepoint)
lf_fp_npa$timepoint = as.factor(lf_fp_npa$timepoint)
lf_fp_npa$obesity = as.factor(lf_fp_npa$obesity)
table(lf_fp_npa$mediator)
head(lf_fp_npa$value[lf_fp_npa$mediator == "vitd"])
lf_fp_npa = lf_fp_npa[!lf_fp_npa$mediator == "vitd",]
table(lf_fp_npa$mediator)
table(lf_fp_npa$sample_type, lf_fp_npa$timepoint)
a = lf_fp_npa %>%
select(matches("^(vl_pfu|timepoint|mediator|sample_type)"))
table(a$sample_type)
#-----------
# serum
#-----------
wf_fp_serum = serum_wf[serum_wf$flustat == 1,]
lf_fp_serum = serum_lf[serum_lf$flustat == 1,]
lf_fp_serum$timepoint = paste0(time_letter, lf_fp_serum$timepoint)
lf_fp_serum$timepoint = as.factor(lf_fp_serum$timepoint)
lf_fp_serum$obesity = as.factor(lf_fp_serum$obesity)
head(lf_fp_serum$value[lf_fp_serum$mediator == "vitd"])
########################################################################
rm(npa_lf, sam_lf, serum_lf)
########################################################################
viral_load_npa = c("vl_pfu_ul")
selected_mediators = c("ifna2a", "ifnb", "ifn", "il1", "il6", "il8", "ip10", "tnf")
cols_to_select = c("mosaic", "obesity", "flustat", "asthma",
"sample_type", "timepoint", "mediator", "value" )
# sanity checks
table(lf_fp_npa$mediator, lf_fp_npa$timepoint)
class(lf_fp_npa$mediator)
str(lf_fp_npa)
table(lf_fp_npa$mediator)
# selected dfs for plotting for paper
#--------
# VL: NPA
#--------
vl_lf_plot = lf_fp_npa[lf_fp_npa$mediator%in%viral_load_npa,]
vl_lf_plot_df = vl_lf_plot[,colnames(vl_lf_plot)%in%cols_to_select]
a6 = vl_lf_plot_df
table(vl_lf_plot $mediator, vl_lf_plot$timepoint, vl_lf_plot$sample_type)
table(vl_lf_plot$sample_type)
table(lf_fp_npa$sample_type)
a5 = npa_wf%>%
select(matches("^(vl_pfu|mosaic)"))
identical(a5$vl_pfu_ul_npa1, a5$vl_pfu_ul_npa1.1)
# Minor fix: somehow upstream introduced this...
table(vl_lf_plot_df$sample_type[vl_lf_plot_df$sample_type == "npa1."])
table(vl_lf_plot_df$sample_type[vl_lf_plot_df$sample_type == "npa"])
vl_lf_plot_df$sample_type[vl_lf_plot_df$sample_type == "npa1."] <- "npa"
table(vl_lf_plot_df$sample_type[vl_lf_plot_df$sample_type == "npa1."])
table(vl_lf_plot_df$sample_type)
#==========================
# DFs for plotting: VL NPA
#==========================
# linear: raw values
dim(vl_lf_plot_df)
table(vl_lf_plot_df$mediator, vl_lf_plot_df$timepoint)
table(vl_lf_plot_df$sample_type, vl_lf_plot_df$timepoint)
# display names
vl_lf_plot_df$mediator[vl_lf_plot_df$mediator == "vl_pfu_ul"] <- "Viral load" # U+03B1
# log values: log10
vl_lf_plot_df_log = vl_lf_plot_df
table(vl_lf_plot_df_log$mediator=="Viral load")
# vl data juggling with 0!
vl_lf_plot_df_log$value[1:30]
table(vl_lf_plot_df_log$value == 0)
vl_lf_plot_df_log$value[vl_lf_plot_df_log$value == 0] <- 1
vl_lf_plot_df_log$value[1:30]
table(vl_lf_plot_df_log$value == 1)
orig_nrow = nrow(vl_lf_plot_df_log); orig_nrow; table(vl_lf_plot_df_log$mediator)
head(vl_lf_plot_df_log$value)
# log all values and reassign
vl_lf_plot_df_log$value = log10(vl_lf_plot_df_log$value)
vl_lf_plot_df_log$value[1:30]
##############################################################
#--------
# NPA: mediators
#--------
# linear: raw values
npa_lf_plot = lf_fp_npa[lf_fp_npa$mediator%in%selected_mediators,]
npa_lf_plot_df = npa_lf_plot[,colnames(npa_lf_plot)%in%cols_to_select]
table(npa_lf_plot_df$sample_type)
table(npa_lf_plot_df$sample_type,npa_lf_plot_df$timepoint, npa_lf_plot_df$mediator )
# Display names
npa_lf_plot_df$mediator[npa_lf_plot_df$mediator == "ifna2a"] <- "IFN-α2a" # U+03B1
npa_lf_plot_df$mediator[npa_lf_plot_df$mediator == "ifnb"] <- "IFN-β" # U+03B2
npa_lf_plot_df$mediator[npa_lf_plot_df$mediator == "ifn"] <- "IFN-γ" # U+03B3
npa_lf_plot_df$mediator[npa_lf_plot_df$mediator == "il1"] <- "IL-1"
npa_lf_plot_df$mediator[npa_lf_plot_df$mediator == "il6"] <- "IL-6"
npa_lf_plot_df$mediator[npa_lf_plot_df$mediator == "il8"] <- "CXCL8"
npa_lf_plot_df$mediator[npa_lf_plot_df$mediator == "ip10"] <- "CXCL10"
npa_lf_plot_df$mediator[npa_lf_plot_df$mediator == "tnf"] <- "TNF-α" # U+03B1
# check display names
table(npa_lf_plot_df$mediator)
# log values: log10
npa_lf_plot_df_log = npa_lf_plot_df
table(npa_lf_plot_df_log$value == 1.000000e+04)
table(npa_lf_plot_df_log$value == 0)
# log all values and reassign
npa_lf_plot_df_log$value[1:10]
npa_lf_plot_df_log$value = log10(npa_lf_plot_df_log$value)
npa_lf_plot_df_log$value[1:10]
# check display names
table(npa_lf_plot_df_log$mediator)
#--------
# Serum: mediators
#--------
# linear: raw values
serum_lf_plot = lf_fp_serum[lf_fp_serum$mediator%in%selected_mediators,]
serum_lf_plot_df = serum_lf_plot[,colnames(serum_lf_plot)%in%cols_to_select]
table(serum_lf_plot_df$sample_type)
table(serum_lf_plot_df$sample_type,serum_lf_plot_df$timepoint, serum_lf_plot_df$mediator )
# Display names: Serum
serum_lf_plot_df$mediator[serum_lf_plot_df$mediator == "ifna2a"] <- "IFN-α2a" # U+03B1
serum_lf_plot_df$mediator[serum_lf_plot_df$mediator == "ifnb"] <- "IFN-β" # U+03B2
serum_lf_plot_df$mediator[serum_lf_plot_df$mediator == "ifn"] <- "IFN-γ" # U+03B3
serum_lf_plot_df$mediator[serum_lf_plot_df$mediator == "il1"] <- "IL-1"
serum_lf_plot_df$mediator[serum_lf_plot_df$mediator == "il6"] <- "IL-6"
serum_lf_plot_df$mediator[serum_lf_plot_df$mediator == "il8"] <- "CXCL8"
serum_lf_plot_df$mediator[serum_lf_plot_df$mediator == "ip10"] <- "CXCL10"
serum_lf_plot_df$mediator[serum_lf_plot_df$mediator == "tnf"] <- "TNF-α" # U+03B1
# check display names
table(serum_lf_plot_df$mediator)
# log values: log10
serum_lf_plot_log = serum_lf_plot_df
table(serum_lf_plot_log$value == 1.000000e+04)
table(serum_lf_plot_log$value == 0)
# log all values and reassign
serum_lf_plot_log$value[1:10]
serum_lf_plot_log$value = log10(serum_lf_plot$value)
serum_lf_plot_log$value[1:10]
# check display names
table(serum_lf_plot_log$mediator)
########################################################################
# remove variables
rm(vl_lf_plot, npa_lf_plot, serum_lf_plot)