222 lines
6.8 KiB
R
222 lines
6.8 KiB
R
#!/usr/bin/Rscript
|
||
getwd()
|
||
setwd("~/git/mosaic_2020/")
|
||
getwd()
|
||
############################################################
|
||
# Graphs for paper?
|
||
#============
|
||
# Mediators required
|
||
#============
|
||
#NPA and Serum
|
||
|
||
#IFNa2a
|
||
#IFN-b
|
||
#IFN-g
|
||
#IL-1
|
||
#IL-6
|
||
#IL-8
|
||
#IP-10
|
||
#TNF-a
|
||
#vl_pfu_ul
|
||
|
||
#Figure 1a: Study design
|
||
#Figure 1b: Viral load
|
||
#Figure 1c: NPA
|
||
#Figure 1d: Serum
|
||
|
||
############################################################
|
||
|
||
#=============
|
||
# Input
|
||
#=============
|
||
source("data_extraction_mediators.R")
|
||
|
||
# check: adult variable and age variable discrepancy!
|
||
metadata_all$mosaic[metadata_all$adult==1 & metadata_all$age<=18]
|
||
|
||
#===============================
|
||
# data assignment for plots
|
||
#================================
|
||
#time_letter = "t"
|
||
time_letter = "T"
|
||
|
||
#-----------
|
||
# npa
|
||
#-----------
|
||
wf_fp_npa = npa_wf[npa_wf$flustat == 1,]
|
||
lf_fp_npa = npa_lf[npa_lf$flustat == 1,]
|
||
lf_fp_npa$timepoint = paste0(time_letter, lf_fp_npa$timepoint)
|
||
lf_fp_npa$timepoint = as.factor(lf_fp_npa$timepoint)
|
||
lf_fp_npa$obesity = as.factor(lf_fp_npa$obesity)
|
||
|
||
table(lf_fp_npa$mediator)
|
||
head(lf_fp_npa$value[lf_fp_npa$mediator == "vitd"])
|
||
lf_fp_npa = lf_fp_npa[!lf_fp_npa$mediator == "vitd",]
|
||
table(lf_fp_npa$mediator)
|
||
|
||
table(lf_fp_npa$sample_type, lf_fp_npa$timepoint)
|
||
|
||
a = lf_fp_npa %>%
|
||
select(matches("^(vl_pfu|timepoint|mediator|sample_type)"))
|
||
table(a$sample_type)
|
||
|
||
#-----------
|
||
# serum
|
||
#-----------
|
||
wf_fp_serum = serum_wf[serum_wf$flustat == 1,]
|
||
lf_fp_serum = serum_lf[serum_lf$flustat == 1,]
|
||
lf_fp_serum$timepoint = paste0(time_letter, lf_fp_serum$timepoint)
|
||
lf_fp_serum$timepoint = as.factor(lf_fp_serum$timepoint)
|
||
lf_fp_serum$obesity = as.factor(lf_fp_serum$obesity)
|
||
|
||
head(lf_fp_serum$value[lf_fp_serum$mediator == "vitd"])
|
||
|
||
########################################################################
|
||
rm(npa_lf, sam_lf, serum_lf)
|
||
|
||
|
||
########################################################################
|
||
viral_load_npa = c("vl_pfu_ul")
|
||
selected_mediators = c("ifna2a", "ifnb", "ifn", "il1", "il6", "il8", "ip10", "tnf")
|
||
cols_to_select = c("mosaic", "obesity", "flustat", "asthma",
|
||
"sample_type", "timepoint", "mediator", "value" )
|
||
|
||
# sanity checks
|
||
table(lf_fp_npa$mediator, lf_fp_npa$timepoint)
|
||
class(lf_fp_npa$mediator)
|
||
str(lf_fp_npa)
|
||
table(lf_fp_npa$mediator)
|
||
|
||
# selected dfs for plotting for paper
|
||
#--------
|
||
# VL: NPA
|
||
#--------
|
||
vl_lf_plot = lf_fp_npa[lf_fp_npa$mediator%in%viral_load_npa,]
|
||
vl_lf_plot_df = vl_lf_plot[,colnames(vl_lf_plot)%in%cols_to_select]
|
||
a6 = vl_lf_plot_df
|
||
|
||
table(vl_lf_plot $mediator, vl_lf_plot$timepoint, vl_lf_plot$sample_type)
|
||
table(vl_lf_plot$sample_type)
|
||
table(lf_fp_npa$sample_type)
|
||
|
||
a5 = npa_wf%>%
|
||
select(matches("^(vl_pfu|mosaic)"))
|
||
identical(a5$vl_pfu_ul_npa1, a5$vl_pfu_ul_npa1.1)
|
||
|
||
# Minor fix: somehow upstream introduced this...
|
||
table(vl_lf_plot_df$sample_type[vl_lf_plot_df$sample_type == "npa1."])
|
||
table(vl_lf_plot_df$sample_type[vl_lf_plot_df$sample_type == "npa"])
|
||
|
||
vl_lf_plot_df$sample_type[vl_lf_plot_df$sample_type == "npa1."] <- "npa"
|
||
table(vl_lf_plot_df$sample_type[vl_lf_plot_df$sample_type == "npa1."])
|
||
table(vl_lf_plot_df$sample_type)
|
||
|
||
#==========================
|
||
# DFs for plotting: VL NPA
|
||
#==========================
|
||
# linear: raw values
|
||
dim(vl_lf_plot_df)
|
||
table(vl_lf_plot_df$mediator, vl_lf_plot_df$timepoint)
|
||
table(vl_lf_plot_df$sample_type, vl_lf_plot_df$timepoint)
|
||
|
||
# display names
|
||
vl_lf_plot_df$mediator[vl_lf_plot_df$mediator == "vl_pfu_ul"] <- "Viral load" # U+03B1
|
||
|
||
# log values: log10
|
||
vl_lf_plot_df_log = vl_lf_plot_df
|
||
table(vl_lf_plot_df_log$mediator=="Viral load")
|
||
|
||
# vl data juggling with 0!
|
||
vl_lf_plot_df_log$value[1:30]
|
||
table(vl_lf_plot_df_log$value == 0)
|
||
|
||
vl_lf_plot_df_log$value[vl_lf_plot_df_log$value == 0] <- 1
|
||
vl_lf_plot_df_log$value[1:30]
|
||
table(vl_lf_plot_df_log$value == 1)
|
||
|
||
orig_nrow = nrow(vl_lf_plot_df_log); orig_nrow; table(vl_lf_plot_df_log$mediator)
|
||
head(vl_lf_plot_df_log$value)
|
||
|
||
# log all values and reassign
|
||
vl_lf_plot_df_log$value = log10(vl_lf_plot_df_log$value)
|
||
vl_lf_plot_df_log$value[1:30]
|
||
|
||
##############################################################
|
||
|
||
#--------
|
||
# NPA: mediators
|
||
#--------
|
||
# linear: raw values
|
||
npa_lf_plot = lf_fp_npa[lf_fp_npa$mediator%in%selected_mediators,]
|
||
npa_lf_plot_df = npa_lf_plot[,colnames(npa_lf_plot)%in%cols_to_select]
|
||
table(npa_lf_plot_df$sample_type)
|
||
table(npa_lf_plot_df$sample_type,npa_lf_plot_df$timepoint, npa_lf_plot_df$mediator )
|
||
|
||
# Display names
|
||
npa_lf_plot_df$mediator[npa_lf_plot_df$mediator == "ifna2a"] <- "IFN-α2a" # U+03B1
|
||
npa_lf_plot_df$mediator[npa_lf_plot_df$mediator == "ifnb"] <- "IFN-β" # U+03B2
|
||
npa_lf_plot_df$mediator[npa_lf_plot_df$mediator == "ifn"] <- "IFN-γ" # U+03B3
|
||
npa_lf_plot_df$mediator[npa_lf_plot_df$mediator == "il1"] <- "IL-1"
|
||
|
||
npa_lf_plot_df$mediator[npa_lf_plot_df$mediator == "il6"] <- "IL-6"
|
||
npa_lf_plot_df$mediator[npa_lf_plot_df$mediator == "il8"] <- "CXCL8"
|
||
npa_lf_plot_df$mediator[npa_lf_plot_df$mediator == "ip10"] <- "CXCL10"
|
||
npa_lf_plot_df$mediator[npa_lf_plot_df$mediator == "tnf"] <- "TNF-α" # U+03B1
|
||
|
||
# check display names
|
||
table(npa_lf_plot_df$mediator)
|
||
|
||
# log values: log10
|
||
npa_lf_plot_df_log = npa_lf_plot_df
|
||
table(npa_lf_plot_df_log$value == 1.000000e+04)
|
||
table(npa_lf_plot_df_log$value == 0)
|
||
|
||
# log all values and reassign
|
||
npa_lf_plot_df_log$value[1:10]
|
||
npa_lf_plot_df_log$value = log10(npa_lf_plot_df_log$value)
|
||
npa_lf_plot_df_log$value[1:10]
|
||
|
||
# check display names
|
||
table(npa_lf_plot_df_log$mediator)
|
||
|
||
#--------
|
||
# Serum: mediators
|
||
#--------
|
||
# linear: raw values
|
||
serum_lf_plot = lf_fp_serum[lf_fp_serum$mediator%in%selected_mediators,]
|
||
serum_lf_plot_df = serum_lf_plot[,colnames(serum_lf_plot)%in%cols_to_select]
|
||
table(serum_lf_plot_df$sample_type)
|
||
table(serum_lf_plot_df$sample_type,serum_lf_plot_df$timepoint, serum_lf_plot_df$mediator )
|
||
|
||
# Display names: Serum
|
||
serum_lf_plot_df$mediator[serum_lf_plot_df$mediator == "ifna2a"] <- "IFN-α2a" # U+03B1
|
||
serum_lf_plot_df$mediator[serum_lf_plot_df$mediator == "ifnb"] <- "IFN-β" # U+03B2
|
||
serum_lf_plot_df$mediator[serum_lf_plot_df$mediator == "ifn"] <- "IFN-γ" # U+03B3
|
||
serum_lf_plot_df$mediator[serum_lf_plot_df$mediator == "il1"] <- "IL-1"
|
||
|
||
serum_lf_plot_df$mediator[serum_lf_plot_df$mediator == "il6"] <- "IL-6"
|
||
serum_lf_plot_df$mediator[serum_lf_plot_df$mediator == "il8"] <- "CXCL8"
|
||
serum_lf_plot_df$mediator[serum_lf_plot_df$mediator == "ip10"] <- "CXCL10"
|
||
serum_lf_plot_df$mediator[serum_lf_plot_df$mediator == "tnf"] <- "TNF-α" # U+03B1
|
||
|
||
# check display names
|
||
table(serum_lf_plot_df$mediator)
|
||
|
||
# log values: log10
|
||
serum_lf_plot_log = serum_lf_plot_df
|
||
table(serum_lf_plot_log$value == 1.000000e+04)
|
||
table(serum_lf_plot_log$value == 0)
|
||
|
||
# log all values and reassign
|
||
serum_lf_plot_log$value[1:10]
|
||
serum_lf_plot_log$value = log10(serum_lf_plot$value)
|
||
serum_lf_plot_log$value[1:10]
|
||
|
||
# check display names
|
||
table(serum_lf_plot_log$mediator)
|
||
|
||
########################################################################
|
||
# remove variables
|
||
rm(vl_lf_plot, npa_lf_plot, serum_lf_plot)
|
||
|
||
|