#!/usr/bin/Rscript getwd() setwd("~/git/mosaic_2020/") getwd() ############################################################ # Graphs for paper? #============ # Mediators required #============ #NPA and Serum #IFNa2a #IFN-b #IFN-g #IL-1 #IL-6 #IL-8 #IP-10 #TNF-a #vl_pfu_ul #Figure 1a: Study design #Figure 1b: Viral load #Figure 1c: NPA #Figure 1d: Serum ############################################################ #============= # Input #============= source("data_extraction_mediators.R") # check: adult variable and age variable discrepancy! metadata_all$mosaic[metadata_all$adult==1 & metadata_all$age<=18] #=============================== # data assignment for plots #================================ #time_letter = "t" time_letter = "T" #----------- # npa #----------- wf_fp_npa = npa_wf[npa_wf$flustat == 1,] lf_fp_npa = npa_lf[npa_lf$flustat == 1,] lf_fp_npa$timepoint = paste0(time_letter, lf_fp_npa$timepoint) lf_fp_npa$timepoint = as.factor(lf_fp_npa$timepoint) lf_fp_npa$obesity = as.factor(lf_fp_npa$obesity) table(lf_fp_npa$mediator) head(lf_fp_npa$value[lf_fp_npa$mediator == "vitd"]) lf_fp_npa = lf_fp_npa[!lf_fp_npa$mediator == "vitd",] table(lf_fp_npa$mediator) table(lf_fp_npa$sample_type, lf_fp_npa$timepoint) a = lf_fp_npa %>% select(matches("^(vl_pfu|timepoint|mediator|sample_type)")) table(a$sample_type) #----------- # serum #----------- wf_fp_serum = serum_wf[serum_wf$flustat == 1,] lf_fp_serum = serum_lf[serum_lf$flustat == 1,] lf_fp_serum$timepoint = paste0(time_letter, lf_fp_serum$timepoint) lf_fp_serum$timepoint = as.factor(lf_fp_serum$timepoint) lf_fp_serum$obesity = as.factor(lf_fp_serum$obesity) head(lf_fp_serum$value[lf_fp_serum$mediator == "vitd"]) ######################################################################## rm(npa_lf, sam_lf, serum_lf) ######################################################################## viral_load_npa = c("vl_pfu_ul") selected_mediators = c("ifna2a", "ifnb", "ifn", "il1", "il6", "il8", "ip10", "tnf") cols_to_select = c("mosaic", "obesity", "flustat", "asthma", "sample_type", "timepoint", "mediator", "value" ) # sanity checks table(lf_fp_npa$mediator, lf_fp_npa$timepoint) class(lf_fp_npa$mediator) str(lf_fp_npa) table(lf_fp_npa$mediator) # selected dfs for plotting for paper #-------- # VL: NPA #-------- vl_lf_plot = lf_fp_npa[lf_fp_npa$mediator%in%viral_load_npa,] vl_lf_plot_df = vl_lf_plot[,colnames(vl_lf_plot)%in%cols_to_select] a6 = vl_lf_plot_df table(vl_lf_plot $mediator, vl_lf_plot$timepoint, vl_lf_plot$sample_type) table(vl_lf_plot$sample_type) table(lf_fp_npa$sample_type) a5 = npa_wf%>% select(matches("^(vl_pfu|mosaic)")) identical(a5$vl_pfu_ul_npa1, a5$vl_pfu_ul_npa1.1) # Minor fix: somehow upstream introduced this... table(vl_lf_plot_df$sample_type[vl_lf_plot_df$sample_type == "npa1."]) table(vl_lf_plot_df$sample_type[vl_lf_plot_df$sample_type == "npa"]) vl_lf_plot_df$sample_type[vl_lf_plot_df$sample_type == "npa1."] <- "npa" table(vl_lf_plot_df$sample_type[vl_lf_plot_df$sample_type == "npa1."]) table(vl_lf_plot_df$sample_type) #========================== # DFs for plotting: VL NPA #========================== # linear: raw values dim(vl_lf_plot_df) table(vl_lf_plot_df$mediator, vl_lf_plot_df$timepoint) table(vl_lf_plot_df$sample_type, vl_lf_plot_df$timepoint) # display names vl_lf_plot_df$mediator[vl_lf_plot_df$mediator == "vl_pfu_ul"] <- "Viral load" # U+03B1 # log values: log10 vl_lf_plot_df_log = vl_lf_plot_df table(vl_lf_plot_df_log$mediator=="Viral load") # vl data juggling with 0! vl_lf_plot_df_log$value[1:30] table(vl_lf_plot_df_log$value == 0) vl_lf_plot_df_log$value[vl_lf_plot_df_log$value == 0] <- 1 vl_lf_plot_df_log$value[1:30] table(vl_lf_plot_df_log$value == 1) orig_nrow = nrow(vl_lf_plot_df_log); orig_nrow; table(vl_lf_plot_df_log$mediator) head(vl_lf_plot_df_log$value) # log all values and reassign vl_lf_plot_df_log$value = log10(vl_lf_plot_df_log$value) vl_lf_plot_df_log$value[1:30] ############################################################## #-------- # NPA: mediators #-------- # linear: raw values npa_lf_plot = lf_fp_npa[lf_fp_npa$mediator%in%selected_mediators,] npa_lf_plot_df = npa_lf_plot[,colnames(npa_lf_plot)%in%cols_to_select] table(npa_lf_plot_df$sample_type) table(npa_lf_plot_df$sample_type,npa_lf_plot_df$timepoint, npa_lf_plot_df$mediator ) # Display names npa_lf_plot_df$mediator[npa_lf_plot_df$mediator == "ifna2a"] <- "IFN-α2a" # U+03B1 npa_lf_plot_df$mediator[npa_lf_plot_df$mediator == "ifnb"] <- "IFN-β" # U+03B2 npa_lf_plot_df$mediator[npa_lf_plot_df$mediator == "ifn"] <- "IFN-γ" # U+03B3 npa_lf_plot_df$mediator[npa_lf_plot_df$mediator == "il1"] <- "IL-1" npa_lf_plot_df$mediator[npa_lf_plot_df$mediator == "il6"] <- "IL-6" npa_lf_plot_df$mediator[npa_lf_plot_df$mediator == "il8"] <- "CXCL8" npa_lf_plot_df$mediator[npa_lf_plot_df$mediator == "ip10"] <- "CXCL10" npa_lf_plot_df$mediator[npa_lf_plot_df$mediator == "tnf"] <- "TNF-α" # U+03B1 # check display names table(npa_lf_plot_df$mediator) # log values: log10 npa_lf_plot_df_log = npa_lf_plot_df table(npa_lf_plot_df_log$value == 1.000000e+04) table(npa_lf_plot_df_log$value == 0) # log all values and reassign npa_lf_plot_df_log$value[1:10] npa_lf_plot_df_log$value = log10(npa_lf_plot_df_log$value) npa_lf_plot_df_log$value[1:10] # check display names table(npa_lf_plot_df_log$mediator) #-------- # Serum: mediators #-------- # linear: raw values serum_lf_plot = lf_fp_serum[lf_fp_serum$mediator%in%selected_mediators,] serum_lf_plot_df = serum_lf_plot[,colnames(serum_lf_plot)%in%cols_to_select] table(serum_lf_plot_df$sample_type) table(serum_lf_plot_df$sample_type,serum_lf_plot_df$timepoint, serum_lf_plot_df$mediator ) # Display names: Serum serum_lf_plot_df$mediator[serum_lf_plot_df$mediator == "ifna2a"] <- "IFN-α2a" # U+03B1 serum_lf_plot_df$mediator[serum_lf_plot_df$mediator == "ifnb"] <- "IFN-β" # U+03B2 serum_lf_plot_df$mediator[serum_lf_plot_df$mediator == "ifn"] <- "IFN-γ" # U+03B3 serum_lf_plot_df$mediator[serum_lf_plot_df$mediator == "il1"] <- "IL-1" serum_lf_plot_df$mediator[serum_lf_plot_df$mediator == "il6"] <- "IL-6" serum_lf_plot_df$mediator[serum_lf_plot_df$mediator == "il8"] <- "CXCL8" serum_lf_plot_df$mediator[serum_lf_plot_df$mediator == "ip10"] <- "CXCL10" serum_lf_plot_df$mediator[serum_lf_plot_df$mediator == "tnf"] <- "TNF-α" # U+03B1 # check display names table(serum_lf_plot_df$mediator) # log values: log10 serum_lf_plot_log = serum_lf_plot_df table(serum_lf_plot_log$value == 1.000000e+04) table(serum_lf_plot_log$value == 0) # log all values and reassign serum_lf_plot_log$value[1:10] serum_lf_plot_log$value = log10(serum_lf_plot$value) serum_lf_plot_log$value[1:10] # check display names table(serum_lf_plot_log$mediator) ######################################################################## # remove variables rm(vl_lf_plot, npa_lf_plot, serum_lf_plot)