From afd649d504bd8083ac583a7127537a7d9c07532c Mon Sep 17 00:00:00 2001 From: Tanushree Tunstall Date: Fri, 23 Oct 2020 14:27:48 +0100 Subject: [PATCH] added loess data in read_data.R and plotting scripts for loess --- all_output_plots.R | 4 +- data_formatting_loess.R | 247 ++++++++++++++++++++++++ loess_plots_days_into_hospitalisation.R | 226 ++++++++++++++++++++++ loess_plots_symptom_onset.R | 227 ++++++++++++++++++++++ read_data.R | 20 +- 5 files changed, 716 insertions(+), 8 deletions(-) create mode 100755 data_formatting_loess.R create mode 100755 loess_plots_days_into_hospitalisation.R create mode 100755 loess_plots_symptom_onset.R diff --git a/all_output_plots.R b/all_output_plots.R index 15ae894..6a937fb 100755 --- a/all_output_plots.R +++ b/all_output_plots.R @@ -27,12 +27,10 @@ rm(my_data) output_plots = paste0(outdir_plots, "output_plots.pdf") corr_and_hmap = paste0(outdir_plots, "corr_hmap.pdf") -#%%======================================================== -# read file +############################################################ # data assignment for plots wf = wf_data lf = lf_data - #===================== # data for plots: LF #===================== diff --git a/data_formatting_loess.R b/data_formatting_loess.R new file mode 100755 index 0000000..2338bcd --- /dev/null +++ b/data_formatting_loess.R @@ -0,0 +1,247 @@ +#!/usr/bin/Rscript +getwd() +setwd('~/git/covid_analysis/') +getwd() +############################################################ +# TASK: data cleaning and extraction: for loess plots +############################################################ +# source data +source("read_data.R") + +# clear unwanted variables +rm(lf_data, wf_data) + +#================================== +# output: formatted and clean data: loess +#================================== +outfile_icu_wf = paste0(datadir,"/icu_covid_wf_v3_loess.csv") +outfile_icu_lf = paste0(datadir,"/icu_covid_lf_v3_loess.csv") +outfile_colnames = paste0(datadir, "/colnames_check_v3_loess.csv") +#%%======================================================== + +colnames_loess = as.data.frame(colnames(my_data)) + +# some numerical cols are characters, change these +#my_data <- as.data.frame(sapply(my_data, function(x) as.numeric(as.character((x))))) +#str(my_data) + +# colnames +all_cols_loess = as.data.frame(colnames(my_data)) +#write.csv(all_cols_loess, "colnames_loess.csv") + +#================================== +# select the mediators to analyse +#================================== +cols_to_select = c("id", + "sRAGEpgmLt1", + "sICAM1ngmLt1", + "PSELECTINngmLt1", + "sVCAM1ngmLt1", + "Angiopoietin2pgmLt1", + "sESelectinngmLt1", + "sESelectinngmLt2", + "Angiopoietin2pgmLt2", + "sRAGEpgmLt2", + "sICAM1ngmLt2", + "PSELECTINngmLt2", + "sVCAM1ngmLt2", + "sESelectinngmLt3", + "Angiopoietin2pgmLt3", + "sRAGEpgmLt3", + "sICAM1ngmLt3", + "PSELECTINngmLt3", + "sVCAM1ngmLt3", + "days_from_symptons_onset_t1", + "days_from_hospitalization_t1", + "outcomes0death1recovery2other", + "PF_t1", + "PF_t2" , + "PF_t3" , + "studygroup0coorteA1coorteB2coorteC") + +# check if these columns to select are present in the data +cols_to_select%in%colnames(my_data) +all(cols_to_select%in%colnames(my_data)) + +table(my_data$studygroup0coorteA1coorteB2coorteC) + +# subset +my_df = my_data[,cols_to_select] +dim(my_df) + +# some numerical cols are characters, change these +str(my_df) +my_df <- as.data.frame(sapply(my_df, function(x) as.numeric(as.character((x))))) +str(my_df) + +# add column name subject_id with "S" prefix to id or simply add it to id +my_df$id = paste0("S", my_df$id) + +# assign nicer colnames +original_colnames = colnames(my_df) +orig_cols = as.data.frame(colnames(my_df)) + +my_colnames = c("id", + "sRAGE_pgmL_t1", + "sICAM1_ngmL_t1", + "PSelectin_ngmL_t1", + "sVCAM1_ngmL_t1", + "Angiopoietin2_pgmL_t1", + "sESelectin_ngmL_t1", + "sESelectin_ngmL_t2", + "Angiopoietin2_pgmL_t2", + "sRAGE_pgmL_t2", + "sICAM1_ngmL_t2", + "PSelectin_ngmL_t2", + "sVCAM1_ngmL_t2", + "sESelectin_ngmL_t3", + "Angiopoietin2_pgmL_t3", + "sRAGE_pgmL_t3", + "sICAM1_ngmL_t3", + "PSelectin_ngmL_t3", + "sVCAM1_ngmL_t3", + "days_from_symptons_onset_t1", + "days_from_hospitalization_t1", + "outcomes", + "PF_units_t1", + "PF_units_t2" , + "PF_units_t3" , + "studygroup") + + +if (length(original_colnames) == length(my_colnames) && all(cols_to_select%in%colnames(my_data))){ + print("PASS: length of colnames match. Assigning clean colnames") + colnames(my_df) = my_colnames + revised_colnames = colnames(my_df) + colnames_check = as.data.frame(cbind(original_colnames, revised_colnames)) + +} else{ + cat(paste0("FAIL:length mismatch when assigning colnames" + , "\nExpected length of colnames: ", length(original_colnames) + , "\nGot: ", length(my_colnames))) + quit() +} + +print(colnames(my_df)) +n_patients = length(unique(my_df$id)) +print(paste0("Total no. of patients:", n_patients)) + +table(my_df$studygroup) +table(my_df$outcomes) +table(my_df$studygroup, my_df$outcomes) + +#%% subset only icu patients +my_df_icu = my_df[my_df$studygroup == 0,] +n_icupatients = length(unique(my_df_icu$id)) + +cat(paste0("Total no. of rows in original df:", nrow(my_df) + , "\nTotal no. of unique patients:",n_patients + , "\nTotal no. of ICU patients:", n_icupatients)) + +table(my_df_icu$studygroup, my_df_icu$outcomes) +tab1 = table(my_df_icu$studygroup, my_df_icu$outcomes) +cat(paste0("no. of icu patients who" + , "\ndied:", tab1[1], " ~ ", tab1[1]/n_icupatients*100, "%" + , "\nrecovered:", tab1[2]," ~ ", tab1[2]/n_icupatients*100, "%")) + #, "\nother:", tab1[3], " ~ ", tab1[3]/n_icupatients*100, "%")) + +table(my_df$studygroup) +#===================================================== +#========= +# lf data +#========= +pivot_cols = c("id" + , "studygroup" + , "days_from_symptons_onset_t1" + , "days_from_hospitalization_t1" + , "outcomes") + +expected_rows_lf = nrow(my_df_icu) * (length(my_df_icu) - length(pivot_cols)) + +# using regex: +df_lf = my_df_icu %>% + tidyr::pivot_longer(-all_of(pivot_cols), names_to = c("mediator", "units", "timepoint"), + names_pattern = "(.*)_(.*)_(.*)", + values_to = "value") + +if ((nrow(df_lf) == expected_rows_lf) & (sum(table(is.na(df_lf$mediator))) == expected_rows_lf)) { + cat(paste0("PASS: long format data has correct no. of rows and NA in mediator:" + , "\nNo. of rows: ", nrow(df_lf) + , "\nNo. of cols: ", ncol(df_lf))) +} else{ + cat(paste0("FAIL:long format data has unexpected no. of rows or NAs in mediator" + , "\nExpected no. of rows: ", expected_rows_lf + , "\nGot: ", nrow(df_lf) + , "\ncheck expected rows calculation!")) + quit() +} + +class(df_lf) # hmmm +str(df_lf) + +class(as.data.frame(df_lf)) +str(as.data.frame(df_lf)) + +# COMMENT: slight difference in class and structure b/w the outout from pivot and when you convert to df +# I will use the df as I am familiar with it! +lf_df = as.data.frame(df_lf) +class(lf_df) +str(lf_df) + +# sort by mediator and timepoint +lf_df = lf_df[order(lf_df$mediator, lf_df$timepoint),] + +table(is.na(lf_df$mediator)) + +#========= +# wf data +#========= +# icu data is your wf data +# sort icu data by columnames +auto_col_order = order(names(my_df_icu)) +#my_col_order = c(1,25, 24, 20, 6, 9, 15, 21, 22, 23, 4, 12, 18, 7, 8, 14, 3, 11, 17, 2, 10, 16, 5, 13, 19) + +my_col_order = c(1, 26, 20, 21, 22 + , 23, 24, 25 + , 6, 9, 15 + , 2, 10, 16 + , 4, 12, 18 + , 5, 13, 19 + , 7, 8, 14 + , 3, 11, 17) + + +if(length(auto_col_order) == length(my_col_order)){ + print("PASS: column order successfully generated. Reordering column in wf data") + wf_df = my_df_icu[, my_col_order] +} else{ + cat(paste0("FAIL:length mismatch of column orders" + , "\nExpected column order for: ", length(auto_col_order) + , "\nGot:", length(my_col_order))) + quit() +} +#all.equal(my_df_icu, wf_df) +#=========================================================== +#%% write icu files + +# lf_data +write.csv(lf_df, outfile_icu_lf, row.names = F) +cat(paste0("Finsihed wrting lf data:" + , "\nNo. of rows: ", nrow(lf_df) + , "\nNo. of cols: ", ncol(lf_df))) + +# column names to check +write.csv(colnames_check, outfile_colnames, row.names = F) +cat(paste0("Finsihed wrting colnames original and revised:" + , "\nNo. of rows: ", nrow(colnames_check) + , "\nNo. of cols: ", ncol(colnames_check))) + +# wf_data: only original +write.csv(wf_df, outfile_icu_wf, row.names = F) +cat(paste0("\nFinsihed wrting wf data:" + , "\nNo. of rows: ", nrow(wf_df) + , "\nNo. of cols: ", ncol(wf_df))) + +# COMMENT: wf_data for scaled values not written out! +#======================================================= +# end of script diff --git a/loess_plots_days_into_hospitalisation.R b/loess_plots_days_into_hospitalisation.R new file mode 100755 index 0000000..35f4447 --- /dev/null +++ b/loess_plots_days_into_hospitalisation.R @@ -0,0 +1,226 @@ +#!/usr/bin/Rscript +getwd() +setwd('~/git/covid_analysis/') +getwd() +############################################################ +# TASK: Loess plots for days into hospital till T1 +############################################################ + +#========================================================== +#============= +# input:source data +#============= +source("read_data.R") + +# clear unwanted variables +rm(my_data, wf_data, lf_data) + +#============= +# Output +#============= +output_plots_loess_hosp = paste0(outdir_plots, "output_plots_v3_loess_hosp.pdf") + +#%%======================================================== +#===================== +# data for plots +#===================== +# remove PF +table(lf_data_loess$mediator) +lf_loess = lf_data_loess[lf_data_loess$mediator!= "PF",] +table(lf_loess$mediator) + +#%======================================================= +hosp_days_min = min(lf_loess$days_from_hospitalization_t1); hosp_days_min +hosp_days_max = max(lf_loess$days_from_hospitalization_t1);hosp_days_max + +my_xscale_hosp = seq(hosp_days_min , hosp_days_max, 5) +my_xscale_hosp + +##################################################################### +# 95% CI: t1_data +# days_from_hospitalization_t1 +##################################################################### + +lf_loess_t1 = lf_loess[lf_loess$timepoint == "t1",] + +# Output plots as one pdf +cat("Output plots will be in:", output_plots_loess_hosp) +pdf(output_plots_loess_hosp, width = 15, height = 8) + +#----------- +# linear +#----------- +p1_hosp = ggplot(lf_loess_t1, aes(x = days_from_hospitalization_t1 + , y = value + , colour = factor(outcomes))) + + #geom_point() + + geom_smooth(method = "loess", size = 1.5, na.rm = T) + + facet_wrap(~mediator, nrow = 2, scales = "free_y")+ + labs(title = "Hospital days into t1: Linear scale and 95% CI" + , x = "Days from hospitalisation to t1" + , y = "T1 Levels")+ + scale_x_continuous(breaks = my_xscale_hosp, limits = c(hosp_days_min , hosp_days_max))+ + scale_colour_discrete(name = "Patient outcome" + , labels = c("Death", "Recovered"))+ + theme(axis.text.x = element_text(size = 13) + , axis.text.y = element_text(size = 13 + , angle = 0 + , hjust = 1 + , vjust = 0) + , axis.title.x = element_text(size = 13) + , axis.title.y = element_text(size = 13) + , plot.title = element_text(size = 15, hjust = 0.5) + , strip.text.x = element_text(size = 13, colour = "black") + , legend.title = element_text(color = "black", size = 13) + , legend.text = element_text(size = 13) + , legend.position = "right" + , legend.direction = "vertical") + +p1_hosp + +#----------- +# log +#----------- +p2_hosp = ggplot(lf_loess_t1, aes(x = days_from_hospitalization_t1 + , y = value + , colour = factor(outcomes))) + + scale_y_log10()+ + #geom_point() + + geom_smooth(method = "loess", size = 1.5, na.rm = T) + + facet_wrap(~mediator, nrow = 2, scales = "free_y")+ + labs(title = "Hospital days into t1: Log scale and 95% CI" + , x = "Days from hospitalisation to t1" + , y = "T1 Levels (Log10)")+ + + scale_x_continuous(breaks = my_xscale_hosp, limits = c(hosp_days_min , hosp_days_max))+ + + scale_colour_discrete(name = "Patient outcome" + , labels = c("Death", "Recovered"))+ + theme(axis.text.x = element_text(size = 13) + , axis.text.y = element_text(size = 13 + , angle = 0 + , hjust = 1 + , vjust = 0) + , axis.title.x = element_text(size = 13) + , axis.title.y = element_text(size = 13) + , plot.title = element_text(size = 15, hjust = 0.5) + , strip.text.x = element_text(size = 13, colour = "black") + , legend.title = element_text(color = "black", size = 13) + , legend.text = element_text(size = 13) + , legend.position = "right" + , legend.direction = "vertical") +p2_hosp + +##################################################################### +# 50% CI: t1_data +# days_from_hospitalization_t1 +##################################################################### + +#----------- +# linear +#----------- +p3_hosp = ggplot(lf_loess_t1, aes(x = days_from_hospitalization_t1 + , y = value + , colour = factor(outcomes))) + + #geom_point() + + geom_smooth(method = "loess", size = 1.5, na.rm = T, level = 0.50) + + facet_wrap(~mediator, nrow = 2, scales = "free_y")+ + labs(title = "Hospital days into t1: Linear scale and 50% CI" + , x = "Days from hospitalisation to t1" + , y = "T1 Levels") + + scale_x_continuous(breaks = my_xscale_hosp, limits = c(hosp_days_min , hosp_days_max))+ + scale_colour_discrete(name = "Patient outcome" + , labels = c("Death", "Recovered"))+ + theme(axis.text.x = element_text(size = 13) + , axis.text.y = element_text(size = 13 + , angle = 0 + , hjust = 1 + , vjust = 0) + , axis.title.x = element_text(size = 13) + , axis.title.y = element_text(size = 13) + , plot.title = element_text(size = 15, hjust = 0.5) + , strip.text.x = element_text(size = 13, colour = "black") + , legend.title = element_text(color = "black", size = 13) + , legend.text = element_text(size = 13) + , legend.position = "right" + , legend.direction = "vertical") + +p3_hosp + +#----------- +# log +#----------- +p4_hosp = ggplot(lf_loess_t1, aes(x = days_from_hospitalization_t1 + , y = value + , colour = factor(outcomes))) + + scale_y_log10()+ + #geom_point() + + geom_smooth(method = "loess", size = 1.5, na.rm = T, level = 0.50) + + facet_wrap(~mediator, nrow = 2, scales = "free_y")+ + labs(title = "Hospital days into t1: Log scale and 50% CI" + , x = "Days from hospitalisation to t1" + , y = "T1 Levels (Log10)")+ + + scale_x_continuous(breaks = my_xscale_hosp, limits = c(hosp_days_min , hosp_days_max))+ + + scale_colour_discrete(name = "Patient outcome" + , labels = c("Death", "Recovered"))+ + theme(axis.text.x = element_text(size = 13) + , axis.text.y = element_text(size = 13 + , angle = 0 + , hjust = 1 + , vjust = 0) + , axis.title.x = element_text(size = 13) + , axis.title.y = element_text(size = 13) + , plot.title = element_text(size = 15, hjust = 0.5) + , strip.text.x = element_text(size = 13, colour = "black") + , legend.title = element_text(color = "black", size = 13) + , legend.text = element_text(size = 13) + , legend.position = "right" + , legend.direction = "vertical") +p4_hosp +dev.off() +##################################################################### +# 95% CI: Combined data *** only if required +# days_from_hospitalization_t1 +##################################################################### + +#----------- +# linear +#----------- + +#p1_all_hosp = ggplot(lf_loess, aes(x = days_from_hospitalization_t1 +# , y = value +# , colour = factor(outcomes))) + +# #geom_point() + +# geom_smooth(method = "loess", size = 1.5, na.rm = T, level = 0.90) + +# facet_wrap(~mediator, nrow = 2, scales = "free_y")+ +# labs(title = "Hospital days into t1: linear scale and 95% CI" +# , x = "Days from hospitalisation to t1" +# , y = "Combined Levels")+ + +# scale_x_continuous(breaks = my_xscale_hosp, limits = c(hosp_days_min , hosp_days_max))+ + +# scale_colour_discrete(name = "Patient outcome" +# , labels = c("Death", "Recovered")) + +#p1_all_hosp + +#----------- +# log +#----------- +#p2_all_hosp = ggplot(lf_loess, aes(x = days_from_hospitalization_t1 +# , y = value +# , colour = factor(outcomes))) + +# scale_y_log10()+ +# #geom_point() + +# geom_smooth(method = "loess", size = 1.5, na.rm = T) + +# facet_wrap(~mediator, nrow = 2, scales = "free_y")+ +# labs(title = "Hospital days into t1: Log scale and 95% CI" +# , x = "Days from hospitalisation to t1" +# , y = "Combined Levels (Log10)")+ +# scale_x_continuous(breaks = my_xscale_hosp, limits = c(hosp_days_min , hosp_days_max))+ + +# scale_colour_discrete(name = "Patient outcome" +# , labels = c("Death", "Recovered")) +#p2_all_hosp diff --git a/loess_plots_symptom_onset.R b/loess_plots_symptom_onset.R new file mode 100755 index 0000000..ea69559 --- /dev/null +++ b/loess_plots_symptom_onset.R @@ -0,0 +1,227 @@ +#!/usr/bin/Rscript +getwd() +setwd('~/git/covid_analysis/') +getwd() +############################################################ +# TASK: Loess plots: symptom onset +############################################################ +#============= +# input:source data +#============= +source("read_data.R") + +# clear unwanted variables +rm(my_data, wf_data, lf_data) + +#============= +# Output +#============= +output_plots_loess_symp = paste0(outdir_plots, "output_plots_v3_loess_symp.pdf") + +############################################################ +#===================== +# data for plots +#===================== +# remove PF +table(lf_data_loess$mediator) +lf_loess = lf_data_loess[lf_data_loess$mediator!= "PF",] +table(lf_loess$mediator) + +#%%================================================================= +symp_days_min = min(lf_loess$days_from_symptons_onset_t1); symp_days_min +symp_days_max = max(lf_loess$days_from_symptons_onset_t1);symp_days_max + +my_xscale = seq(symp_days_min , symp_days_max, 5) +my_xscale + +##################################################################### +# 95% CI: t1_data +# days_from_symptons_onset_t1 +##################################################################### + +lf_loess_t1 = lf_loess[lf_loess$timepoint == "t1",] + +# Output plots as one pdf +cat("Output plots will be in:", output_plots_loess_symp) +pdf(output_plots_loess_symp, width = 15, height = 8) + +#----------- +# linear +#----------- +p1 = ggplot(lf_loess_t1, aes(x = days_from_symptons_onset_t1 + , y = value + , colour = factor(outcomes))) + + #geom_point() + + geom_smooth(method = "loess", size = 1.5, na.rm = T) + + facet_wrap(~mediator, nrow = 2, scales = "free_y")+ + labs(title = "Days into t1: Linear scale and 95% CI" + , x = "Days from symptom onset to t1" + , y = "T1 Levels")+ + scale_x_continuous(breaks = my_xscale, limits = c(symp_days_min , symp_days_max))+ + scale_colour_discrete(name = "Patient outcome" + , labels = c("Death", "Recovered")) + + theme(axis.text.x = element_text(size = 13) + , axis.text.y = element_text(size = 13 + , angle = 0 + , hjust = 1 + , vjust = 0) + , axis.title.x = element_text(size = 13) + , axis.title.y = element_text(size = 13) + , plot.title = element_text(size = 15, hjust = 0.5) + , strip.text.x = element_text(size = 13, colour = "black") + , legend.title = element_text(color = "black", size = 13) + , legend.text = element_text(size = 13) + , legend.position = "right" + , legend.direction = "vertical") + +p1 + +#----------- +# log +#----------- +p2 = ggplot(lf_loess_t1, aes(x = days_from_symptons_onset_t1 + , y = value + , colour = factor(outcomes))) + + scale_y_log10()+ + #geom_point() + + geom_smooth(method = "loess", size = 1.5, na.rm = T) + + facet_wrap(~mediator, nrow = 2, scales = "free_y")+ + labs(title = "Days into t1: Log scale and 95% CI" + , x = "Days from symptom onset to t1" + , y = "T1 Levels (Log10)")+ + + scale_x_continuous(breaks = my_xscale, limits = c(symp_days_min , symp_days_max))+ + + scale_colour_discrete(name = "Patient outcome" + , labels = c("Death", "Recovered"))+ + theme(axis.text.x = element_text(size = 13) + , axis.text.y = element_text(size = 13 + , angle = 0 + , hjust = 1 + , vjust = 0) + , axis.title.x = element_text(size = 13) + , axis.title.y = element_text(size = 13) + , plot.title = element_text(size = 15, hjust = 0.5) + , strip.text.x = element_text(size = 13, colour = "black") + , legend.title = element_text(color = "black", size = 13) + , legend.text = element_text(size = 13) + , legend.position = "right" + , legend.direction = "vertical") +p2 + + +##################################################################### +# 50% CI: t1_data +# days_from_symptons_onset_t1 +##################################################################### + +#----------- +# linear +#----------- +p3 = ggplot(lf_loess_t1, aes(x = days_from_symptons_onset_t1 + , y = value + , colour = factor(outcomes))) + + #geom_point() + + geom_smooth(method = "loess", size = 1.5, na.rm = T, level = 0.50) + + facet_wrap(~mediator, nrow = 2, scales = "free_y")+ + labs(title = "Days into t1: Linear scale and 50% CI" + , x = "Days from symptom onset to t1" + , y = "T1 Levels")+ + scale_x_continuous(breaks = my_xscale, limits = c(symp_days_min , symp_days_max))+ + scale_colour_discrete(name = "Patient outcome" + , labels = c("Death", "Recovered"))+ + theme(axis.text.x = element_text(size = 13) + , axis.text.y = element_text(size = 13 + , angle = 0 + , hjust = 1 + , vjust = 0) + , axis.title.x = element_text(size = 13) + , axis.title.y = element_text(size = 13) + , plot.title = element_text(size = 15, hjust = 0.5) + , strip.text.x = element_text(size = 13, colour = "black") + , legend.title = element_text(color = "black", size = 13) + , legend.text = element_text(size = 13) + , legend.position = "right" + , legend.direction = "vertical") + +p3 + +#----------- +# log +#----------- +p4 = ggplot(lf_loess_t1, aes(x = days_from_symptons_onset_t1 + , y = value + , colour = factor(outcomes))) + + scale_y_log10()+ + #geom_point() + + geom_smooth(method = "loess", size = 1.5, na.rm = T, level = 0.5) + + facet_wrap(~mediator, nrow = 2, scales = "free_y")+ + labs(title = "Days into t1: Log scale and 50% CI" + , x = "Days from symptom onset to t1" + , y = "T1 Levels (Log10)")+ + + scale_x_continuous(breaks = my_xscale, limits = c(symp_days_min , symp_days_max))+ + + scale_colour_discrete(name = "Patient outcome" + , labels = c("Death", "Recovered")) + + theme(axis.text.x = element_text(size = 13) + , axis.text.y = element_text(size = 13 + , angle = 0 + , hjust = 1 + , vjust = 0) + , axis.title.x = element_text(size = 13) + , axis.title.y = element_text(size = 13) + , plot.title = element_text(size = 15, hjust = 0.5) + , strip.text.x = element_text(size = 13, colour = "black") + , legend.title = element_text(color = "black", size = 13) + , legend.text = element_text(size = 13) + , legend.position = "right" + , legend.direction = "vertical") +p4 +dev.off() +##################################################################### +# 95% CI: Combined data *** only if required +# days_from_symptons_onset_t1 +##################################################################### + +#----------- +# linear +#----------- + +#p1_all = ggplot(lf_loess, aes(x = days_from_symptons_onset_t1 +# , y = value +# , colour = factor(outcomes))) + +# #geom_point() + +# geom_smooth(method = "loess", size = 1.5, na.rm = T) + +# facet_wrap(~mediator, nrow = 2, scales = "free_y")+ +# labs(title = "Days into t1: linear scale and 95% CI" +# , x = "Days from symptom onset to t1" +# , y = "Combined Levels")+ +# scale_x_continuous(breaks = my_xscale, limits = c(symp_days_min , symp_days_max))+ + +# scale_colour_discrete(name = "Patient outcome" +# , labels = c("Death", "Recovered")) + +#p1_all + +#----------- +# log +#----------- +#p2_all = ggplot(lf_loess, aes(x = days_from_symptons_onset_t1 +# , y = value +# , colour = factor(outcomes))) + +# scale_y_log10()+ +# #geom_point() + +# geom_smooth(method = "loess", size = 1.5, na.rm = T) + +# facet_wrap(~mediator, nrow = 2, scales = "free_y")+ +# labs(title = "Days into t1: Log scale and 95% CI" +# , x = "Days from symptom onset to t1" +# , y = "Combined Levels (Log10)")+ +# scale_x_continuous(breaks = my_xscale, limits = c(symp_days_min , symp_days_max))+ + +# scale_colour_discrete(name = "Patient outcome" +# , labels = c("Death", "Recovered")) +#p2_all + + + diff --git a/read_data.R b/read_data.R index f53c9be..102bb64 100755 --- a/read_data.R +++ b/read_data.R @@ -17,17 +17,27 @@ outdir_stats = "~/git/covid_data/output/stats/" #========================================================== # input data files -infile_covid = paste0(datadir,"/covid19_v3.csv") +infile_covid = paste0(datadir, "/covid19_v3.csv") + +infile_icu_wf = paste0(datadir, "/icu_covid_wf_v3.csv") +infile_icu_lf = paste0(datadir, "/icu_covid_lf_v3.csv") + +infile_icu_wf_loess = paste0(datadir, "/icu_covid_wf_v3_loess.csv") +infile_icu_lf_loess = paste0(datadir, "/icu_covid_lf_v3_loess.csv") -infile_icu_wf = paste0(datadir,"/icu_covid_wf_v3.csv") -infile_icu_lf = paste0(datadir,"/icu_covid_lf_v3.csv") #%%======================================================== # read files my_data = read.csv(infile_covid, stringsAsFactors = F) -wf_data = read.csv(infile_icu_wf , stringsAsFactors = F) +wf_data = read.csv(infile_icu_wf, stringsAsFactors = F) dim(wf_data) -lf_data = read.csv(infile_icu_lf , stringsAsFactors = F) +lf_data = read.csv(infile_icu_lf, stringsAsFactors = F) dim(lf_data) + +wf_data_loess = read.csv(infile_icu_wf_loess, stringsAsFactors = F) +dim(wf_data_loess) + +lf_data_loess = read.csv(infile_icu_lf_loess, stringsAsFactors = F) +dim(lf_data_loess) #%%========================================================