261 lines
9.2 KiB
R
261 lines
9.2 KiB
R
#!/usr/bin/Rscript
|
|
getwd()
|
|
setwd('~/git/covid_analysis/')
|
|
getwd()
|
|
############################################################
|
|
# TASK: KS test b/w outcome groups at each timepoint
|
|
############################################################
|
|
# source data
|
|
source("read_data.R")
|
|
|
|
# clear unwanted variables
|
|
rm(my_data)
|
|
|
|
############################################################
|
|
#===================================
|
|
# output: KS test at each timepoint
|
|
#===================================
|
|
ks_each_time = paste0(outdir_stats, "KS_test_by_time_v3.csv")
|
|
|
|
############################################################
|
|
# data assignment for stats
|
|
wf = wf_data
|
|
lf = lf_data
|
|
#========================================================
|
|
sum(is.na(lf$value))
|
|
lf$log10_value = log10(lf$value)
|
|
|
|
#lf = lf[!is.na(lf$value),]
|
|
#sum(is.na(lf$value))
|
|
#str(lf$value)
|
|
|
|
########################################################################
|
|
#==============
|
|
# timepoint: t1
|
|
#==============
|
|
lf_t1 = lf[lf$timepoint=="t1",]
|
|
|
|
table(lf_t1$outcomes)
|
|
|
|
lf_death_t1 = lf_t1[lf_t1$outcomes == 0,]
|
|
lf_recovered_t1 = lf_t1[lf_t1$outcomes == 1,]
|
|
|
|
#=======================================================================
|
|
# individual mediator: t1
|
|
ks.test(lf_death_t1$value[lf_death_t1$mediator == "PF"]
|
|
, lf_recovered_t1$value[lf_recovered_t1$mediator == "PF"])
|
|
|
|
ks.test(lf_death_t1$value[lf_death_t1$mediator == "sRAGE"]
|
|
, lf_recovered_t1$value[lf_recovered_t1$mediator == "sRAGE"])
|
|
|
|
#=======================================================================
|
|
# loop: t1
|
|
ks_df_t1 = data.frame()
|
|
mediator_names = levels(factor(lf$mediator))
|
|
|
|
for (i in mediator_names) {
|
|
df_t1 = data.frame(mediator = NA, test_name = NA, ks_statistic = NA, ks_pvalue = NA)
|
|
#print(i)
|
|
ks.test(lf_death_t1$value[lf_death_t1$mediator == i]
|
|
, lf_recovered_t1$value[lf_recovered_t1$mediator == i])
|
|
|
|
med = i
|
|
ks_method = ks.test(lf_death_t1$value[lf_death_t1$mediator == i]
|
|
, lf_recovered_t1$value[lf_recovered_t1$mediator == i])$method
|
|
|
|
ks_statistic = ks.test(lf_death_t1$value[lf_death_t1$mediator == i]
|
|
, lf_recovered_t1$value[lf_recovered_t1$mediator == i])$statistic
|
|
|
|
ks_pval = ks.test(lf_death_t1$value[lf_death_t1$mediator == i]
|
|
, lf_recovered_t1$value[lf_recovered_t1$mediator == i])$p.value
|
|
|
|
#print(c(med, ks_method, ks_statistic[[1]], ks_pval))
|
|
|
|
df_t1$mediator = med
|
|
df_t1$test_name = ks_method
|
|
df_t1$ks_statistic = ks_statistic[[1]]
|
|
df_t1$ks_pvalue = ks_pval
|
|
|
|
print(df)
|
|
ks_df_t1 = rbind(ks_df_t1, df_t1)
|
|
}
|
|
|
|
# formatting
|
|
ks_df_t1$group = "0 vs 1"
|
|
ks_df_t1$timepoint = "t1"
|
|
|
|
str(ks_df_t1)
|
|
ks_df_t1$pvalue_signif = ks_df_t1$ks_pvalue
|
|
str(ks_df_t1$pvalue_signif)
|
|
|
|
ks_df_t1 = dplyr::mutate(ks_df_t1
|
|
, pvalue_signif = case_when(pvalue_signif == 0.05 ~ "."
|
|
, pvalue_signif <=0.0001 ~ '****'
|
|
, pvalue_signif <=0.001 ~ '***'
|
|
, pvalue_signif <=0.01 ~ '**'
|
|
, pvalue_signif <0.05 ~ '*'
|
|
, TRUE ~ 'ns'))
|
|
|
|
##########################################################################
|
|
#==============
|
|
# timepoint: t2
|
|
#==============
|
|
lf_t2 = lf[lf$timepoint=="t2",]
|
|
|
|
table(lf_t2$outcomes)
|
|
|
|
lf_death_t2 = lf_t2[lf_t2$outcomes == 0,]
|
|
lf_recovered_t2 = lf_t2[lf_t2$outcomes == 1,]
|
|
#=======================================================================
|
|
# individual mediator: t2
|
|
ks.test(lf_death_t2$value[lf_death_t2$mediator == "PF"]
|
|
, lf_recovered_t2$value[lf_recovered_t2$mediator == "PF"])
|
|
|
|
ks.test(lf_death_t2$value[lf_death_t2$mediator == "sRAGE"]
|
|
, lf_recovered_t2$value[lf_recovered_t2$mediator == "sRAGE"])
|
|
|
|
#=======================================================================
|
|
# loop: t2
|
|
ks_df_t2 = data.frame()
|
|
mediator_names = levels(factor(lf$mediator))
|
|
|
|
for (i in mediator_names) {
|
|
df_t2 = data.frame(mediator = NA, test_name = NA, ks_statistic = NA, ks_pvalue = NA)
|
|
#print(i)
|
|
ks.test(lf_death_t2$value[lf_death_t2$mediator == i]
|
|
, lf_recovered_t2$value[lf_recovered_t2$mediator == i])
|
|
|
|
med = i
|
|
ks_method = ks.test(lf_death_t2$value[lf_death_t2$mediator == i]
|
|
, lf_recovered_t2$value[lf_recovered_t2$mediator == i])$method
|
|
|
|
ks_statistic = ks.test(lf_death_t2$value[lf_death_t2$mediator == i]
|
|
, lf_recovered_t2$value[lf_recovered_t2$mediator == i])$statistic
|
|
|
|
ks_pval = ks.test(lf_death_t2$value[lf_death_t2$mediator == i]
|
|
, lf_recovered_t2$value[lf_recovered_t2$mediator == i])$p.value
|
|
|
|
#print(c(med, ks_method, ks_statistic[[1]], ks_pval))
|
|
|
|
df_t2$mediator = med
|
|
df_t2$test_name = ks_method
|
|
df_t2$ks_statistic = ks_statistic[[1]]
|
|
df_t2$ks_pvalue = ks_pval
|
|
|
|
print(df)
|
|
ks_df_t2 = rbind(ks_df_t2, df_t2)
|
|
}
|
|
|
|
# formatting
|
|
ks_df_t2$group = "0 vs 1"
|
|
ks_df_t2$timepoint = "t2"
|
|
|
|
str(ks_df_t2)
|
|
ks_df_t2$pvalue_signif = ks_df_t2$ks_pvalue
|
|
str(ks_df_t2$pvalue_signif)
|
|
|
|
ks_df_t2 = dplyr::mutate(ks_df_t2
|
|
, pvalue_signif = case_when(pvalue_signif == 0.05 ~ "."
|
|
, pvalue_signif <=0.0001 ~ '****'
|
|
, pvalue_signif <=0.001 ~ '***'
|
|
, pvalue_signif <=0.01 ~ '**'
|
|
, pvalue_signif <0.05 ~ '*'
|
|
, TRUE ~ 'ns'))
|
|
#=======================================================================
|
|
#==============
|
|
# timepoint: t3
|
|
#==============
|
|
lf_t3 = lf[lf$timepoint=="t3",]
|
|
|
|
table(lf_t3$outcomes)
|
|
|
|
lf_death_t3 = lf_t3[lf_t3$outcomes == 0,]
|
|
lf_recovered_t3 = lf_t3[lf_t3$outcomes == 1,]
|
|
#=======================================================================
|
|
# individual mediator: t3
|
|
ks.test(lf_death_t3$value[lf_death_t3$mediator == "PF"]
|
|
, lf_recovered_t3$value[lf_recovered_t3$mediator == "PF"])
|
|
|
|
ks.test(lf_death_t3$value[lf_death_t3$mediator == "sRAGE"]
|
|
, lf_recovered_t3$value[lf_recovered_t3$mediator == "sRAGE"])
|
|
|
|
#=======================================================================
|
|
# loop: t3
|
|
ks_df_t3 = data.frame()
|
|
mediator_names = levels(factor(lf$mediator))
|
|
|
|
for (i in mediator_names) {
|
|
df_t3 = data.frame(mediator = NA, test_name = NA, ks_statistic = NA, ks_pvalue = NA)
|
|
#print(i)
|
|
ks.test(lf_death_t3$value[lf_death_t3$mediator == i]
|
|
, lf_recovered_t3$value[lf_recovered_t3$mediator == i])
|
|
|
|
med = i
|
|
ks_method = ks.test(lf_death_t3$value[lf_death_t3$mediator == i]
|
|
, lf_recovered_t3$value[lf_recovered_t3$mediator == i])$method
|
|
|
|
ks_statistic = ks.test(lf_death_t3$value[lf_death_t3$mediator == i]
|
|
, lf_recovered_t3$value[lf_recovered_t3$mediator == i])$statistic
|
|
|
|
ks_pval = ks.test(lf_death_t3$value[lf_death_t3$mediator == i]
|
|
, lf_recovered_t3$value[lf_recovered_t3$mediator == i])$p.value
|
|
|
|
#print(c(med, ks_method, ks_statistic[[1]], ks_pval))
|
|
|
|
df_t3$mediator = med
|
|
df_t3$test_name = ks_method
|
|
df_t3$ks_statistic = ks_statistic[[1]]
|
|
df_t3$ks_pvalue = ks_pval
|
|
|
|
print(df)
|
|
ks_df_t3 = rbind(ks_df_t3, df_t3)
|
|
}
|
|
|
|
# formatting
|
|
ks_df_t3$group = "0 vs 1"
|
|
ks_df_t3$timepoint = "t3"
|
|
|
|
str(ks_df_t3)
|
|
ks_df_t3$pvalue_signif = ks_df_t3$ks_pvalue
|
|
str(ks_df_t3$pvalue_signif)
|
|
|
|
ks_df_t3 = dplyr::mutate(ks_df_t3
|
|
, pvalue_signif = case_when(pvalue_signif == 0.05 ~ "."
|
|
, pvalue_signif <=0.0001 ~ '****'
|
|
, pvalue_signif <=0.001 ~ '***'
|
|
, pvalue_signif <=0.01 ~ '**'
|
|
, pvalue_signif <0.05 ~ '*'
|
|
, TRUE ~ 'ns'))
|
|
|
|
##########################################################################
|
|
# combine all three ks_dfs
|
|
n_dfs = 3
|
|
if ( all.equal(nrow(ks_df_t1), nrow(ks_df_t2), nrow(ks_df_t3)) &&
|
|
all.equal(ncol(ks_df_t1), ncol(ks_df_t2), ncol(ks_df_t3)) ){
|
|
cat("\nPASS: Calculating expected rows and cols for sanity checks on combined_dfs")
|
|
expected_rows = nrow(ks_df_t1) * n_dfs
|
|
expected_cols = ncol(ks_df_t1)
|
|
ks_df_combined = rbind(ks_df_t1, ks_df_t2, ks_df_t3)
|
|
if ( nrow(ks_df_combined) == expected_rows && ncol(ks_df_combined) == expected_cols ){
|
|
cat("\nPASS: combined df successfully created"
|
|
, "\nnrow combined_df:", nrow(ks_df_combined)
|
|
, "\nncol combined_df:", ncol(ks_df_combined))
|
|
}
|
|
else{
|
|
cat("\nFAIL: Dim mismatch"
|
|
, "\nExpected rows:", expected_rows
|
|
, "\nGot:", nrow(ks_df_combined)
|
|
, "\nExpected cols:", expected_cols
|
|
, "\nGot:", ncol(ks_df_combined))
|
|
}
|
|
}else{
|
|
cat("\nFAIL: Could not generate expected_rows and/or expected_cols"
|
|
, "\nCheck hardcoded value of n_dfs")
|
|
}
|
|
|
|
#=======================================================================
|
|
#******************
|
|
# write output file: KS test
|
|
#******************
|
|
cat("Output of KS test by time will be in:", ks_each_time)
|
|
write.csv(ks_df_combined, ks_each_time , row.names = FALSE)
|