updated un[aired stats with n_obs_complete and removed mediator vitd from sam and npa

This commit is contained in:
Tanushree Tunstall 2020-10-30 13:04:45 +00:00
parent 1b5d353060
commit a2dbecd03d
5 changed files with 165 additions and 65 deletions

View file

@ -30,7 +30,7 @@ flu_stats_time_unpaired = paste0(outdir_stats, outfile_name)
wf = npa_wf[npa_wf$flustat == 1,]
lf = npa_lf[npa_lf$flustat == 1,]
lf$timepoint = paste0("t", lf$timepoint)
lf = lf[!lf$mediator == "vitd",]
########################################################################
# clear variables
rm(sam_lf, sam_wf
@ -43,12 +43,12 @@ rm(pivot_cols)
# sanity checks
table(lf$timepoint)
if (table(lf$flustat) == table(npa_lf$flustat)[[2]]){
cat("Analysing Flu positive patients for:", my_sample_type)
}else{
cat("FAIL: problem with subsetting data for:", my_sample_type)
quit()
}
#if (table(lf$flustat) == table(npa_lf$flustat)[[2]]){
# cat("Analysing Flu positive patients for:", my_sample_type)
#}else{
# cat("FAIL: problem with subsetting data for:", my_sample_type)
# quit()
#}
########################################################################
# Unpaired stats at each timepoint b/w groups: wilcoxon UNpaired analysis
# with correction
@ -58,6 +58,7 @@ my_adjust_method = "BH"
#==============
# unpaired: t1
#==============
lf_t1 = lf[lf$timepoint == "t1",]
sum(is.na(lf_t1$value))
@ -80,10 +81,20 @@ stats_un_t1$timepoint = "t1"
stats_un_t1 = as.data.frame(stats_un_t1)
class(stats_un_t1)
# calculate n_obs for each mediator
n_t1 = data.frame(table(lf_t1_comp$mediator))
colnames(n_t1) = c("mediator", "n_obs")
n_t1$mediator = as.character(n_t1$mediator)
#----------------------------------------
# calculate n_obs for each mediator: t1
#----------------------------------------
#n_t1 = data.frame(table(lf_t1_comp$mediator))
n_t1_all = data.frame(table(lf_t1$mediator))
colnames(n_t1_all) = c("mediator", "n_obs")
n_t1_all$mediator = as.character(n_t1_all$mediator)
n_t1_comp = data.frame(table(lf_t1_comp$mediator))
colnames(n_t1_comp) = c("mediator", "n_obs_complete")
n_t1_comp$mediator = as.character(n_t1_comp$mediator)
merge_cols = intersect(names(n_t1_all), names(n_t1_comp)); merge_cols
n_t1= merge(n_t1_all, n_t1_comp, by = merge_cols, all = T)
#==================================
# Merge: merge stats + n_obs df
@ -130,10 +141,20 @@ stats_un_t2$timepoint = "t2"
stats_un_t2 = as.data.frame(stats_un_t2)
class(stats_un_t2)
# calculate n_obs for each mediator
n_t2 = data.frame(table(lf_t2_comp$mediator))
colnames(n_t2) = c("mediator", "n_obs")
n_t2$mediator = as.character(n_t2$mediator)
#----------------------------------------
# calculate n_obs for each mediator: t2
#----------------------------------------
#n_t2 = data.frame(table(lf_t2_comp$mediator))
n_t2_all = data.frame(table(lf_t2$mediator))
colnames(n_t2_all) = c("mediator", "n_obs")
n_t2_all$mediator = as.character(n_t2_all$mediator)
n_t2_comp = data.frame(table(lf_t2_comp$mediator))
colnames(n_t2_comp) = c("mediator", "n_obs_complete")
n_t2_comp$mediator = as.character(n_t2_comp$mediator)
merge_cols = intersect(names(n_t2_all), names(n_t2_comp)); merge_cols
n_t2= merge(n_t2_all, n_t2_comp, by = merge_cols, all = T)
#==================================
# Merge: merge stats + n_obs df
@ -180,10 +201,20 @@ stats_un_t3$timepoint = "t3"
stats_un_t3 = as.data.frame(stats_un_t3)
class(stats_un_t3)
# calculate n_obs for each mediator
n_t3 = data.frame(table(lf_t3_comp$mediator))
colnames(n_t3) = c("mediator", "n_obs")
n_t3$mediator = as.character(n_t3$mediator)
#----------------------------------------
# calculate n_obs for each mediator: t3
#----------------------------------------
#n_t3 = data.frame(table(lf_t3_comp$mediator))
n_t3_all = data.frame(table(lf_t3$mediator))
colnames(n_t3_all) = c("mediator", "n_obs")
n_t3_all$mediator = as.character(n_t3_all$mediator)
n_t3_comp = data.frame(table(lf_t3_comp$mediator))
colnames(n_t3_comp) = c("mediator", "n_obs_complete")
n_t3_comp$mediator = as.character(n_t3_comp$mediator)
merge_cols = intersect(names(n_t3_all), names(n_t3_comp)); merge_cols
n_t3= merge(n_t3_all, n_t3_comp, by = merge_cols, all = T)
#==================================
# Merge: merge stats + n_obs df
@ -291,6 +322,7 @@ my_col_order2 = c("mediator"
, "timepoint"
, "sample_type"
, "n_obs"
, "n_obs_complete"
, "group1"
, "group2"
, "method"
@ -320,6 +352,7 @@ colnames(combined_unpaired_stats_f) = c("mediator"
, "timepoint"
, "sample_type"
, "n_obs"
, "n_obs_complete"
, "group1"
, "group2"
, "method"