From 61fbb2f8d6066f2c2152e2b2c660c330787816c9 Mon Sep 17 00:00:00 2001 From: Tanushree Tunstall Date: Mon, 26 Oct 2020 14:33:20 +0000 Subject: [PATCH] formatted non-flu stats in the same way as the flu ones --- Header_TT.R | 0 data_extraction_formatting.R | 0 flu_stats_unpaired_npa.R | 10 +++- flu_stats_unpaired_sam.R | 107 ++++++++++++++++++++-------------- flu_stats_unpaired_serum.R | 109 ++++++++++++++++++++--------------- read_data.R | 0 stats_unpaired_npa.R | 104 +++++++++++++++++++-------------- stats_unpaired_sam.R | 107 ++++++++++++++++++++-------------- stats_unpaired_serum.R | 108 +++++++++++++++++++--------------- test_flu_counts.R | 0 10 files changed, 319 insertions(+), 226 deletions(-) mode change 100644 => 100755 Header_TT.R mode change 100644 => 100755 data_extraction_formatting.R mode change 100644 => 100755 flu_stats_unpaired_npa.R mode change 100644 => 100755 flu_stats_unpaired_sam.R mode change 100644 => 100755 flu_stats_unpaired_serum.R mode change 100644 => 100755 read_data.R mode change 100644 => 100755 stats_unpaired_npa.R mode change 100644 => 100755 stats_unpaired_sam.R mode change 100644 => 100755 stats_unpaired_serum.R mode change 100644 => 100755 test_flu_counts.R diff --git a/Header_TT.R b/Header_TT.R old mode 100644 new mode 100755 diff --git a/data_extraction_formatting.R b/data_extraction_formatting.R old mode 100644 new mode 100755 diff --git a/flu_stats_unpaired_npa.R b/flu_stats_unpaired_npa.R old mode 100644 new mode 100755 index 4810d92..f9ac727 --- a/flu_stats_unpaired_npa.R +++ b/flu_stats_unpaired_npa.R @@ -33,8 +33,11 @@ lf = npa_adults_lf[npa_adults_lf$flustat == 1,] table(lf$timepoint) lf$timepoint = paste0("t", lf$timepoint) +my_sample_type = "npa" + ######################################################################## -# Unpaired stats at each timepoint b/w groups: wilcoxon UNpaired analysis with correction +# Unpaired stats at each timepoint b/w groups: wilcoxon UNpaired analysis +# with correction ####################################################################### # with adjustment: fdr and BH are identical my_adjust_method = "BH" @@ -234,7 +237,8 @@ if ( nrow(combined_unpaired_stats) == expected_rows && ncol(combined_unpaired_st combined_unpaired_stats = subset(combined_unpaired_stats, select = -c(.y.)) # add sample_type -combined_unpaired_stats$sample_type = "npa" +cat("Adding sample type info as a column", my_sample_type, "...") +combined_unpaired_stats$sample_type = my_sample_type # add: reflect stats method correctly i.e paired or unpaired # incase there are NA due to LLODs, the gsub won't work! @@ -313,4 +317,4 @@ colnames(combined_unpaired_stats_f) # write output file #****************** cat("UNpaired stats for groups will be:", stats_time_unpaired_flu_npa) -write.csv(combined_unpaired_stats_f, stats_time_unpaired_flu_npa, row.names = FALSE) \ No newline at end of file +write.csv(combined_unpaired_stats_f, stats_time_unpaired_flu_npa, row.names = FALSE) diff --git a/flu_stats_unpaired_sam.R b/flu_stats_unpaired_sam.R old mode 100644 new mode 100755 index f7dd0a1..4af022e --- a/flu_stats_unpaired_sam.R +++ b/flu_stats_unpaired_sam.R @@ -32,9 +32,11 @@ lf = sam_adults_lf[sam_adults_lf$flustat == 1,] table(lf$timepoint) length(unique(lf$mosaic)) lf$timepoint = paste0("t", lf$timepoint) +my_sample_type = "sam" ######################################################################## -# Unpaired stats at each timepoint b/w groups: wilcoxon UNpaired analysis with correction +# Unpaired stats at each timepoint b/w groups: wilcoxon UNpaired analysis +# with correction ####################################################################### # with adjustment: fdr and BH are identical my_adjust_method = "BH" @@ -88,6 +90,9 @@ if (all(n_t1$mediator%in%stats_un_t1$mediator)) { , "\nncol:", ncol(stats_un_t1)) } +# add bonferroni adjustment as well +stats_un_t1$p_adj_bonferroni = p.adjust(stats_un_t1$p, method = "bonferroni") + rm(n_t1) rm(lf_t1_comp) @@ -132,8 +137,8 @@ if (all(n_t2$mediator%in%stats_un_t2$mediator)) { , "\nncol:", ncol(stats_un_t2)) } -# check: satisfied!!!! -wilcox.test() +# add bonferroni adjustment as well +stats_un_t2$p_adj_bonferroni = p.adjust(stats_un_t2$p, method = "bonferroni") rm(n_t2) rm(lf_t2_comp) @@ -184,6 +189,9 @@ if (all(n_t3$mediator%in%stats_un_t3$mediator)) { # FIXME: supply the col name automatically? wilcox.test(wf$ifna2a_sam3[wf$obesity == 1], wf$ifna2a_sam3[wf$obesity == 0]) +# add bonferroni adjustment as well +stats_un_t3$p_adj_bonferroni = p.adjust(stats_un_t3$p, method = "bonferroni") + rm(n_t3) rm(lf_t3_comp) @@ -223,46 +231,41 @@ if ( nrow(combined_unpaired_stats) == expected_rows && ncol(combined_unpaired_st quit() } -#=============================================================== +####################################################################### +#================= # formatting df -# delete unnecessary column +#================= +# delete: unnecessary column combined_unpaired_stats = subset(combined_unpaired_stats, select = -c(.y.)) -# reflect stats method correctly -combined_unpaired_stats$method +# add sample_type +cat("Adding sample type info as a column", my_sample_type, "...") +combined_unpaired_stats$sample_type = my_sample_type + +# add: reflect stats method correctly i.e paired or unpaired +# incase there are NA due to LLODs, the gsub won't work! #combined_unpaired_stats$method = gsub("Wilcoxon", "Wilcoxon_unpaired", combined_unpaired_stats$method) combined_unpaired_stats$method = "wilcoxon unpaired" combined_unpaired_stats$method -# replace "." in colnames with "_" -colnames(combined_unpaired_stats) -#names(combined_unpaired_stats) = gsub("\.", "_", names(combined_unpaired_stats)) # weird!!!! - -colnames(combined_unpaired_stats) = c("mediator" - , "group1" - , "group2" - , "p" - , "p_adj" - , "p_format" - , "p_signif" - , "method" - , "timepoint" - , "n_obs") - -colnames(combined_unpaired_stats) -combined_unpaired_stats$sample_type = "sam" - -# add an extra column for padjust_signif -combined_unpaired_stats$padjust_signif = round(combined_unpaired_stats$p_adj, digits = 2) - -# add appropriate symbols for padjust_signif +# add an extra column for padjust_signif: my_adjust_method +combined_unpaired_stats$padjust_signif = combined_unpaired_stats$p.adj +# add appropriate symbols for padjust_signif: my_adjust_method combined_unpaired_stats = dplyr::mutate(combined_unpaired_stats, padjust_signif = case_when(padjust_signif == 0.05 ~ "." - , padjust_signif <=0.0001 ~ '****' - , padjust_signif <=0.001 ~ '***' - , padjust_signif <=0.01 ~ '**' - , padjust_signif <0.05 ~ '*' - , TRUE ~ 'ns')) - + , padjust_signif <=0.0001 ~ '****' + , padjust_signif <=0.001 ~ '***' + , padjust_signif <=0.01 ~ '**' + , padjust_signif <0.05 ~ '*' + , TRUE ~ 'ns')) +# add an extra column for p_bon_signif +combined_unpaired_stats$p_bon_signif = combined_unpaired_stats$p_adj_bonferroni +# add appropriate symbols for p_bon_signif +combined_unpaired_stats = dplyr::mutate(combined_unpaired_stats, p_bon_signif = case_when(p_bon_signif == 0.05 ~ "." + , p_bon_signif <=0.0001 ~ '****' + , p_bon_signif <=0.001 ~ '***' + , p_bon_signif <=0.01 ~ '**' + , p_bon_signif <0.05 ~ '*' + , TRUE ~ 'ns')) # reorder columns print("preparing to reorder columns...") colnames(combined_unpaired_stats) @@ -274,16 +277,18 @@ my_col_order2 = c("mediator" , "group2" , "method" , "p" - , "p_format" - , "p_signif" - , "p_adj" - , "padjust_signif") + , "p.format" + , "p.signif" + , "p.adj" + , "padjust_signif" + , "p_adj_bonferroni" + , "p_bon_signif") -if( length(my_col_order2) == ncol(combined_unpaired_stats) && all(my_col_order2%in%colnames(combined_unpaired_stats)) ){ +if( length(my_col_order2) == ncol(combined_unpaired_stats) && (all(my_col_order2%in%colnames(combined_unpaired_stats))) ){ print("PASS: Reordering columns...") combined_unpaired_stats_f = combined_unpaired_stats[, my_col_order2] print("Successful: column reordering") - print("formatted df called:'combined_unpaired_stats_f'") + print("formatted df called:'combined_unpaired_stats_f'") cat('\nformatted df has the following dimensions\n') print(dim(combined_unpaired_stats_f )) } else{ @@ -291,10 +296,26 @@ if( length(my_col_order2) == ncol(combined_unpaired_stats) && all(my_col_order2% , "\nExpected column order for: ", ncol(combined_unpaired_stats) , "\nGot:", length(my_col_order2))) quit() -} +} +# assign nice column names like replace "." with "_" +colnames(combined_unpaired_stats_f) = c("mediator" + , "timepoint" + , "sample_type" + , "n_obs" + , "group1" + , "group2" + , "method" + , "p" + , "p_format" + , "p_signif" + , paste0("p_adj_fdr_", my_adjust_method) + , paste0("p_", my_adjust_method, "_signif") + , "p_adj_bonferroni" + , "p_bon_signif") +colnames(combined_unpaired_stats_f) #****************** # write output file #****************** cat("UNpaired stats for groups will be:", stats_time_unpaired_flu_sam) -write.csv(combined_unpaired_stats_f, stats_time_unpaired_flu_sam, row.names = FALSE) \ No newline at end of file +write.csv(combined_unpaired_stats_f, stats_time_unpaired_flu_sam, row.names = FALSE) diff --git a/flu_stats_unpaired_serum.R b/flu_stats_unpaired_serum.R old mode 100644 new mode 100755 index 480886d..296a4c2 --- a/flu_stats_unpaired_serum.R +++ b/flu_stats_unpaired_serum.R @@ -32,6 +32,8 @@ lf = serum_adults_lf[serum_adults_lf$flustat == 1,] table(lf$timepoint) lf$timepoint = paste0("t", lf$timepoint) +my_sample_type = "serum" + ######################################################################## # Unpaired stats at each timepoint b/w groups: wilcoxon UNpaired analysis with correction ####################################################################### @@ -87,8 +89,8 @@ if (all(n_t1$mediator%in%stats_un_t1$mediator)) { , "\nncol:", ncol(stats_un_t1)) } -# check: satisfied!!!! -wilcox.test() +# add bonferroni adjustment as well +stats_un_t1$p_adj_bonferroni = p.adjust(stats_un_t1$p, method = "bonferroni") rm(n_t1) rm(lf_t1_comp) @@ -134,8 +136,8 @@ if (all(n_t2$mediator%in%stats_un_t2$mediator)) { , "\nncol:", ncol(stats_un_t2)) } -# check: satisfied!!!! -wilcox.test() +# add bonferroni adjustment as well +stats_un_t2$p_adj_bonferroni = p.adjust(stats_un_t2$p, method = "bonferroni") rm(n_t2) rm(lf_t2_comp) @@ -183,8 +185,8 @@ if (all(n_t3$mediator%in%stats_un_t3$mediator)) { , "\nncol:", ncol(stats_un_t3)) } -# check: satisfied!!!! -wilcox.test() +# add bonferroni adjustment as well +stats_un_t3$p_adj_bonferroni = p.adjust(stats_un_t3$p, method = "bonferroni") rm(n_t3) rm(lf_t3_comp) @@ -225,46 +227,41 @@ if ( nrow(combined_unpaired_stats) == expected_rows && ncol(combined_unpaired_st quit() } -#=============================================================== +####################################################################### +#================= # formatting df -# delete unnecessary column +#================= +# delete: unnecessary column combined_unpaired_stats = subset(combined_unpaired_stats, select = -c(.y.)) -# reflect stats method correctly -combined_unpaired_stats$method +# add sample_type +cat("Adding sample type info as a column", my_sample_type, "...") +combined_unpaired_stats$sample_type = my_sample_type + +# add: reflect stats method correctly i.e paired or unpaired +# incase there are NA due to LLODs, the gsub won't work! #combined_unpaired_stats$method = gsub("Wilcoxon", "Wilcoxon_unpaired", combined_unpaired_stats$method) combined_unpaired_stats$method = "wilcoxon unpaired" combined_unpaired_stats$method -# replace "." in colnames with "_" -colnames(combined_unpaired_stats) -#names(combined_unpaired_stats) = gsub("\.", "_", names(combined_unpaired_stats)) # weird!!!! - -colnames(combined_unpaired_stats) = c("mediator" - , "group1" - , "group2" - , "p" - , "p_adj" - , "p_format" - , "p_signif" - , "method" - , "timepoint" - , "n_obs") - -colnames(combined_unpaired_stats) -combined_unpaired_stats$sample_type = "serum" - -# add an extra column for padjust_signif -combined_unpaired_stats$padjust_signif = round(combined_unpaired_stats$p_adj, digits = 2) - -# add appropriate symbols for padjust_signif +# add an extra column for padjust_signif: my_adjust_method +combined_unpaired_stats$padjust_signif = combined_unpaired_stats$p.adj +# add appropriate symbols for padjust_signif: my_adjust_method combined_unpaired_stats = dplyr::mutate(combined_unpaired_stats, padjust_signif = case_when(padjust_signif == 0.05 ~ "." - , padjust_signif <=0.0001 ~ '****' - , padjust_signif <=0.001 ~ '***' - , padjust_signif <=0.01 ~ '**' - , padjust_signif <0.05 ~ '*' - , TRUE ~ 'ns')) - + , padjust_signif <=0.0001 ~ '****' + , padjust_signif <=0.001 ~ '***' + , padjust_signif <=0.01 ~ '**' + , padjust_signif <0.05 ~ '*' + , TRUE ~ 'ns')) +# add an extra column for p_bon_signif +combined_unpaired_stats$p_bon_signif = combined_unpaired_stats$p_adj_bonferroni +# add appropriate symbols for p_bon_signif +combined_unpaired_stats = dplyr::mutate(combined_unpaired_stats, p_bon_signif = case_when(p_bon_signif == 0.05 ~ "." + , p_bon_signif <=0.0001 ~ '****' + , p_bon_signif <=0.001 ~ '***' + , p_bon_signif <=0.01 ~ '**' + , p_bon_signif <0.05 ~ '*' + , TRUE ~ 'ns')) # reorder columns print("preparing to reorder columns...") colnames(combined_unpaired_stats) @@ -276,27 +273,47 @@ my_col_order2 = c("mediator" , "group2" , "method" , "p" - , "p_format" - , "p_signif" - , "p_adj" - , "padjust_signif") + , "p.format" + , "p.signif" + , "p.adj" + , "padjust_signif" + , "p_adj_bonferroni" + , "p_bon_signif") -if( length(my_col_order2) == ncol(combined_unpaired_stats) && all(my_col_order2%in%colnames(combined_unpaired_stats)) ){ +if( length(my_col_order2) == ncol(combined_unpaired_stats) && (all(my_col_order2%in%colnames(combined_unpaired_stats))) ){ print("PASS: Reordering columns...") combined_unpaired_stats_f = combined_unpaired_stats[, my_col_order2] print("Successful: column reordering") print("formatted df called:'combined_unpaired_stats_f'") - cat("\nformatted df has the following dimensions\n") + cat('\nformatted df has the following dimensions\n') print(dim(combined_unpaired_stats_f )) } else{ cat(paste0("FAIL:Cannot reorder columns, length mismatch" , "\nExpected column order for: ", ncol(combined_unpaired_stats) , "\nGot:", length(my_col_order2))) quit() -} +} +# assign nice column names like replace "." with "_" +colnames(combined_unpaired_stats_f) = c("mediator" + , "timepoint" + , "sample_type" + , "n_obs" + , "group1" + , "group2" + , "method" + , "p" + , "p_format" + , "p_signif" + , paste0("p_adj_fdr_", my_adjust_method) + , paste0("p_", my_adjust_method, "_signif") + , "p_adj_bonferroni" + , "p_bon_signif") + +colnames(combined_unpaired_stats_f) + #****************** # write output file #****************** cat("UNpaired stats for groups will be:", stats_time_unpaired_flu_serum) -write.csv(combined_unpaired_stats_f, stats_time_unpaired_flu_serum, row.names = FALSE) \ No newline at end of file +write.csv(combined_unpaired_stats_f, stats_time_unpaired_flu_serum, row.names = FALSE) diff --git a/read_data.R b/read_data.R old mode 100644 new mode 100755 diff --git a/stats_unpaired_npa.R b/stats_unpaired_npa.R old mode 100644 new mode 100755 index f84f94a..54016bf --- a/stats_unpaired_npa.R +++ b/stats_unpaired_npa.R @@ -30,6 +30,8 @@ lf = npa_adults_lf table(lf$timepoint) lf$timepoint = paste0("t", lf$timepoint) +my_sample_type = "npa" + ######################################################################## # Unpaired stats at each timepoint b/w groups: wilcoxon UNpaired analysis with correction ####################################################################### @@ -85,8 +87,8 @@ if (all(n_t1$mediator%in%stats_un_t1$mediator)) { , "\nncol:", ncol(stats_un_t1)) } -# check: satisfied!!!! -wilcox.test() +# add bonferroni adjustment as well +stats_un_t1$p_adj_bonferroni = p.adjust(stats_un_t1$p, method = "bonferroni") rm(n_t1) rm(lf_t1_comp) @@ -132,8 +134,8 @@ if (all(n_t2$mediator%in%stats_un_t2$mediator)) { , "\nncol:", ncol(stats_un_t2)) } -# check: satisfied!!!! -wilcox.test() +# add bonferroni adjustment as well +stats_un_t2$p_adj_bonferroni = p.adjust(stats_un_t2$p, method = "bonferroni") rm(n_t2) rm(lf_t2_comp) @@ -180,8 +182,8 @@ if (all(n_t3$mediator%in%stats_un_t3$mediator)) { , "\nncol:", ncol(stats_un_t3)) } -# check: satisfied!!!! -wilcox.test() +# add bonferroni adjustment as well +stats_un_t3$p_adj_bonferroni = p.adjust(stats_un_t3$p, method = "bonferroni") rm(n_t3) rm(lf_t3_comp) @@ -222,46 +224,41 @@ if ( nrow(combined_unpaired_stats) == expected_rows && ncol(combined_unpaired_st quit() } -#=============================================================== +####################################################################### +#================= # formatting df -# delete unnecessary column +#================= +# delete: unnecessary column combined_unpaired_stats = subset(combined_unpaired_stats, select = -c(.y.)) -# reflect stats method correctly -combined_unpaired_stats$method +# add sample_type +cat("Adding sample type info as a column", my_sample_type, "...") +combined_unpaired_stats$sample_type = my_sample_type + +# add: reflect stats method correctly i.e paired or unpaired +# incase there are NA due to LLODs, the gsub won't work! #combined_unpaired_stats$method = gsub("Wilcoxon", "Wilcoxon_unpaired", combined_unpaired_stats$method) combined_unpaired_stats$method = "wilcoxon unpaired" combined_unpaired_stats$method -# replace "." in colnames with "_" -colnames(combined_unpaired_stats) -#names(combined_unpaired_stats) = gsub("\.", "_", names(combined_unpaired_stats)) # weird!!!! - -colnames(combined_unpaired_stats) = c("mediator" - , "group1" - , "group2" - , "p" - , "p_adj" - , "p_format" - , "p_signif" - , "method" - , "timepoint" - , "n_obs") - -colnames(combined_unpaired_stats) -combined_unpaired_stats$sample_type = "npa" - -# add an extra column for padjust_signif -combined_unpaired_stats$padjust_signif = round(combined_unpaired_stats$p_adj, digits = 2) - -# add appropriate symbols for padjust_signif +# add an extra column for padjust_signif: my_adjust_method +combined_unpaired_stats$padjust_signif = combined_unpaired_stats$p.adj +# add appropriate symbols for padjust_signif: my_adjust_method combined_unpaired_stats = dplyr::mutate(combined_unpaired_stats, padjust_signif = case_when(padjust_signif == 0.05 ~ "." - , padjust_signif <=0.0001 ~ '****' - , padjust_signif <=0.001 ~ '***' - , padjust_signif <=0.01 ~ '**' - , padjust_signif <0.05 ~ '*' - , TRUE ~ 'ns')) - + , padjust_signif <=0.0001 ~ '****' + , padjust_signif <=0.001 ~ '***' + , padjust_signif <=0.01 ~ '**' + , padjust_signif <0.05 ~ '*' + , TRUE ~ 'ns')) +# add an extra column for p_bon_signif +combined_unpaired_stats$p_bon_signif = combined_unpaired_stats$p_adj_bonferroni +# add appropriate symbols for p_bon_signif +combined_unpaired_stats = dplyr::mutate(combined_unpaired_stats, p_bon_signif = case_when(p_bon_signif == 0.05 ~ "." + , p_bon_signif <=0.0001 ~ '****' + , p_bon_signif <=0.001 ~ '***' + , p_bon_signif <=0.01 ~ '**' + , p_bon_signif <0.05 ~ '*' + , TRUE ~ 'ns')) # reorder columns print("preparing to reorder columns...") colnames(combined_unpaired_stats) @@ -273,10 +270,12 @@ my_col_order2 = c("mediator" , "group2" , "method" , "p" - , "p_format" - , "p_signif" - , "p_adj" - , "padjust_signif") + , "p.format" + , "p.signif" + , "p.adj" + , "padjust_signif" + , "p_adj_bonferroni" + , "p_bon_signif") if( length(my_col_order2) == ncol(combined_unpaired_stats) && (all(my_col_order2%in%colnames(combined_unpaired_stats))) ){ print("PASS: Reordering columns...") @@ -291,9 +290,26 @@ if( length(my_col_order2) == ncol(combined_unpaired_stats) && (all(my_col_order2 , "\nGot:", length(my_col_order2))) quit() } - +# assign nice column names like replace "." with "_" +colnames(combined_unpaired_stats_f) = c("mediator" + , "timepoint" + , "sample_type" + , "n_obs" + , "group1" + , "group2" + , "method" + , "p" + , "p_format" + , "p_signif" + , paste0("p_adj_fdr_", my_adjust_method) + , paste0("p_", my_adjust_method, "_signif") + , "p_adj_bonferroni" + , "p_bon_signif") + +colnames(combined_unpaired_stats_f) + #****************** # write output file #****************** cat("UNpaired stats for groups will be:", stats_time_unpaired_npa) -write.csv(combined_unpaired_stats_f, stats_time_unpaired_npa, row.names = FALSE) \ No newline at end of file +write.csv(combined_unpaired_stats_f, stats_time_unpaired_npa, row.names = FALSE) diff --git a/stats_unpaired_sam.R b/stats_unpaired_sam.R old mode 100644 new mode 100755 index 0894ef1..8808fe9 --- a/stats_unpaired_sam.R +++ b/stats_unpaired_sam.R @@ -30,6 +30,8 @@ lf = sam_adults_lf table(lf$timepoint) lf$timepoint = paste0("t", lf$timepoint) +my_sample_type = "sam" + ######################################################################## # Unpaired stats at each timepoint b/w groups: wilcoxon UNpaired analysis with correction ####################################################################### @@ -85,8 +87,8 @@ if (all(n_t1$mediator%in%stats_un_t1$mediator)) { , "\nncol:", ncol(stats_un_t1)) } -# check: satisfied!!!! -wilcox.test() +# add bonferroni adjustment as well +stats_un_t1$p_adj_bonferroni = p.adjust(stats_un_t1$p, method = "bonferroni") rm(n_t1) rm(lf_t1_comp) @@ -132,8 +134,8 @@ if (all(n_t2$mediator%in%stats_un_t2$mediator)) { , "\nncol:", ncol(stats_un_t2)) } -# check: satisfied!!!! -wilcox.test() +# add bonferroni adjustment as well +stats_un_t2$p_adj_bonferroni = p.adjust(stats_un_t2$p, method = "bonferroni") rm(n_t2) rm(lf_t2_comp) @@ -184,6 +186,9 @@ if (all(n_t3$mediator%in%stats_un_t3$mediator)) { # FIXME: supply the col name automatically? wilcox.test(wf$ifna2a_sam3[wf$obesity == 1], wf$ifna2a_sam3[wf$obesity == 0]) +# add bonferroni adjustment as well +stats_un_t3$p_adj_bonferroni = p.adjust(stats_un_t3$p, method = "bonferroni") + rm(n_t3) rm(lf_t3_comp) @@ -223,46 +228,41 @@ if ( nrow(combined_unpaired_stats) == expected_rows && ncol(combined_unpaired_st quit() } -#=============================================================== +####################################################################### +#================= # formatting df -# delete unnecessary column +#================= +# delete: unnecessary column combined_unpaired_stats = subset(combined_unpaired_stats, select = -c(.y.)) -# reflect stats method correctly -combined_unpaired_stats$method +# add sample_type +cat("Adding sample type info as a column", my_sample_type, "...") +combined_unpaired_stats$sample_type = my_sample_type + +# add: reflect stats method correctly i.e paired or unpaired +# incase there are NA due to LLODs, the gsub won't work! #combined_unpaired_stats$method = gsub("Wilcoxon", "Wilcoxon_unpaired", combined_unpaired_stats$method) combined_unpaired_stats$method = "wilcoxon unpaired" combined_unpaired_stats$method -# replace "." in colnames with "_" -colnames(combined_unpaired_stats) -#names(combined_unpaired_stats) = gsub("\.", "_", names(combined_unpaired_stats)) # weird!!!! - -colnames(combined_unpaired_stats) = c("mediator" - , "group1" - , "group2" - , "p" - , "p_adj" - , "p_format" - , "p_signif" - , "method" - , "timepoint" - , "n_obs") - -colnames(combined_unpaired_stats) -combined_unpaired_stats$sample_type = "sam" - -# add an extra column for padjust_signif -combined_unpaired_stats$padjust_signif = round(combined_unpaired_stats$p_adj, digits = 2) - -# add appropriate symbols for padjust_signif +# add an extra column for padjust_signif: my_adjust_method +combined_unpaired_stats$padjust_signif = combined_unpaired_stats$p.adj +# add appropriate symbols for padjust_signif: my_adjust_method combined_unpaired_stats = dplyr::mutate(combined_unpaired_stats, padjust_signif = case_when(padjust_signif == 0.05 ~ "." - , padjust_signif <=0.0001 ~ '****' - , padjust_signif <=0.001 ~ '***' - , padjust_signif <=0.01 ~ '**' - , padjust_signif <0.05 ~ '*' - , TRUE ~ 'ns')) - + , padjust_signif <=0.0001 ~ '****' + , padjust_signif <=0.001 ~ '***' + , padjust_signif <=0.01 ~ '**' + , padjust_signif <0.05 ~ '*' + , TRUE ~ 'ns')) +# add an extra column for p_bon_signif +combined_unpaired_stats$p_bon_signif = combined_unpaired_stats$p_adj_bonferroni +# add appropriate symbols for p_bon_signif +combined_unpaired_stats = dplyr::mutate(combined_unpaired_stats, p_bon_signif = case_when(p_bon_signif == 0.05 ~ "." + , p_bon_signif <=0.0001 ~ '****' + , p_bon_signif <=0.001 ~ '***' + , p_bon_signif <=0.01 ~ '**' + , p_bon_signif <0.05 ~ '*' + , TRUE ~ 'ns')) # reorder columns print("preparing to reorder columns...") colnames(combined_unpaired_stats) @@ -274,16 +274,18 @@ my_col_order2 = c("mediator" , "group2" , "method" , "p" - , "p_format" - , "p_signif" - , "p_adj" - , "padjust_signif") + , "p.format" + , "p.signif" + , "p.adj" + , "padjust_signif" + , "p_adj_bonferroni" + , "p_bon_signif") -if( length(my_col_order2) == ncol(combined_unpaired_stats) && all(my_col_order2%in%colnames(combined_unpaired_stats)) ){ +if( length(my_col_order2) == ncol(combined_unpaired_stats) && (all(my_col_order2%in%colnames(combined_unpaired_stats))) ){ print("PASS: Reordering columns...") combined_unpaired_stats_f = combined_unpaired_stats[, my_col_order2] print("Successful: column reordering") - print("formatted df called:'combined_unpaired_stats_f'") + print("formatted df called:'combined_unpaired_stats_f'") cat('\nformatted df has the following dimensions\n') print(dim(combined_unpaired_stats_f )) } else{ @@ -291,10 +293,27 @@ if( length(my_col_order2) == ncol(combined_unpaired_stats) && all(my_col_order2% , "\nExpected column order for: ", ncol(combined_unpaired_stats) , "\nGot:", length(my_col_order2))) quit() -} +} +# assign nice column names like replace "." with "_" +colnames(combined_unpaired_stats_f) = c("mediator" + , "timepoint" + , "sample_type" + , "n_obs" + , "group1" + , "group2" + , "method" + , "p" + , "p_format" + , "p_signif" + , paste0("p_adj_fdr_", my_adjust_method) + , paste0("p_", my_adjust_method, "_signif") + , "p_adj_bonferroni" + , "p_bon_signif") + +colnames(combined_unpaired_stats_f) #****************** # write output file #****************** cat("UNpaired stats for groups will be:", stats_time_unpaired_sam) -write.csv(combined_unpaired_stats_f, stats_time_unpaired_sam, row.names = FALSE) \ No newline at end of file +write.csv(combined_unpaired_stats_f, stats_time_unpaired_sam, row.names = FALSE) diff --git a/stats_unpaired_serum.R b/stats_unpaired_serum.R old mode 100644 new mode 100755 index dd78831..857668e --- a/stats_unpaired_serum.R +++ b/stats_unpaired_serum.R @@ -30,6 +30,8 @@ lf = serum_adults_lf table(lf$timepoint) lf$timepoint = paste0("t", lf$timepoint) +my_sample_type = "serum" + ######################################################################## # Unpaired stats at each timepoint b/w groups: wilcoxon UNpaired analysis with correction ####################################################################### @@ -85,8 +87,8 @@ if (all(n_t1$mediator%in%stats_un_t1$mediator)) { , "\nncol:", ncol(stats_un_t1)) } -# check: satisfied!!!! -wilcox.test() +# add bonferroni adjustment as well +stats_un_t1$p_adj_bonferroni = p.adjust(stats_un_t1$p, method = "bonferroni") rm(n_t1) rm(lf_t1_comp) @@ -132,8 +134,8 @@ if (all(n_t2$mediator%in%stats_un_t2$mediator)) { , "\nncol:", ncol(stats_un_t2)) } -# check: satisfied!!!! -wilcox.test() +# add bonferroni adjustment as well +stats_un_t2$p_adj_bonferroni = p.adjust(stats_un_t2$p, method = "bonferroni") rm(n_t2) rm(lf_t2_comp) @@ -181,8 +183,8 @@ if (all(n_t3$mediator%in%stats_un_t3$mediator)) { , "\nncol:", ncol(stats_un_t3)) } -# check: satisfied!!!! -wilcox.test() +# add bonferroni adjustment as well +stats_un_t3$p_adj_bonferroni = p.adjust(stats_un_t3$p, method = "bonferroni") rm(n_t3) rm(lf_t3_comp) @@ -223,46 +225,41 @@ if ( nrow(combined_unpaired_stats) == expected_rows && ncol(combined_unpaired_st quit() } -#=============================================================== +####################################################################### +#================= # formatting df -# delete unnecessary column +#================= +# delete: unnecessary column combined_unpaired_stats = subset(combined_unpaired_stats, select = -c(.y.)) -# reflect stats method correctly -combined_unpaired_stats$method +# add sample_type +cat("Adding sample type info as a column", my_sample_type, "...") +combined_unpaired_stats$sample_type = my_sample_type + +# add: reflect stats method correctly i.e paired or unpaired +# incase there are NA due to LLODs, the gsub won't work! #combined_unpaired_stats$method = gsub("Wilcoxon", "Wilcoxon_unpaired", combined_unpaired_stats$method) combined_unpaired_stats$method = "wilcoxon unpaired" combined_unpaired_stats$method -# replace "." in colnames with "_" -colnames(combined_unpaired_stats) -#names(combined_unpaired_stats) = gsub("\.", "_", names(combined_unpaired_stats)) # weird!!!! - -colnames(combined_unpaired_stats) = c("mediator" - , "group1" - , "group2" - , "p" - , "p_adj" - , "p_format" - , "p_signif" - , "method" - , "timepoint" - , "n_obs") - -colnames(combined_unpaired_stats) -combined_unpaired_stats$sample_type = "serum" - -# add an extra column for padjust_signif -combined_unpaired_stats$padjust_signif = round(combined_unpaired_stats$p_adj, digits = 2) - -# add appropriate symbols for padjust_signif +# add an extra column for padjust_signif: my_adjust_method +combined_unpaired_stats$padjust_signif = combined_unpaired_stats$p.adj +# add appropriate symbols for padjust_signif: my_adjust_method combined_unpaired_stats = dplyr::mutate(combined_unpaired_stats, padjust_signif = case_when(padjust_signif == 0.05 ~ "." - , padjust_signif <=0.0001 ~ '****' - , padjust_signif <=0.001 ~ '***' - , padjust_signif <=0.01 ~ '**' - , padjust_signif <0.05 ~ '*' - , TRUE ~ 'ns')) - + , padjust_signif <=0.0001 ~ '****' + , padjust_signif <=0.001 ~ '***' + , padjust_signif <=0.01 ~ '**' + , padjust_signif <0.05 ~ '*' + , TRUE ~ 'ns')) +# add an extra column for p_bon_signif +combined_unpaired_stats$p_bon_signif = combined_unpaired_stats$p_adj_bonferroni +# add appropriate symbols for p_bon_signif +combined_unpaired_stats = dplyr::mutate(combined_unpaired_stats, p_bon_signif = case_when(p_bon_signif == 0.05 ~ "." + , p_bon_signif <=0.0001 ~ '****' + , p_bon_signif <=0.001 ~ '***' + , p_bon_signif <=0.01 ~ '**' + , p_bon_signif <0.05 ~ '*' + , TRUE ~ 'ns')) # reorder columns print("preparing to reorder columns...") colnames(combined_unpaired_stats) @@ -274,27 +271,46 @@ my_col_order2 = c("mediator" , "group2" , "method" , "p" - , "p_format" - , "p_signif" - , "p_adj" - , "padjust_signif") + , "p.format" + , "p.signif" + , "p.adj" + , "padjust_signif" + , "p_adj_bonferroni" + , "p_bon_signif") -if( length(my_col_order2) == ncol(combined_unpaired_stats) && all(my_col_order2%in%colnames(combined_unpaired_stats)) ){ +if( length(my_col_order2) == ncol(combined_unpaired_stats) && (all(my_col_order2%in%colnames(combined_unpaired_stats))) ){ print("PASS: Reordering columns...") combined_unpaired_stats_f = combined_unpaired_stats[, my_col_order2] print("Successful: column reordering") print("formatted df called:'combined_unpaired_stats_f'") - cat("\nformatted df has the following dimensions\n") + cat('\nformatted df has the following dimensions\n') print(dim(combined_unpaired_stats_f )) } else{ cat(paste0("FAIL:Cannot reorder columns, length mismatch" , "\nExpected column order for: ", ncol(combined_unpaired_stats) , "\nGot:", length(my_col_order2))) quit() -} +} +# assign nice column names like replace "." with "_" +colnames(combined_unpaired_stats_f) = c("mediator" + , "timepoint" + , "sample_type" + , "n_obs" + , "group1" + , "group2" + , "method" + , "p" + , "p_format" + , "p_signif" + , paste0("p_adj_fdr_", my_adjust_method) + , paste0("p_", my_adjust_method, "_signif") + , "p_adj_bonferroni" + , "p_bon_signif") + +colnames(combined_unpaired_stats_f) #****************** # write output file #****************** cat("UNpaired stats for groups will be:", stats_time_unpaired_serum) -write.csv(combined_unpaired_stats_f, stats_time_unpaired_serum, row.names = FALSE) \ No newline at end of file +write.csv(combined_unpaired_stats_f, stats_time_unpaired_serum, row.names = FALSE) diff --git a/test_flu_counts.R b/test_flu_counts.R old mode 100644 new mode 100755