added function for stats from lf data

2021-08-27 13:01:52 +01:00 · 2021-08-27 13:01:52 +01:00 · da9bb67706
commit da9bb67706
parent 6e01ef22c0
4 changed files with 137 additions and 50 deletions
--- a/scripts/functions/lf_unpaired_stats.R
+++ b/scripts/functions/lf_unpaired_stats.R
@ -0,0 +1,21 @@
+library(ggpubr)
+###################################################################
+
+lf_unpaired_stats <- function(lf_data
+                              , lf_stat_value = "param_value"
+                              , lf_stat_group = "mutation_info"
+                              , lf_col_statvars = "param_type"
+                              , my_paired = FALSE
+                              , stat_adj = "none"){
+  
+  stat_formula = as.formula(paste0(lf_stat_value, "~", lf_stat_group))
+  
+  my_stat_df = compare_means(stat_formula
+                             , group.by = lf_col_statvars
+                             , data = lf_data
+                             , paired = my_paired
+                             , p.adjust.method =  stat_adj)  
+  
+  
+  return(my_stat_df)
+}
--- a/scripts/functions/stat_bp_stability.R
+++ b/scripts/functions/stat_bp_stability.R
@ -1,8 +1,49 @@
-my_stat_ps = compare_means(param_value~mutation_info, group.by = "param_type"
-                           , data = df_lf_ps,  paired = FALSE, p.adjust.method = "BH")
+library(ggpubr)
+###################################################################
+
+my_unpaired_stats <- function(lf_data
+                              , lf_stat_value = "param_value"
+                              , lf_stat_group = "mutation_info"
+                              , lf_col_statvars = "param_type"
+                              , my_paired = FALSE
+                              , stat_adj = "none"){
+  
+  stat_formula = as.formula(paste0(lf_stat_value, "~", lf_stat_group))
+  
+  my_stat_df = compare_means(stat_formula
+                             , group.by = lf_col_statvars
+                             , data = lf_data
+                             , paired = my_paired
+                             , p.adjust.method =  stat_adj)  
+  
+  
+  return(my_stat_df)
+}
+  
+#####################
+# call stat function
+#####################
+stat_results_df <- my_unpaired_stats(lf_data =  lf_duet
+                  , lf_stat_value = "param_value"
+                  , lf_stat_group = "mutation_info"
+                  , lf_col_statvars = "param_type"
+                  , my_paired = FALSE
+                  , stat_adj = "none"
+)

 y_value = "param_value"

+#################################
+my_comparisons <- list( c("DM", "OM") )
+
+my_ats = 22 # axis text size
+my_als = 20 # axis label size
+my_fls = 20 # facet label size
+my_pts = 22 # plot title size
+
+####################################
+
+
 stat_bp_mut <- function(df
                        , x_bp_cateog = "mutation_info"
                        , y_var = "param_value"
@ -16,7 +57,12 @@ stat_bp_mut <- function(df
                        #, stat_label = "p.format")
                        , stat_label = "p.signif" )

-p1 = ggplot(df_lf_ps, aes(x = mutation_info
+
+#############################
+y_value = "param_value"
+
+
+p1 = ggplot(lf_duet, aes(x = mutation_info
                          , y = eval(parse(text = y_value)) ))  + 
  facet_wrap(~ param_type
             , nrow = 1
@ -48,4 +94,7 @@ p1 = ggplot(df_lf_ps, aes(x = mutation_info
                     , method = "wilcox.test"
                     , paired = FALSE
                     #, label = "p.format")
-                     , label = "p.signif")
+                     , label = "p.signif")
+
+p1
+
--- a/scripts/functions/test_lf_unpaired_stats.R
+++ b/scripts/functions/test_lf_unpaired_stats.R
@ -0,0 +1,17 @@
+setwd("~/git/LSHTM_analysis/scripts/functions")
+source("lf_unpaired_stats.R")
+
+# run other_plots_data.R
+# to get the df you want to test this function 
+
+
+#####################
+# call stat function
+#####################
+stat_results_df <- lf_unpaired_stats(lf_data =  lf_duet
+                  , lf_stat_value = "param_value"
+                  , lf_stat_group = "mutation_info"
+                  , lf_col_statvars = "param_type"
+                  , my_paired = FALSE
+                  , stat_adj = "none"
+)
--- a/scripts/plotting/other_plots_data.R
+++ b/scripts/plotting/other_plots_data.R
@ -6,7 +6,7 @@
 #=======================================================================
 # working dir and loading libraries
 # getwd()
-# setwd("~/git/LSHTM_analysis/scripts/plotting")
+setwd("~/git/LSHTM_analysis/scripts/plotting")
 # getwd()

 # make cmd
@ -14,7 +14,7 @@
 # drug = "streptomycin"
 # gene = "gid"

-#source("get_plotting_dfs.R")
+source("get_plotting_dfs.R")
 #=======================================================================
 # MOVE TO COMBINE or singular file for deepddg

@ -492,47 +492,47 @@ if (nrow(lf_mcsm) == expected_rows_lf){
  quit()
 }
 ############################################################################
-# # clear excess variables
-# rm(all_plot_dfs
-#    , cols_dynamut2_df
-#    , cols_mcsm_df
-#    , cols_mcsm_na_df
-#    , comb_df
-#    , corr_data_ps
-#    , corr_ps_df3
-#    , df_lf_ps
-#    , foo
-#    , foo_cnames
-#    , gene_metadata
-#    , logo_data
-#    , logo_data_or_mult
-#    , logo_data_plot
-#    , logo_data_plot_logor
-#    , logo_data_plot_or
-#    , my_data_snp
-#    , my_df
-#    , my_df_u
-#    , ols_mcsm_df
-#    , other_muts
-#    , pd_df
-#    , subcols_df_ps
-#    , tab_mt
-#    , wide_df_logor
-#    , wide_df_logor_m
-#    , wide_df_or
-#    , wide_df_or_mult
-#    , wt)
-# 
-# 
-# rm(c3, c4, check1
-#    , cols_check
-#    , cols_to_select
-#    , cols_to_select_deepddg
-#    , cols_to_select_duet
-#    , cols_to_select_dynamut
-#    , cols_to_select_dynamut2
-#    , cols_to_select_encomddg
-#    , cols_to_select_encomdds
-#    , cols_to_select_mcsm
-#    , cols_to_select_mcsm_na
-#    , cols_to_select_sdm)
+# clear excess variables
+rm(all_plot_dfs
+   , cols_dynamut2_df
+   , cols_mcsm_df
+   , cols_mcsm_na_df
+   , comb_df
+   , corr_data_ps
+   , corr_ps_df3
+   , df_lf_ps
+   , foo
+   , foo_cnames
+   , gene_metadata
+   , logo_data
+   , logo_data_or_mult
+   , logo_data_plot
+   , logo_data_plot_logor
+   , logo_data_plot_or
+   , my_data_snp
+   , my_df
+   , my_df_u
+   , ols_mcsm_df
+   , other_muts
+   , pd_df
+   , subcols_df_ps
+   , tab_mt
+   , wide_df_logor
+   , wide_df_logor_m
+   , wide_df_or
+   , wide_df_or_mult
+   , wt)
+
+
+rm(c3, c4, check1
+   , cols_check
+   , cols_to_select
+   , cols_to_select_deepddg
+   , cols_to_select_duet
+   , cols_to_select_dynamut
+   , cols_to_select_dynamut2
+   , cols_to_select_encomddg
+   , cols_to_select_encomdds
+   , cols_to_select_mcsm
+   , cols_to_select_mcsm_na
+   , cols_to_select_sdm)