added function for stats from lf data

This commit is contained in:
Tanushree Tunstall 2021-08-27 13:01:52 +01:00
parent 6e01ef22c0
commit da9bb67706
4 changed files with 137 additions and 50 deletions

View file

@ -0,0 +1,21 @@
library(ggpubr)
###################################################################
lf_unpaired_stats <- function(lf_data
, lf_stat_value = "param_value"
, lf_stat_group = "mutation_info"
, lf_col_statvars = "param_type"
, my_paired = FALSE
, stat_adj = "none"){
stat_formula = as.formula(paste0(lf_stat_value, "~", lf_stat_group))
my_stat_df = compare_means(stat_formula
, group.by = lf_col_statvars
, data = lf_data
, paired = my_paired
, p.adjust.method = stat_adj)
return(my_stat_df)
}

View file

@ -1,8 +1,49 @@
my_stat_ps = compare_means(param_value~mutation_info, group.by = "param_type"
, data = df_lf_ps, paired = FALSE, p.adjust.method = "BH")
library(ggpubr)
###################################################################
my_unpaired_stats <- function(lf_data
, lf_stat_value = "param_value"
, lf_stat_group = "mutation_info"
, lf_col_statvars = "param_type"
, my_paired = FALSE
, stat_adj = "none"){
stat_formula = as.formula(paste0(lf_stat_value, "~", lf_stat_group))
my_stat_df = compare_means(stat_formula
, group.by = lf_col_statvars
, data = lf_data
, paired = my_paired
, p.adjust.method = stat_adj)
return(my_stat_df)
}
#####################
# call stat function
#####################
stat_results_df <- my_unpaired_stats(lf_data = lf_duet
, lf_stat_value = "param_value"
, lf_stat_group = "mutation_info"
, lf_col_statvars = "param_type"
, my_paired = FALSE
, stat_adj = "none"
)
y_value = "param_value"
#################################
my_comparisons <- list( c("DM", "OM") )
my_ats = 22 # axis text size
my_als = 20 # axis label size
my_fls = 20 # facet label size
my_pts = 22 # plot title size
####################################
stat_bp_mut <- function(df
, x_bp_cateog = "mutation_info"
, y_var = "param_value"
@ -16,7 +57,12 @@ stat_bp_mut <- function(df
#, stat_label = "p.format")
, stat_label = "p.signif" )
p1 = ggplot(df_lf_ps, aes(x = mutation_info
#############################
y_value = "param_value"
p1 = ggplot(lf_duet, aes(x = mutation_info
, y = eval(parse(text = y_value)) )) +
facet_wrap(~ param_type
, nrow = 1
@ -48,4 +94,7 @@ p1 = ggplot(df_lf_ps, aes(x = mutation_info
, method = "wilcox.test"
, paired = FALSE
#, label = "p.format")
, label = "p.signif")
, label = "p.signif")
p1

View file

@ -0,0 +1,17 @@
setwd("~/git/LSHTM_analysis/scripts/functions")
source("lf_unpaired_stats.R")
# run other_plots_data.R
# to get the df you want to test this function
#####################
# call stat function
#####################
stat_results_df <- lf_unpaired_stats(lf_data = lf_duet
, lf_stat_value = "param_value"
, lf_stat_group = "mutation_info"
, lf_col_statvars = "param_type"
, my_paired = FALSE
, stat_adj = "none"
)

View file

@ -6,7 +6,7 @@
#=======================================================================
# working dir and loading libraries
# getwd()
# setwd("~/git/LSHTM_analysis/scripts/plotting")
setwd("~/git/LSHTM_analysis/scripts/plotting")
# getwd()
# make cmd
@ -14,7 +14,7 @@
# drug = "streptomycin"
# gene = "gid"
#source("get_plotting_dfs.R")
source("get_plotting_dfs.R")
#=======================================================================
# MOVE TO COMBINE or singular file for deepddg
@ -492,47 +492,47 @@ if (nrow(lf_mcsm) == expected_rows_lf){
quit()
}
############################################################################
# # clear excess variables
# rm(all_plot_dfs
# , cols_dynamut2_df
# , cols_mcsm_df
# , cols_mcsm_na_df
# , comb_df
# , corr_data_ps
# , corr_ps_df3
# , df_lf_ps
# , foo
# , foo_cnames
# , gene_metadata
# , logo_data
# , logo_data_or_mult
# , logo_data_plot
# , logo_data_plot_logor
# , logo_data_plot_or
# , my_data_snp
# , my_df
# , my_df_u
# , ols_mcsm_df
# , other_muts
# , pd_df
# , subcols_df_ps
# , tab_mt
# , wide_df_logor
# , wide_df_logor_m
# , wide_df_or
# , wide_df_or_mult
# , wt)
#
#
# rm(c3, c4, check1
# , cols_check
# , cols_to_select
# , cols_to_select_deepddg
# , cols_to_select_duet
# , cols_to_select_dynamut
# , cols_to_select_dynamut2
# , cols_to_select_encomddg
# , cols_to_select_encomdds
# , cols_to_select_mcsm
# , cols_to_select_mcsm_na
# , cols_to_select_sdm)
# clear excess variables
rm(all_plot_dfs
, cols_dynamut2_df
, cols_mcsm_df
, cols_mcsm_na_df
, comb_df
, corr_data_ps
, corr_ps_df3
, df_lf_ps
, foo
, foo_cnames
, gene_metadata
, logo_data
, logo_data_or_mult
, logo_data_plot
, logo_data_plot_logor
, logo_data_plot_or
, my_data_snp
, my_df
, my_df_u
, ols_mcsm_df
, other_muts
, pd_df
, subcols_df_ps
, tab_mt
, wide_df_logor
, wide_df_logor_m
, wide_df_or
, wide_df_or_mult
, wt)
rm(c3, c4, check1
, cols_check
, cols_to_select
, cols_to_select_deepddg
, cols_to_select_duet
, cols_to_select_dynamut
, cols_to_select_dynamut2
, cols_to_select_encomddg
, cols_to_select_encomdds
, cols_to_select_mcsm
, cols_to_select_mcsm_na
, cols_to_select_sdm)