diff --git a/scripts/functions/dm_om_data.R b/scripts/functions/dm_om_data.R index 21e4245..a8914be 100644 --- a/scripts/functions/dm_om_data.R +++ b/scripts/functions/dm_om_data.R @@ -26,7 +26,8 @@ dm_om_wf_lf_data <- function(df , categ_cols_to_factor){ df = as.data.frame(df) - + df$maf = log10(df$maf) # can't see otherwise + # Initialise the required dfs based on gene name geneL_normal = c("pnca") geneL_na = c("gid", "rpob") @@ -93,7 +94,9 @@ dm_om_wf_lf_data <- function(df , "mcsm_ppi2_affinity" , "mcsm_ppi2_scaled" , "mcsm_ppi2_outcome" , "consurf_score" , "consurf_scaled" , "consurf_outcome" # exists now , "snap2_score" , "snap2_scaled" , "snap2_outcome" - , "mcsm_na_affinity" , "mcsm_na_scaled" , "mcsm_na_outcome") + , "mcsm_na_affinity" , "mcsm_na_scaled" , "mcsm_na_outcome" + , "provean_score" , "provean_scaled" , "provean_outcome") + }else{ colnames_to_extract = c(mut_colname, mut_info_colname, mut_info_label_colname , aa_pos_colname, LigDist_colname @@ -143,29 +146,31 @@ dynamut2_dn = paste0("Dynamut2 " , stability_suffix); dynamut2_dn mcsm_na_dn = paste0("mCSM-NA ", stability_suffix); mcsm_na_dn mcsm_ppi2_dn = paste0("mCSM-PPI2 ", stability_suffix); mcsm_ppi2_dn -consurf_dn = paste0("Consurf"); consurf_dn +consurf_dn = paste0("ConSurf"); consurf_dn snap2_dn = paste0("SNAP2"); snap2_dn - +provean_dn = paste0("PROVEAN"); provean_dn # change column names: plyr new_colnames = c(asa = "ASA" , rsa = "RSA" , rd_values = "RD" , kd_values = "KD" - , log10_or_mychisq = "Log10 (OR)" - , neglog_pval_fisher = "-Log (P)" - #, af = "MAF" - , maf = "MAF" - #, ligand_dist_colname = lig_dn # cannot handle variable name 'ligand_dist_colname' + #, log10_or_mychisq = "Log10(OR)" + #, neglog_pval_fisher = "-Log(P)" + #, af = "MAF" + , maf = "Log10(MAF)" + #, ligand_dist_colname= lig_dn # cannot handle variable name 'ligand_dist_colname' , affinity_scaled = mcsm_lig_dn , duet_scaled = duet_dn , foldx_scaled = foldx_dn , deepddg_scaled = deepddg_dn , ddg_dynamut2_scaled = dynamut2_dn , mcsm_na_scaled = mcsm_na_dn - , mcsm_ppi2_affinity = mcsm_ppi2_dn - , consurf_score = consurf_dn - , snap2_score = snap2_dn) + , mcsm_ppi2_scaled = mcsm_ppi2_dn + , consurf_scaled = consurf_dn + , snap2_scaled = snap2_dn + , provean_scaled = provean_dn) + comb_df_sl1 = plyr::rename(comb_df_s , replace = new_colnames @@ -173,7 +178,8 @@ comb_df_sl1 = plyr::rename(comb_df_s , warn_duplicated = T) # renaming colname using variable i.e ligand_dist_colname: dplyr -comb_df_sl = comb_df_sl1 %>% dplyr::rename(!!lig_dn := all_of(ligand_dist_colname)) +#comb_df_sl = comb_df_sl1 %>% dplyr::rename(!!lig_dn := all_of(ligand_dist_colname)) +comb_df_sl = comb_df_sl1 %>% dplyr::rename(!!lig_dn := all_of(LigDist_colname)) # NEW names(comb_df_sl) #======================= @@ -206,9 +212,9 @@ static_cols_end = c(lig_dn , "RSA" , "RD" , "KD" - , "MAF" - , "Log10 (OR)" - #, "-Log (P)" + , "Log10(MAF)" + #, "Log10(OR)" + #, "-Log(P)" ) ######################################################################### @@ -344,19 +350,22 @@ wf_lf_dataL[['lf_dynamut2']] = lf_dynamut2 #1--> "most_variable", 2--> "", 3-->"", 4-->"" #5-->"", 6-->"", 7-->"", 8-->"", 9-->"most_conserved" #==================== -# FIXME: if you add category column to consurf +# WF data: consurf cols_to_select_consurf = c(static_cols_start, c("consurf_outcome", consurf_dn), static_cols_end) wf_consurf = comb_df_sl[, cols_to_select_consurf] + pivot_cols_consurf = cols_to_select_consurf[1: (length(static_cols_start) + 1)]; pivot_cols_consurf - -# WF data: consurf -cols_to_select_consurf = c(static_cols_start, c(consurf_dn), static_cols_end) -wf_consurf = comb_df_sl[, cols_to_select_consurf] - -pivot_cols_consurf = cols_to_select_consurf[1: (length(static_cols_start))]; pivot_cols_consurf expected_rows_lf = nrow(wf_consurf) * (length(wf_consurf) - length(pivot_cols_consurf)) expected_rows_lf +# when outcome didn't exist +#cols_to_select_consurf = c(static_cols_start, c(consurf_dn), static_cols_end) +#wf_consurf = comb_df_sl[, cols_to_select_consurf] +# +# pivot_cols_consurf = cols_to_select_consurf[1: (length(static_cols_start))]; pivot_cols_consurf +# expected_rows_lf = nrow(wf_consurf) * (length(wf_consurf) - length(pivot_cols_consurf)) +# expected_rows_lf + # LF data: consurf lf_consurf = gather(wf_consurf , key = param_type @@ -403,6 +412,37 @@ if (nrow(lf_snap2) == expected_rows_lf){ # Assign them to the output list wf_lf_dataL[['wf_snap2']] = wf_snap2 wf_lf_dataL[['lf_snap2']] = lf_snap2 + +#============== +# Provean2: LF +#============== +# WF data: provean +cols_to_select_provean = c(static_cols_start, c("provean_outcome", provean_dn), static_cols_end) +wf_provean = comb_df_sl[, cols_to_select_provean] + +pivot_cols_provean = cols_to_select_provean[1: (length(static_cols_start) + 1)]; pivot_cols_provean +expected_rows_lf = nrow(wf_provean) * (length(wf_provean) - length(pivot_cols_provean)) +expected_rows_lf + +# LF data: provean +lf_provean = gather(wf_provean + , key = param_type + , value = param_value + , all_of(provean_dn):tail(static_cols_end,1) + , factor_key = TRUE) + +if (nrow(lf_provean) == expected_rows_lf){ + cat("\nPASS: long format data created for ", provean_dn) +}else{ + cat("\nFAIL: long format data could not be created for duet") + quit() +} + +# Assign them to the output list +wf_lf_dataL[['wf_provean']] = wf_provean +wf_lf_dataL[['lf_provean']] = lf_provean + + ########################################################################### # AFFINITY cols ########################################################################### diff --git a/scripts/functions/lf_bp.R b/scripts/functions/lf_bp.R index 33d378c..d5f3616 100644 --- a/scripts/functions/lf_bp.R +++ b/scripts/functions/lf_bp.R @@ -25,7 +25,7 @@ lf_bp <- function(lf_df = lf_duet , make_boxplot = FALSE , bp_width = c("auto", 0.5) , add_stats = TRUE - , stat_grp_comp = c("DM", "OM") + , stat_grp_comp = c("R", "S") , stat_method = "wilcox.test" , my_paired = FALSE , stat_label = c("p.format", "p.signif") ){ @@ -129,77 +129,77 @@ lf_bp <- function(lf_df = lf_duet # TODO: plot_ly() ############################# -lf_bp_plotly <- function(lf_df - , p_title = "" - , colour_categ = "" - , x_grp = mutation_info - , y_var = param_value - , facet_var = param_type - , n_facet_row = 1 - , y_scales = "free_y" - , colour_bp_strip = "khaki2" - , dot_size = 3 - , dot_transparency = 0.3 - , violin_quantiles = c(0.25, 0.5, 0.75) # can be NULL - , my_ats = 20 # axis text size - , my_als = 18 # axis label size - , my_fls = 18 # facet label size - , my_pts = 22 # plot title size) - #, make_boxplot = FALSE - , bp_width = c("auto", 0.5) - #, add_stats = FALSE - #, stat_grp_comp = c("DM", "OM") - #, stat_method = "wilcox.test" - #, my_paired = FALSE - #, stat_label = c("p.format", "p.signif") - ){ - - OutPlotly = ggplot(lf_df, aes(x = eval(parse(text = x_grp)) - , y = eval(parse(text = y_var)) - , label1 = x_grp - , label2 = y_var - , lable3 = colour_categ) ) + - - facet_wrap(~ eval(parse(text = facet_var)) - , nrow = n_facet_row - , scales = y_scales) + - - geom_violin(trim = T - , scale = "width" - , draw_quantiles = violin_quantiles) + - - geom_beeswarm(priority = "density" - , size = dot_size - , alpha = dot_transparency - , show.legend = FALSE - , cex = 0.8 - , aes(colour = factor(eval(parse(text = colour_categ) ) ) ) ) + - theme(axis.text.x = element_text(size = my_ats) - , axis.text.y = element_text(size = my_ats - , angle = 0 - , hjust = 1 - , vjust = 0) - , axis.title.x = element_text(size = my_ats) - , axis.title.y = element_text(size = my_ats) - , plot.title = element_text(size = my_pts - , hjust = 0.5 - , colour = "black" - , face = "bold") - , strip.background = element_rect(fill = colour_bp_strip) - , strip.text.x = element_text(size = my_fls - , colour = "black") - , legend.title = element_text(color = "black" - , size = my_als) - , legend.text = element_text(size = my_ats) - , legend.position = "none")+ - - labs(title = p_title - , x = "" - , y = "") - - OutPlotly = ggplotly(OutPlotly - #, tooltip = c("label") - ) - return(OutPlotly) - -} +# lf_bp_plotly <- function(lf_df +# , p_title = "" +# , colour_categ = "" +# , x_grp = mutation_info +# , y_var = param_value +# , facet_var = param_type +# , n_facet_row = 1 +# , y_scales = "free_y" +# , colour_bp_strip = "khaki2" +# , dot_size = 3 +# , dot_transparency = 0.3 +# , violin_quantiles = c(0.25, 0.5, 0.75) # can be NULL +# , my_ats = 20 # axis text size +# , my_als = 18 # axis label size +# , my_fls = 18 # facet label size +# , my_pts = 22 # plot title size) +# #, make_boxplot = FALSE +# , bp_width = c("auto", 0.5) +# #, add_stats = FALSE +# #, stat_grp_comp = c("DM", "OM") +# #, stat_method = "wilcox.test" +# #, my_paired = FALSE +# #, stat_label = c("p.format", "p.signif") +# ){ +# +# OutPlotly = ggplot(lf_df, aes(x = eval(parse(text = x_grp)) +# , y = eval(parse(text = y_var)) +# , label1 = x_grp +# , label2 = y_var +# , lable3 = colour_categ) ) + +# +# facet_wrap(~ eval(parse(text = facet_var)) +# , nrow = n_facet_row +# , scales = y_scales) + +# +# geom_violin(trim = T +# , scale = "width" +# , draw_quantiles = violin_quantiles) + +# +# geom_beeswarm(priority = "density" +# , size = dot_size +# , alpha = dot_transparency +# , show.legend = FALSE +# , cex = 0.8 +# , aes(colour = factor(eval(parse(text = colour_categ) ) ) ) ) + +# theme(axis.text.x = element_text(size = my_ats) +# , axis.text.y = element_text(size = my_ats +# , angle = 0 +# , hjust = 1 +# , vjust = 0) +# , axis.title.x = element_text(size = my_ats) +# , axis.title.y = element_text(size = my_ats) +# , plot.title = element_text(size = my_pts +# , hjust = 0.5 +# , colour = "black" +# , face = "bold") +# , strip.background = element_rect(fill = colour_bp_strip) +# , strip.text.x = element_text(size = my_fls +# , colour = "black") +# , legend.title = element_text(color = "black" +# , size = my_als) +# , legend.text = element_text(size = my_ats) +# , legend.position = "none")+ +# +# labs(title = p_title +# , x = "" +# , y = "") +# +# OutPlotly = ggplotly(OutPlotly +# #, tooltip = c("label") +# ) +# return(OutPlotly) +# +# } diff --git a/scripts/functions/lf_unpaired_stats.R b/scripts/functions/lf_unpaired_stats.R index 28a8ad0..55473ff 100644 --- a/scripts/functions/lf_unpaired_stats.R +++ b/scripts/functions/lf_unpaired_stats.R @@ -3,7 +3,7 @@ library(ggpubr) lf_unpaired_stats <- function(lf_data , lf_stat_value = "param_value" - , lf_stat_group = "mutation_info" + , lf_stat_group = "mutation_info_labels" , lf_col_statvars = "param_type" , my_paired = FALSE , stat_adj = "none"){ diff --git a/scripts/plotting/get_plotting_dfs.R b/scripts/plotting/get_plotting_dfs.R index 0810d62..0f7d6d4 100644 --- a/scripts/plotting/get_plotting_dfs.R +++ b/scripts/plotting/get_plotting_dfs.R @@ -140,6 +140,9 @@ lf_consurf = all_dm_om_df[['lf_consurf']] wf_snap2 = all_dm_om_df[['wf_snap2']] lf_snap2 = all_dm_om_df[['lf_snap2']] +wf_provean = all_dm_om_df[['wf_provean']] +lf_provean = all_dm_om_df[['lf_provean']] + if (tolower(gene)%in%geneL_na){ wf_mcsm_na = all_dm_om_df[['wf_mcsm_na']] lf_mcsm_na = all_dm_om_df[['lf_mcsm_na']]