things

2022-08-05 16:13:57 +01:00 · 2022-08-05 16:13:57 +01:00 · 33925dafe9
commit 33925dafe9
parent 6cb9998c4c
4 changed files with 142 additions and 99 deletions
--- a/scripts/functions/dm_om_data.R
+++ b/scripts/functions/dm_om_data.R
@ -26,7 +26,8 @@ dm_om_wf_lf_data <- function(df
                          , categ_cols_to_factor){
  
  df = as.data.frame(df)
-
+  df$maf = log10(df$maf) # can't see otherwise
+  
  # Initialise the required dfs based on gene name
  geneL_normal  = c("pnca")
  geneL_na      = c("gid", "rpob")
@ -93,7 +94,9 @@ dm_om_wf_lf_data <- function(df
        , "mcsm_ppi2_affinity"    , "mcsm_ppi2_scaled"   , "mcsm_ppi2_outcome"
        , "consurf_score"         , "consurf_scaled"     , "consurf_outcome" # exists now
        , "snap2_score"           , "snap2_scaled"       , "snap2_outcome"
-        , "mcsm_na_affinity"      , "mcsm_na_scaled"     , "mcsm_na_outcome")
+        , "mcsm_na_affinity"      , "mcsm_na_scaled"     , "mcsm_na_outcome"
+        , "provean_score"         , "provean_scaled"     , "provean_outcome")
+ 
  }else{
    colnames_to_extract = c(mut_colname, mut_info_colname, mut_info_label_colname
                            , aa_pos_colname, LigDist_colname
@ -143,29 +146,31 @@ dynamut2_dn  = paste0("Dynamut2 " , stability_suffix); dynamut2_dn

 mcsm_na_dn   = paste0("mCSM-NA ", stability_suffix); mcsm_na_dn
 mcsm_ppi2_dn = paste0("mCSM-PPI2 ", stability_suffix); mcsm_ppi2_dn
-consurf_dn   = paste0("Consurf"); consurf_dn
+consurf_dn   = paste0("ConSurf"); consurf_dn
 snap2_dn     = paste0("SNAP2"); snap2_dn
-
+provean_dn   = paste0("PROVEAN"); provean_dn

 # change column names: plyr
 new_colnames = c(asa  = "ASA"
                , rsa                 = "RSA"
                , rd_values           = "RD"
                , kd_values           = "KD"
-                , log10_or_mychisq    = "Log10 (OR)"
-                , neglog_pval_fisher  = "-Log (P)"
-                #, af                  = "MAF"
-                , maf                  = "MAF"
-                #, ligand_dist_colname     = lig_dn # cannot handle variable name 'ligand_dist_colname'
+                #, log10_or_mychisq    = "Log10(OR)"
+                #, neglog_pval_fisher  = "-Log(P)"
+                #, af                 = "MAF"
+                , maf                 = "Log10(MAF)"
+                #, ligand_dist_colname= lig_dn # cannot handle variable name 'ligand_dist_colname'
                , affinity_scaled     = mcsm_lig_dn
                , duet_scaled         = duet_dn
                , foldx_scaled        = foldx_dn
                , deepddg_scaled      = deepddg_dn
                , ddg_dynamut2_scaled = dynamut2_dn
                , mcsm_na_scaled      = mcsm_na_dn
-                , mcsm_ppi2_affinity  = mcsm_ppi2_dn
-                , consurf_score       = consurf_dn
-                , snap2_score         = snap2_dn)
+                , mcsm_ppi2_scaled    = mcsm_ppi2_dn
+                , consurf_scaled      = consurf_dn
+                , snap2_scaled        = snap2_dn
+                , provean_scaled      = provean_dn)
+

 comb_df_sl1 = plyr::rename(comb_df_s
                          , replace = new_colnames
@ -173,7 +178,8 @@ comb_df_sl1 = plyr::rename(comb_df_s
                          , warn_duplicated = T)

 # renaming colname using variable i.e ligand_dist_colname: dplyr
-comb_df_sl = comb_df_sl1 %>% dplyr::rename(!!lig_dn := all_of(ligand_dist_colname))
+#comb_df_sl = comb_df_sl1 %>% dplyr::rename(!!lig_dn := all_of(ligand_dist_colname))
+comb_df_sl = comb_df_sl1 %>% dplyr::rename(!!lig_dn := all_of(LigDist_colname)) # NEW
 names(comb_df_sl)

 #=======================
@ -206,9 +212,9 @@ static_cols_end = c(lig_dn
                    , "RSA"
                    , "RD"
                    , "KD"
-                    , "MAF"
-                    , "Log10 (OR)"
-                    #, "-Log (P)"
+                    , "Log10(MAF)"
+                    #, "Log10(OR)"
+                    #, "-Log(P)"
                    )

 #########################################################################
@ -344,19 +350,22 @@ wf_lf_dataL[['lf_dynamut2']] = lf_dynamut2
 #1--> "most_variable", 2--> "", 3-->"",  4-->""
 #5-->"", 6-->"", 7-->"", 8-->"", 9-->"most_conserved"
 #====================
-# FIXME: if you add category column to consurf
+# WF data: consurf
 cols_to_select_consurf = c(static_cols_start, c("consurf_outcome", consurf_dn), static_cols_end)
 wf_consurf = comb_df_sl[, cols_to_select_consurf]
+
 pivot_cols_consurf = cols_to_select_consurf[1: (length(static_cols_start) + 1)]; pivot_cols_consurf
-
-# WF data: consurf
-cols_to_select_consurf = c(static_cols_start, c(consurf_dn), static_cols_end)
-wf_consurf = comb_df_sl[, cols_to_select_consurf]
-
-pivot_cols_consurf = cols_to_select_consurf[1: (length(static_cols_start))]; pivot_cols_consurf
 expected_rows_lf = nrow(wf_consurf) * (length(wf_consurf) - length(pivot_cols_consurf))
 expected_rows_lf

+# when outcome didn't exist
+#cols_to_select_consurf = c(static_cols_start, c(consurf_dn), static_cols_end)
+#wf_consurf = comb_df_sl[, cols_to_select_consurf]
+# 
+# pivot_cols_consurf = cols_to_select_consurf[1: (length(static_cols_start))]; pivot_cols_consurf
+# expected_rows_lf = nrow(wf_consurf) * (length(wf_consurf) - length(pivot_cols_consurf))
+# expected_rows_lf
+
 # LF data: consurf
 lf_consurf = gather(wf_consurf
                    , key = param_type
@ -403,6 +412,37 @@ if (nrow(lf_snap2) == expected_rows_lf){
 # Assign them to the output list
 wf_lf_dataL[['wf_snap2']] = wf_snap2
 wf_lf_dataL[['lf_snap2']] = lf_snap2
+
+#==============
+# Provean2: LF
+#==============
+# WF data: provean
+cols_to_select_provean = c(static_cols_start, c("provean_outcome", provean_dn), static_cols_end)
+wf_provean = comb_df_sl[, cols_to_select_provean]
+
+pivot_cols_provean = cols_to_select_provean[1: (length(static_cols_start) + 1)]; pivot_cols_provean
+expected_rows_lf = nrow(wf_provean) * (length(wf_provean) - length(pivot_cols_provean))
+expected_rows_lf
+
+# LF data: provean
+lf_provean = gather(wf_provean
+                    , key = param_type
+                    , value = param_value
+                    , all_of(provean_dn):tail(static_cols_end,1)
+                    , factor_key = TRUE)
+
+if (nrow(lf_provean) == expected_rows_lf){
+  cat("\nPASS: long format data created for ", provean_dn)
+}else{
+  cat("\nFAIL: long format data could not be created for duet")
+  quit()
+}
+
+# Assign them to the output list
+wf_lf_dataL[['wf_provean']] = wf_provean
+wf_lf_dataL[['lf_provean']] = lf_provean
+
+
 ###########################################################################
 # AFFINITY cols
 ###########################################################################
--- a/scripts/functions/lf_bp.R
+++ b/scripts/functions/lf_bp.R
@ -25,7 +25,7 @@ lf_bp <- function(lf_df = lf_duet
                  , make_boxplot = FALSE
                  , bp_width = c("auto", 0.5)
                  , add_stats = TRUE
-                  , stat_grp_comp = c("DM", "OM")
+                  , stat_grp_comp = c("R", "S")
                  , stat_method = "wilcox.test"
                  , my_paired = FALSE
                  , stat_label = c("p.format", "p.signif") ){
@ -129,77 +129,77 @@ lf_bp <- function(lf_df = lf_duet

 # TODO: plot_ly()
 #############################
-lf_bp_plotly <- function(lf_df
-                         , p_title = ""
-                         , colour_categ = ""
-                         , x_grp = mutation_info
-                         , y_var = param_value
-                         , facet_var = param_type
-                         , n_facet_row = 1
-                         , y_scales = "free_y"
-                         , colour_bp_strip = "khaki2"
-                         , dot_size = 3
-                         , dot_transparency = 0.3
-                         , violin_quantiles = c(0.25, 0.5, 0.75) # can be NULL
-                         , my_ats = 20 # axis text size
-                         , my_als = 18 # axis label size
-                         , my_fls = 18 # facet label size
-                         , my_pts = 22 # plot title size)
-                         #, make_boxplot = FALSE
-                         , bp_width = c("auto", 0.5)
-                         #, add_stats = FALSE
-                         #, stat_grp_comp = c("DM", "OM")
-                         #, stat_method = "wilcox.test"
-                         #, my_paired = FALSE
-                         #, stat_label = c("p.format", "p.signif") 
-                         ){
-  
-  OutPlotly = ggplot(lf_df, aes(x = eval(parse(text = x_grp))
-                              , y = eval(parse(text = y_var))
-                              , label1 = x_grp
-                              , label2 = y_var
-                              , lable3 = colour_categ) )  +
-
-      facet_wrap(~ eval(parse(text = facet_var))
-               , nrow = n_facet_row
-               , scales = y_scales) +
-    
-    geom_violin(trim = T
-                , scale = "width"
-                , draw_quantiles = violin_quantiles) +
-    
-   geom_beeswarm(priority = "density"
-                  , size = dot_size
-                  , alpha = dot_transparency
-                  , show.legend = FALSE
-                  , cex = 0.8
-                  , aes(colour = factor(eval(parse(text = colour_categ) ) ) ) ) +
-    theme(axis.text.x = element_text(size = my_ats)
-          , axis.text.y = element_text(size = my_ats
-                                       , angle = 0
-                                       , hjust = 1
-                                       , vjust = 0)
-          , axis.title.x = element_text(size = my_ats)
-          , axis.title.y = element_text(size = my_ats)
-          , plot.title = element_text(size = my_pts
-                             , hjust = 0.5
-                             , colour = "black"
-                             , face = "bold")
-          , strip.background = element_rect(fill = colour_bp_strip)
-          , strip.text.x = element_text(size = my_fls
-                                         , colour = "black")
-          , legend.title = element_text(color = "black"
-                                         , size = my_als)
-          , legend.text = element_text(size = my_ats)
-          , legend.position = "none")+
-    
-    labs(title = p_title
-         , x = ""
-         , y = "") 
-  
-  OutPlotly = ggplotly(OutPlotly
-                       #, tooltip = c("label")
-                       )
-  return(OutPlotly)
-  
-}
+# lf_bp_plotly <- function(lf_df
+#                          , p_title = ""
+#                          , colour_categ = ""
+#                          , x_grp = mutation_info
+#                          , y_var = param_value
+#                          , facet_var = param_type
+#                          , n_facet_row = 1
+#                          , y_scales = "free_y"
+#                          , colour_bp_strip = "khaki2"
+#                          , dot_size = 3
+#                          , dot_transparency = 0.3
+#                          , violin_quantiles = c(0.25, 0.5, 0.75) # can be NULL
+#                          , my_ats = 20 # axis text size
+#                          , my_als = 18 # axis label size
+#                          , my_fls = 18 # facet label size
+#                          , my_pts = 22 # plot title size)
+#                          #, make_boxplot = FALSE
+#                          , bp_width = c("auto", 0.5)
+#                          #, add_stats = FALSE
+#                          #, stat_grp_comp = c("DM", "OM")
+#                          #, stat_method = "wilcox.test"
+#                          #, my_paired = FALSE
+#                          #, stat_label = c("p.format", "p.signif") 
+#                          ){
+#   
+#   OutPlotly = ggplot(lf_df, aes(x = eval(parse(text = x_grp))
+#                               , y = eval(parse(text = y_var))
+#                               , label1 = x_grp
+#                               , label2 = y_var
+#                               , lable3 = colour_categ) )  +
+# 
+#       facet_wrap(~ eval(parse(text = facet_var))
+#                , nrow = n_facet_row
+#                , scales = y_scales) +
+#     
+#     geom_violin(trim = T
+#                 , scale = "width"
+#                 , draw_quantiles = violin_quantiles) +
+#     
+#    geom_beeswarm(priority = "density"
+#                   , size = dot_size
+#                   , alpha = dot_transparency
+#                   , show.legend = FALSE
+#                   , cex = 0.8
+#                   , aes(colour = factor(eval(parse(text = colour_categ) ) ) ) ) +
+#     theme(axis.text.x = element_text(size = my_ats)
+#           , axis.text.y = element_text(size = my_ats
+#                                        , angle = 0
+#                                        , hjust = 1
+#                                        , vjust = 0)
+#           , axis.title.x = element_text(size = my_ats)
+#           , axis.title.y = element_text(size = my_ats)
+#           , plot.title = element_text(size = my_pts
+#                              , hjust = 0.5
+#                              , colour = "black"
+#                              , face = "bold")
+#           , strip.background = element_rect(fill = colour_bp_strip)
+#           , strip.text.x = element_text(size = my_fls
+#                                          , colour = "black")
+#           , legend.title = element_text(color = "black"
+#                                          , size = my_als)
+#           , legend.text = element_text(size = my_ats)
+#           , legend.position = "none")+
+#     
+#     labs(title = p_title
+#          , x = ""
+#          , y = "") 
+#   
+#   OutPlotly = ggplotly(OutPlotly
+#                        #, tooltip = c("label")
+#                        )
+#   return(OutPlotly)
+#   
+# }
--- a/scripts/functions/lf_unpaired_stats.R
+++ b/scripts/functions/lf_unpaired_stats.R
@ -3,7 +3,7 @@ library(ggpubr)

 lf_unpaired_stats <- function(lf_data
                              , lf_stat_value = "param_value"
-                              , lf_stat_group = "mutation_info"
+                              , lf_stat_group = "mutation_info_labels"
                              , lf_col_statvars = "param_type"
                              , my_paired = FALSE
                              , stat_adj = "none"){
--- a/scripts/plotting/get_plotting_dfs.R
+++ b/scripts/plotting/get_plotting_dfs.R
@ -140,6 +140,9 @@ lf_consurf   = all_dm_om_df[['lf_consurf']]
 wf_snap2     = all_dm_om_df[['wf_snap2']]
 lf_snap2     = all_dm_om_df[['lf_snap2']]

+wf_provean   = all_dm_om_df[['wf_provean']]
+lf_provean   = all_dm_om_df[['lf_provean']]
+
 if (tolower(gene)%in%geneL_na){
  wf_mcsm_na   = all_dm_om_df[['wf_mcsm_na']]
  lf_mcsm_na   = all_dm_om_df[['lf_mcsm_na']]