From 8d6c148fff809824d8501339fe163fbe0d541ef0 Mon Sep 17 00:00:00 2001
From: Tanushree Tunstall <tanu@tunstall.in>
Date: Mon, 22 Aug 2022 10:53:25 +0100
Subject: [PATCH] renamed 2 to _v2

---
 scripts/functions/combining_dfs_plotting.R    |  31 +
 scripts/functions/corr_plot_data.R            |   4 +
 scripts/functions/plotting_data.R             |  33 +
 scripts/plotting/get_plotting_dfs.R           |   2 +
 scripts/plotting/plotting_colnames.R          |   4 +-
 .../plotting/plotting_thesis/basic_barplots.R |   4 +-
 .../plotting_thesis/basic_barplots_v2.R       | 584 ------------------
 7 files changed, 74 insertions(+), 588 deletions(-)
 delete mode 100644 scripts/plotting/plotting_thesis/basic_barplots_v2.R

diff --git a/scripts/functions/combining_dfs_plotting.R b/scripts/functions/combining_dfs_plotting.R
index 7285f11..3441c3a 100644
--- a/scripts/functions/combining_dfs_plotting.R
+++ b/scripts/functions/combining_dfs_plotting.R
@@ -33,9 +33,15 @@
 #==========================================================
 #lig_dist_colname = 'ligand_distance' or global var LigDist_colname
 #lig_dist_cutoff  =  10 or global var LigDist_cutoff
+geneL_normal  = c("pnca")
+geneL_na      = c("gid", "rpob")
+geneL_ppi2    = c("alr", "embb", "katg", "rpob")
+
+
 
 combining_dfs_plotting <- function(  my_df_u
                                    , gene_metadata
+                                   , gene # ADDED
                                    , lig_dist_colname = ''
                                    , lig_dist_cutoff = ''){
 
@@ -679,6 +685,31 @@ combining_dfs_plotting <- function(  my_df_u
   
   min( merged_df3['avg_lig_affinity_scaled']); max( merged_df3['avg_lig_affinity_scaled'])
   
+  ###################################################################
+  # Rectify pos_count column in merged_df3
+  # The one in merged_df2 is correct
+  
+  nc_pc_CHANGE = which(colnames(merged_df3)== "pos_count"); nc_pc_CHANGE
+  colnames(merged_df3)[nc_pc_CHANGE] = "df2_pos_count_all"
+  head(merged_df3$pos_count)
+  head(merged_df3$df2_pos_count_all)
+  
+  # DROP pos_count column
+  # merged_df3$pos_count <-NULL
+  merged_df3 = merged_df3[, !colnames(merged_df3)%in%c("pos_count")]
+  head(merged_df3$pos_count)
+
+  merged_df3 = merged_df3 %>% 
+    dplyr::add_count(position)
+  class(merged_df3)
+  merged_df3 = as.data.frame(merged_df3)
+  class(merged_df3)
+  nc_change = which(colnames(merged_df3) == "n")
+  colnames(merged_df3)[nc_change] <- "pos_count"
+  class(merged_df3)
+  ####################################################################
+  # ADD: distance to Nucleic acid column for na genes
+  
   
   ####################################################################
   #TODO
diff --git a/scripts/functions/corr_plot_data.R b/scripts/functions/corr_plot_data.R
index cd242cc..ab479c2 100644
--- a/scripts/functions/corr_plot_data.R
+++ b/scripts/functions/corr_plot_data.R
@@ -7,6 +7,10 @@
 # LigDist_colname   #from globals: plotting_globals.R
 # ppi2Dist_colname  #from globals: plotting_globals.R
 # naDist_colname    #from globals: plotting_globals.R
+geneL_normal  = c("pnca")
+geneL_na      = c("gid", "rpob")
+geneL_ppi2    = c("alr", "embb", "katg", "rpob")
+
 corr_data_extract <- function(df
                               , gene
                               , drug
diff --git a/scripts/functions/plotting_data.R b/scripts/functions/plotting_data.R
index 67a3f2c..47c707d 100755
--- a/scripts/functions/plotting_data.R
+++ b/scripts/functions/plotting_data.R
@@ -5,6 +5,17 @@
 # load libraries and functions
 library(data.table)
 library(dplyr)
+
+# ADDED: New
+geneL_normal  = c("pnca")
+geneL_na      = c("gid", "rpob")
+geneL_ppi2    = c("alr", "embb", "katg", "rpob")
+
+if (tolower(gene)%in%geneL_na){
+  
+  infilename_nca = paste0("/home/tanu/git/Misc/mcsm_na_dist/"
+                        , tolower(gene), "_nca_distances.csv")
+}
 #========================================================
 # plotting_data(): formatting data for plots
 # input args: 
@@ -20,6 +31,7 @@ library(dplyr)
 #lig_dist_cutoff  =  10 or global var LigDist_cutoff
 
 plotting_data <- function(df
+                          , gene # ADDED
                           , lig_dist_colname 
                           , lig_dist_cutoff) {
 my_df       = data.frame()
@@ -57,7 +69,28 @@ if ( length(unique(df$mutationinformation)) != length(df$mutationinformation)){
 upos = unique(my_df_u$position)
 cat("\nDim of clean df:"); cat(dim(my_df_u), "\n")
 cat("\nNo. of unique mutational positions:"); cat(length(upos), "\n")
+#===============================================
+# ADD : na distance column for genes with nucleic acid affinity
+#===============================================
+#gid_na_distcol
+if (tolower(gene)%in%geneL_na){
+
+  distcol_nca_name = read.csv(infilename_nca, header = F)
+  head(distcol_nca_name)
+  colnames(distcol_nca_name) <- c("mutationinformation", "nca_distance")
+  head(distcol_nca_name)
+  class(distcol_nca_name)
+
+  mcol = colnames(distcol_nca_name)[colnames(distcol_nca_name)%in%colnames(my_df_u)]
+  mcol
+  head(my_df_u$mutationinformation)
+  head(distcol_nca_name$mutationinformation)
   
+  my_df_u = merge(my_df_u, distcol_nca_name, 
+                     by = "mutationinformation",
+                     all = T)
+
+} 
 #===============================================
 # extract mutations <10 Angstroms and symbol
 #===============================================
diff --git a/scripts/plotting/get_plotting_dfs.R b/scripts/plotting/get_plotting_dfs.R
index ec44d90..f06f5d7 100644
--- a/scripts/plotting/get_plotting_dfs.R
+++ b/scripts/plotting/get_plotting_dfs.R
@@ -53,6 +53,7 @@ if (!exists("infile_params") && exists("gene")){
 cat("\nReading mcsm combined data file: ", infile_params)
 mcsm_df = read.csv(infile_params, header = T)
 pd_df = plotting_data(mcsm_df
+                      , gene = gene # ADDED
                       , lig_dist_colname = LigDist_colname
                       , lig_dist_cutoff = LigDist_cutoff)
 
@@ -87,6 +88,7 @@ cat("\nDim of meta data file: ", dim(gene_metadata))
 
 all_plot_dfs = combining_dfs_plotting(my_df_u
                                       , gene_metadata
+                                      , gene = gene # ADDED
                                       , lig_dist_colname = LigDist_colname
                                       , lig_dist_cutoff = LigDist_cutoff)
 
diff --git a/scripts/plotting/plotting_colnames.R b/scripts/plotting/plotting_colnames.R
index d417de1..282bc35 100644
--- a/scripts/plotting/plotting_colnames.R
+++ b/scripts/plotting/plotting_colnames.R
@@ -92,8 +92,8 @@ if (tolower(gene)%in%geneL_na){
                     naDist_colname,
                     "mcsm_na_affinity", "mcsm_na_scaled", "mcsm_na_outcome")
 
-  raw_affinity_cols     = c(common_raw_affinity_cols         , "mcsm_na_affinity")
-  scaled_affinity_cols  = c(common_scaled_affinity_cols   , "mcsm_na_scaled")
+  raw_affinity_cols     = c(common_raw_affinity_cols     , "mcsm_na_affinity")
+  scaled_affinity_cols  = c(common_scaled_affinity_cols  , "mcsm_na_scaled")
   outcome_affinity_cols = c(common_outcome_affinity_cols , "mcsm_na_outcome")
   affinity_dist_colnames      = c(LigDist_colname, ppi2Dist_colname, naDist_colname)
   
diff --git a/scripts/plotting/plotting_thesis/basic_barplots.R b/scripts/plotting/plotting_thesis/basic_barplots.R
index 4d4a520..7eb2030 100644
--- a/scripts/plotting/plotting_thesis/basic_barplots.R
+++ b/scripts/plotting/plotting_thesis/basic_barplots.R
@@ -30,8 +30,8 @@
 #source("~/git/LSHTM_analysis/config/gid.R")
 
 #source("~/git/LSHTM_analysis/config/alr.R")
-source("~/git/LSHTM_analysis/config/katg.R")
-#source("~/git/LSHTM_analysis/config/rpob.R")
+#source("~/git/LSHTM_analysis/config/katg.R")
+source("~/git/LSHTM_analysis/config/rpob.R")
 
 source("~/git/LSHTM_analysis/scripts/plotting/get_plotting_dfs.R")
 #source("~/git/LSHTM_analysis/scripts/plotting/plotting_colnames.R") sourced by above
diff --git a/scripts/plotting/plotting_thesis/basic_barplots_v2.R b/scripts/plotting/plotting_thesis/basic_barplots_v2.R
deleted file mode 100644
index b6fcfea..0000000
--- a/scripts/plotting/plotting_thesis/basic_barplots_v2.R
+++ /dev/null
@@ -1,584 +0,0 @@
-#!/usr/bin/env Rscript   
-#########################################################
-# TASK: Barplots for mCSM DUET, ligand affinity, and foldX
-# basic barplots with count of mutations
-# basic barplots with frequency of count of mutations
-
-# , df_colname = ""
-# , leg_title = ""
-# , ats = 25     # axis text size
-# , als = 22     # axis label size
-# , lts = 20     # legend text size
-# , ltis = 22    # label title size
-# , geom_ls = 10 # geom_label size
-# , yaxis_title = "Number of nsSNPs"
-# , bp_plot_title = ""
-# , label_categories = c("Destabilising", "Stabilising")
-# , title_colour = "chocolate4"
-# , subtitle_text = NULL
-# , sts = 20
-# , subtitle_colour = "pink"
-# #, leg_position = c(0.73,0.8) # within plot area
-# , leg_position = "top"
-# , bar_fill_values = c("#F8766D", "#00BFC4")
-#########################################################
-#=============
-# Data: Input
-#==============
-#source("~/git/LSHTM_analysis/config/pnca.R")
-#source("~/git/LSHTM_analysis/config/embb.R")
-#source("~/git/LSHTM_analysis/config/gid.R")
-
-source("~/git/LSHTM_analysis/config/alr.R")
-#source("~/git/LSHTM_analysis/config/katg.R")
-#source("~/git/LSHTM_analysis/config/rpob.R")
-
-source("~/git/LSHTM_analysis/scripts/plotting/get_plotting_dfs.R")
-#source("~/git/LSHTM_analysis/scripts/plotting/plotting_colnames.R") sourced by above
-# sanity check
-
-cat("\nSourced plotting cols as well:", length(plotting_cols))
-
-####################################################
-class(merged_df3)
-merged_df3 = as.data.frame(merged_df3)
-
-class(merged_df3)
-head(merged_df3$pos_count)
-
-nc_pc_CHANGE = which(colnames(merged_df3)== "pos_count"); nc_pc_CHANGE
-colnames(merged_df3)[nc_pc_CHANGE] = "df2_pos_count_all"
-head(merged_df3$pos_count)
-head(merged_df3$df2_pos_count_all)
-
-# DROP pos_count column
-# merged_df3$pos_count <-NULL
-merged_df3 = merged_df3[, !colnames(merged_df3)%in%c("pos_count")]
-head(merged_df3$pos_count)
-
-df3 = merged_df3[, colnames(merged_df3)%in%plotting_cols]
-"nca_distance"%in%colnames(df3)
-
-#=======
-# output
-#=======
-outdir_images = paste0("~/git/Writing/thesis/images/results/", tolower(gene), "/")
-cat("plots will output to:", outdir_images)
-
-###########################################################
-#------------------------------
-# plot default sizes
-#------------------------------
-#=========================
-# Affinity outcome
-# check this var: outcome_cols_affinity
-# get from preformatting or put in globals
-#==========================
-DistCutOff
-LigDist_colname  # = "ligand_distance" # from globals 
-ppi2Dist_colname
-naDist_colname
-
-###########################################################
-# get plotting data within the distance
-df3_lig  = df3[df3[[LigDist_colname]]<DistCutOff,]
-df3_ppi2 = df3[df3[[ppi2Dist_colname]]<DistCutOff,]
-df3_na   = df3[df3[[naDist_colname]]<DistCutOff,]
-common_bp_title = paste0("Sites <", DistCutOff, angstroms_symbol)
-
-#------------------------------
-# barplot for ligand affinity:
-# <10 Ang of ligand
-#------------------------------
-mLigP = stability_count_bp(plotdf = df3_lig
-               , df_colname = "ligand_outcome"
-               #, leg_title  = "mCSM-lig"
-               #, bp_plot_title = paste(common_bp_title, "ligand")
-               , yaxis_title = "Number of nsSNPs"
-               , leg_position = "none"
-               , subtitle_text = "mCSM-lig"
-               , bar_fill_values = c("#F8766D", "#00BFC4")
-               , subtitle_colour= "black"
-               , sts = 10
-               , lts = 8
-               , ats = 12
-               , als = 11
-               , ltis = 11
-               , geom_ls = 2.5)
-mLigP
-#------------------------------
-# barplot for ligand affinity:
-# <10 Ang of ligand
-# mmCSM-lig: will be the same no. of sites but the effect will be different
-#------------------------------
-mmLigP = stability_count_bp(plotdf = df3_lig
-                   , df_colname = "mmcsm_lig_outcome"
-                   #, leg_title  = "mmCSM-lig"
-                   #, label_categories = labels_mmlig
-                   #, bp_plot_title = paste(common_bp_title, "ligand")
-                   
-                   , yaxis_title = ""
-                   , leg_position = "none"
-                   , subtitle_text = "mmCSM-lig"
-                   , bar_fill_values = c("#F8766D", "#00BFC4")
-                   , subtitle_colour= "black"
-                   , sts = 10
-                   , lts = 8
-                   , ats = 12
-                   , als = 11
-                   , ltis = 11
-                   , geom_ls = 2.5
-                   )
-mmLigP
-#------------------------------
-# barplot for ppi2 affinity
-#  <10 Ang of interface
-#------------------------------
-if (tolower(gene)%in%geneL_ppi2){
-    ppi2P = stability_count_bp(plotdf = df3_ppi2
-                     , df_colname = "mcsm_ppi2_outcome"
-                     #, leg_title  = "mCSM-ppi2"
-                     #, label_categories = labels_ppi2
-                     #, bp_plot_title = paste(common_bp_title, "PP-interface")
-                     
-                     , yaxis_title = "Number of nsSNPs"
-                     , leg_position = "none"
-                     , subtitle_text = "mCSM-ppi2"
-                     , bar_fill_values = c("#F8766D", "#00BFC4")
-                     , subtitle_colour= "black"
-                     , sts = 10
-                     , lts = 8
-                     , ats = 12
-                     , als = 11
-                     , ltis = 11
-                     , geom_ls = 2.5
-                     )
-  ppi2P
-}
-#----------------------------
-# barplot for ppi2 affinity
-#  <10 Ang of interface
-#------------------------------
-if (tolower(gene)%in%geneL_na){
-    
-  nca_distP = stability_count_bp(plotdf = df3_na
-                             , df_colname = "mcsm_na_outcome"
-                             #, leg_title  = "mCSM-NA"
-                             #, label_categories = 
-                             #, bp_plot_title = paste(common_bp_title, "Dist to NA")
-                             
-                             , yaxis_title = "Number of nsSNPs"
-                             , leg_position = "none"
-                             , subtitle_text = "mCSM-NA"
-                             , bar_fill_values = c("#F8766D", "#00BFC4")
-                             , subtitle_colour= "black"
-                             , sts = 10
-                             , lts = 8
-                             , ats = 12
-                             , als = 11
-                             , ltis = 11
-                             , geom_ls = 2.5
-  )
-  nca_distP
-}
-
-#####################################################################
-
-# ------------------------------
-# bp site site count: mCSM-lig
-# < 10 Ang ligand
-# ------------------------------
-common_bp_title = paste0("Sites <", DistCutOff, angstroms_symbol)
-
-posC_lig = site_snp_count_bp(plotdf = df3_lig
-                  , df_colname = "position"
-                  , xaxis_title = "Number of nsSNPs"
-                  , yaxis_title = "Number of Sites"
-                  , subtitle_colour = "chocolate4"
-                  , subtitle_text = ""
-                  , subtitle_size = 8
-                  , geom_ls = 2.6
-                  , leg_text_size = 10
-                  , axis_text_size = 10
-                  , axis_label_size = 10)
-
-posC_lig
-# ------------------------------
-# bp site site count: ppi2
-# < 10 Ang interface
-# ------------------------------
-if (tolower(gene)%in%geneL_ppi2){
-  
-  posC_ppi2 = site_snp_count_bp(plotdf = df3_ppi2
-                    , df_colname = "position"
-                    , xaxis_title = "Number of nsSNPs"
-                    , yaxis_title = "Number of Sites"
-                    , subtitle_colour = "chocolate4"
-                    , subtitle_text = ""
-                    , subtitle_size = 8
-                    , geom_ls = 2.6
-                    , leg_text_size = 10
-                    , axis_text_size = 10
-                    , axis_label_size = 10)
-  posC_ppi2
-}
-
-# ------------------------------
-# bp site site count: NCA dist
-# < 10 Ang nca
-# ------------------------------
-if (tolower(gene)%in%geneL_na){
-  
-  posC_nca = site_snp_count_bp(plotdf = df3_na
-                                , df_colname = "position"
-                                , xaxis_title = "Number of nsSNPs"
-                                , yaxis_title = "Number of Sites"
-                                , subtitle_colour = "chocolate4"
-                                , subtitle_text = ""
-                                , subtitle_size = 8
-                                , geom_ls = 2.6
-                                , leg_text_size = 10
-                                , axis_text_size = 10
-                                , axis_label_size = 10)
-  posC_nca
-}
-
-
-#===============================================================
-# PE count
-rects <- data.frame(x = 1:6,
-                    colors = c("#ffd700" #gold
-                               , "#f0e68c" #khaki
-                               , "#da70d6"# orchid
-                               , "#ff1493"# deeppink
-                               , "#00BFC4" #, "#007d85" #blue
-                               , "#F8766D" )# red,
-)
-rects
-
-rects$text =  c("-ve Lig"
-                , "+ve Lig"
-                , "+ve PPI2"
-                , "-ve PPI2"
-                , "+ve stability"
-                , "-ve stability")
-
-# FOR EMBB ONLY
-rects$numbers = c(38, 0, 22, 9, 108, 681)
-rects$num_labels = paste0("n=", rects$numbers)
-
-rects
-
-#https://stackoverflow.com/questions/47986055/create-a-rectangle-filled-with-text
-
-peP = ggplot(rects, aes(x, y = 0, fill = colors, label = paste0(text,"\n", num_labels))) +
-  geom_tile(width = 1, height = 1) + # make square tiles
-  geom_text(color = "black", size = 1.7) + # add white text in the middle
-  scale_fill_identity(guide = "none") + # color the tiles with the colors in the data frame
-  coord_fixed() + # make sure tiles are square
-  coord_flip()+ scale_x_reverse() +
-  # theme_void() # remove any axis markings
-  theme_nothing() # remove any axis markings
-peP
-
-peP2 = ggplot(rects, aes(x, y = 0, fill = colors, label = paste0(text,"\n", num_labels))) +
-  geom_tile() + # make square tiles
-  geom_text(color = "black", size = 1.6) + # add white text in the middle
-  scale_fill_identity(guide = "none") + # color the tiles with the colors in the data frame
-  coord_fixed() + # make sure tiles are square
-  theme_nothing() # remove any axis markings
-peP2
-
-# ------------------------------
-# bp site site count: ALL
-# <10 Ang ligand
-# ------------------------------
-posC_all = site_snp_count_bp(plotdf = df3
-                             , df_colname = "position"
-                             , xaxis_title = "Number of nsSNPs"
-                             , yaxis_title = "Number of Sites"
-                             , subtitle_colour = "chocolate4"
-                             , subtitle_text = "All mutations sites"
-                             , subtitle_size = 8
-                             , geom_ls = 2.6
-                             , leg_text_size = 10
-                             , axis_text_size = 10
-                             , axis_label_size = 10)
-posC_all
-##################################################################
-
-#------------------------------
-# barplot for sensitivity:
-#------------------------------
-# sensP = stability_count_bp(plotdf = df3
-#                           , df_colname = "sensitivity"
-#                           #, leg_title  = "mCSM-ppi2"
-#                           #, label_categories = labels_ppi2
-#                           #, bp_plot_title = paste(common_bp_title, "PP-interface")
-#                           
-#                           , yaxis_title = "Number of nsSNPs"
-#                           , leg_position = "none"
-#                           , subtitle_text = "Sensitivity"
-#                           , bar_fill_values = c("red", "blue")
-#                           , subtitle_colour= "black"
-#                           , sts = 10
-#                           , lts = 8
-#                           , ats = 8
-#                           , als =8
-#                           , ltis = 11
-#                           , geom_ls =2
-# )
-
-
-consurfP = stability_count_bp(plotdf = df3
-                              , df_colname = "consurf_outcome"
-                              #, leg_title = "ConSurf"
-                              #, label_categories = labels_consurf
-                              , yaxis_title = "Number of nsSNPs"
-                              , leg_position = "top"
-                              , subtitle_text = "ConSurf"
-                              , bar_fill_values = consurf_colours # from globals
-                              , subtitle_colour= "black"
-                              , sts = 10
-                              , lts = 8
-                              , ats = 8
-                              , als = 8
-                              , ltis = 11
-                              , geom_ls = 2)
-
-consurfP
-
-####################
-# Sensitivity count: Mutations
-####################
-table(df3$sensitivity)
-
-rect_sens=data.frame(mutation_class=c("Resistant","Sensitive")
-                    , tile_colour =c("red","blue")
-                    , numbers = c(table(df3$sensitivity)[[1]], table(df3$sensitivity)[[2]]))
-
-
-
-sensP = ggplot(rect_sens, aes(mutation_class, y = 0
-                              , fill = tile_colour
-                              , label = paste0("n=", numbers)
-                              )) +
-  geom_tile(width = 1, height = 1) + # make square tiles
-  geom_label(color = "black", size = 1.7,fill = "white", alpha=0.7) + # add white text in the middle
-  scale_fill_identity(guide = "none") + # color the tiles with the colors in the data frame
-  coord_fixed() + # make sure tiles are square
-  #coord_flip()+ scale_x_reverse() +
-  # theme_void() # remove any axis markings
-  theme_nothing() # remove any axis markings
-sensP
-
-
-# sensP2 = sensP + 
-#   coord_flip() + scale_x_reverse()
-# sensP2
-#===============================
-# Sensitivity count: Site
-#==============================
-table(df3$sensitivity)
-#--------
-# embb
-#--------
-#rsc = 54
-#ccc = 46
-#ssc = 470
-
-
-rect_rs_siteC =data.frame(mutation_class=c("A_Resistant sites"
-                                           , "B_Common sites"
-                                           , "C_Sensitive sites"), 
-                          tile_colour =c("red",
-                                         "purple",
-                                         "blue"),
-                          numbers = c(rsc, ccc, ssc), 
-                          order  = c(1, 2, 3))
-
-rect_rs_siteC$labels = paste0(rect_rs_siteC$mutation_class, "\nn=", rect_rs_siteC$ numbers)
-
-sens_siteP = ggplot(rect_rs_siteC, aes(mutation_class, y = 0,
-                                       fill = tile_colour,
-                                       label = paste0("n=", numbers))) +
-  geom_tile(width = 1, height = 1) + 
-  geom_label(color = "black", size = 1.7,fill = "white", alpha=0.7) + 
-  theme_nothing() 
-sens_siteP
-
-##############################################################
-#===================
-# Stability
-#===================
-# duetP
-duetP = stability_count_bp(plotdf = df3
-                           , df_colname = "duet_outcome"
-                           , leg_title = "mCSM-DUET"
-                           #, label_categories = labels_duet
-                           , yaxis_title = "Number of nsSNPs"
-                           , leg_position = "none"
-                           , subtitle_text = "mCSM-DUET"
-                           , bar_fill_values = c("#F8766D", "#00BFC4")
-                           , subtitle_colour= "black"
-                           , sts = 10
-                           , lts = 8
-                           , ats = 12
-                           , als = 11
-                           , ltis = 11
-                           , geom_ls = 2.5
-)
-duetP
-
-# foldx
-foldxP = stability_count_bp(plotdf = df3
-                            , df_colname = "foldx_outcome"
-                            #, leg_title = "FoldX"
-                            #, label_categories = labels_foldx
-                            , yaxis_title = ""
-                            , leg_position = "none"
-                            , subtitle_text = "FoldX"
-                            , bar_fill_values = c("#F8766D", "#00BFC4")
-                            , sts = 10
-                            , lts = 8
-                            , ats = 12
-                            , als = 11
-                            , ltis = 11
-                            , geom_ls = 2.5
-)
-foldxP
-
-# deepddg
-deepddgP = stability_count_bp(plotdf = df3
-                              , df_colname = "deepddg_outcome"
-                              #, leg_title = "DeepDDG"
-                              #, label_categories = labels_deepddg
-                              , yaxis_title = ""
-                              , leg_position = "none"
-                              , subtitle_text = "DeepDDG"
-                              , bar_fill_values = c("#F8766D", "#00BFC4")
-                              , sts = 10
-                              , lts = 8
-                              , ats = 12
-                              , als = 11
-                              , ltis = 11
-                              , geom_ls = 2.5
-)
-deepddgP
-
-# deepddg
-dynamut2P = stability_count_bp(plotdf = df3
-                               , df_colname = "ddg_dynamut2_outcome"
-                               #, leg_title = "Dynamut2"
-                               #, label_categories = labels_ddg_dynamut2_outcome
-                               , yaxis_title = ""
-                               , leg_position = "none"
-                               , subtitle_text = "Dynamut2"
-                               , bar_fill_values = c("#F8766D", "#00BFC4")
-                               , sts = 10
-                               , lts = 8
-                               , ats = 12
-                               , als = 11
-                               , ltis = 11
-                               , geom_ls = 2.5
-)
-dynamut2P
-
-# provean
-proveanP = stability_count_bp(plotdf = df3
-                              , df_colname = "provean_outcome"
-                              #, leg_title = "PROVEAN"
-                              #, label_categories = labels_provean
-                              , yaxis_title = "Number of nsSNPs"
-                              , leg_position = "none" # top
-                              , subtitle_text = "PROVEAN"
-                              , bar_fill_values = c("#D01C8B", "#F1B6DA") # light pink and deep
-                              , sts = 10
-                              , lts = 8
-                              , ats = 12
-                              , als = 11
-                              , ltis = 11
-                              , geom_ls = 2.5
-)
-proveanP
-
-# snap2
-snap2P = stability_count_bp(plotdf = df3
-                            , df_colname = "snap2_outcome"
-                            #, leg_title = "SNAP2"
-                            #, label_categories = labels_snap2
-                            , yaxis_title = ""
-                            , leg_position = "none" # top
-                            , subtitle_text = "SNAP2"
-                            , bar_fill_values = c("#D01C8B", "#F1B6DA") # light pink and deep
-                            , sts = 10
-                            , lts = 8
-                            , ats = 12
-                            , als = 11
-                            , ltis = 11
-                            , geom_ls = 2.5)
-snap2P
-
-##############################################################
-
-##############################
-# FIXME for other genes: ATTEMPTED to derive numbers
-##############################
-# 
-# table(str_df_short$pe_effect_outcome)
-# # extract the numbers
-# DD_lig_n       = table(str_df_short$pe_effect_outcome)[[1]]
-# SS_lig_n       = 0
-# DD_ppi2_n      = table(str_df_short$pe_effect_outcome)[[2]]
-# SS_ppi2_n      = table(str_df_short$pe_effect_outcome)[[4]]
-# DD_stability_n = table(str_df_short$pe_effect_outcome)[[3]]
-# SS_stability_n = table(str_df_short$pe_effect_outcome)[[5]]
-# 
-# nums = c(DD_lig_n, SS_lig_n,DD_ppi2_n,SS_ppi2_n, DD_stability_n, SS_stability_n )
-# 
-# rect_pe = data.frame(x = 1:6
-#                      , pe_effect_type=c("-ve Lig aff"
-#                                       , "+ve Lig aff"
-#                                       , "-ve PPI2 aff"
-#                                       , " +ve PPI2 aff"
-#                                       , "-ve stability"
-#                                       , "+ve stability")
-#                      
-#                      , tile_colour =c("#ffd700" #gold
-#                                       ,"#f0e68c" # khaki
-#                                       , "#ff1493" #deeppink
-#                                       , "#da70d6" #orchid
-#                                       , "#F8766D" # Sred
-#                                       , "#00BFC4") #Sblue
-#                      # , numbers = c(DD_lig_n
-#                      #               , SS_lig_n
-#                      #               , DD_ppi2_n
-#                      #               , SS_ppi2_n
-#                      #               , DD_stability_n
-#                      #               , SS_stability_n )
-#                      , numbers = nums
-#                      )
-# 
-# rect_pe$num_labels = paste0("n=", rect_pe$numbers)
-# rect_pe
-# 
-# # create plot
-# peP = ggplot(rect_pe, aes(x=pe_effect_type , y = 0, fill = tile_colour
-#                           , label = paste0(pe_effect_type,"\n", num_labels))) +
-#   geom_tile(width = 1, height = 1) + # make square tiles
-#   geom_text(color = "black", size = 1.7) + # add white text in the middle
-#   scale_fill_identity(guide = "none") + # color the tiles with the colors in the data frame
-#   coord_fixed() + # make sure tiles are square
-#   coord_flip()+ scale_x_reverse() +
-#   # theme_void() # remove any axis markings
-#   theme_nothing() # remove any axis markings
-# peP
-# 
-# peP2 = ggplot(rect_pe, aes(x=pe_effect_type, y = 0, fill = tile_colour
-#                            , label = paste0(pe_effect_type,"\n", num_labels))) +
-#   geom_tile() + 
-#   geom_text(color = "black", size = 1.6) + 
-#   scale_fill_identity(guide = "none") + 
-#   coord_fixed() +
-#   theme_nothing() 
-# peP2