From 04253b961f82df6b253d8ed374e424ca9b2d5bc9 Mon Sep 17 00:00:00 2001
From: Tanushree Tunstall <tanu@tunstall.in>
Date: Mon, 22 Aug 2022 21:56:13 +0100
Subject: [PATCH] lots of per-plot configs

---
 scripts/functions/bp_lineage.R                |  15 +-
 scripts/functions/bp_lineage_diversity.R      |  13 +-
 scripts/functions/corr_plot_data.R            |  18 +-
 scripts/functions/dm_om_data.R                |   6 +-
 scripts/functions/lineage_dist.R              |  11 +-
 scripts/functions/position_count_bp.R         |   3 +-
 scripts/plotting/get_plotting_dfs.R           |   6 +-
 .../alr/dm_om_plots_layout_alr.R              | 164 +++++++
 .../plotting/plotting_thesis/basic_barplots.R |  46 +-
 .../plotting_thesis/basic_barplots_UPDATED.R  | 357 +++++++++++++++
 .../plotting/plotting_thesis/dm_om_plots.R    |  48 ++-
 .../gid/basic_barplots_layout_gid.R           | 271 ++++++++++++
 .../plotting_thesis/gid/dm_om_plots_gid.R     | 320 ++++++++++++++
 .../plotting_thesis/gid/gg_pairs_all_gid.R    | 178 ++++++++
 scripts/plotting/plotting_thesis/just.R       |  38 ++
 .../katg/basic_barplots_katg.R                | 364 ++++++++++++++++
 .../katg/basic_barplots_layput_katg.R         | 309 +++++++++++++
 .../katg/dm_om_plots_layout_katg.R            | 179 ++++++++
 .../plotting_thesis/katg/gg_pairs_all_katg.R  | 203 +++++++++
 .../katg/pe_sens_site_count_katg.R            | 179 ++++++++
 .../katg/prominent_effects_katg.R             | 331 ++++++++++++++
 .../katg/sensitivity_count_katg.R             |  65 +++
 .../plotting/plotting_thesis/linage_bp_dist.R |  69 ++-
 .../plotting_thesis/linage_bp_dist_layout.R   |  36 +-
 .../plotting_thesis/pe_sens_site_count.R      |  78 ++++
 .../plotting_thesis/prominent_effects.R       | 405 ------------------
 .../rpob/basic_barplots_layout_rpob.R         | 348 +++++++++++++++
 .../rpob/basic_barplots_rpob.R                | 372 ++++++++++++++++
 .../rpob/dm_om_plots_layout_rpob.R            | 179 ++++++++
 .../plotting_thesis/rpob/gg_pairs_all_rpob.R  | 203 +++++++++
 .../plotting_thesis/rpob/katg_other_plots.R   |   2 +
 .../plotting_thesis/rpob/linage_bp_dist.R     | 172 ++++++++
 .../rpob/linage_bp_dist_layout.R              |  30 ++
 .../rpob/pe_sens_site_count_rpob.R            | 179 ++++++++
 .../rpob/prominent_effects_rpob.R             | 398 +++++++++++++++++
 .../rpob/sensitivity_count_rpob.R             |  66 +++
 36 files changed, 5121 insertions(+), 540 deletions(-)
 create mode 100644 scripts/plotting/plotting_thesis/alr/dm_om_plots_layout_alr.R
 create mode 100644 scripts/plotting/plotting_thesis/basic_barplots_UPDATED.R
 create mode 100644 scripts/plotting/plotting_thesis/gid/basic_barplots_layout_gid.R
 create mode 100644 scripts/plotting/plotting_thesis/gid/dm_om_plots_gid.R
 create mode 100644 scripts/plotting/plotting_thesis/gid/gg_pairs_all_gid.R
 create mode 100644 scripts/plotting/plotting_thesis/just.R
 create mode 100644 scripts/plotting/plotting_thesis/katg/basic_barplots_katg.R
 create mode 100644 scripts/plotting/plotting_thesis/katg/basic_barplots_layput_katg.R
 create mode 100644 scripts/plotting/plotting_thesis/katg/dm_om_plots_layout_katg.R
 create mode 100644 scripts/plotting/plotting_thesis/katg/gg_pairs_all_katg.R
 create mode 100644 scripts/plotting/plotting_thesis/katg/pe_sens_site_count_katg.R
 create mode 100644 scripts/plotting/plotting_thesis/katg/prominent_effects_katg.R
 create mode 100644 scripts/plotting/plotting_thesis/katg/sensitivity_count_katg.R
 create mode 100644 scripts/plotting/plotting_thesis/pe_sens_site_count.R
 delete mode 100644 scripts/plotting/plotting_thesis/prominent_effects.R
 create mode 100644 scripts/plotting/plotting_thesis/rpob/basic_barplots_layout_rpob.R
 create mode 100644 scripts/plotting/plotting_thesis/rpob/basic_barplots_rpob.R
 create mode 100644 scripts/plotting/plotting_thesis/rpob/dm_om_plots_layout_rpob.R
 create mode 100644 scripts/plotting/plotting_thesis/rpob/gg_pairs_all_rpob.R
 create mode 100644 scripts/plotting/plotting_thesis/rpob/katg_other_plots.R
 create mode 100644 scripts/plotting/plotting_thesis/rpob/linage_bp_dist.R
 create mode 100644 scripts/plotting/plotting_thesis/rpob/linage_bp_dist_layout.R
 create mode 100644 scripts/plotting/plotting_thesis/rpob/pe_sens_site_count_rpob.R
 create mode 100644 scripts/plotting/plotting_thesis/rpob/prominent_effects_rpob.R
 create mode 100644 scripts/plotting/plotting_thesis/rpob/sensitivity_count_rpob.R

diff --git a/scripts/functions/bp_lineage.R b/scripts/functions/bp_lineage.R
index 643d0a5..cc8103f 100644
--- a/scripts/functions/bp_lineage.R
+++ b/scripts/functions/bp_lineage.R
@@ -13,15 +13,15 @@ lin_count_bp <- function( lf_data = lin_lf
                           , display_label_col = "p_count"
                           , bar_stat_stype = "identity"
                           , x_lab_angle = 90
-                          , d_lab_size = 5
+                          , d_lab_size = 2.3
                           , d_lab_hjust = 0.5
                           , d_lab_vjust = 0.5
                           , d_lab_col = "black"
-                          , my_xats = 20 # x axis text size
-                          , my_yats = 20 # y axis text size
-                          , my_xals = 22 # x axis label size
-                          , my_yals = 22 # y axis label size
-                          , my_lls   = 22 # legend label size
+                          , my_xats = 8 # x axis text size
+                          , my_yats = 8 # y axis text size
+                          , my_xals = 10 # x axis label size
+                          , my_yals = 10 # y axis label size
+                          , my_lls   = 10 # legend label size
                           , bar_col_labels =  c("Mutations", "Total Samples")
                           , bar_col_values = c("grey50", "gray75")
                           , bar_leg_name = ""
@@ -56,7 +56,8 @@ lin_count_bp <- function( lf_data = lin_lf
           , axis.title.y = element_text(size     = my_yals
                                         , colour = "black")
           , legend.position = leg_location
-          , legend.text = element_text(size = my_lls)) + 
+          , legend.text = element_text(size = my_lls)
+          , legend.key.size = unit(my_lls, 'pt')) + 
     
     geom_label(aes(label = eval(parse(text = display_label_col)))
                , size    = d_lab_size
diff --git a/scripts/functions/bp_lineage_diversity.R b/scripts/functions/bp_lineage_diversity.R
index 68617f2..2a5d6a3 100644
--- a/scripts/functions/bp_lineage_diversity.R
+++ b/scripts/functions/bp_lineage_diversity.R
@@ -14,15 +14,15 @@ lin_count_bp_diversity <- function( lf_data = lin_wf
                           , display_label_col = "snp_diversity_f"
                           , bar_stat_stype = "identity"
                           , x_lab_angle = 90
-                          , d_lab_size = 5
+                          , d_lab_size = 2.3
                           , d_lab_hjust = 0.5
                           , d_lab_vjust = 0.5
                           , d_lab_col = "black"
-                          , my_xats = 20 # x axis text size
-                          , my_yats = 20 # y axis text size
-                          , my_xals = 22 # x axis label size
-                          , my_yals = 22 # y axis label size
-                          , my_lls   = 22 # legend label size
+                          , my_xats = 8 # x axis text size
+                          , my_yats = 8 # y axis text size
+                          , my_xals = 10 # x axis label size
+                          , my_yals = 10 # y axis label size
+                          , my_lls   = 10 # legend label size
                           , bar_col_labels =  "" #c("Mutations", "Total Samples")
                           , bar_col_values = c("gray50", "gray75")
                           , bar_leg_name = ""
@@ -64,6 +64,7 @@ lin_count_bp_diversity <- function( lf_data = lin_wf
                                         , colour = "black")
           , legend.position = leg_location
           , legend.text = element_text(size = my_lls)
+          , legend.key.size = unit(my_lls, 'pt')
           , plot.title = element_text(size =  my_lls
                                       , colour = title_colour
                                       , hjust = 0.5)
diff --git a/scripts/functions/corr_plot_data.R b/scripts/functions/corr_plot_data.R
index ab479c2..0384df9 100644
--- a/scripts/functions/corr_plot_data.R
+++ b/scripts/functions/corr_plot_data.R
@@ -7,9 +7,6 @@
 # LigDist_colname   #from globals: plotting_globals.R
 # ppi2Dist_colname  #from globals: plotting_globals.R
 # naDist_colname    #from globals: plotting_globals.R
-geneL_normal  = c("pnca")
-geneL_na      = c("gid", "rpob")
-geneL_ppi2    = c("alr", "embb", "katg", "rpob")
 
 corr_data_extract <- function(df
                               , gene
@@ -75,11 +72,24 @@ corr_data_extract <- function(df
     
     if (tolower(gene)%in%geneL_na){
       colnames_to_extract = c(common_colnames,"mcsm_na_affinity", naDist_colname)
-      display_colnames    = c(display_common_colnames, "mCSM-NA", "NCA-Dist")
+      display_colnames    = c(display_common_colnames, "mCSM-NA", "NA-Dist")
       corr_df             = df[,colnames_to_extract]
       colnames(corr_df)   = display_colnames
     }
     
+    # SPECIAL case for rpob as it exists in both ppi and na
+    if (tolower(gene)%in%c("rpob")){
+      colnames_to_extract = c(common_colnames
+                              , "mcsm_ppi2_affinity", ppi2Dist_colname
+                              , "mcsm_na_affinity", naDist_colname)
+      
+      display_colnames    = c(display_common_colnames
+                              ,"mCSM-PPI2", "PPI-Dist"
+                              ,"mCSM-NA", "NA-Dist" )
+      
+      corr_df = df[,colnames_to_extract]
+      colnames(corr_df)   = display_colnames
+    }
     # [optional] arg: extract_scaled_cols
     if (extract_scaled_cols){
       cat("\nExtracting scaled columns as well...\n")
diff --git a/scripts/functions/dm_om_data.R b/scripts/functions/dm_om_data.R
index 48c290b..8fd8470 100644
--- a/scripts/functions/dm_om_data.R
+++ b/scripts/functions/dm_om_data.R
@@ -734,7 +734,7 @@ dm_om_wf_lf_data <- function(df
   # mcsm-ppi2 affinity
   # data filtered by cut off
   #========================
-  if (tolower(gene)%in%geneL_ppi2){
+  if (tolower(gene)%in%geneL_ppi2 || tolower(gene)%in%geneL_both){
     #-----------------
     # mCSM-PPI2: WF and lF
     #-----------------
@@ -776,13 +776,11 @@ dm_om_wf_lf_data <- function(df
     
   }
   
-  
-  
   #====================
   # mcsm-NA affinity
   # data filtered by cut off
   #====================
-  if (tolower(gene)%in%geneL_na){
+  if (tolower(gene)%in%geneL_na|| tolower(gene)%in%geneL_both){
     #---------------
     # mCSM-NA: WF and lF
     #-----------------
diff --git a/scripts/functions/lineage_dist.R b/scripts/functions/lineage_dist.R
index 4e3689a..e19f2ba 100644
--- a/scripts/functions/lineage_dist.R
+++ b/scripts/functions/lineage_dist.R
@@ -20,11 +20,11 @@ lineage_distP <- function(plotdf
                           , fill_categ = "mutation_info_labels"
                           , fill_categ_cols = c("#E69F00", "#999999")
                           , label_categories = c("LABEL1", "LABEL2")
-                          , my_ats = 15 # axis text size
-                          , my_als = 20 # axis label size
-                          , my_leg_ts = 16
-                          , my_leg_title = 16
-                          , my_strip_ts = 20
+                          , my_ats = 15 # 15 axis text size
+                          , my_als = 20 # 20 axis label size
+                          , my_leg_ts = 16 #16
+                          , my_leg_title = 16 #16
+                          , my_strip_ts = 20 #20
                           , leg_pos = c(0.8, 0.9)
                           , leg_pos_wf = c("top", "left", "bottom", "right")
                           , leg_dir_wf = c("horizontal", "vertical")
@@ -57,6 +57,7 @@ lineage_distP <- function(plotdf
           , axis.title.y = element_blank()
           , strip.text = element_text(size = my_strip_ts)
           , legend.text = element_text(size = my_leg_ts)
+          , legend.key.size = unit(my_leg_ts, 'pt')
           , legend.title = element_text(size = my_leg_title)
           , legend.position = c(0.8, 0.9)) +
     labs(x = x_lab
diff --git a/scripts/functions/position_count_bp.R b/scripts/functions/position_count_bp.R
index d81932d..d334ae4 100755
--- a/scripts/functions/position_count_bp.R
+++ b/scripts/functions/position_count_bp.R
@@ -132,7 +132,8 @@ site_snp_count_bp <- function (plotdf
     scale_x_continuous(breaks = unique(snpsBYpos_df$snpsBYpos)) +
     geom_label(stat = "count", aes(label = ..count..)
                , color = "black"
-               , size = geom_ls) +
+               , size = geom_ls
+               , position = position_dodge2(width = 1)) +
     theme(axis.text.x = element_text(size = axis_text_size
                                      , angle = 0)
           , axis.text.y = element_text(size = axis_text_size
diff --git a/scripts/plotting/get_plotting_dfs.R b/scripts/plotting/get_plotting_dfs.R
index aae338e..da0687a 100644
--- a/scripts/plotting/get_plotting_dfs.R
+++ b/scripts/plotting/get_plotting_dfs.R
@@ -49,9 +49,13 @@ if (!exists("infile_params") && exists("gene")){
   cat("\nInput file for mcsm comb data not specified, assuming filename: ", infile_params, "\n")
 }
 
-# Input 1: read <gene>_comb_afor.csv
+# Input 1:
 cat("\nReading mcsm combined data file: ", infile_params)
 mcsm_df = read.csv(infile_params, header = T)
+if (tolower(gene)%in%c("rpob")){
+  mcsm_df =  mcsm_df[mcsm_df$position!=1148,]
+}
+
 pd_df = plotting_data(mcsm_df
                       , gene = gene # ADDED
                       , lig_dist_colname = LigDist_colname
diff --git a/scripts/plotting/plotting_thesis/alr/dm_om_plots_layout_alr.R b/scripts/plotting/plotting_thesis/alr/dm_om_plots_layout_alr.R
new file mode 100644
index 0000000..64d17a0
--- /dev/null
+++ b/scripts/plotting/plotting_thesis/alr/dm_om_plots_layout_alr.R
@@ -0,0 +1,164 @@
+# source dm_om_plots.R
+#============
+# Plot labels
+#============
+tit1 = "Stability changes"
+tit2 = "Genomic measure"
+tit3 = "Distance to partners"
+tit4 = "Evolutionary Conservation"
+tit5 = "Affinity changes"
+pt_size = 30
+
+theme_georgia <- function(...) {
+  theme_gray(base_family = "sans", ...) + 
+    theme(plot.title = element_text(face = "bold"))
+}
+
+
+title_theme <- calc_element("plot.title", theme_georgia())
+
+pt1 = ggdraw() + 
+  draw_label(
+    tit1,
+    fontfamily = title_theme$family,
+    fontface = title_theme$face,
+    #size = title_theme$size
+    size = pt_size
+  )
+
+pt2 = ggdraw() + 
+  draw_label(
+    tit2,
+    fontfamily = title_theme$family,
+    fontface = title_theme$face,
+    size = pt_size
+  )
+
+pt3 = ggdraw() + 
+  draw_label(
+    tit3,
+    fontfamily = title_theme$family,
+    fontface = title_theme$face,
+    size = pt_size
+  )
+
+pt4 = ggdraw() + 
+  draw_label(
+    tit4,
+    fontfamily = title_theme$family,
+    fontface = title_theme$face,
+    size = pt_size
+  )
+
+
+pt5 = ggdraw() + 
+  draw_label(
+    tit5,
+    fontfamily = title_theme$family,
+    fontface = title_theme$face,
+    size = pt_size
+  )
+
+#======================
+# Output plot function
+#======================
+OutPlot_dm_om = function(x){
+  
+  # dist b/w plot title and plot
+  relH_tp = c(0.08, 0.92)
+  
+  my_label_size = 25
+  #----------------
+  # Top panel
+  #----------------
+  top_panel = cowplot::plot_grid(
+    cowplot::plot_grid(pt1, 
+                       cowplot::plot_grid(duetP, foldxP, deepddgP, dynamut2P
+                                          , nrow = 1
+                                          , labels = c("A", "B", "C", "D")
+                                          , label_size = my_label_size)
+                       , ncol = 1
+                       , rel_heights = relH_tp
+    ),
+    NULL,
+    cowplot::plot_grid(pt2, 
+                       cowplot::plot_grid(genomicsP
+                                          , nrow = 1
+                                          , labels = c("E")
+                                          , label_size = my_label_size)
+                       , ncol = 1
+                       , rel_heights = relH_tp
+    ),
+    NULL,
+    cowplot::plot_grid(pt3, 
+                       cowplot::plot_grid( #distanceP
+                         distanceP_lig
+                         , distanceP_ppi2
+                         #, distanceP_na
+                         , nrow = 1
+                         , labels = c("F", "G")
+                         , label_size = my_label_size)
+                       , ncol = 1
+                       , rel_heights = relH_tp
+    ),
+    nrow = 1,
+    rel_widths = c(2/7, 0.1/7, 0.5/7, 0.1/7,  1/7)
+  )
+  
+  #----------------
+  # Bottom panel
+  #----------------
+  bottom_panel = cowplot::plot_grid(
+    cowplot::plot_grid(pt4, 
+                       cowplot::plot_grid(consurfP, proveanP, snap2P
+                                          , nrow = 1
+                                          , labels = c("H", "I", "J")
+                                          , label_size = my_label_size)
+                       , ncol = 1
+                       , rel_heights =relH_tp
+    ),NULL,
+    cowplot::plot_grid(pt5, 
+                       cowplot::plot_grid(mcsmligP, mcsmlig2P
+                                          , mcsmppi2P
+                                          #, mcsmnaP
+                                          , nrow = 1
+                                          , labels = c("K", "L", "M")
+                                          , label_size = my_label_size)
+                       , ncol = 1
+                       , rel_heights = relH_tp
+    ),NULL,
+    nrow = 1, 
+    rel_widths = c(3/6,0.1/6,3/6, 0.1/6 )
+  )
+  
+  #-------------------------------
+  # combine: Top and Bottom panel
+  #-------------------------------
+  cowplot::plot_grid (top_panel, bottom_panel
+                      , nrow =2
+                      , rel_widths = c(1, 1)
+                      , align = "hv")
+}
+
+#=====================
+# OutPlot: svg and png
+#======================
+dm_om_combinedP =  paste0(outdir_images
+                          ,tolower(gene)
+                          ,"_dm_om_all.svg")
+
+cat("DM OM plots with stats:", dm_om_combinedP)
+svg(dm_om_combinedP, width = 32, height = 18)
+
+OutPlot_dm_om()
+dev.off()
+
+
+dm_om_combinedP_png =  paste0(outdir_images
+                              ,tolower(gene)
+                              ,"_dm_om_all.png")
+cat("DM OM plots with stats:", dm_om_combinedP_png)
+png(dm_om_combinedP_png, width = 32, height = 18, units = "in", res = 300)
+
+OutPlot_dm_om()
+dev.off()
diff --git a/scripts/plotting/plotting_thesis/basic_barplots.R b/scripts/plotting/plotting_thesis/basic_barplots.R
index 61b3540..10f1dc4 100644
--- a/scripts/plotting/plotting_thesis/basic_barplots.R
+++ b/scripts/plotting/plotting_thesis/basic_barplots.R
@@ -183,7 +183,6 @@ if (tolower(gene)%in%geneL_na){
 }
 
 #####################################################################
-
 # ------------------------------
 # bp site site count: mCSM-lig
 # < 10 Ang ligand
@@ -203,10 +202,10 @@ posC_lig = site_snp_count_bp(plotdf = df3_lig
                   , axis_label_size = 10)
 
 posC_lig
-# ------------------------------
+#------------------------------
 # bp site site count: ppi2
 # < 10 Ang interface
-# ------------------------------
+#------------------------------
 if (tolower(gene)%in%geneL_ppi2){
   
   posC_ppi2 = site_snp_count_bp(plotdf = df3_ppi2
@@ -223,10 +222,10 @@ if (tolower(gene)%in%geneL_ppi2){
   posC_ppi2
 }
 
-# ------------------------------
+#------------------------------
 # bp site site count: NCA dist
 # < 10 Ang nca
-# ------------------------------
+#------------------------------
 if (tolower(gene)%in%geneL_na){
   
   posC_nca = site_snp_count_bp(plotdf = df3_na
@@ -242,15 +241,11 @@ if (tolower(gene)%in%geneL_na){
                                 , axis_label_size = 10)
   posC_nca
 }
-
-
 #===============================================================
-
-
-# ------------------------------
+#------------------------------
 # bp site site count: ALL
 # <10 Ang ligand
-# ------------------------------
+#------------------------------
 posC_all = site_snp_count_bp(plotdf = df3
                              , df_colname = "position"
                              , xaxis_title = "Number of nsSNPs"
@@ -282,34 +277,6 @@ consurfP = stability_count_bp(plotdf = df3
 
 consurfP
 
-####################
-# Sensitivity count: Mutations
-####################
-table(df3$sensitivity)
-
-rect_sens=data.frame(mutation_class=c("Resistant","Sensitive")
-                    , tile_colour =c("red","blue")
-                    , numbers = c(table(df3$sensitivity)[[1]], table(df3$sensitivity)[[2]]))
-
-
-
-sensP = ggplot(rect_sens, aes(mutation_class, y = 0
-                              , fill = tile_colour
-                              , label = paste0("n=", numbers)
-                              )) +
-  geom_tile(width = 1, height = 1) + # make square tiles
-  geom_label(color = "black", size = 1.7,fill = "white", alpha=0.7) + # add white text in the middle
-  scale_fill_identity(guide = "none") + # color the tiles with the colors in the data frame
-  coord_fixed() + # make sure tiles are square
-  #coord_flip()+ scale_x_reverse() +
-  # theme_void() # remove any axis markings
-  theme_nothing() # remove any axis markings
-sensP
-
-# sensP2 = sensP + 
-#   coord_flip() + scale_x_reverse()
-# sensP2
-
 ##############################################################
 #===================
 # Stability
@@ -421,5 +388,4 @@ snap2P = stability_count_bp(plotdf = df3
                             , ltis = 11
                             , geom_ls = 2.5)
 snap2P
-
 #####################################################################################
diff --git a/scripts/plotting/plotting_thesis/basic_barplots_UPDATED.R b/scripts/plotting/plotting_thesis/basic_barplots_UPDATED.R
new file mode 100644
index 0000000..96572e0
--- /dev/null
+++ b/scripts/plotting/plotting_thesis/basic_barplots_UPDATED.R
@@ -0,0 +1,357 @@
+#!/usr/bin/env Rscript   
+#########################################################
+# TASK: Barplots
+# basic barplots with outcome
+# basic barplots with frequency of count of mutations
+#########################################################
+#=============
+# Data: Input
+#==============
+#source("~/git/LSHTM_analysis/config/pnca.R")
+#source("~/git/LSHTM_analysis/config/embb.R")
+#source("~/git/LSHTM_analysis/config/gid.R")
+
+#source("~/git/LSHTM_analysis/config/alr.R")
+#source("~/git/LSHTM_analysis/config/katg.R")
+source("~/git/LSHTM_analysis/config/rpob.R")
+
+source("~/git/LSHTM_analysis/scripts/plotting/get_plotting_dfs.R")
+#source("~/git/LSHTM_analysis/scripts/plotting/plotting_colnames.R") sourced by above
+
+cat("\nSourced plotting cols as well:", length(plotting_cols))
+
+####################################################
+class(merged_df3)
+
+df3 = subset(merged_df3, select = -c(pos_count))
+
+#=======
+# output
+#=======
+outdir_images = paste0("~/git/Writing/thesis/images/results/", tolower(gene), "/")
+cat("plots will output to:", outdir_images)
+
+###########################################################
+#------------------------------
+# plot default sizes
+#------------------------------
+#=========================
+# Affinity outcome
+# check this var: outcome_cols_affinity
+# get from preformatting or put in globals
+#==========================
+DistCutOff
+LigDist_colname  # = "ligand_distance" # from globals 
+ppi2Dist_colname
+naDist_colname
+
+###########################################################
+# get plotting data within the distance
+df3_lig  = df3[df3[[LigDist_colname]]<DistCutOff,]
+df3_ppi2 = df3[df3[[ppi2Dist_colname]]<DistCutOff,]
+df3_na   = df3[df3[[naDist_colname]]<DistCutOff,]
+common_bp_title = paste0("Sites <", DistCutOff, angstroms_symbol)
+
+#------------------------------
+# barplot for ligand affinity:
+# <10 Ang of ligand
+#------------------------------
+mLigP = stability_count_bp(plotdf = df3_lig
+               , df_colname = "ligand_outcome"
+               #, leg_title  = "mCSM-lig"
+               #, bp_plot_title = paste(common_bp_title, "ligand")
+               , yaxis_title = "Number of nsSNPs"
+               , leg_position = "none"
+               , subtitle_text = "mCSM-lig"
+               , bar_fill_values = c("#F8766D", "#00BFC4")
+               , subtitle_colour= "black"
+               , sts = 10
+               , lts = 8
+               , ats = 12
+               , als = 11
+               , ltis = 11
+               , geom_ls = 2.5)
+mLigP
+#------------------------------
+# barplot for ligand affinity:
+# <10 Ang of ligand
+# mmCSM-lig: will be the same no. of sites but the effect will be different
+#------------------------------
+mmLigP = stability_count_bp(plotdf = df3_lig
+                   , df_colname = "mmcsm_lig_outcome"
+                   #, leg_title  = "mmCSM-lig"
+                   #, label_categories = labels_mmlig
+                   #, bp_plot_title = paste(common_bp_title, "ligand")
+                   
+                   , yaxis_title = ""
+                   , leg_position = "none"
+                   , subtitle_text = "mmCSM-lig"
+                   , bar_fill_values = c("#F8766D", "#00BFC4")
+                   , subtitle_colour= "black"
+                   , sts = 10
+                   , lts = 8
+                   , ats = 12
+                   , als = 11
+                   , ltis = 11
+                   , geom_ls = 2.5
+                   )
+mmLigP
+#------------------------------
+# barplot for ppi2 affinity
+#  <10 Ang of interface
+#------------------------------
+if (tolower(gene)%in%geneL_ppi2){
+    ppi2P = stability_count_bp(plotdf = df3_ppi2
+                     , df_colname = "mcsm_ppi2_outcome"
+                     #, leg_title  = "mCSM-ppi2"
+                     #, label_categories = labels_ppi2
+                     #, bp_plot_title = paste(common_bp_title, "PP-interface")
+                     
+                     , yaxis_title = "Number of nsSNPs"
+                     , leg_position = "none"
+                     , subtitle_text = "mCSM-ppi2"
+                     , bar_fill_values = c("#F8766D", "#00BFC4")
+                     , subtitle_colour= "black"
+                     , sts = 10
+                     , lts = 8
+                     , ats = 12
+                     , als = 11
+                     , ltis = 11
+                     , geom_ls = 2.5
+                     )
+  ppi2P
+}
+#----------------------------
+# barplot for ppi2 affinity
+#  <10 Ang of interface
+#------------------------------
+if (tolower(gene)%in%geneL_na){
+    
+  nca_distP = stability_count_bp(plotdf = df3_na
+                             , df_colname = "mcsm_na_outcome"
+                             #, leg_title  = "mCSM-NA"
+                             #, label_categories = 
+                             #, bp_plot_title = paste(common_bp_title, "Dist to NA")
+                             
+                             , yaxis_title = "Number of nsSNPs"
+                             , leg_position = "none"
+                             , subtitle_text = "mCSM-NA"
+                             , bar_fill_values = c("#F8766D", "#00BFC4")
+                             , subtitle_colour= "black"
+                             , sts = 10
+                             , lts = 8
+                             , ats = 12
+                             , als = 11
+                             , ltis = 11
+                             , geom_ls = 2.5
+  )
+  nca_distP
+}
+
+#####################################################################
+# ------------------------------
+# bp site site count: mCSM-lig
+# < 10 Ang ligand
+# ------------------------------
+common_bp_title = paste0("Sites <", DistCutOff, angstroms_symbol)
+
+posC_lig = site_snp_count_bp(plotdf = df3_lig
+                  , df_colname = "position"
+                  , xaxis_title = "Number of nsSNPs"
+                  , yaxis_title = "Number of Sites"
+                  , subtitle_colour = "chocolate4"
+                  , subtitle_text = ""
+                  , subtitle_size = 8
+                  , geom_ls = 2.6
+                  , leg_text_size = 10
+                  , axis_text_size = 10
+                  , axis_label_size = 10)
+
+posC_lig
+#------------------------------
+# bp site site count: ppi2
+# < 10 Ang interface
+#------------------------------
+if (tolower(gene)%in%geneL_ppi2){
+  
+  posC_ppi2 = site_snp_count_bp(plotdf = df3_ppi2
+                    , df_colname = "position"
+                    , xaxis_title = "Number of nsSNPs"
+                    , yaxis_title = "Number of Sites"
+                    , subtitle_colour = "chocolate4"
+                    , subtitle_text = ""
+                    , subtitle_size = 8
+                    , geom_ls = 2.6
+                    , leg_text_size = 10
+                    , axis_text_size = 10
+                    , axis_label_size = 10)
+  posC_ppi2
+}
+
+#------------------------------
+# bp site site count: NCA dist
+# < 10 Ang nca
+#------------------------------
+if (tolower(gene)%in%geneL_na){
+  
+  posC_nca = site_snp_count_bp(plotdf = df3_na
+                                , df_colname = "position"
+                                , xaxis_title = "Number of nsSNPs"
+                                , yaxis_title = "Number of Sites"
+                                , subtitle_colour = "chocolate4"
+                                , subtitle_text = ""
+                                , subtitle_size = 8
+                                , geom_ls = 2.6
+                                , leg_text_size = 10
+                                , axis_text_size = 10
+                                , axis_label_size = 10)
+  posC_nca
+}
+#===============================================================
+#------------------------------
+# bp site site count: ALL
+# <10 Ang ligand
+#------------------------------
+posC_all = site_snp_count_bp(plotdf = df3
+                             , df_colname = "position"
+                             , xaxis_title = "Number of nsSNPs"
+                             , yaxis_title = "Number of Sites"
+                             , subtitle_colour = "chocolate4"
+                             , subtitle_text = "All mutations sites"
+                             , subtitle_size = 8
+                             , geom_ls = 2.6
+                             , leg_text_size = 10
+                             , axis_text_size = 10
+                             , axis_label_size = 10)
+posC_all
+##################################################################
+consurfP = stability_count_bp(plotdf = df3
+                              , df_colname = "consurf_outcome"
+                              #, leg_title = "ConSurf"
+                              #, label_categories = labels_consurf
+                              , yaxis_title = "Number of nsSNPs"
+                              , leg_position = "top"
+                              , subtitle_text = "ConSurf"
+                              , bar_fill_values = consurf_colours # from globals
+                              , subtitle_colour= "black"
+                              , sts = 10
+                              , lts = 8
+                              , ats = 8
+                              , als = 8
+                              , ltis = 11
+                              , geom_ls = 2)
+
+consurfP
+
+##############################################################
+#===================
+# Stability
+#===================
+# duetP
+duetP = stability_count_bp(plotdf = df3
+                           , df_colname = "duet_outcome"
+                           , leg_title = "mCSM-DUET"
+                           #, label_categories = labels_duet
+                           , yaxis_title = "Number of nsSNPs"
+                           , leg_position = "none"
+                           , subtitle_text = "mCSM-DUET"
+                           , bar_fill_values = c("#F8766D", "#00BFC4")
+                           , subtitle_colour= "black"
+                           , sts = 10
+                           , lts = 8
+                           , ats = 12
+                           , als = 11
+                           , ltis = 11
+                           , geom_ls = 2.5
+)
+duetP
+
+# foldx
+foldxP = stability_count_bp(plotdf = df3
+                            , df_colname = "foldx_outcome"
+                            #, leg_title = "FoldX"
+                            #, label_categories = labels_foldx
+                            , yaxis_title = ""
+                            , leg_position = "none"
+                            , subtitle_text = "FoldX"
+                            , bar_fill_values = c("#F8766D", "#00BFC4")
+                            , sts = 10
+                            , lts = 8
+                            , ats = 12
+                            , als = 11
+                            , ltis = 11
+                            , geom_ls = 2.5
+)
+foldxP
+
+# deepddg
+deepddgP = stability_count_bp(plotdf = df3
+                              , df_colname = "deepddg_outcome"
+                              #, leg_title = "DeepDDG"
+                              #, label_categories = labels_deepddg
+                              , yaxis_title = ""
+                              , leg_position = "none"
+                              , subtitle_text = "DeepDDG"
+                              , bar_fill_values = c("#F8766D", "#00BFC4")
+                              , sts = 10
+                              , lts = 8
+                              , ats = 12
+                              , als = 11
+                              , ltis = 11
+                              , geom_ls = 2.5
+)
+deepddgP
+
+# deepddg
+dynamut2P = stability_count_bp(plotdf = df3
+                               , df_colname = "ddg_dynamut2_outcome"
+                               #, leg_title = "Dynamut2"
+                               #, label_categories = labels_ddg_dynamut2_outcome
+                               , yaxis_title = ""
+                               , leg_position = "none"
+                               , subtitle_text = "Dynamut2"
+                               , bar_fill_values = c("#F8766D", "#00BFC4")
+                               , sts = 10
+                               , lts = 8
+                               , ats = 12
+                               , als = 11
+                               , ltis = 11
+                               , geom_ls = 2.5
+)
+dynamut2P
+
+# provean
+proveanP = stability_count_bp(plotdf = df3
+                              , df_colname = "provean_outcome"
+                              #, leg_title = "PROVEAN"
+                              #, label_categories = labels_provean
+                              , yaxis_title = "Number of nsSNPs"
+                              , leg_position = "none" # top
+                              , subtitle_text = "PROVEAN"
+                              , bar_fill_values = c("#D01C8B", "#F1B6DA") # light pink and deep
+                              , sts = 10
+                              , lts = 8
+                              , ats = 12
+                              , als = 11
+                              , ltis = 11
+                              , geom_ls = 2.5
+)
+proveanP
+
+# snap2
+snap2P = stability_count_bp(plotdf = df3
+                            , df_colname = "snap2_outcome"
+                            #, leg_title = "SNAP2"
+                            #, label_categories = labels_snap2
+                            , yaxis_title = ""
+                            , leg_position = "none" # top
+                            , subtitle_text = "SNAP2"
+                            , bar_fill_values = c("#D01C8B", "#F1B6DA") # light pink and deep
+                            , sts = 10
+                            , lts = 8
+                            , ats = 12
+                            , als = 11
+                            , ltis = 11
+                            , geom_ls = 2.5)
+snap2P
+#####################################################################################
diff --git a/scripts/plotting/plotting_thesis/dm_om_plots.R b/scripts/plotting/plotting_thesis/dm_om_plots.R
index d588f6d..962c9ff 100644
--- a/scripts/plotting/plotting_thesis/dm_om_plots.R
+++ b/scripts/plotting/plotting_thesis/dm_om_plots.R
@@ -36,11 +36,11 @@ genomicsP = lf_bp2(genomics_dataP
 
 genomicsP
 
-#check
-wilcox.test(wf_dist_genP$`Log10(MAF)`[wf_dist_genP$mutation_info_labels=="R"]
-            , wf_dist_genP$`Log10(MAF)`[wf_dist_genP$mutation_info_labels=="S"], paired = FALSE)
-
-tapply(wf_dist_genP$`Log10(MAF)`, wf_dist_genP$mutation_info_labels, summary)
+# #check
+# wilcox.test(wf_dist_genP$`Log10(MAF)`[wf_dist_genP$mutation_info_labels=="R"]
+#             , wf_dist_genP$`Log10(MAF)`[wf_dist_genP$mutation_info_labels=="S"], paired = FALSE)
+# 
+# tapply(wf_dist_genP$`Log10(MAF)`, wf_dist_genP$mutation_info_labels, summary)
 
 #-------------------
 # Distance data plot: 
@@ -61,16 +61,16 @@ distanceP = lf_bp2(dist_dataP
 
 distanceP
 
-# check
-wilcox.test(wf_dist_genP$`PPI Dist(Å)`[wf_dist_genP$mutation_info_labels=="R"]
-            , wf_dist_genP$`PPI Dist(Å)`[wf_dist_genP$mutation_info_labels=="S"], paired = FALSE)
-
-wilcox.test(wf_dist_genP$`Lig Dist(Å)`[wf_dist_genP$mutation_info_labels=="R"]
-            , wf_dist_genP$`Lig Dist(Å)`[wf_dist_genP$mutation_info_labels=="S"], paired = FALSE)
-
-tapply(wf_dist_genP$`PPI Dist(Å)`, wf_dist_genP$mutation_info_labels, summary)
-
-tapply(wf_dist_genP$`Lig Dist(Å)`, wf_dist_genP$mutation_info_labels, summary)
+# # check
+# wilcox.test(wf_dist_genP$`PPI Dist(Å)`[wf_dist_genP$mutation_info_labels=="R"]
+#             , wf_dist_genP$`PPI Dist(Å)`[wf_dist_genP$mutation_info_labels=="S"], paired = FALSE)
+# 
+# wilcox.test(wf_dist_genP$`Lig Dist(Å)`[wf_dist_genP$mutation_info_labels=="R"]
+#             , wf_dist_genP$`Lig Dist(Å)`[wf_dist_genP$mutation_info_labels=="S"], paired = FALSE)
+# 
+# tapply(wf_dist_genP$`PPI Dist(Å)`, wf_dist_genP$mutation_info_labels, summary)
+# 
+# tapply(wf_dist_genP$`Lig Dist(Å)`, wf_dist_genP$mutation_info_labels, summary)
 
 
 #-------------------
@@ -100,7 +100,9 @@ if (tolower(gene)%in%geneL_ppi2){
   levels(dist_data_ppi2$param_type)
   distanceP_ppi2 = lf_bp2(dist_data_ppi2
                           #, p_title = ""
-                          , violin_quantiles = c(0.5), monochrome = F)
+                          , violin_quantiles = c(0.5)
+                          , dot_transparency = 0.2
+                          , monochrome = F)
   
   distanceP_ppi2
 }
@@ -109,9 +111,9 @@ if (tolower(gene)%in%geneL_na){
   #-------------------
   # Distance data plot: NADist
   #--------------------
-  levels(dist_dataP$param_type)[[3]]
+  levels(dist_dataP$param_type)[[2]]
   #Lig Dist(Å), PPI Dist(Å)
-  dist_data_na = dist_dataP[dist_dataP$param_type%in%c(levels(dist_dataP$param_type)[[3]]),]
+  dist_data_na = dist_dataP[dist_dataP$param_type%in%c(levels(dist_dataP$param_type)[[2]]),]
   dist_data_na$param_type = factor(dist_data_na$param_type)
   table(dist_data_na$param_type)
   levels(dist_data_na$param_type)
@@ -233,9 +235,9 @@ table(lf_mcsm_ligP$param_type)
 mcsmligP = lf_bp2(lf_mcsm_ligP
                   #, p_title = ""
                   , violin_quantiles = c(0.5), monochrome = F
-                  , dot_transparency = 1)
-
+                  , dot_transparency = 0.8)
 
+mcsmligP
 #=================
 # Plot: mmCSM-lig2
 #=================
@@ -248,7 +250,7 @@ table(lf_mmcsm_lig2P$param_type)
 mcsmlig2P = lf_bp2(lf_mmcsm_lig2P
                    #, p_title = ""
                    , violin_quantiles = c(0.5), monochrome = F
-                   , dot_transparency = 1)
+                   , dot_transparency = 0.8)
 
 mcsmlig2P
 
@@ -265,7 +267,7 @@ if (tolower(gene)%in%geneL_ppi2){
   mcsmppi2P = lf_bp2(lf_mcsm_ppi2P
                      #, p_title = ""
                      , violin_quantiles = c(0.5), monochrome = F
-                     , dot_transparency = 1)
+                     , dot_transparency = 0.3)
   
 }
 
@@ -282,7 +284,7 @@ if (tolower(gene)%in%geneL_na){
   mcsmnaP = lf_bp2(lf_mcsm_naP
                    #, p_title = ""
                    , violin_quantiles = c(0.5), monochrome = F
-                   , dot_transparency = 1)
+                   , dot_transparency = 0.4)
   
 }
 
diff --git a/scripts/plotting/plotting_thesis/gid/basic_barplots_layout_gid.R b/scripts/plotting/plotting_thesis/gid/basic_barplots_layout_gid.R
new file mode 100644
index 0000000..bdaa2a8
--- /dev/null
+++ b/scripts/plotting/plotting_thesis/gid/basic_barplots_layout_gid.R
@@ -0,0 +1,271 @@
+duetP
+foldxP
+deepddgP
+dynamut2P
+proveanP
+snap2P
+
+mLigP
+mmLigP
+posC_lig
+ppi2P
+posC_nca
+peP
+sensP
+#========================
+# Common title settings
+#=========================
+theme_georgia <- function(...) {
+  theme_gray(base_family = "sans", ...) + 
+    theme(plot.title = element_text(face = "bold"))
+}
+title_theme <- calc_element("plot.title", theme_georgia())
+
+###############################################################
+common_bp_title = paste0("Sites <", DistCutOff, angstroms_symbol)
+
+# extract common legends
+# lig affinity
+common_legend_outcome = get_legend(mLigP +
+                                     guides(color = guide_legend(nrow = 1)) +
+                                     theme(legend.position = "top"))
+
+# stability
+common_legend_outcome = get_legend(duetP +
+                                     guides(color = guide_legend(nrow = 1)) +
+                                     theme(legend.position = "top"))
+# conservation
+cons_common_legend_outcome = get_legend(snap2P +
+                                          guides(color = guide_legend(nrow = 1)) +
+                                          theme(legend.position = "top"))
+###################################################################
+#==================================
+# Stability+Conservation: COMBINE
+#==================================
+tt_size = 10
+#----------------------------
+# stability and consv title
+#----------------------------
+tt_stab = ggdraw() + 
+  draw_label(
+    paste0("Stability outcome"),
+    fontfamily = title_theme$family,
+    fontface = title_theme$face,
+    #size = title_theme$size
+    size = tt_size
+  )
+
+tt_cons = ggdraw() + 
+  draw_label(
+    paste0("Conservation outcome"),
+    fontfamily = title_theme$family,
+    fontface = title_theme$face,
+    size = tt_size
+  )
+
+#----------------------
+# Output plot
+#-----------------------
+stab_cons_CLP =  paste0(outdir_images
+                        ,tolower(gene)
+                        ,"_stab_cons_BP_CLP.png")
+
+print(paste0("plot filename:", stab_cons_CLP))
+png(stab_cons_CLP, units = "in", width = 10, height = 5, res = 300 )
+
+cowplot::plot_grid(
+  cowplot::plot_grid(
+    cowplot::plot_grid(
+      tt_stab,
+      common_legend_outcome,
+      nrow = 2
+    ),
+    cowplot::plot_grid(
+      duetP, 
+      foldxP, 
+      deepddgP, 
+      dynamut2P,
+      nrow = 1,
+      labels = c("A", "B", "C", "D"),
+      label_size = 12),
+    nrow = 2,
+    rel_heights=c(1,10)
+  ),
+  NULL,
+  cowplot::plot_grid(
+    cowplot::plot_grid(
+      cowplot::plot_grid(
+        tt_cons,
+        cons_common_legend_outcome,
+        nrow = 2
+      ),
+      cowplot::plot_grid(
+        proveanP,
+        snap2P,
+        nrow=1, 
+        labels = c("E", "F"),
+        align = "hv"),
+      nrow = 2,
+      rel_heights = c(1, 10),
+      label_size = 12),
+    nrow=1
+  ),
+  rel_widths = c(2,0.15,1),
+  nrow=1
+)
+
+dev.off()
+
+#################################################################
+#=======================================
+# Affinity barplots: COMBINE ALL three
+#========================================
+
+ligT = paste0(common_bp_title, " ligand")
+lig_affT = ggdraw() +
+  draw_label(
+    ligT,
+    fontfamily = title_theme$family,
+    fontface = title_theme$face,
+    #size = title_theme$size
+    size = 8
+  )
+
+p1 = cowplot::plot_grid(cowplot::plot_grid(lig_affT
+                                           , common_legend_outcome
+                                           , nrow=2),
+                        cowplot::plot_grid(mLigP, mmLigP, posC_lig
+                                           , nrow = 1
+                                           , rel_widths = c(1,1,1.8)
+                                           , align = "h"),
+                        nrow = 2,
+                        rel_heights = c(1,8)
+                        
+)
+p1
+###########################################################
+ncaT = paste0(common_bp_title, " Dist-NA")
+nca_affT = ggdraw() +
+  draw_label(
+    ncaT,
+    fontfamily = title_theme$family,
+    fontface = title_theme$face,
+    #size = title_theme$size
+    size = 8
+  )
+
+p2 = cowplot::plot_grid(cowplot::plot_grid(nca_affT
+                                           , common_legend_outcome
+                                           , nrow=2),
+                        cowplot::plot_grid(nca_distP, posC_nca
+                                             , nrow = 1
+                                             , rel_widths = c(1.2,1.8)
+                                             , align = "h"),
+                        nrow = 2,
+                        rel_heights = c(1,8)
+)
+p2
+###########################################################
+# PE + All position count
+peT_allT = ggdraw() +
+  draw_label(
+    paste0("All mutation sites"),
+    fontfamily = title_theme$family,
+    fontface = title_theme$face,
+    #size = title_theme$size
+    size = 8
+  )
+
+p3 = cowplot::plot_grid(cowplot::plot_grid(peT_allT, nrow = 2
+                                           , rel_widths = c(1,3),axis = "lr"),
+                        cowplot::plot_grid(
+                          peP2, posC_all,
+                          nrow = 2,
+                          rel_widths = c(1,1),
+                          align = "v",
+                          axis = "lr",
+                          rel_heights = c(1,8)
+                          ),
+                        rel_heights = c(1,18),
+                        nrow = 2,axis = "lr")
+p3
+#===============
+# Final combine
+#===============
+w = 11.79
+h = 3.5
+mut_impact_CLP =  paste0(outdir_images
+                         ,tolower(gene)
+                         ,"_mut_impactCLP.png")
+
+#svg(affP, width = 20, height = 5.5)
+print(paste0("plot filename:", mut_impact_CLP))
+png(mut_impact_CLP, units = "in", width = w, height = h, res = 300 )
+
+cowplot::plot_grid(p1, p2
+                   #, p3
+                   , nrow = 1
+                   , labels = "AUTO"
+                   , label_size = 12
+                   , rel_widths = c(3,2,2)
+                   #, rel_heights = c(1)
+                   )
+
+dev.off()
+##################################################
+sensP
+consurfP
+#=================
+# Combine sensitivity + ConSurf
+# or ConSurf
+#=================
+w = 3
+h = 3
+# sens_conP =  paste0(outdir_images
+#                          ,tolower(gene)
+#                          ,"_sens_cons_CLP.png")
+# 
+# print(paste0("plot filename:", sens_conP))
+# png(sens_conP, units = "in", width = w, height = h, res = 300 )
+# 
+# cowplot::plot_grid(sensP, consurfP,
+#                    nrow = 2,
+#                    rel_heights = c(1, 1.5)
+#                    )
+# 
+# dev.off()
+
+conCLP =  paste0(outdir_images
+                    ,tolower(gene)
+                    ,"_consurf_BP.png")
+
+print(paste0("plot filename:", conCLP))
+png(conCLP, units = "in", width = w, height = h, res = 300 )
+consurfP
+
+dev.off()
+#================================
+# Sensitivity mutation numbers: geom_tile
+#================================
+sensCLP =  paste0(outdir_images
+                 ,tolower(gene)
+                 ,"_sensN_tile.png")
+
+print(paste0("plot filename:", sensCLP))
+png(sensCLP, units = "in", width = 1, height = 1, res = 300 )
+sensP
+dev.off()
+#================================
+# Sensitivity SITE numbers: geom_tile
+#================================
+sens_siteCLP =  paste0(outdir_images
+                  ,tolower(gene)
+                  ,"_sens_siteC_tile.png")
+
+print(paste0("plot filename:", sens_siteCLP))
+png(sens_siteCLP, units = "in", width = 1, height = 1, res = 300 )
+sens_siteP
+dev.off()
+
+###########################################################
+
diff --git a/scripts/plotting/plotting_thesis/gid/dm_om_plots_gid.R b/scripts/plotting/plotting_thesis/gid/dm_om_plots_gid.R
new file mode 100644
index 0000000..0f57b2a
--- /dev/null
+++ b/scripts/plotting/plotting_thesis/gid/dm_om_plots_gid.R
@@ -0,0 +1,320 @@
+#################
+# Numbers
+##################
+#all_dm_om_df = dm_om_wf_lf_data(df = merged_df3, gene = gene)
+all_dm_om_df = dm_om_wf_lf_data(df = df3, gene = gene)
+
+# lf_duet   = all_dm_om_df[['lf_duet']]
+# table(lf_duet$param_type)
+################################################################
+
+#======================
+# Data: Dist+Genomics
+#======================
+lf_dist_genP   = all_dm_om_df[['lf_dist_gen']]
+wf_dist_genP   = all_dm_om_df[['wf_dist_gen']]
+
+levels(lf_dist_genP$param_type)
+#lf_dist_genP$param_type <- factor(lf_dist_genP$param_type, levels=c("Log10(MAF)", "Lig Dist(Å)", "PPI Dist(Å)"))
+table(lf_dist_genP$param_type)
+
+genomics_param = c("Log10(MAF)")
+
+dist_genP = lf_bp2(lf_dist_genP
+               #, p_title
+               , violin_quantiles = c(0.5), monochrome = F)
+dist_genP
+#-------------------
+# Genomics data plot
+#-------------------
+genomics_dataP = lf_dist_genP[lf_dist_genP$param_type%in%genomics_param,]
+genomics_dataP$param_type = factor(genomics_dataP$param_type)
+table(genomics_dataP$param_type)
+
+genomicsP = lf_bp2(genomics_dataP
+                   #, p_title = ""
+                   , violin_quantiles = c(0.5), monochrome = F)
+
+genomicsP
+
+#check
+wilcox.test(wf_dist_genP$`Log10(MAF)`[wf_dist_genP$mutation_info_labels=="R"]
+            , wf_dist_genP$`Log10(MAF)`[wf_dist_genP$mutation_info_labels=="S"], paired = FALSE)
+
+tapply(wf_dist_genP$`Log10(MAF)`, wf_dist_genP$mutation_info_labels, summary)
+
+#-------------------
+# Distance data plot: 
+#--------------------
+# not genomics
+dist_dataP = lf_dist_genP[!lf_dist_genP$param_type%in%genomics_param,]
+dist_dataP$param_type = factor(dist_dataP$param_type)
+table(dist_dataP$param_type)
+levels(dist_dataP$param_type)
+# relevel factor to control ordering of appearance of plot
+dist_dataP$param_type <-relevel(dist_dataP$param_type, "Lig Dist(Å)" )
+table(dist_dataP$param_type)
+levels(dist_dataP$param_type)
+
+distanceP = lf_bp2(dist_dataP
+                   #, p_title = ""
+                   , violin_quantiles = c(0.5), monochrome = F)
+
+distanceP
+
+# check
+wilcox.test(wf_dist_genP$`PPI Dist(Å)`[wf_dist_genP$mutation_info_labels=="R"]
+, wf_dist_genP$`PPI Dist(Å)`[wf_dist_genP$mutation_info_labels=="S"], paired = FALSE)
+
+wilcox.test(wf_dist_genP$`Lig Dist(Å)`[wf_dist_genP$mutation_info_labels=="R"]
+            , wf_dist_genP$`Lig Dist(Å)`[wf_dist_genP$mutation_info_labels=="S"], paired = FALSE)
+
+
+wilcox.test(wf_dist_genP$`NA Dist(Å)`[wf_dist_genP$mutation_info_labels=="R"]
+            , wf_dist_genP$`NA Dist(Å)`[wf_dist_genP$mutation_info_labels=="S"], paired = FALSE)
+
+tapply(wf_dist_genP$`PPI Dist(Å)`, wf_dist_genP$mutation_info_labels, summary)
+
+tapply(wf_dist_genP$`Lig Dist(Å)`, wf_dist_genP$mutation_info_labels, summary)
+
+
+#-------------------
+# Distance data plot: LigDist
+#--------------------
+levels(dist_dataP$param_type)[[1]]
+#Lig Dist(Å), PPI Dist(Å)
+dist_data_lig = dist_dataP[dist_dataP$param_type%in%c(levels(dist_dataP$param_type)[[1]]),]
+dist_data_lig$param_type = factor(dist_data_lig$param_type)
+table(dist_data_lig$param_type)
+levels(dist_data_lig$param_type)
+distanceP_lig = lf_bp2(dist_data_lig
+                   #, p_title = ""
+                   , violin_quantiles = c(0.5), monochrome = F)
+
+distanceP_lig
+
+if (tolower(gene)%in%geneL_ppi2){
+  #-------------------
+  # Distance data plot: LigDist
+  #--------------------
+  levels(dist_dataP$param_type)[[2]]
+  #Lig Dist(Å), PPI Dist(Å)
+  dist_data_ppi2 = dist_dataP[dist_dataP$param_type%in%c(levels(dist_dataP$param_type)[[2]]),]
+  dist_data_ppi2$param_type = factor(dist_data_ppi2$param_type)
+  table(dist_data_ppi2$param_type)
+  levels(dist_data_ppi2$param_type)
+  distanceP_ppi2 = lf_bp2(dist_data_ppi2
+                          #, p_title = ""
+                          , violin_quantiles = c(0.5), monochrome = F)
+  
+  distanceP_ppi2
+}
+
+if (tolower(gene)%in%geneL_na){
+  #-------------------
+  # Distance data plot: NADist
+  #--------------------
+  levels(dist_dataP$param_type)[[2]]
+  #Lig Dist(Å), PPI Dist(Å)
+  dist_data_na = dist_dataP[dist_dataP$param_type%in%c(levels(dist_dataP$param_type)[[2]]),]
+  dist_data_na$param_type = factor(dist_data_na$param_type)
+  table(dist_data_na$param_type)
+  levels(dist_data_na$param_type)
+  distanceP_na = lf_bp2(dist_data_na
+                        #, p_title = ""
+                        , violin_quantiles = c(0.5), monochrome = F)
+  
+  distanceP_na
+}
+#==============
+# Plot:DUET
+#==============
+lf_duetP   = all_dm_om_df[['lf_duet']]
+#lf_duetP  = lf_duet[!lf_duet$param_type%in%c(static_colsP),]
+table(lf_duetP$param_type)
+lf_duetP$param_type = factor(lf_duetP$param_type)
+table(lf_duetP$param_type)
+
+duetP = lf_bp2(lf_duetP
+               #, p_title = ""
+               , violin_quantiles = c(0.5), monochrome = F
+               , dot_transparency = 0.2)
+
+#==============
+# Plot:FoldX
+#==============
+lf_foldxP   = all_dm_om_df[['lf_foldx']]
+#lf_foldxP  = lf_foldx[!lf_foldx$param_type%in%c(static_colsP),]
+table(lf_foldxP$param_type)
+lf_foldxP$param_type = factor(lf_foldxP$param_type)
+table(lf_foldxP$param_type)
+
+foldxP = lf_bp2(lf_foldxP
+                #, p_title = ""
+                , violin_quantiles = c(0.5), monochrome = F
+                , dot_transparency = 0.1)
+
+#==============
+# Plot:DeepDDG
+#==============
+lf_deepddgP   = all_dm_om_df[['lf_deepddg']]
+#lf_deepddgP  = lf_deepddg[!lf_deepddg$param_type%in%c(static_colsP),]
+table(lf_deepddgP$param_type)
+lf_deepddgP$param_type = factor(lf_deepddgP$param_type)
+table(lf_deepddgP$param_type)
+
+deepddgP = lf_bp2(lf_deepddgP
+                  #, p_title = ""
+                  , violin_quantiles = c(0.5), monochrome = F
+                  , dot_transparency = 0.2)
+
+deepddgP
+
+#==============
+# Plot: Dynamut2
+#==============
+lf_dynamut2P   = all_dm_om_df[['lf_dynamut2']]
+#lf_dynamut2P  = lf_dynamut2[!lf_dynamut2$param_type%in%c(static_colsP),]
+table(lf_dynamut2P$param_type)
+lf_dynamut2P$param_type = factor(lf_dynamut2P$param_type)
+table(lf_dynamut2P$param_type)
+
+dynamut2P = lf_bp2(lf_dynamut2P
+                   #, p_title = ""
+                   , violin_quantiles = c(0.5), monochrome = F
+                   , dot_transparency = 0.2)
+
+
+#==============
+# Plot:ConSurf
+#==============
+lf_consurfP   = all_dm_om_df[['lf_consurf']]
+#lf_consurfP  = lf_consurf[!lf_consurf$param_type%in%c(static_colsP),]
+table(lf_consurfP$param_type)
+lf_consurfP$param_type = factor(lf_consurfP$param_type)
+table(lf_consurfP$param_type)
+
+consurfP = lf_bp2(lf_consurfP
+                  #, p_title = ""
+                  , violin_quantiles = c(0.5), monochrome = F)
+
+#==============
+# Plot:PROVEAN
+#==============
+lf_proveanP   = all_dm_om_df[['lf_provean']]
+#lf_proveanP  = lf_provean[!lf_provean$param_type%in%c(static_colsP),]
+table(lf_proveanP$param_type)
+lf_proveanP$param_type = factor(lf_proveanP$param_type)
+table(lf_proveanP$param_type)
+
+proveanP = lf_bp2(lf_proveanP
+                  #, p_title = ""
+                  , violin_quantiles = c(0.5), monochrome = F)
+
+#==============
+# Plot:SNAP2
+#==============
+lf_snap2P  = all_dm_om_df[['lf_snap2']]
+#lf_snap2P  = lf_snap2[!lf_snap2$param_type%in%c(static_colsP),]
+table(lf_snap2P$param_type)
+lf_snap2P$param_type = factor(lf_snap2P$param_type)
+table(lf_snap2P$param_type)
+
+snap2P = lf_bp2(lf_snap2P
+                #, p_title = ""
+                , violin_quantiles = c(0.5), monochrome = F)
+
+
+############################################################################
+#================
+# Plot: mCSM-lig
+#================
+lf_mcsm_ligP   = all_dm_om_df[['lf_mcsm_lig']]
+#lf_mcsm_ligP  = lf_mcsm_lig[!lf_mcsm_lig$param_type%in%c(static_colsP),]
+table(lf_mcsm_ligP$param_type)
+lf_mcsm_ligP$param_type = factor(lf_mcsm_ligP$param_type)
+table(lf_mcsm_ligP$param_type)
+
+mcsmligP = lf_bp2(lf_mcsm_ligP
+                  #, p_title = ""
+                  , violin_quantiles = c(0.5), monochrome = F
+                  , dot_transparency = 1)
+
+
+#=================
+# Plot: mmCSM-lig2
+#=================
+lf_mmcsm_lig2P   = all_dm_om_df[['lf_mmcsm_lig2']]
+#lf_mmcsm_lig2P  = lf_mmcsm_lig2P[!lf_mmcsm_lig2P$param_type%in%c(static_colsP),]
+table(lf_mmcsm_lig2P$param_type)
+lf_mmcsm_lig2P$param_type = factor(lf_mmcsm_lig2P$param_type)
+table(lf_mmcsm_lig2P$param_type)
+
+mcsmlig2P = lf_bp2(lf_mmcsm_lig2P
+                   #, p_title = ""
+                   , violin_quantiles = c(0.5), monochrome = F
+                   , dot_transparency = 1)
+
+mcsmlig2P
+
+#================
+# Plot: mCSM-ppi2
+#================
+if (tolower(gene)%in%geneL_ppi2){
+  lf_mcsm_ppi2P   = all_dm_om_df[['lf_mcsm_ppi2']]
+  #lf_mcsm_ppi2P  = lf_mcsm_ppi2[!lf_mcsm_ppi2$param_type%in%c(static_colsP),]
+  table(lf_mcsm_ppi2P$param_type)
+  lf_mcsm_ppi2P$param_type = factor(lf_mcsm_ppi2P$param_type)
+  table(lf_mcsm_ppi2P$param_type)
+  
+  mcsmppi2P = lf_bp2(lf_mcsm_ppi2P
+                     #, p_title = ""
+                     , violin_quantiles = c(0.5), monochrome = F
+                     , dot_transparency = 1)
+  
+}
+
+#==============
+# Plot: mCSM-NA
+#==============
+if (tolower(gene)%in%geneL_na){
+  lf_mcsm_naP   = all_dm_om_df[['lf_mcsm_na']]
+  #lf_mcsm_naP  = lf_mcsm_na[!lf_mcsm_na$param_type%in%c(static_colsP),]
+  table(lf_mcsm_naP$param_type)
+  lf_mcsm_naP$param_type = factor(lf_mcsm_naP$param_type)
+  table(lf_mcsm_naP$param_type)
+  
+  mcsmnaP = lf_bp2(lf_mcsm_naP
+                   #, p_title = ""
+                   , violin_quantiles = c(0.5), monochrome = F
+                   , dot_transparency = 1)
+  
+}
+
+######################################
+# Outplot  with stats
+######################################
+# outdir_images = paste0("~/git/Writing/thesis/images/results/", tolower(gene), "/")
+# 
+# dm_om_combinedP =  paste0(outdir_images
+#                    ,tolower(gene)
+#                    ,"_dm_om_all.svg" )
+# 
+# cat("DM OM plots with stats:", dm_om_combinedP)
+# svg(dm_om_combinedP, width = 32, height = 18)
+# cowplot::plot_grid(
+#   cowplot::plot_grid(duetP, foldxP, deepddgP, dynamut2P, genomicsP, distanceP
+#                    , nrow=1
+#                    , rel_widths = c(1/7, 1/7,1/7,1/7, 1/7, 1.75/7)),
+#                    #, rel_widths = c(1/8, 1/8,1/8,1/8, 1/8, 2.75/8)), # for 3 distances
+#   cowplot::plot_grid(consurfP, proveanP, snap2P
+#                    , mcsmligP
+#                    , mcsmlig2P
+#                    , mcsmppi2P
+#                    #, mcsmnaP
+#                    , nrow=1),
+#   nrow=2)
+# 
+# dev.off()
+
+
diff --git a/scripts/plotting/plotting_thesis/gid/gg_pairs_all_gid.R b/scripts/plotting/plotting_thesis/gid/gg_pairs_all_gid.R
new file mode 100644
index 0000000..5fc389a
--- /dev/null
+++ b/scripts/plotting/plotting_thesis/gid/gg_pairs_all_gid.R
@@ -0,0 +1,178 @@
+#source("~/git/LSHTM_analysis/config/embb.R")
+#source("~/git/LSHTM_analysis/scripts/plotting/plotting_colnames.R")
+#source("~/git/LSHTM_analysis/scripts/plotting/get_plotting_dfs.R")
+
+my_gg_pairs=function(plot_df, plot_title
+                     , tt_args_size = 2.5
+                     , gp_args_size = 2.5){
+  ggpairs(plot_df,
+          columns = 1:(ncol(plot_df)-1),
+          upper = list(
+            continuous = wrap('cor', # ggally_cor()
+                              method = "spearman",
+                              use = "pairwise.complete.obs",
+                              title="ρ",
+                              digits=2,
+                              justify_labels = "centre",
+                              #title_args=c(colour="black"),
+                              title_args=c(size=tt_args_size),#2.5
+                              group_args=c(size=gp_args_size)#2.5
+            )
+          ),
+          lower = list(
+            continuous = wrap("points",
+                              alpha = 0.7,
+                              size=0.125),
+            combo     = wrap("dot",
+                             alpha = 0.7,
+                             size=0.125)
+          ),
+          aes(colour = factor(ifelse(dst_mode==0,
+                                     "S",
+                                     "R") ),
+              alpha = 0.5),
+          title=plot_title) +
+    
+    scale_colour_manual(values = c("red", "blue")) +
+    scale_fill_manual(values = c("red", "blue")) #+
+  # theme(text = element_text(size=7,
+  #                           face="bold"))
+}
+
+DistCutOff = 10
+###########################################################################
+geneL_normal  = c("pnca")
+geneL_na      = c("gid", "rpob")
+geneL_ppi2    = c("alr", "embb", "katg", "rpob")
+
+merged_df3 = as.data.frame(merged_df3)
+
+corr_plotdf = corr_data_extract(merged_df3
+                                , gene = gene
+                                , drug = drug
+                                , extract_scaled_cols = F)
+
+aff_dist_cols = colnames(corr_plotdf)[grep("Dist", colnames(corr_plotdf))]
+static_cols =  c("Log10(MAF)"
+                 , "Log10(OR)"
+)
+############################################################
+#=============================================
+# Creating masked df for affinity data
+#=============================================
+corr_affinity_df = corr_plotdf
+
+#----------------------
+# Mask affinity columns
+#-----------------------
+corr_affinity_df[corr_affinity_df["Lig-Dist"]>DistCutOff,"mCSM-lig"]=0
+corr_affinity_df[corr_affinity_df["Lig-Dist"]>DistCutOff,"mmCSM-lig"]=0
+
+if (tolower(gene)%in%geneL_ppi2){
+  corr_affinity_df[corr_affinity_df["PPI-Dist"]>DistCutOff,"mCSM-PPI2"]=0
+}
+
+if (tolower(gene)%in%geneL_na){
+  corr_affinity_df[corr_affinity_df["NCA-Dist"]>DistCutOff,"mCSM-NA"]=0
+}
+
+# count 0
+#res <- colSums(corr_affinity_df==0)/nrow(corr_affinity_df)*100
+unmasked_vals <- nrow(corr_affinity_df) - colSums(corr_affinity_df==0) 
+unmasked_vals
+
+##########################################################
+#================
+# Stability
+#================
+corr_ps_colnames = c(static_cols
+                     , "DUET"
+                     , "FoldX"
+                     , "DeepDDG"
+                     , "Dynamut2"
+                     , aff_dist_cols
+                     , "dst_mode")
+
+corr_df_ps = corr_plotdf[, corr_ps_colnames]
+
+# Plot #1
+plot_corr_df_ps = my_gg_pairs(corr_df_ps, plot_title="Stability estimates")
+##########################################################
+#================
+# Conservation
+#================
+corr_conservation_cols = c( static_cols
+                            , "ConSurf"
+                            , "SNAP2"
+                            , "PROVEAN"
+                            #, aff_dist_cols
+                            , "dst_mode"
+)
+
+corr_df_cons = corr_plotdf[, corr_conservation_cols]
+
+# Plot #2
+plot_corr_df_cons = my_gg_pairs(corr_df_cons, plot_title="Conservation estimates")
+
+##########################################################
+#================
+# Affinity: lig, ppi and na as applicable
+#================
+#corr_df_lig = corr_plotdf[corr_plotdf["Lig-Dist"]<DistCutOff,]
+common_aff_colnames = c("mCSM-lig"
+                        , "mmCSM-lig")
+
+if (tolower(gene)%in%geneL_normal){
+  aff_colnames = common_aff_colnames
+}
+if (tolower(gene)%in%geneL_ppi2){
+  aff_colnames = c(common_aff_colnames, "mCSM-PPI2")
+}
+
+if (tolower(gene)%in%geneL_na){
+  aff_colnames = c(common_aff_colnames, "mCSM-NA")
+}
+
+# building ffinal affinity colnames for correlation
+corr_aff_colnames = c(static_cols
+                      , aff_colnames
+                      , "dst_mode") # imp
+
+corr_df_aff = corr_affinity_df[, corr_aff_colnames]
+colnames(corr_df_aff)
+
+# Plot #3
+plot_corr_df_aff = my_gg_pairs(corr_df_aff,
+                               plot_title="Affinity estimates",
+                               tt_args_size = 4,
+                               gp_args_size = 4)
+
+#=============
+# combine
+#=============
+
+#png("/home/tanu/tmp/gg_pairs_all.png", height = 6, width=11.75, unit="in",res=300)
+png(paste0(outdir_images
+           ,tolower(gene)
+           ,"_CorrAB.png"), height = 7, width=11.75, unit="in",res=300)
+
+cowplot::plot_grid(ggmatrix_gtable(plot_corr_df_ps),
+                   ggmatrix_gtable(plot_corr_df_cons),
+                   #                  ggmatrix_gtable(plot_corr_df_aff),
+                   #                   nrow=1, ncol=3, rel_heights = 7,7,3
+                   nrow=1,
+                   rel_widths = c(1.5,1),
+                   labels = "AUTO",
+                   label_size = 12)
+dev.off()
+
+# affinity corr
+#png("/home/tanu/tmp/gg_pairs_affinity.png", height =7, width=7, unit="in",res=300)
+png(paste0(outdir_images
+           ,tolower(gene)
+           ,"_CorrC.png"), height =7, width=7, unit="in",res=300)
+
+cowplot::plot_grid(ggmatrix_gtable(plot_corr_df_aff),
+                   labels = "C",
+                   label_size = 12)
+dev.off()
diff --git a/scripts/plotting/plotting_thesis/just.R b/scripts/plotting/plotting_thesis/just.R
new file mode 100644
index 0000000..3524c08
--- /dev/null
+++ b/scripts/plotting/plotting_thesis/just.R
@@ -0,0 +1,38 @@
+bar =  merged_df3[, c("mutationinformation", "position", "pos_count", "df2_pos_count_all")]
+
+foo = merged_df3[, c("mutationinformation", "position"
+                     , "ligand_distance"
+                     , "interface_dist"
+                     , "or_mychisq"
+                     , "maf")]
+
+foo[foo$position==445,]
+
+foo1 = na.omit(foo)
+foo2 = foo1[order(-foo1$or_mychisq),]
+head(foo1, 10)
+head(foo2, 20)
+
+foo2[foo2$position==497,]
+
+foo2[foo2$position==328,]
+foo2[foo2$position==347,]
+
+ggplot(foo, aes(x= position), reorder(x)) + geom_bar()
+foo2[foo2$position%in%c(405, 406, 407, 408,409),]
+
+dr = seq(319,334, 1)
+dr = seq(303, 306, 1)
+
+foo2[foo2$position%in%dr,]
+foo2[foo2$position%in%aa_pos_dsl,]
+foo2[foo2$position%in%aa_pos_ca,]
+foo2[foo2$position%in%c(644,645, 690),]
+# maf
+
+foo3 = foo[order(-foo$maf),]
+head(foo3)
+E378A, M306V, M306I, G406A, R24P, Q497R
+
+foo3_nona = foo1[order(-foo1$maf),]
+head(foo3_nona)
diff --git a/scripts/plotting/plotting_thesis/katg/basic_barplots_katg.R b/scripts/plotting/plotting_thesis/katg/basic_barplots_katg.R
new file mode 100644
index 0000000..50001a5
--- /dev/null
+++ b/scripts/plotting/plotting_thesis/katg/basic_barplots_katg.R
@@ -0,0 +1,364 @@
+#!/usr/bin/env Rscript   
+#########################################################
+# TASK: Barplots
+# basic barplots with outcome
+# basic barplots with frequency of count of mutations
+#########################################################
+#=============
+# Data: Input
+#==============
+#source("~/git/LSHTM_analysis/config/katg.R")
+#source("~/git/LSHTM_analysis/scripts/plotting/get_plotting_dfs.R")
+
+#cat("\nSourced plotting cols as well:", length(plotting_cols))
+
+####################################################
+class(merged_df3)
+
+df3 = subset(merged_df3, select = -c(pos_count))
+
+#=======
+# output
+#=======
+outdir_images = paste0("~/git/Writing/thesis/images/results/", tolower(gene), "/")
+cat("plots will output to:", outdir_images)
+
+##########################################################
+# blue, red bp
+sts = 8
+lts = 8
+ats = 8
+als = 8
+ltis = 8
+geom_ls = 2.2
+
+#pos_count
+subtitle_size = 8
+geom_ls_pc = 2.2
+leg_text_size = 8
+axis_text_size = 8
+axis_label_size = 8
+
+###########################################################
+#------------------------------
+# plot default sizes
+#------------------------------
+#=========================
+# Affinity outcome
+# check this var: outcome_cols_affinity
+# get from preformatting or put in globals
+#==========================
+DistCutOff
+LigDist_colname  # = "ligand_distance" # from globals 
+ppi2Dist_colname
+naDist_colname
+
+###########################################################
+# get plotting data within the distance
+df3_lig  = df3[df3[[LigDist_colname]]<DistCutOff,]
+df3_ppi2 = df3[df3[[ppi2Dist_colname]]<DistCutOff,]
+df3_na   = df3[df3[[naDist_colname]]<DistCutOff,]
+common_bp_title = paste0("Sites <", DistCutOff, angstroms_symbol)
+
+#------------------------------
+# barplot for ligand affinity:
+# <10 Ang of ligand
+#------------------------------
+mLigP = stability_count_bp(plotdf = df3_lig
+               , df_colname = "ligand_outcome"
+               #, leg_title  = "mCSM-lig"
+               #, bp_plot_title = paste(common_bp_title, "ligand")
+               , yaxis_title = "Number of nsSNPs"
+               , leg_position = "none"
+               , subtitle_text = "mCSM\nLig"
+               , bar_fill_values = c("#F8766D", "#00BFC4")
+               , subtitle_colour= "black"
+               , sts = sts
+               , lts = lts
+               , ats = ats
+               , als = als
+               , ltis = ltis
+               , geom_ls = geom_ls
+               )
+mLigP
+#------------------------------
+# barplot for ligand affinity:
+# <10 Ang of ligand
+# mmCSM-lig: will be the same no. of sites but the effect will be different
+#------------------------------
+mmLigP = stability_count_bp(plotdf = df3_lig
+                   , df_colname = "mmcsm_lig_outcome"
+                   #, leg_title  = "mmCSM-lig"
+                   #, label_categories = labels_mmlig
+                   #, bp_plot_title = paste(common_bp_title, "ligand")
+                   
+                   , yaxis_title = ""
+                   , leg_position = "none"
+                   , subtitle_text = "mmCSM\nLig"
+                   , bar_fill_values = c("#F8766D", "#00BFC4")
+                   , subtitle_colour= "black"
+                   , sts = sts
+                   , lts = lts
+                   , ats = ats
+                   , als = als
+                   , ltis = ltis
+                   , geom_ls = geom_ls
+                   )
+mmLigP
+#------------------------------
+# barplot for ppi2 affinity
+#  <10 Ang of interface
+#------------------------------
+if (tolower(gene)%in%geneL_ppi2){
+    ppi2P = stability_count_bp(plotdf = df3_ppi2
+                     , df_colname = "mcsm_ppi2_outcome"
+                     #, leg_title  = "mCSM-ppi2"
+                     #, label_categories = labels_ppi2
+                     #, bp_plot_title = paste(common_bp_title, "PP-interface")
+                     
+                     , yaxis_title = "Number of nsSNPs"
+                     , leg_position = "none"
+                     , subtitle_text = "mCSM\nPPI2"
+                     , bar_fill_values = c("#F8766D", "#00BFC4")
+                     , subtitle_colour= "black"
+                     , sts = sts
+                     , lts = lts
+                     , ats = ats
+                     , als = als
+                     , ltis = ltis
+                     , geom_ls = geom_ls
+                     )
+  ppi2P
+}
+#----------------------------
+# barplot for ppi2 affinity
+#  <10 Ang of interface
+#------------------------------
+if (tolower(gene)%in%geneL_na){
+  nca_distP = stability_count_bp(plotdf = df3_na
+                             , df_colname = "mcsm_na_outcome"
+                             #, leg_title  = "mCSM-NA"
+                             #, label_categories = 
+                             #, bp_plot_title = paste(common_bp_title, "Dist to NA")
+                             
+                             , yaxis_title = "Number of nsSNPs"
+                             , leg_position = "none"
+                             , subtitle_text = "mCSM\nNA"
+                             , bar_fill_values = c("#F8766D", "#00BFC4")
+                             , subtitle_colour= "black"
+                             , sts = sts
+                             , lts = lts
+                             , ats = ats
+                             , als = als
+                             , ltis = ltis
+                             , geom_ls = geom_ls
+  )
+  nca_distP
+}
+
+#####################################################################
+# ------------------------------
+# bp site site count: mCSM-lig
+# < 10 Ang ligand
+# ------------------------------
+common_bp_title = paste0("Sites <", DistCutOff, angstroms_symbol)
+
+posC_lig = site_snp_count_bp(plotdf = df3_lig
+                  , df_colname = "position"
+                  , xaxis_title = "Number of nsSNPs"
+                  , yaxis_title = "Number of Sites"
+                  , subtitle_colour = "chocolate4"
+                  , subtitle_text = ""
+                  , subtitle_size = subtitle_size
+                  , geom_ls = geom_ls_pc
+                  , leg_text_size = leg_text_size
+                  , axis_text_size = axis_text_size
+                  , axis_label_size = axis_label_size)
+
+posC_lig
+#------------------------------
+# bp site site count: ppi2
+# < 10 Ang interface
+#------------------------------
+if (tolower(gene)%in%geneL_ppi2){
+  posC_ppi2 = site_snp_count_bp(plotdf = df3_ppi2
+                    , df_colname = "position"
+                    , xaxis_title = "Number of nsSNPs"
+                    , yaxis_title = "Number of Sites"
+                    , subtitle_colour = "chocolate4"
+                    , subtitle_text = ""
+                    , subtitle_size = subtitle_size
+                    , geom_ls = geom_ls_pc
+                    , leg_text_size = leg_text_size
+                    , axis_text_size = axis_text_size
+                    , axis_label_size = axis_label_size)
+  posC_ppi2
+}
+
+#------------------------------
+# bp site site count: NCA dist
+# < 10 Ang nca
+#------------------------------
+if (tolower(gene)%in%geneL_na){
+  posC_nca = site_snp_count_bp(plotdf = df3_na
+                                , df_colname = "position"
+                                , xaxis_title = "Number of nsSNPs"
+                                , yaxis_title = "Number of Sites"
+                                , subtitle_colour = "chocolate4"
+                                , subtitle_text = ""
+                               , subtitle_size = subtitle_size
+                               , geom_ls = geom_ls_pc
+                               , leg_text_size = leg_text_size
+                               , axis_text_size = axis_text_size
+                               , axis_label_size = axis_label_size)
+  posC_nca
+}
+#===============================================================
+#------------------------------
+# bp site site count: ALL
+# <10 Ang ligand
+#------------------------------
+posC_all = site_snp_count_bp(plotdf = df3
+                             , df_colname = "position"
+                             , xaxis_title = "Number of nsSNPs"
+                             , yaxis_title = "Number of Sites"
+                             , subtitle_colour = "chocolate4"
+                             , subtitle_text = "All mutations sites"
+                             , subtitle_size = subtitle_size
+                             , geom_ls = geom_ls_pc
+                             , leg_text_size = leg_text_size
+                             , axis_text_size = axis_text_size
+                             , axis_label_size = axis_label_size)
+posC_all
+##################################################################
+consurfP = stability_count_bp(plotdf = df3
+                              , df_colname = "consurf_outcome"
+                              #, leg_title = "ConSurf"
+                              #, label_categories = labels_consurf
+                              , yaxis_title = "Number of nsSNPs"
+                              , leg_position = "top"
+                              , subtitle_text = "ConSurf"
+                              , bar_fill_values = consurf_colours # from globals
+                              , subtitle_colour= "black"
+                              , sts = sts
+                              , lts = lts
+                              , ats = ats
+                              , als = als
+                              , ltis = ltis
+                              , geom_ls = geom_ls)
+
+consurfP
+
+##############################################################
+sts_so = 10
+lts_so = 10
+ats_so = 10
+als_so = 10
+ltis_so = 10
+geom_ls_so = 2.5
+#===================
+# Stability
+#===================
+# duetP
+duetP = stability_count_bp(plotdf = df3
+                           , df_colname = "duet_outcome"
+                           , leg_title = "mCSM-DUET"
+                           #, label_categories = labels_duet
+                           , yaxis_title = "Number of nsSNPs"
+                           , leg_position = "none"
+                           , subtitle_text = "mCSM-DUET"
+                           , bar_fill_values = c("#F8766D", "#00BFC4")
+                           , subtitle_colour= "black"
+                           , sts = sts_so
+                           , lts = lts_so
+                           , ats = ats_so
+                           , als = als_so
+                           , ltis = ltis_so
+                           , geom_ls = geom_ls_so)
+duetP
+
+# foldx
+foldxP = stability_count_bp(plotdf = df3
+                            , df_colname = "foldx_outcome"
+                            #, leg_title = "FoldX"
+                            #, label_categories = labels_foldx
+                            , yaxis_title = ""
+                            , leg_position = "none"
+                            , subtitle_text = "FoldX"
+                            , bar_fill_values = c("#F8766D", "#00BFC4")
+                            , sts = sts_so
+                            , lts = lts_so
+                            , ats = ats_so
+                            , als = als_so
+                            , ltis = ltis_so
+                            , geom_ls = geom_ls_so)
+foldxP
+
+# deepddg
+deepddgP = stability_count_bp(plotdf = df3
+                              , df_colname = "deepddg_outcome"
+                              #, leg_title = "DeepDDG"
+                              #, label_categories = labels_deepddg
+                              , yaxis_title = ""
+                              , leg_position = "none"
+                              , subtitle_text = "DeepDDG"
+                              , bar_fill_values = c("#F8766D", "#00BFC4")
+                              , sts = sts_so
+                              , lts = lts_so
+                              , ats = ats_so
+                              , als = als_so
+                              , ltis = ltis_so
+                              , geom_ls = geom_ls_so)
+deepddgP
+
+# deepddg
+dynamut2P = stability_count_bp(plotdf = df3
+                               , df_colname = "ddg_dynamut2_outcome"
+                               #, leg_title = "Dynamut2"
+                               #, label_categories = labels_ddg_dynamut2_outcome
+                               , yaxis_title = ""
+                               , leg_position = "none"
+                               , subtitle_text = "Dynamut2"
+                               , bar_fill_values = c("#F8766D", "#00BFC4")
+                               , sts = sts_so
+                               , lts = lts_so
+                               , ats = ats_so
+                               , als = als_so
+                               , ltis = ltis_so
+                               , geom_ls = geom_ls_so)
+dynamut2P
+
+# provean
+proveanP = stability_count_bp(plotdf = df3
+                              , df_colname = "provean_outcome"
+                              #, leg_title = "PROVEAN"
+                              #, label_categories = labels_provean
+                              , yaxis_title = "Number of nsSNPs"
+                              , leg_position = "none" # top
+                              , subtitle_text = "PROVEAN"
+                              , bar_fill_values = c("#D01C8B", "#F1B6DA") # light pink and deep
+                              , sts = sts_so
+                              , lts = lts_so
+                              , ats = ats_so
+                              , als = als_so
+                              , ltis = ltis_so
+                              , geom_ls = geom_ls_so)
+proveanP
+
+# snap2
+snap2P = stability_count_bp(plotdf = df3
+                            , df_colname = "snap2_outcome"
+                            #, leg_title = "SNAP2"
+                            #, label_categories = labels_snap2
+                            , yaxis_title = ""
+                            , leg_position = "none" # top
+                            , subtitle_text = "SNAP2"
+                            , bar_fill_values = c("#D01C8B", "#F1B6DA") # light pink and deep
+                            , sts = sts_so
+                            , lts = lts_so
+                            , ats = ats_so
+                            , als = als_so
+                            , ltis = ltis_so
+                            , geom_ls = geom_ls_so)
+snap2P
+#####################################################################################
diff --git a/scripts/plotting/plotting_thesis/katg/basic_barplots_layput_katg.R b/scripts/plotting/plotting_thesis/katg/basic_barplots_layput_katg.R
new file mode 100644
index 0000000..6ea7d0e
--- /dev/null
+++ b/scripts/plotting/plotting_thesis/katg/basic_barplots_layput_katg.R
@@ -0,0 +1,309 @@
+#=============
+# Data: Input
+#==============
+#source("~/git/LSHTM_analysis/config/katg.R")
+#source("~/git/LSHTM_analysis/scripts/plotting/get_plotting_dfs.R")
+source("/home/tanu/git/LSHTM_analysis/scripts/plotting/plotting_thesis/katg/basic_barplots_katg.R")
+source("/home/tanu/git/LSHTM_analysis/scripts/plotting/plotting_thesis/katg/pe_sens_site_count_katg.R")
+
+if ( tolower(gene)%in%c("katg") ){
+  cat("\nPlots available for layout are:")
+  
+  duetP
+  foldxP
+  deepddgP
+  dynamut2P
+  proveanP
+  snap2P
+  
+  mLigP
+  mmLigP
+  posC_lig
+  
+  ppi2P
+  posC_ppi2
+  
+  peP2
+  sens_siteP
+  peP   # not used
+  sensP # not used
+}
+
+
+#========================
+# Common title settings
+#=========================
+theme_georgia <- function(...) {
+  theme_gray(base_family = "sans", ...) + 
+    theme(plot.title = element_text(face = "bold"))
+}
+title_theme <- calc_element("plot.title", theme_georgia())
+
+###############################################################
+common_bp_title = paste0("Sites <", DistCutOff, angstroms_symbol)
+
+# extract common legends
+# lig affinity
+common_legend_outcome = get_legend(mLigP +
+                                     guides(color = guide_legend(nrow = 1)) +
+                                     theme(legend.position = "top"))
+
+# stability
+common_legend_outcome = get_legend(duetP +
+                                     guides(color = guide_legend(nrow = 1)) +
+                                     theme(legend.position = "top"))
+# conservation
+cons_common_legend_outcome = get_legend(snap2P +
+                                          guides(color = guide_legend(nrow = 1)) +
+                                          theme(legend.position = "top"))
+###################################################################
+#==================================
+# Stability+Conservation: COMBINE
+#==================================
+tt_size = 10
+#----------------------------
+# stability and consv title
+#----------------------------
+tt_stab = ggdraw() + 
+  draw_label(
+    paste0("Stability outcome"),
+    fontfamily = title_theme$family,
+    fontface = title_theme$face,
+    #size = title_theme$size
+    size = tt_size
+  )
+
+tt_cons = ggdraw() + 
+  draw_label(
+    paste0("Conservation outcome"),
+    fontfamily = title_theme$family,
+    fontface = title_theme$face,
+    size = tt_size
+  )
+
+#----------------------
+# Output plot
+#-----------------------
+stab_cons_CLP =  paste0(outdir_images
+                        ,tolower(gene)
+                        ,"_stab_cons_BP_CLP.png")
+
+print(paste0("plot filename:", stab_cons_CLP))
+png(stab_cons_CLP, units = "in", width = 10, height = 5, res = 300 )
+
+cowplot::plot_grid(
+  cowplot::plot_grid(
+    cowplot::plot_grid(
+      tt_stab,
+      common_legend_outcome,
+      nrow = 2
+    ),
+    cowplot::plot_grid(
+      duetP, 
+      foldxP, 
+      deepddgP, 
+      dynamut2P,
+      nrow = 1,
+      labels = c("A", "B", "C", "D"),
+      label_size = 12),
+    nrow = 2,
+    rel_heights=c(1,10)
+  ),
+  NULL,
+  cowplot::plot_grid(
+    cowplot::plot_grid(
+      cowplot::plot_grid(
+        tt_cons,
+        cons_common_legend_outcome,
+        nrow = 2
+      ),
+      cowplot::plot_grid(
+        proveanP,
+        snap2P,
+        nrow=1, 
+        labels = c("E", "F"),
+        align = "hv"),
+      nrow = 2,
+      rel_heights = c(1, 10),
+      label_size = 12),
+    nrow=1
+  ),
+  rel_widths = c(2,0.15,1),
+  nrow=1
+)
+
+dev.off()
+
+#################################################################
+#=======================================
+# Affinity barplots: COMBINE ALL four
+#========================================
+ligT = paste0(common_bp_title, " ligand")
+lig_affT = ggdraw() +
+  draw_label(
+    ligT,
+    fontfamily = title_theme$family,
+    fontface = title_theme$face,
+    #size = title_theme$size
+    size = 8
+  )
+
+p1 = cowplot::plot_grid(cowplot::plot_grid(lig_affT
+                                           , common_legend_outcome
+                                           , nrow=2),
+                        cowplot::plot_grid(mLigP, mmLigP, posC_lig
+                                           , nrow = 1
+                                           , rel_widths = c(1,0.65,1.8)
+                                           , align = "h"),
+                        nrow = 2,
+                        rel_heights = c(1,8)
+                        
+)
+p1
+
+###########################################################
+ppi2T = paste0(common_bp_title, " PP-interface")
+ppi2_affT = ggdraw() +
+  draw_label(
+    ppi2T,
+    fontfamily = title_theme$family,
+    fontface = title_theme$face,
+    size = 8
+  )
+
+p3 = cowplot::plot_grid(cowplot::plot_grid(ppi2_affT, common_legend_outcome, nrow=2),
+                        cowplot::plot_grid(ppi2P, posC_ppi2
+                                           , nrow = 1
+                                           , rel_widths = c(1,1.9)
+                                           , align = "h"),
+                        nrow = 2,
+                        rel_heights = c(1,8)
+)
+p3
+
+# PE + All position count
+peT_allT = ggdraw() +
+  draw_label(
+    paste0("All mutation sites"),
+    fontfamily = title_theme$family,
+    fontface = title_theme$face,
+    #size = title_theme$size
+    size = 8
+  )
+
+p4 = cowplot::plot_grid(cowplot::plot_grid(peT_allT, nrow = 2
+                                           , rel_widths = c(1,3),axis = "lr"),
+                        cowplot::plot_grid(
+                          peP2, posC_all,
+                          nrow = 2,
+                          rel_widths = c(1,1),
+                          align = "v",
+                          axis = "lr",
+                          rel_heights = c(1,8)
+                        ),
+                        rel_heights = c(1,18),
+                        nrow = 2,axis = "lr")
+p4
+
+
+#### Combine p1+p3+p4 ####
+w = 11.79
+h = 3.5
+mut_impact_CLP =  paste0(outdir_images
+                         ,tolower(gene)
+                         ,"_mut_impactCLP.png")
+
+#svg(affP, width = 20, height = 5.5)
+print(paste0("plot filename:", mut_impact_CLP))
+png(mut_impact_CLP, units = "in", width = w, height = h, res = 300 )
+
+cowplot::plot_grid(p1,
+                   p3,
+                   p4,
+                   nrow = 1,
+                   labels = "AUTO",
+                   label_size = 12,
+                   rel_widths = c(2.5,2,2)
+                   #, rel_heights = c(1)
+)
+
+dev.off()
+w = 11.79
+h = 3.5
+mut_impact_CLP =  paste0(outdir_images
+                         ,tolower(gene)
+                         ,"_mut_impactCLP.png")
+
+#svg(affP, width = 20, height = 5.5)
+print(paste0("plot filename:", mut_impact_CLP))
+png(mut_impact_CLP, units = "in", width = w, height = h, res = 300 )
+
+cowplot::plot_grid(p1,
+                   p3,
+                   p4,
+                   nrow = 1,
+                   labels = "AUTO",
+                   label_size = 12,
+                   rel_widths = c(2.5,2,2)
+                   #, rel_heights = c(1)
+)
+
+dev.off()
+
+##################################################
+sensP
+consurfP
+#=================
+#### Combine sensitivity + ConSurf ####
+# or ConSurf
+#=================
+w = 3
+h = 3
+# sens_conP =  paste0(outdir_images
+#                          ,tolower(gene)
+#                          ,"_sens_cons_CLP.png")
+# 
+# print(paste0("plot filename:", sens_conP))
+# png(sens_conP, units = "in", width = w, height = h, res = 300 )
+# 
+# cowplot::plot_grid(sensP, consurfP,
+#                    nrow = 2,
+#                    rel_heights = c(1, 1.5)
+#                    )
+# 
+# dev.off()
+
+conCLP =  paste0(outdir_images
+                    ,tolower(gene)
+                    ,"_consurf_BP.png")
+
+print(paste0("plot filename:", conCLP))
+png(conCLP, units = "in", width = w, height = h, res = 300 )
+consurfP
+
+dev.off()
+#================================
+# Sensitivity mutation numbers: geom_tile
+#================================
+sensCLP =  paste0(outdir_images
+                 ,tolower(gene)
+                 ,"_sensN_tile.png")
+
+print(paste0("plot filename:", sensCLP))
+png(sensCLP, units = "in", width = 1, height = 1, res = 300 )
+sensP
+dev.off()
+#================================
+# Sensitivity SITE numbers: geom_tile
+#================================
+sens_siteCLP =  paste0(outdir_images
+                  ,tolower(gene)
+                  ,"_sens_siteC_tile.png")
+
+print(paste0("plot filename:", sens_siteCLP))
+png(sens_siteCLP, units = "in", width = 1.2, height = 1, res = 300 )
+sens_siteP
+dev.off()
+
+###########################################################
+
diff --git a/scripts/plotting/plotting_thesis/katg/dm_om_plots_layout_katg.R b/scripts/plotting/plotting_thesis/katg/dm_om_plots_layout_katg.R
new file mode 100644
index 0000000..dd9c554
--- /dev/null
+++ b/scripts/plotting/plotting_thesis/katg/dm_om_plots_layout_katg.R
@@ -0,0 +1,179 @@
+# source dm_om_plots.R
+source("/home/tanu/git/LSHTM_analysis/scripts/plotting/plotting_thesis/dm_om_plots.R")
+
+##### plots to combine ####
+duetP
+foldxP
+deepddgP
+dynamut2P
+genomicsP
+consurfP
+proveanP
+snap2P
+mcsmligP
+mcsmlig2P
+mcsmppi2P
+mcsmnaP
+
+# Plot labels
+tit1 = "Stability changes"
+tit2 = "Genomic measure"
+tit3 = "Distance to partners"
+tit4 = "Evolutionary Conservation"
+tit5 = "Affinity changes"
+pt_size = 30
+
+theme_georgia <- function(...) {
+  theme_gray(base_family = "sans", ...) + 
+    theme(plot.title = element_text(face = "bold"))
+}
+
+
+title_theme <- calc_element("plot.title", theme_georgia())
+
+pt1 = ggdraw() + 
+  draw_label(
+    tit1,
+    fontfamily = title_theme$family,
+    fontface = title_theme$face,
+    #size = title_theme$size
+    size = pt_size
+  )
+
+pt2 = ggdraw() + 
+  draw_label(
+    tit2,
+    fontfamily = title_theme$family,
+    fontface = title_theme$face,
+    size = pt_size
+  )
+
+pt3 = ggdraw() + 
+  draw_label(
+    tit3,
+    fontfamily = title_theme$family,
+    fontface = title_theme$face,
+    size = pt_size
+  )
+
+pt4 = ggdraw() + 
+  draw_label(
+    tit4,
+    fontfamily = title_theme$family,
+    fontface = title_theme$face,
+    size = pt_size
+  )
+
+
+pt5 = ggdraw() + 
+  draw_label(
+    tit5,
+    fontfamily = title_theme$family,
+    fontface = title_theme$face,
+    size = pt_size
+  )
+
+#======================
+# Output plot function
+#======================
+OutPlot_dm_om = function(x){
+  
+  # dist b/w plot title and plot
+  relH_tp = c(0.08, 0.92)
+  
+  my_label_size = 25
+  #----------------
+  # Top panel
+  #----------------
+  top_panel = cowplot::plot_grid(
+    cowplot::plot_grid(pt1, 
+                       cowplot::plot_grid(duetP, foldxP, deepddgP, dynamut2P
+                                          , nrow = 1
+                                          , labels = c("A", "B", "C", "D")
+                                          , label_size = my_label_size)
+                       , ncol = 1
+                       , rel_heights = relH_tp
+    ),
+    NULL,
+    cowplot::plot_grid(pt2, 
+                       cowplot::plot_grid(genomicsP
+                                          , nrow = 1
+                                          , labels = c("E")
+                                          , label_size = my_label_size)
+                       , ncol = 1
+                       , rel_heights = relH_tp
+    ),
+    NULL,
+    cowplot::plot_grid(pt3, 
+                       cowplot::plot_grid( #distanceP
+                         distanceP_lig
+                         #, distanceP_ppi2
+                         , distanceP_na
+                         , nrow = 1
+                         , labels = c("F", "G")
+                         , label_size = my_label_size)
+                       , ncol = 1
+                       , rel_heights = relH_tp
+    ),
+    nrow = 1,
+    rel_widths = c(2/7, 0.1/7, 0.5/7, 0.1/7,  1/7)
+  )
+  
+  #----------------
+  # Bottom panel
+  #----------------
+  bottom_panel = cowplot::plot_grid(
+    cowplot::plot_grid(pt4, 
+                       cowplot::plot_grid(consurfP, proveanP, snap2P
+                                          , nrow = 1
+                                          , labels = c("H", "I", "J")
+                                          , label_size = my_label_size)
+                       , ncol = 1
+                       , rel_heights =relH_tp
+    ),NULL,
+    cowplot::plot_grid(pt5, 
+                       cowplot::plot_grid(mcsmligP
+                                          , mcsmlig2P
+                                          , mcsmppi2P
+                                          , mcsmnaP
+                                          , nrow = 1
+                                          , labels = c("K", "L", "M")
+                                          , label_size = my_label_size)
+                       , ncol = 1
+                       , rel_heights = relH_tp
+    ),NULL,
+    nrow = 1, 
+    rel_widths = c(3/6,0.1/6,3/6, 0.1/6 )
+  )
+  
+  #-------------------------------
+  # combine: Top and Bottom panel
+  #-------------------------------
+  cowplot::plot_grid (top_panel, bottom_panel
+                      , nrow =2
+                      , rel_widths = c(1, 1)
+                      , align = "hv")
+}
+
+#=====================
+# OutPlot: svg and png
+#======================
+dm_om_combinedP =  paste0(outdir_images
+                          ,tolower(gene)
+                          ,"_dm_om_all.svg")
+
+cat("DM OM plots with stats:", dm_om_combinedP)
+svg(dm_om_combinedP, width = 32, height = 18)
+
+OutPlot_dm_om()
+dev.off()
+
+
+dm_om_combinedP_png =  paste0(outdir_images
+                              ,tolower(gene)
+                              ,"_dm_om_all.png")
+cat("DM OM plots with stats:", dm_om_combinedP_png)
+png(dm_om_combinedP_png, width = 32, height = 18, units = "in", res = 300)
+
+OutPlot_dm_om()
+dev.off()
diff --git a/scripts/plotting/plotting_thesis/katg/gg_pairs_all_katg.R b/scripts/plotting/plotting_thesis/katg/gg_pairs_all_katg.R
new file mode 100644
index 0000000..2bc7b89
--- /dev/null
+++ b/scripts/plotting/plotting_thesis/katg/gg_pairs_all_katg.R
@@ -0,0 +1,203 @@
+#source("~/git/LSHTM_analysis/config/embb.R")
+#source("~/git/LSHTM_analysis/scripts/plotting/plotting_colnames.R")
+#source("~/git/LSHTM_analysis/scripts/plotting/get_plotting_dfs.R")
+
+my_gg_pairs=function(plot_df, plot_title
+                     , tt_args_size = 2.5
+                     , gp_args_size = 2.5){
+  ggpairs(plot_df,
+          columns = 1:(ncol(plot_df)-1),
+          upper = list(
+            continuous = wrap('cor', # ggally_cor()
+                              method = "spearman",
+                              use = "pairwise.complete.obs",
+                              title="ρ",
+                              digits=2,
+                              justify_labels = "centre",
+                              title_args=list(size=tt_args_size, colour="black"),#2.5
+                              group_args=list(size=gp_args_size)#2.5
+            )
+          ),
+          lower = list(
+            continuous = wrap("points",
+                              alpha = 0.7,
+                              size=0.125),
+            combo     = wrap("dot",
+                             alpha = 0.7,
+                             size=0.125)
+          ),
+          aes(colour = factor(ifelse(dst_mode==0,
+                                     "S",
+                                     "R") ),
+              alpha = 0.5),
+          title=plot_title) +
+    
+    scale_colour_manual(values = c("red", "blue")) +
+    scale_fill_manual(values = c("red", "blue")) #+
+  # theme(text = element_text(size=7,
+  #                           face="bold"))
+}
+
+DistCutOff = 10
+###########################################################################
+geneL_normal  = c("pnca")
+geneL_na      = c("gid", "rpob")
+geneL_ppi2    = c("alr", "embb", "katg", "rpob")
+
+merged_df3 = as.data.frame(merged_df3)
+
+corr_plotdf = corr_data_extract(merged_df3
+                                , gene = gene
+                                , drug = drug
+                                , extract_scaled_cols = F)
+
+aff_dist_cols = colnames(corr_plotdf)[grep("Dist", colnames(corr_plotdf))]
+static_cols =  c("Log10(MAF)"
+                 , "Log10(OR)"
+)
+############################################################
+#=============================================
+# Creating masked df for affinity data
+#=============================================
+corr_affinity_df = corr_plotdf
+
+#----------------------
+# Mask affinity columns
+#-----------------------
+corr_affinity_df[corr_affinity_df["Lig-Dist"]>DistCutOff,"mCSM-lig"]=0
+corr_affinity_df[corr_affinity_df["Lig-Dist"]>DistCutOff,"mmCSM-lig"]=0
+
+if (tolower(gene)%in%geneL_ppi2){
+  corr_affinity_df[corr_affinity_df["PPI-Dist"]>DistCutOff,"mCSM-PPI2"]=0
+}
+
+if (tolower(gene)%in%geneL_na){
+  corr_affinity_df[corr_affinity_df["NCA-Dist"]>DistCutOff,"mCSM-NA"]=0
+}
+
+# count 0
+#res <- colSums(corr_affinity_df==0)/nrow(corr_affinity_df)*100
+unmasked_vals <- nrow(corr_affinity_df) - colSums(corr_affinity_df==0) 
+unmasked_vals
+
+##########################################################
+#================
+# Stability
+#================
+corr_ps_colnames = c(static_cols
+                     , "DUET"
+                     , "FoldX"
+                     , "DeepDDG"
+                     , "Dynamut2"
+                     , aff_dist_cols
+                     , "dst_mode")
+
+corr_df_ps = corr_plotdf[, corr_ps_colnames]
+
+# Plot #1
+plot_corr_df_ps = my_gg_pairs(corr_df_ps, plot_title="Stability estimates")
+
+##########################################################
+#================
+# Conservation
+#================
+corr_conservation_cols = c( static_cols
+                            , "ConSurf"
+                            , "SNAP2"
+                            , "PROVEAN"
+                            #, aff_dist_cols
+                            , "dst_mode"
+)
+
+corr_df_cons = corr_plotdf[, corr_conservation_cols]
+
+# Plot #2
+plot_corr_df_cons = my_gg_pairs(corr_df_cons, plot_title="Conservation estimates")
+
+##########################################################
+#================
+# Affinity: lig, ppi and na as applicable
+#================
+#corr_df_lig = corr_plotdf[corr_plotdf["Lig-Dist"]<DistCutOff,]
+common_aff_colnames = c("mCSM-lig"
+                        , "mmCSM-lig")
+
+if (tolower(gene)%in%geneL_normal){
+  aff_colnames = common_aff_colnames
+}
+if (tolower(gene)%in%geneL_ppi2){
+  aff_colnames = c(common_aff_colnames, "mCSM-PPI2")
+}
+
+if (tolower(gene)%in%geneL_na){
+  aff_colnames = c(common_aff_colnames, "mCSM-NA")
+}
+
+# building ffinal affinity colnames for correlation
+corr_aff_colnames = c(static_cols
+                      , aff_colnames
+                      , "dst_mode") # imp
+
+corr_df_aff = corr_affinity_df[, corr_aff_colnames]
+colnames(corr_df_aff)
+
+# Plot #3
+plot_corr_df_aff = my_gg_pairs(corr_df_aff
+                               , plot_title="Affinity estimates"
+                               #, tt_args_size = 4
+                               #, gp_args_size = 4
+                               )
+
+#### Combine plots #####
+# #png("/home/tanu/tmp/gg_pairs_all.png", height = 6, width=11.75, unit="in",res=300)
+# png(paste0(outdir_images
+#            ,tolower(gene)
+#            ,"_CorrAB.png"), height = 6, width=11.75, unit="in",res=300)
+# 
+# cowplot::plot_grid(ggmatrix_gtable(plot_corr_df_ps),
+#                    ggmatrix_gtable(plot_corr_df_cons),
+#                    #                  ggmatrix_gtable(plot_corr_df_aff),
+#                    #                   nrow=1, ncol=3, rel_heights = 7,7,3
+#                    nrow=1,
+#                    #rel_heights = 1,1
+#                    labels = "AUTO",
+#                    label_size = 12)
+# dev.off()
+# 
+# # affinity corr
+# #png("/home/tanu/tmp/gg_pairs_affinity.png", height =7, width=7, unit="in",res=300)
+# png(paste0(outdir_images
+#            ,tolower(gene)
+#            ,"_CorrC.png"), height =7, width=7, unit="in",res=300)
+# 
+# cowplot::plot_grid(ggmatrix_gtable(plot_corr_df_aff),
+#                    labels = "C",
+#                    label_size = 12)
+# dev.off()
+
+#### Combine A ####
+png(paste0(outdir_images
+           ,tolower(gene)
+           ,"_CorrA.png"), height =8, width=8, unit="in",res=300)
+
+cowplot::plot_grid(ggmatrix_gtable(plot_corr_df_ps),
+                   labels = "A",
+                   label_size = 12)
+dev.off()
+
+#### Combine B+C ####
+# B + C
+png(paste0(outdir_images
+           ,tolower(gene)
+           ,"_CorrBC.png"), height = 6, width=11.75, unit="in",res=300)
+
+cowplot::plot_grid(ggmatrix_gtable(plot_corr_df_cons),
+                   ggmatrix_gtable(plot_corr_df_aff),
+                   #                  ggmatrix_gtable(plot_corr_df_aff),
+                   #                   nrow=1, ncol=3, rel_heights = 7,7,3
+                   nrow=1,
+                   #rel_heights = 1,1
+                   labels = c("B", "C"),
+                   label_size = 12)
+dev.off()
+
diff --git a/scripts/plotting/plotting_thesis/katg/pe_sens_site_count_katg.R b/scripts/plotting/plotting_thesis/katg/pe_sens_site_count_katg.R
new file mode 100644
index 0000000..5757108
--- /dev/null
+++ b/scripts/plotting/plotting_thesis/katg/pe_sens_site_count_katg.R
@@ -0,0 +1,179 @@
+source("/home/tanu/git/LSHTM_analysis/scripts/plotting/plotting_thesis/katg/prominent_effects_katg.R")
+source("/home/tanu/git/LSHTM_analysis/scripts/plotting/plotting_thesis/katg/sensitivity_count_katg.R")
+
+##############################################################
+#                         PE count
+#lig-- na--ppi2--stab
+# pe_colour_map = c("DD_lig"         = "#ffd700"   # gold
+#                   , "SS_lig"         = "#f0e68c"  # khaki
+#                   
+#                   , "DD_nucleic_acid"= "#a0522d"  # sienna
+#                   , "SS_nucleic_acid"= "#d2b48c"  # tan
+#                   
+#                   , "DD_ppi2"        = "#da70d6"  # orchid
+#                   , "SS_ppi2"        = "#ff1493"  # deeppink
+#                   
+#                   , "DD_stability"   = "#f8766d"  # red
+#                   , "SS_stability"   = "#00BFC4") # blue
+table(str_df_plot_cols$pe_effect_outcome)
+##############################################################
+#===========
+#PE count
+#===========
+rects <- data.frame(x=1:6, 
+                     colors = c("#ffd700" ,
+                                "#f0e68c" ,
+                                
+                                #"#a0522d" ,
+                                #"#d2b48c" ,
+                                
+                                "#da70d6" ,
+                                "#ff1493" ,
+                                
+                                "#f8766d" ,
+                                "#00BFC4") 
+                     )
+
+rects$text =  c("-ve Lig"
+                , "+ve Lig"
+                
+                #, "-ve\nNuc.Acid"
+                #, "+ve\nNuc.Acid"
+                
+                , "-ve PPI2"
+                , "+ve PPI2"
+
+                , "-ve stability"
+                , "+ve stability"
+)
+
+cell1 = table(str_df_plot_cols$pe_effect_outcome)[["DD_lig"]]
+cell2 = 0
+
+#cell3 = table(str_df_plot_cols$pe_effect_outcome)[["DD_nucleic_acid"]]
+#cell4 = table(str_df_plot_cols$pe_effect_outcome)[["SS_nucleic_acid"]]
+
+cell5 = table(str_df_plot_cols$pe_effect_outcome)[["DD_ppi2"]]
+cell6 = table(str_df_plot_cols$pe_effect_outcome)[["SS_ppi2"]]
+
+cell7 = table(str_df_plot_cols$pe_effect_outcome)[["DD_stability"]]
+cell8 = table(str_df_plot_cols$pe_effect_outcome)[["SS_stability"]]
+
+
+#rects$numbers = c(38, 0, 22, 9, 108, 681) #for embb
+rects$numbers = c(cell1, cell2,
+                  #cell3, cell4,
+                  cell5, cell6,
+                  cell7, cell8)
+
+rects$num_labels = paste0("n=", rects$numbers)
+
+rects
+#------
+# Plot
+#------
+#https://stackoverflow.com/questions/47986055/create-a-rectangle-filled-with-text
+peP = ggplot(rects, aes(x, y = 0, fill = colors, label = paste0(text,"\n", num_labels))) +
+  geom_tile(width = 1, height = 1) + # make square tiles
+  geom_text(color = "black", size = 1.7) + # add white text in the middle
+  scale_fill_identity(guide = "none") + # color the tiles with the colors in the data frame
+  coord_fixed() + # make sure tiles are square
+  coord_flip()+ scale_x_reverse() +
+  # theme_void() # remove any axis markings
+  theme_nothing() # remove any axis markings
+peP
+
+#------
+# Plot: this one is better
+#------
+peP2 = ggplot(rects, aes(x, y = 0, fill = colors, label = paste0(text,"\n", num_labels))) +
+  geom_tile() + # make square tiles
+  geom_text(color = "black", size = 1.6) + # add white text in the middle
+  scale_fill_identity(guide = "none") + # color the tiles with the colors in the data frame
+  coord_fixed() + # make sure tiles are square
+  theme_nothing() # remove any axis markings
+peP2
+
+########################################################
+# From: script sensitivity_count per gene
+#===============================
+# Sensitivity count: SITE
+#===============================
+#--------
+# embb
+#--------
+#rsc = 54
+#ccc = 46
+#ssc = 470
+
+rsc = site_Rc; rsc
+ccc = site_Cc; ccc
+ssc = site_Sc; ssc
+
+rect_rs_siteC <- data.frame(x=1:3,
+                    colors = c("red",
+                               "purple",
+                               "blue")
+                    )
+                               
+rect_rs_siteC                          
+rect_rs_siteC$text = c("Resistant",
+                       "Common",
+                       "Sensitive")
+
+rect_rs_siteC$numbers = c(rsc,ccc,ssc)
+rect_rs_siteC$num_labels = paste0("n=", rect_rs_siteC$numbers)
+rect_rs_siteC
+
+#------
+# Plot
+#------
+sens_siteP = ggplot(rect_rs_siteC, aes(x, y = 0,
+                                       fill = colors,
+                                       label = num_labels
+                                       #,label = paste0(text,"\n", num_labels)
+                                       )) +
+  geom_tile(width = 1, height = 1) +
+  #geom_text(color = "black", size = 1.7) +
+  geom_label(color = "black", size = 1.7,fill = "white", alpha=0.7) + 
+  scale_fill_identity(guide = "none") + 
+  coord_fixed()+
+  theme_nothing() # remove any axis markings
+sens_siteP
+
+################################################################
+#===============================
+# Sensitivity count: Mutations
+#===============================
+table(sensP_df$sensitivity)
+muts_Rc = table(sensP_df$sensitivity)[["R"]]
+muts_Sc = table(sensP_df$sensitivity)[["S"]]
+rect_sens <- data.frame(x=1:2,
+                              colors = c("red",
+                                         "blue")
+                        )
+
+rect_sens$text = c("Resistant",
+                       "Sensitive")
+rect_sens$numbers = c(muts_Rc,muts_Sc)
+rect_sens$num_labels = paste0("n=", rect_sens$numbers)
+rect_sens
+#------
+# Plot
+#------
+sensP = ggplot(rect_sens, aes(x, y = 0,
+                              fill = colors,
+                              label = paste0(text,"\n", num_labels))) +
+  geom_tile(width = 1, height = 1) +
+  #geom_text(color = "black", size = 1.7) +
+  geom_label(color = "black", size = 1.7,fill = "white", alpha=0.7) + 
+  scale_fill_identity(guide = "none") + 
+  coord_fixed()+
+  theme_nothing() # remove any axis markings
+sensP
+
+sensP2 = sensP + 
+   coord_flip() + scale_x_reverse()
+sensP2
+
+
diff --git a/scripts/plotting/plotting_thesis/katg/prominent_effects_katg.R b/scripts/plotting/plotting_thesis/katg/prominent_effects_katg.R
new file mode 100644
index 0000000..0bd5e92
--- /dev/null
+++ b/scripts/plotting/plotting_thesis/katg/prominent_effects_katg.R
@@ -0,0 +1,331 @@
+########################################################
+pos_colname = "position"
+
+#-------------
+# from ~/git/LSHTM_analysis/scripts/plotting/plotting_colnames.R
+#-------------
+length(all_stability_cols); length(raw_stability_cols)
+length(scaled_stability_cols); length(outcome_stability_cols)
+length(affinity_dist_colnames)
+
+
+static_cols =  c("mutationinformation",
+                 #"position",
+                 pos_colname,
+                 "sensitivity")
+
+other_cols_all = c(scaled_stability_cols, scaled_affinity_cols, affinity_dist_colnames)
+
+#omit avg cols and foldx_scaled_signC cols
+other_cols =  other_cols_all[grep("avg", other_cols_all, invert = T)]
+other_cols = other_cols[grep("foldx_scaled_signC",other_cols, invert = T )]
+other_cols
+
+cols_to_extract =  c(static_cols, other_cols)
+cat("\nExtracting cols:", cols_to_extract)
+expected_ncols = length(static_cols) + length(other_cols)
+expected_ncols
+
+str_df = merged_df3[, cols_to_extract]
+
+if (ncol(str_df) == expected_ncols){
+  cat("\nPASS: successfully extracted cols for calculating prominent effects")
+}else{
+  stop("\nAbort: Could not extract cols for calculating prominent effects")
+}
+
+#=========================
+# Masking affinity columns
+#=========================
+# First make values for affinity cols 0 when their corresponding dist >10
+head(str_df)
+
+# replace in place affinity values >10
+str_df[str_df["ligand_distance"]>10,"affinity_scaled"]=0
+str_df[str_df["ligand_distance"]>10,"mmcsm_lig_scaled"]=0
+
+#ppi2 gene: replace in place ppi2 affinity values where ppi2 dist >10
+if (tolower(gene)%in%geneL_ppi2){
+  str_df[str_df["interface_dist"]>10,"mcsm_ppi2_scaled"]=0
+} 
+  
+# na gene: replace in place na affinity values where na dist >10
+if (tolower(gene)%in%geneL_na){
+  str_df[str_df["nca_distance"]>10,"mcsm_na_scaled"]=0
+}
+
+colnames(str_df)
+head(str_df)
+
+scaled_cols_tc = other_cols[grep("scaled", other_cols)]
+
+
+################################################
+#===============
+# whole df
+#===============
+give_col=function(x,y,df=str_df){
+  df[df[[pos_colname]]==x,y]
+}
+
+for (i in unique(str_df[[pos_colname]]) ){
+  print(i)
+  #cat(length(unique(str_df[[pos_colname]])))
+  
+  biggest     = max(abs(give_col(i,scaled_cols_tc)))
+  
+  str_df[str_df[[pos_colname]]==i,'abs_max_effect'] = biggest
+  str_df[str_df[[pos_colname]]==i,'effect_type']= names(
+    give_col(i,scaled_cols_tc)[which(
+      abs(
+        give_col(i,scaled_cols_tc)
+      ) == biggest, arr.ind=T
+    )[, "col"]])[1]
+  
+  effect_name = unique(str_df[str_df[[pos_colname]]==i,'effect_type'])#[1] # pick first one in case we have multiple exact values
+  
+  # get index/rowname for value of max effect, and then use it to get the original sign
+  # here
+  #ind = rownames(which(abs(str_df[str_df[[pos_colname]]==i,c('position',effect_name)][effect_name])== biggest, arr.ind=T))
+  ind = rownames(which(abs(str_df[str_df[[pos_colname]]==i,c(pos_colname,effect_name)][effect_name])== biggest, arr.ind=T))
+  
+  str_df[str_df[[pos_colname]]==i,'effect_sign'] = sign(str_df[effect_name][ind,])[1]
+}
+
+# ends with suffix 2 if dups
+str_df$effect_type = sub("\\.[0-9]+", "", str_df$effect_type) # cull duplicate effect types that happen when there are exact duplicate values
+colnames(str_df)
+table(str_df$effect_type)
+
+# check
+str_df_check = str_df[str_df[[pos_colname]]%in%c(24, 32, 160, 303, 334),]
+
+#================
+# for Plots
+#================
+str_df_short = str_df[, c("mutationinformation",
+                          #"position",
+                          pos_colname,
+                          "sensitivity"
+                          , "effect_type"
+                          , "effect_sign")]
+
+table(str_df_short$effect_type)
+table(str_df_short$effect_sign)
+str(str_df_short)
+
+# assign pe outcome
+str_df_short$pe_outcome = ifelse(str_df_short$effect_sign<0, "DD", "SS")
+table(str_df_short$pe_outcome )
+table(str_df_short$effect_sign)
+
+#==============
+# group effect type:
+# lig, ppi2, nuc. acid, stability
+#==============
+affcols = c("affinity_scaled",  "mmcsm_lig_scaled")
+ppi2_cols = c("mcsm_ppi2_scaled")
+
+#lig 
+table(str_df_short$effect_type)
+str_df_short$effect_grouped = ifelse(str_df_short$effect_type%in%affcols
+                                     , "lig"
+                                     , str_df_short$effect_type)
+table(str_df_short$effect_grouped)
+
+#ppi2
+str_df_short$effect_grouped = ifelse(str_df_short$effect_grouped%in%ppi2_cols
+                                     , "ppi2"
+                                     , str_df_short$effect_grouped)
+table(str_df_short$effect_grouped)
+
+
+#stability
+str_df_short$effect_grouped = ifelse(!str_df_short$effect_grouped%in%c("lig",
+                                                                       "ppi2" 
+                                                                       )
+                                     , "stability"
+                                     , str_df_short$effect_grouped)
+
+table(str_df_short$effect_grouped)
+
+# create a sign as well
+str_df_short$pe_effect_outcome = paste0(str_df_short$pe_outcome, "_"
+                                     , str_df_short$effect_grouped)
+
+table(str_df_short$pe_effect_outcome)
+
+#####################################################################
+# Chimera: for colouring
+####################################################################
+
+#-------------------------------------
+# get df with unique position
+#--------------------------------------
+#data[!duplicated(data$x), ]   
+str_df_plot = str_df_short[!duplicated(str_df[[pos_colname]]),]
+
+if (nrow(str_df_plot) == length(unique(str_df[[pos_colname]]))){
+  cat("\nPASS: successfully extracted df with unique positions")
+}else{
+  stop("\nAbort: Could not extract df with unique positions")
+}
+
+#-------------------------------------
+# generate colours for effect types
+#--------------------------------------
+str_df_plot_cols = str_df_plot[, c(pos_colname,
+                                   "sensitivity",
+                                   "pe_outcome",
+                                   "effect_grouped",
+                                   "pe_effect_outcome")]
+head(str_df_plot_cols)
+
+# colour intensity based on sign
+#str_df_plot_cols$colour_hue = ifelse(str_df_plot_cols$effect_sign<0, "bright", "light")
+str_df_plot_cols$colour_hue = ifelse(str_df_plot_cols$pe_outcome=="DD", "bright", "light")
+
+table(str_df_plot_cols$colour_hue); table(str_df_plot$pe_outcome)
+head(str_df_plot_cols)
+
+# colour based on effect
+table(str_df_plot_cols$pe_effect_outcome)
+
+# colors = c("#ffd700" #gold
+#            , "#f0e68c" #khaki
+#            , "#da70d6"# orchid
+#            , "#ff1493"# deeppink
+#            , "#a0522d" #sienna
+#            , "#d2b48c" # tan
+#            , "#00BFC4" #, "#007d85" #blue
+#            , "#F8766D" )# red
+
+pe_colour_map = c("DD_lig"         = "#ffd700"   # gold
+                , "SS_lig"         = "#f0e68c"  # khaki
+             
+                , "DD_nucleic_acid"= "#a0522d"  # sienna
+                , "SS_nucleic_acid"= "#d2b48c"  # tan
+                
+                , "DD_ppi2"        = "#da70d6"  # orchid
+                , "SS_ppi2"        = "#ff1493"  # deeppink
+
+                , "DD_stability"   = "#f8766d"  # red
+                , "SS_stability"   = "#00BFC4") # blue
+        
+#unlist(d[c('a', 'a', 'c', 'b')], use.names=FALSE)
+
+#map the colours
+str_df_plot_cols$colour_map= unlist(map(str_df_plot_cols$pe_effect_outcome
+                                 ,function(x){pe_colour_map[[x]]}
+                                 ))
+head(str_df_plot_cols$colour_map)
+table(str_df_plot_cols$colour_map)
+table(str_df_plot_cols$pe_effect_outcome)
+
+# str_df_plot_cols$colours = paste0(str_df_plot_cols$colour_hue
+#                                   , "_"
+#                                   , str_df_plot_cols$colour_map)
+# head(str_df_plot_cols$colours)
+# table(str_df_plot_cols$colours)
+# 
+# 
+# class(str_df_plot_cols$colour_map)
+# str(str_df_plot_cols)
+
+# sort by colour
+head(str_df_plot_cols)
+str_df_plot_cols = str_df_plot_cols[order(str_df_plot_cols$colour_map), ]
+head(str_df_plot_cols)
+
+#======================================
+# write file with prominent effects
+#======================================
+outdir_images = paste0("~/git/Writing/thesis/images/results/", tolower(gene), "/")
+write.csv(str_df_plot_cols, paste0(outdir_images, tolower(gene), "_prominent_effects.csv"))
+
+################################
+# printing for chimera
+###############################
+chain_suffix = ".A"
+str_df_plot_cols$pos_chain = paste0(str_df_plot_cols[[pos_colname]], chain_suffix)
+table(str_df_plot_cols$colour_map)
+table(str_df_plot_cols$pe_effect_outcome)
+
+#===================================================
+#-------------------
+# Ligand Affinity
+#-------------------
+# -ve Lig Aff
+dd_lig        = str_df_plot_cols[str_df_plot_cols$pe_effect_outcome=="DD_lig",]
+if (nrow(dd_lig) == table(str_df_plot_cols$pe_effect_outcome)[['DD_lig']]){
+  dd_lig_pos  = dd_lig[[pos_colname]]
+}else{
+  stop("Abort: DD affinity colour numbers mismtatch")
+}
+
+# +ve Lig Aff
+ss_lig            = str_df_plot_cols[str_df_plot_cols$pe_effect_outcome=="SS_lig",]
+if (!empty(ss_lig)){
+  if (nrow(ss_lig)  == table(str_df_plot_cols$pe_effect_outcome)[['SS_lig']]){
+    ss_lig_pos      = ss_lig[[pos_colname]]
+  }else{
+    stop("Abort: SS affinity colour numbers mismtatch")
+  } 
+  #put in chimera cmd
+  paste0(dd_lig_pos, chain_suffix)
+  paste0(ss_lig_pos, chain_suffix)
+}
+
+
+
+
+#===================================================
+#-------------------
+# PPI2 Affinity
+#-------------------
+# -ve PPI2
+dd_ppi2           =  str_df_plot_cols[str_df_plot_cols$pe_effect_outcome=="DD_ppi2",]
+if (nrow(dd_ppi2) == table(str_df_plot_cols$pe_effect_outcome)[['DD_ppi2']]){
+  dd_ppi2_pos     =  dd_ppi2[[pos_colname]]
+}else{
+  stop("Abort: DD PPI2 colour numbers mismtatch")
+}
+
+# +ve PPI2
+ss_ppi2            = str_df_plot_cols[str_df_plot_cols$pe_effect_outcome=="SS_ppi2",]
+if (nrow(ss_ppi2)  == table(str_df_plot_cols$pe_effect_outcome)[['SS_ppi2']]){
+  ss_ppi2_pos      = ss_ppi2[[pos_colname]]
+}else{
+  stop("Abort: SS PPI2 colour numbers mismtatch")
+}
+
+#put in chimera cmd
+paste0(dd_ppi2_pos,chain_suffix)
+paste0(ss_ppi2_pos,chain_suffix)
+
+#=========================================================
+#------------------------
+# Stability
+#------------------------
+# -ve Stability
+dd_stability           =  str_df_plot_cols[str_df_plot_cols$pe_effect_outcome=="DD_stability",]
+if (nrow(dd_stability) == table(str_df_plot_cols$pe_effect_outcome)[['DD_stability']]){
+  dd_stability_pos     =  dd_stability[[pos_colname]]
+}else{
+  stop("Abort: DD Stability colour numbers mismtatch")
+}
+
+# +ve Stability
+ss_stability            = str_df_plot_cols[str_df_plot_cols$pe_effect_outcome=="SS_stability",]
+if (nrow(ss_stability)  == table(str_df_plot_cols$pe_effect_outcome)[['SS_stability']]){
+  ss_stability_pos      = ss_stability[[pos_colname]]
+}else{
+  stop("Abort: SS Stability colour numbers mismtatch")
+}
+
+#put in chimera cmd
+# stabiliting first as it has less numbers
+paste0(ss_stability_pos, chain_suffix)
+paste0(dd_stability_pos, chain_suffix)
+####################################################################
+
diff --git a/scripts/plotting/plotting_thesis/katg/sensitivity_count_katg.R b/scripts/plotting/plotting_thesis/katg/sensitivity_count_katg.R
new file mode 100644
index 0000000..66caef2
--- /dev/null
+++ b/scripts/plotting/plotting_thesis/katg/sensitivity_count_katg.R
@@ -0,0 +1,65 @@
+#=========================
+# Count Sensitivity
+# Mutations and positions
+#=========================
+pos_colname_c ="position"
+
+sensP_df = merged_df3[,c("mutationinformation",
+                         #"position",
+                         pos_colname_c,
+                         "sensitivity")]
+
+head(sensP_df)
+table(sensP_df$sensitivity)
+
+#---------------
+# Total unique positions
+#----------------
+tot_mut_pos = length(unique(sensP_df[[pos_colname_c]]))
+cat("\nNo of Tot muts sites:", tot_mut_pos)
+
+# resistant mut pos
+sens_site_allR = sensP_df[[pos_colname_c]][sensP_df$sensitivity=="R"]
+sens_site_UR   = unique(sens_site_allR)
+length(sens_site_UR)
+
+# Sensitive mut pos
+sens_site_allS = sensP_df[[pos_colname_c]][sensP_df$sensitivity=="S"]
+sens_site_US = unique(sens_site_allS)
+length(sens_site_UR)
+
+#---------------
+# Common Sites
+#----------------
+common_pos = intersect(sens_site_UR,sens_site_US)
+site_Cc = length(common_pos)
+cat("\nNo of Common sites:", site_Cc
+    , "\nThese are:", common_pos)
+
+#---------------
+# Resistant muts
+#----------------
+site_R = sens_site_UR[!sens_site_UR%in%common_pos]
+site_Rc = length(site_R)
+
+if ( length(sens_site_allR) == table(sensP_df$sensitivity)[['R']] ){
+  cat("\nNo of R muts:", length(sens_site_allR) 
+      , "\nNo. of R sites:",site_Rc
+      , "\nThese are:", site_R
+)
+}
+
+#---------------
+# Sensitive muts
+#----------------
+site_S = sens_site_US[!sens_site_US%in%common_pos]
+site_Sc = length(site_S)
+
+if ( length(sens_site_allS) == table(sensP_df$sensitivity)[['S']] ){
+  cat("\nNo of S muts:", length(sens_site_allS) 
+      , "\nNo. of S sites:", site_Sc
+      , "\nThese are:", site_S)
+}
+
+#########################
+
diff --git a/scripts/plotting/plotting_thesis/linage_bp_dist.R b/scripts/plotting/plotting_thesis/linage_bp_dist.R
index bfe6118..dde9ed0 100644
--- a/scripts/plotting/plotting_thesis/linage_bp_dist.R
+++ b/scripts/plotting/plotting_thesis/linage_bp_dist.R
@@ -34,7 +34,12 @@ if ( all(table(nsample_lin$sensitivity)== table(nsample_lin$mutation_info_labels
 ###################################################
 #                  Lineage barplots               #
 ###################################################
-
+my_xats = 8 # x axis text size # were  25
+my_yats = 8# y axis text sized_lab_size
+my_xals = 8 # x axis label size
+my_yals = 8 # y axis label size
+my_lls  = 8 # legend label size
+d_lab_size = 2.3
 #===============================
 # lineage sample and SNP count
 #===============================
@@ -46,13 +51,13 @@ lin_countP = lin_count_bp(lf_data = lineage_dfL[['lin_lf']]
              , bar_fill_categ = "count_categ"
              , display_label_col = "p_count"
              , bar_stat_stype = "identity"
-             , d_lab_size = 8
+             , d_lab_size = d_lab_size
              , d_lab_col = "black"
-             , my_xats = 25 # x axis text size
-             , my_yats = 25 # y axis text sized_lab_size
-             , my_xals = 25 # x axis label size
-             , my_yals = 25 # y axis label size
-             , my_lls  = 25 # legend label size
+             , my_xats = my_xats # x axis text size
+             , my_yats = my_yats # y axis text sized_lab_size
+             , my_xals = my_xals # x axis label size
+             , my_yals = my_yals # y axis label size
+             , my_lls  = my_lls # legend label size
              , bar_col_labels =  c("SNPs", "Total Samples")
              , bar_col_values = c("grey50", "gray75")
              , bar_leg_name = ""
@@ -73,12 +78,12 @@ lin_diversityP = lin_count_bp_diversity(lf_data = lineage_dfL[['lin_wf']]
                               , display_label_col = "snp_diversity_f"
                               , bar_stat_stype = "identity"
                               , x_lab_angle = 90
-                              , d_lab_size =9
-                              , my_xats = 25 # x axis text size
-                              , my_yats = 25 # y axis text size
-                              , my_xals = 25 # x axis label size
-                              , my_yals = 25 # y axis label size
-                              , my_lls  = 25 # legend label size
+                              , d_lab_size = d_lab_size
+                              , my_xats = my_xats # x axis text size
+                              , my_yats = my_yats # y axis text sized_lab_size
+                              , my_xals = my_xals # x axis label size
+                              , my_yals = my_yals # y axis label size
+                              , my_lls  = my_lls # legend label size
                               , y_log10 = F
                               , y_scale_percent = F
                               , leg_location = "top"
@@ -86,28 +91,9 @@ lin_diversityP = lin_count_bp_diversity(lf_data = lineage_dfL[['lin_wf']]
                               , bp_plot_title = "nsSNP diversity"
                               , title_colour = "black" #"chocolate4"
                               , subtitle_text = NULL
-                              , sts = 20
+                              , sts = 10
                               , subtitle_colour = "#350E20FF")
 lin_diversityP
-#=============================================
-# Output plots: Lineage count and Diversity
-#=============================================
-# lineage_bp_CL =  paste0(outdir_images
-#                         ,tolower(gene)
-#                         ,"_lineage_bp_CL_Tall.svg")
-# 
-# cat("Lineage barplots:", lineage_bp_CL)
-# svg(lineage_bp_CL, width = 8, height = 15)
-# 
-# cowplot::plot_grid(lin_countP, lin_diversityP
-#                      #, labels = c("(a)", "(b)", "(c)", "(d)")
-#                       , nrow = 2
-#                      # , ncols = 2
-#                       , labels = "AUTO"
-#                       , label_size = 25)
-# 
-# dev.off()
-########################################################################
 
 
 ###################################################
@@ -119,6 +105,13 @@ lin_diversityP
 #                           , "foldx_scaled"
 #                           , "avg_stability_scaled")
 
+my_ats = 8 # x axis text size # were  25
+my_als = 8# y axis text sized_lab_size
+my_leg_ts = 8 # x axis label size
+my_leg_title = 8 # y axis label size
+my_strip_ts  = 8 #
+
+
 my_xlabel = paste0("Average stability ", "(", stability_suffix, ")"); my_xlabel
 #plotdf = merged_df2[merged_df2$lineage%in%c("L1", "L2", "L3", "L4"),]
 
@@ -133,11 +126,11 @@ linP_dm_om = lineage_distP(merged_df2
                            , fill_categ_cols = c("red", "blue")
                            , label_categories = c("Resistant", "Sensitive")
                            , leg_label = "Mutation group"
-                           , my_ats = 22 # axis text size
-                           , my_als = 22 # axis label size
-                           , my_leg_ts = 22
-                           , my_leg_title = 22
-                           , my_strip_ts = 22
+                           , my_ats = my_ats # axis text size
+                           , my_als = my_als # axis label size
+                           , my_leg_ts = my_leg_ts
+                           , my_leg_title = my_leg_title
+                           , my_strip_ts = my_strip_ts
                            , alpha = 0.56
 )
 
diff --git a/scripts/plotting/plotting_thesis/linage_bp_dist_layout.R b/scripts/plotting/plotting_thesis/linage_bp_dist_layout.R
index 00f3adc..aae8f3d 100644
--- a/scripts/plotting/plotting_thesis/linage_bp_dist_layout.R
+++ b/scripts/plotting/plotting_thesis/linage_bp_dist_layout.R
@@ -1,23 +1,47 @@
 #!/usr/bin/env Rscript  
-
+source("/home/tanu/git/LSHTM_analysis/scripts/plotting/plotting_thesis/linage_bp_dist.R")
 ###########################################
 # TASK: generate plots for lineage
 # Individual plots in 
 #lineage_bp_both.R
 #linage_dist_ens_stability.R
 ###########################################
-my_label_size = 25
+# svg
+# my_label_size = 12
+# linPlots_combined = paste0(outdir_images
+#                         , tolower(gene)
+#                         ,"_linP_combined.svg")
+# 
+# cat("\nOutput plot:", linPlots_combined)
+# svg(linPlots_combined, width = 18, height = 12)
+# 
+# cowplot::plot_grid(
+#   cowplot::plot_grid(lin_countP, lin_diversityP
+#                      , nrow = 2
+#                      , rel_heights = c(1.2,1)
+#                      , labels = "AUTO"
+#                      , label_size = my_label_size),
+#   NULL,
+#   linP_dm_om,
+#   nrow = 1,
+#   labels = c("", "", "C"),
+#   label_size = my_label_size,
+#   rel_widths = c(35, 3, 52)
+# )
+# dev.off()
 
+# png
 linPlots_combined = paste0(outdir_images
-                        , tolower(gene)
-                        ,"_linP_combined.svg")
+                           , tolower(gene)
+                           ,"_linP_combined.png")
 
 cat("\nOutput plot:", linPlots_combined)
-svg(linPlots_combined, width = 18, height = 12)
+png(linPlots_combined, width = 9, height = 6, units = "in" ,res = 300)
 
 cowplot::plot_grid(
   cowplot::plot_grid(lin_countP, lin_diversityP
                      , nrow = 2
+                     , rel_heights = c(1.2,1)
                      , labels = "AUTO"
                      , label_size = my_label_size),
   NULL,
@@ -27,4 +51,4 @@ cowplot::plot_grid(
   label_size = my_label_size,
   rel_widths = c(35, 3, 52)
 )
-dev.off()
\ No newline at end of file
+dev.off()
diff --git a/scripts/plotting/plotting_thesis/pe_sens_site_count.R b/scripts/plotting/plotting_thesis/pe_sens_site_count.R
new file mode 100644
index 0000000..3bba656
--- /dev/null
+++ b/scripts/plotting/plotting_thesis/pe_sens_site_count.R
@@ -0,0 +1,78 @@
+##############################################################
+#                         PE count
+##############################################################
+rects <- data.frame(x = 1:6,
+                    colors = c("#ffd700" #gold
+                               , "#f0e68c" #khaki
+                               , "#da70d6"# orchid
+                               , "#ff1493"# deeppink
+                               , "#00BFC4" #, "#007d85" #blue
+                               , "#F8766D" )# red,
+)
+rects
+
+rects$text =  c("-ve Lig"
+                , "+ve Lig"
+                , "+ve PPI2"
+                , "-ve PPI2"
+                , "+ve stability"
+                , "-ve stability")
+
+# FOR EMBB ONLY
+rects$numbers = c(38, 0, 22, 9, 108, 681)
+rects$num_labels = paste0("n=", rects$numbers)
+
+rects
+
+#https://stackoverflow.com/questions/47986055/create-a-rectangle-filled-with-text
+
+peP = ggplot(rects, aes(x, y = 0, fill = colors, label = paste0(text,"\n", num_labels))) +
+  geom_tile(width = 1, height = 1) + # make square tiles
+  geom_text(color = "black", size = 1.7) + # add white text in the middle
+  scale_fill_identity(guide = "none") + # color the tiles with the colors in the data frame
+  coord_fixed() + # make sure tiles are square
+  coord_flip()+ scale_x_reverse() +
+  # theme_void() # remove any axis markings
+  theme_nothing() # remove any axis markings
+peP
+
+peP2 = ggplot(rects, aes(x, y = 0, fill = colors, label = paste0(text,"\n", num_labels))) +
+  geom_tile() + # make square tiles
+  geom_text(color = "black", size = 1.6) + # add white text in the middle
+  scale_fill_identity(guide = "none") + # color the tiles with the colors in the data frame
+  coord_fixed() + # make sure tiles are square
+  theme_nothing() # remove any axis markings
+peP2
+
+
+########################################################
+# MANUAL process
+#===============================
+# Sensitivity count: Site
+#==============================
+table(df3$sensitivity)
+#--------
+# embb
+#--------
+#rsc = 54
+#ccc = 46
+#ssc = 470
+
+rect_rs_siteC =data.frame(mutation_class=c("A_Resistant sites"
+                                           , "B_Common sites"
+                                           , "C_Sensitive sites"), 
+                          tile_colour =c("red",
+                                         "purple",
+                                         "blue"),
+                          numbers = c(rsc, ccc, ssc), 
+                          order  = c(1, 2, 3))
+
+rect_rs_siteC$labels = paste0(rect_rs_siteC$mutation_class, "\nn=", rect_rs_siteC$ numbers)
+
+sens_siteP = ggplot(rect_rs_siteC, aes(mutation_class, y = 0,
+                                       fill = tile_colour,
+                                       label = paste0("n=", numbers))) +
+  geom_tile(width = 1, height = 1) + 
+  geom_label(color = "black", size = 1.7,fill = "white", alpha=0.7) + 
+  theme_nothing() 
+sens_siteP
diff --git a/scripts/plotting/plotting_thesis/prominent_effects.R b/scripts/plotting/plotting_thesis/prominent_effects.R
deleted file mode 100644
index ad03b14..0000000
--- a/scripts/plotting/plotting_thesis/prominent_effects.R
+++ /dev/null
@@ -1,405 +0,0 @@
-#!/usr/bin/env Rscript       
-#source("~/git/LSHTM_analysis/config/alr.R")
-source("~/git/LSHTM_analysis/config/embb.R")
-#source("~/git/LSHTM_analysis/config/katg.R")
-#source("~/git/LSHTM_analysis/config/gid.R")
-#source("~/git/LSHTM_analysis/config/pnca.R")
-#source("~/git/LSHTM_analysis/config/rpob.R")
-
-# get plotting dfs 
-source("~/git/LSHTM_analysis/scripts/plotting/get_plotting_dfs.R")
-
-source("~/git/LSHTM_analysis/scripts/plotting/plotting_colnames.R")
-
-length(all_stability_cols); length(raw_stability_cols)
-length(scaled_stability_cols); length(outcome_stability_cols)
-length(affinity_dist_colnames)
-
-static_cols =  c("mutationinformation", "position", "sensitivity")
-other_cols_all = c(scaled_stability_cols, scaled_affinity_cols, affinity_dist_colnames)
-
-#omit avg cols and foldx_scaled_signC cols
-other_cols =  other_cols_all[grep("avg", other_cols_all, invert = T)]
-other_cols = other_cols[grep("foldx_scaled_signC",other_cols, invert = T )]
-other_cols
-
-cols_to_extract =  c(static_cols, other_cols)
-expected_ncols = length(static_cols) + length(other_cols)
-
-str_df = merged_df3[, cols_to_extract]
-
-if (ncol(str_df) == expected_ncols){
-  cat("\nPASS: successfully extracted cols for calculating prominent effects")
-}else{
-  stop("\nAbort: Could not extract cols for calculating prominent effects")
-}
-
-#=========================
-# Masking affinity columns
-#=========================
-# First make values for affinity cols 0 when their corresponding dist >10
-head(str_df)
-
-# replace in place affinity values >10
-str_df[str_df["ligand_distance"]>10,"affinity_scaled"]=0
-str_df[str_df["ligand_distance"]>10,"mmcsm_lig_scaled"]=0
-
-#ppi2 gene: replace in place ppi2 affinity values where ppi2 dist >10
-
-if (tolower(gene)%in%geneL_ppi2){
-  str_df[str_df["interface_dist"]>10,"mcsm_ppi2_scaled"]=0
-} 
-  
-# na gene: replace in place na affinity values where na dist >10
-if (tolower(gene)%in%geneL_na){
-  str_df[str_df["XXXX"]>10,"mcsm_na_scaled"]=0
-}
-
-colnames(str_df)
-
-head(str_df)
-
-# get names of cols to calculate the prominent effects from
-scaled_cols_tc = c("duet_scaled", "deepddg_scaled"
-       , "ddg_dynamut2_scaled", "foldx_scaled","affinity_scaled"
-       ,  "mmcsm_lig_scaled"  ,  "mcsm_ppi2_scaled")
-
-#--------------------------------
-#get rowmax for absolute values
-#--------------------------------
-#str_df$row_maximum = apply(str_df[,-1], 1, function(x){max(abs(x))})
-#str_df$row_maximum = apply(str_df[,scaled_cols_tc], 1, function(x){max(abs(x))})
-
-#correct
-#BOO= abs(str_df[,scaled_cols_tc]) == str_df[,'row_maximum']; head(BOO)
-#also corr but weird
-#POO = which(abs(str_df[,scaled_cols_tc]) == str_df[,'row_maximum'], arr.ind =T); head(POO)
-
-################################################
-# #-------------
-# # short df: try
-# #-------------
-# df2_short = str_df[str_df$position%in%c(167, 423, 427),]
-# df2_short = str_df[str_df$position%in%c(170, 167, 493, 453, 435, 433, 480, 456, 445),]
-# df2_short = str_df[str_df$position%in%c(435, 480),]
-# 
-# 
-# give_col=function(x,y,df=df2_short){
-#   df[df$position==x,y]
-# }
-# 
-# for (i in unique(df2_short$position) ){
-#   print(i)
-#   #print(paste0("\nNo. of unique positions:", length(unique(df2$position))) )
-#   #cat(length(unique(df2$position)))
-#   #df2_short[df2_short$position==i,scaled_cols_tc]
-#   
-#   biggest     = max(abs(give_col(i,scaled_cols_tc)))
-#   
-#   df2_short[df2_short$position==i,'abs_max_effect'] = biggest
-#   df2_short[df2_short$position==i,'effect_type']= names(
-#     give_col(i,scaled_cols_tc)[which(
-#       abs(
-#         give_col(i,scaled_cols_tc)
-#       ) == biggest, arr.ind=T
-#     )[, "col"]])
-#   
-#   effect_name = df2_short[df2_short$position==i,'effect_type'][1] # pick first one in case we have multiple exact values
-#   
-#   # get index/rowname for value of max effect, and then use it to get the original sign
-#   # here
-#   #df2_short[df2_short$position==i,c(effect_name)]
-#   #which(abs(df2_short[df2_short$position==i,c('position',effect_name)][effect_name])==biggest, arr.ind=T)
-#   ind = rownames(which(abs(df2_short[df2_short$position==i,c('position',effect_name)][effect_name])== biggest, arr.ind=T))
-#   df2_short[df2_short$position==i,'effect_sign'] = sign(df2_short[effect_name][ind,])
-# }
-# 
-# # ends with suffix 2 if dups
-# df2_short$effect_type = sub("\\.[0-9]+", "", df2_short$effect_type) # cull duplicate effect types that happen when there are exact duplicate values
-# 
-# View(df2_short)
-
-
-
-#===============
-# whole df
-#===============
-give_col=function(x,y,df=str_df){
-  df[df$position==x,y]
-}
-
-for (i in unique(str_df$position) ){
-  print(i)
-  #cat(length(unique(str_df$position)))
-  
-  biggest     = max(abs(give_col(i,scaled_cols_tc)))
-  
-  str_df[str_df$position==i,'abs_max_effect'] = biggest
-  str_df[str_df$position==i,'effect_type']= names(
-    give_col(i,scaled_cols_tc)[which(
-      abs(
-        give_col(i,scaled_cols_tc)
-      ) == biggest, arr.ind=T
-    )[, "col"]])[1]
-  
-  effect_name = unique(str_df[str_df$position==i,'effect_type'])#[1] # pick first one in case we have multiple exact values
-  
-  # get index/rowname for value of max effect, and then use it to get the original sign
-  # here
-  ind = rownames(which(abs(str_df[str_df$position==i,c('position',effect_name)][effect_name])== biggest, arr.ind=T))
-  str_df[str_df$position==i,'effect_sign'] = sign(str_df[effect_name][ind,])[1]
-}
-
-# ends with suffix 2 if dups
-str_df$effect_type = sub("\\.[0-9]+", "", str_df$effect_type) # cull duplicate effect types that happen when there are exact duplicate values
-
-colnames(str_df)
-
-# check
-str_df_check = str_df[str_df$position%in%c(24, 32,160, 303, 334),]
-table(str_df$effect_type)
-
-#================
-# for Plots
-#================
-str_df_short = str_df[, c("mutationinformation","position","sensitivity"
-                          , "effect_type"
-                          , "effect_sign")]
-
-table(str_df_short$effect_type)
-table(str_df_short$effect_sign)
-str(str_df_short)
-
-# assign pe outcome
-str_df_short$pe_outcome = ifelse(str_df_short$effect_sign<0, "DD", "SS")
-table(str_df_short$pe_outcome )
-table(str_df_short$effect_sign)
-
-#==============
-# group effect type:
-# lig, ppi2, nuc. acid, stability
-#==============
-
-affcols = c("affinity_scaled",  "mmcsm_lig_scaled")
-ppi2_cols = c("mcsm_ppi2_scaled")
-#nuc_na_cols = c("mcsm_a_scaled")
-
-
-#lig 
-table(str_df_short$effect_type)
-str_df_short$effect_grouped = ifelse(str_df_short$effect_type%in%affcols
-                                     , "lig"
-                                     , str_df_short$effect_type)
-table(str_df_short$effect_grouped)
-
-#ppi2
-str_df_short$effect_grouped = ifelse(str_df_short$effect_grouped%in%ppi2_cols
-                                     , "ppi2"
-                                     , str_df_short$effect_grouped)
-table(str_df_short$effect_grouped)
-
-#stability
-str_df_short$effect_grouped = ifelse(!str_df_short$effect_grouped%in%c("lig", "ppi2")
-                                     , "stability"
-                                     , str_df_short$effect_grouped)
-
-table(str_df_short$effect_grouped)
-
-# create a sign as well
-str_df_short$pe_effect_outcome = paste0(str_df_short$pe_outcome, "_"
-                                     , str_df_short$effect_grouped)
-
-table(str_df_short$pe_effect_outcome)
-
-
-
-#####################################################################
-# Chimera: for colouring
-####################################################################
-
-#-------------------------------------
-# get df with unique position
-#--------------------------------------
-#data[!duplicated(data$x), ]   
-str_df_plot = str_df[!duplicated(str_df$position),]
-
-if (nrow(str_df_plot) == length(unique(str_df$position))){
-  cat("\nPASS: successfully extracted df with unique positions")
-}else{
-  stop("\nAbort: Could not extract df with unique positions")
-}
-
-#-------------------------------------
-# generate colours for effect types
-#--------------------------------------
-str_df_plot_cols = str_df_plot[, c("position", "sensitivity"
-                              , affinity_dist_colnames
-                              , "abs_max_effect"
-                              , "effect_type"
-                              , "effect_sign")]
-head(str_df_plot_cols)
-
-# colour intensity based on sign
-str_df_plot_cols$colour_hue = ifelse(str_df_plot_cols$effect_sign<0, "bright", "light")
-table(str_df_plot_cols$colour_hue)
-head(str_df_plot_cols)
-# colour based on effect
-table(str_df_plot_cols$effect_type)
-
-pe_colour_map = c("affinity_scaled"    = "salmon"
-                , "mmcsm_lig_scaled"   = "salmon"
-                , "mcsm_ppi2_scaled"   = "pink"
-                , "mcsm_na_scaled"     = "orange"
-                , "duet_scaled"        = "dimgray"
-                , "deepddg_scaled"     = "dimgray"
-                , "ddg_dynamut2_scaled"= "dimgray"
-                , "foldx_scaled"       = "dimgray")
-        
-#unlist(d[c('a', 'a', 'c', 'b')], use.names=FALSE)
-
-#map the colours
-str_df_plot_cols$colour_map= unlist(map(str_df_plot_cols$effect_type
-                                 ,function(x){pe_colour_map[[x]]}
-                                 ))
-
-str_df_plot_cols$colours = paste0(str_df_plot_cols$colour_hue
-                                  , "_"
-                                  , str_df_plot_cols$colour_map)
-head(str_df_plot_cols$colours)
-table(str_df_plot_cols$colours)
-
-
-class(str_df_plot_cols$colour_map)
-str(str_df_plot_cols)
-
-# sort by colour
-head(str_df_plot_cols)
-str_df_plot_cols = str_df_plot_cols[order(str_df_plot_cols$colour_map), ]
-head(str_df_plot_cols)
-
-#======================================
-# write file with prominent effects
-#======================================
-outdir_images = paste0("~/git/Writing/thesis/images/results/", tolower(gene), "/")
-write.csv(str_df_plot_cols, paste0(outdir_images, tolower(gene), "_prominent_effects.csv"))
-
-################################
-# printing for chimera
-###############################
-str_df_plot_cols$pos_chain = paste0(str_df_plot_cols$position, ".B,")
-table(str_df_plot_cols$colour_map)
-
-#===================================================
-#-------------------
-# Ligand Affinity
-#-------------------
-foo = str_df_plot_cols[str_df_plot_cols$colours=="yellow",]
-all(foo2$effect_sign == 1)
-
-foo1 = str_df_plot_cols[str_df_plot_cols$colours=="bright_salmon",]
-all(foo3$effect_sign == -1)
-
-#light salmon: stabilising affinity
-table(str_df_plot_cols$colours)
-
-affinity_pos_l = str_df_plot_cols$pos_chain[str_df_plot_cols$colours=="light_salmon"]
-affinity_pos_lc = paste(affinity_pos_l, collapse = "")
-affinity_pos_lc
-table(str_df_plot_cols$colours)[["light_salmon"]]
-
-#bright salmon: DEstabilsing affinity
-affinity_pos_b = str_df_plot_cols$pos_chain[str_df_plot_cols$colours=="bright_salmon"]
-affinity_pos_bc = paste(affinity_pos_b, collapse = "")
-affinity_pos_bc
-table(str_df_plot_cols$colours)[["bright_salmon"]]
-
-c1 = length(affinity_pos_l) + length(affinity_pos_b) == table(str_df_plot_cols$colour_map)[["salmon"]]
-
-if (c1){
-  cat("PASS: affinity colour numbers checked")
-}else{
-  stop("Abort: affinity colour numbers mismtatch")
-}
-
-#put in chimera cmd
-affinity_pos_lc
-affinity_pos_bc
-
-#===================================================
-#-------------------
-# ppi2 Affinity
-#-------------------
-foo2 = str_df_plot_cols[str_df_plot_cols$colours=="light_pink",]
-all(foo2$effect_sign == 1)
-
-foo3 = str_df_plot_cols[str_df_plot_cols$colours=="bright_pink",]
-all(foo3$effect_sign == -1)
-
-#light_pink: stabilising affinity
-table(str_df_plot_cols$colours)
-
-ppi2_pos_l = str_df_plot_cols$pos_chain[str_df_plot_cols$colours=="light_pink"]
-ppi2_pos_lc = paste(ppi2_pos_l, collapse = "")
-ppi2_pos_lc
-table(str_df_plot_cols$colours)[["light_pink"]]
-
-#bright pink: DEstabilsing affinity
-ppi2_pos_b = str_df_plot_cols$pos_chain[str_df_plot_cols$colours=="bright_pink"]
-ppi2_pos_bc = paste(ppi2_pos_b, collapse = "")
-ppi2_pos_bc
-table(str_df_plot_cols$colours)[["bright_pink"]]
-
-c2 = length(ppi2_pos_l) + length(ppi2_pos_b) == table(str_df_plot_cols$colour_map)[["pink"]]
-
-if (c2){
-  cat("PASS: ppi2 colour numbers checked")
-}else{
-  stop("Abort: ppi2 colour numbers mismtatch")
-}
-
-#put in chimera cmd
-ppi2_pos_lc
-ppi2_pos_bc
-
-#=========================================================
-#-------------------
-# Stability
-#-------------------
-foo4 = str_df_plot_cols[str_df_plot_cols$colours=="light_dimgray",]
-all(foo2$effect_sign == 1)
-
-foo5 = str_df_plot_cols[str_df_plot_cols$colours=="bright_dimgray",]
-all(foo3$effect_sign == -1)
-
-#light_dimgray: stabilising Stability
-table(str_df_plot_cols$colours)
-
-stab_pos_l = str_df_plot_cols$pos_chain[str_df_plot_cols$colours=="light_dimgray"]
-stab_pos_lc = paste(stab_pos_l, collapse = "")
-stab_pos_lc
-table(str_df_plot_cols$colours)[["light_dimgray"]]
-
-#bright_dimgray pink: DEstabilsing Stability
-stab_pos_b = str_df_plot_cols$pos_chain[str_df_plot_cols$colours=="bright_dimgray"]
-stab_pos_bc = paste(stab_pos_b, collapse = "")
-stab_pos_bc
-table(str_df_plot_cols$colours)[["bright_dimgray"]]
-
-c3 = length(stab_pos_l) + length(stab_pos_b) == table(str_df_plot_cols$colour_map)[["dimgray"]]
-
-if (c3){
-  cat("PASS: stability colour numbers checked")
-}else{
-  stop("Abort: stability colour numbers mismtatch")
-}
-
-#put in chimera cmd
-stab_pos_lc
-stab_pos_bc
-
-
-# stab tool count
-table(str_df_plot_cols$effect_type)
-
-table(str_df_plot_cols$effect_type, str_df_plot_cols$effect_sign)
-
diff --git a/scripts/plotting/plotting_thesis/rpob/basic_barplots_layout_rpob.R b/scripts/plotting/plotting_thesis/rpob/basic_barplots_layout_rpob.R
new file mode 100644
index 0000000..43080ff
--- /dev/null
+++ b/scripts/plotting/plotting_thesis/rpob/basic_barplots_layout_rpob.R
@@ -0,0 +1,348 @@
+#source("/home/tanu/git/LSHTM_analysis/scripts/plotting/plotting_thesis/pe_sens_site_count_rpob.R")
+
+if ( tolower(gene)%in%c("rpob") ){
+  cat("\nPlots available for layout are:")
+  
+  duetP
+  foldxP
+  deepddgP
+  dynamut2P
+  proveanP
+  snap2P
+  
+  mLigP
+  mmLigP
+  posC_lig
+  
+  ppi2P
+  posC_ppi2
+  
+  nca_distP
+  posC_nca
+  
+  peP2
+  sens_siteP
+  peP   # not used
+  sensP # not used
+}
+
+
+#========================
+# Common title settings
+#=========================
+theme_georgia <- function(...) {
+  theme_gray(base_family = "sans", ...) + 
+    theme(plot.title = element_text(face = "bold"))
+}
+title_theme <- calc_element("plot.title", theme_georgia())
+
+###############################################################
+common_bp_title = paste0("Sites <", DistCutOff, angstroms_symbol)
+
+# extract common legends
+# lig affinity
+common_legend_outcome = get_legend(mLigP +
+                                     guides(color = guide_legend(nrow = 1)) +
+                                     theme(legend.position = "top"))
+
+# stability
+common_legend_outcome = get_legend(duetP +
+                                     guides(color = guide_legend(nrow = 1)) +
+                                     theme(legend.position = "top"))
+# conservation
+cons_common_legend_outcome = get_legend(snap2P +
+                                          guides(color = guide_legend(nrow = 1)) +
+                                          theme(legend.position = "top"))
+###################################################################
+#==================================
+# Stability+Conservation: COMBINE
+#==================================
+tt_size = 10
+#----------------------------
+# stability and consv title
+#----------------------------
+tt_stab = ggdraw() + 
+  draw_label(
+    paste0("Stability outcome"),
+    fontfamily = title_theme$family,
+    fontface = title_theme$face,
+    #size = title_theme$size
+    size = tt_size
+  )
+
+tt_cons = ggdraw() + 
+  draw_label(
+    paste0("Conservation outcome"),
+    fontfamily = title_theme$family,
+    fontface = title_theme$face,
+    size = tt_size
+  )
+
+#----------------------
+# Output plot
+#-----------------------
+stab_cons_CLP =  paste0(outdir_images
+                        ,tolower(gene)
+                        ,"_stab_cons_BP_CLP.png")
+
+print(paste0("plot filename:", stab_cons_CLP))
+png(stab_cons_CLP, units = "in", width = 10, height = 5, res = 300 )
+
+cowplot::plot_grid(
+  cowplot::plot_grid(
+    cowplot::plot_grid(
+      tt_stab,
+      common_legend_outcome,
+      nrow = 2
+    ),
+    cowplot::plot_grid(
+      duetP, 
+      foldxP, 
+      deepddgP, 
+      dynamut2P,
+      nrow = 1,
+      labels = c("A", "B", "C", "D"),
+      label_size = 12),
+    nrow = 2,
+    rel_heights=c(1,10)
+  ),
+  NULL,
+  cowplot::plot_grid(
+    cowplot::plot_grid(
+      cowplot::plot_grid(
+        tt_cons,
+        cons_common_legend_outcome,
+        nrow = 2
+      ),
+      cowplot::plot_grid(
+        proveanP,
+        snap2P,
+        nrow=1, 
+        labels = c("E", "F"),
+        align = "hv"),
+      nrow = 2,
+      rel_heights = c(1, 10),
+      label_size = 12),
+    nrow=1
+  ),
+  rel_widths = c(2,0.15,1),
+  nrow=1
+)
+
+dev.off()
+
+#################################################################
+#=======================================
+# Affinity barplots: COMBINE ALL four
+#========================================
+ligT = paste0(common_bp_title, " ligand")
+lig_affT = ggdraw() +
+  draw_label(
+    ligT,
+    fontfamily = title_theme$family,
+    fontface = title_theme$face,
+    #size = title_theme$size
+    size = 8
+  )
+
+p1 = cowplot::plot_grid(cowplot::plot_grid(lig_affT
+                                           , common_legend_outcome
+                                           , nrow=2),
+                        cowplot::plot_grid(mLigP, mmLigP, posC_lig
+                                           , nrow = 1
+                                           , rel_widths = c(1,0.65,1.8)
+                                           , align = "h"),
+                        nrow = 2,
+                        rel_heights = c(1,8)
+                        
+)
+p1
+###########################################################
+ncaT = paste0(common_bp_title, " Nucleic Acid")
+nca_affT = ggdraw() +
+  draw_label(
+    ncaT,
+    fontfamily = title_theme$family,
+    fontface = title_theme$face,
+    #size = title_theme$size
+    size = 8
+  )
+
+p2 = cowplot::plot_grid(cowplot::plot_grid(nca_affT
+                                           , common_legend_outcome
+                                           , nrow=2),
+                        cowplot::plot_grid(nca_distP, posC_nca
+                                             , nrow = 1
+                                             , rel_widths = c(1,1.8)
+                                             , align = "h"),
+                        nrow = 2,
+                        rel_heights = c(1,8)
+)
+p2
+###########################################################
+ppi2T = paste0(common_bp_title, " PP-interface")
+ppi2_affT = ggdraw() +
+  draw_label(
+    ppi2T,
+    fontfamily = title_theme$family,
+    fontface = title_theme$face,
+    size = 8
+  )
+
+p3 = cowplot::plot_grid(cowplot::plot_grid(ppi2_affT, common_legend_outcome, nrow=2),
+                        cowplot::plot_grid(ppi2P, posC_ppi2
+                                           , nrow = 1
+                                           , rel_widths = c(1,1.9)
+                                           , align = "h"),
+                        nrow = 2,
+                        rel_heights = c(1,8)
+)
+p3
+
+#### Combine p1+p2+p3 ####
+w = 11.79
+h = 3.5
+mut_impact_CLP =  paste0(outdir_images
+                         ,tolower(gene)
+                         ,"_mut_impactCLP.png")
+
+#svg(affP, width = 20, height = 5.5)
+print(paste0("plot filename:", mut_impact_CLP))
+png(mut_impact_CLP, units = "in", width = w, height = h, res = 300 )
+
+cowplot::plot_grid(p1,
+                   p2,
+                   p3,
+                   #p4,
+                   nrow = 1,
+                   labels = "AUTO",
+                   label_size = 12,
+                   rel_widths = c(2.5,2,2)
+                   #, rel_heights = c(1)
+)
+
+dev.off()
+w = 11.79
+h = 3.5
+mut_impact_CLP =  paste0(outdir_images
+                         ,tolower(gene)
+                         ,"_mut_impactCLP.png")
+
+#svg(affP, width = 20, height = 5.5)
+print(paste0("plot filename:", mut_impact_CLP))
+png(mut_impact_CLP, units = "in", width = w, height = h, res = 300 )
+
+cowplot::plot_grid(p1,
+                   p2,
+                   p3,
+                   #p4,
+                   nrow = 1,
+                   labels = "AUTO",
+                   label_size = 12,
+                   rel_widths = c(2.5,2,2)
+                   #, rel_heights = c(1)
+)
+
+dev.off()
+
+#### Combine p4: ALL pos count ####
+# PE + All position count
+peT_allT = ggdraw() +
+  draw_label(
+    paste0("All mutation sites"),
+    fontfamily = title_theme$family,
+    fontface = title_theme$face,
+    #size = title_theme$size
+    size = 8
+  )
+
+p4 = cowplot::plot_grid(cowplot::plot_grid(peT_allT, nrow = 2
+                                           , rel_widths = c(1,3),axis = "lr"),
+                        cowplot::plot_grid(
+                          peP2, posC_all,
+                          nrow = 2,
+                          rel_widths = c(1,1),
+                          align = "v",
+                          axis = "lr",
+                          rel_heights = c(1,8)
+                        ),
+                        rel_heights = c(1,18),
+                        nrow = 2,axis = "lr")
+p4
+
+w = 3.5
+h = 4
+mut_impact_CLP_PE =  paste0(outdir_images
+                         ,tolower(gene)
+                         ,"_mut_impactCLP_PE.png")
+
+print(paste0("plot filename:", mut_impact_CLP_PE))
+png(mut_impact_CLP_PE, units = "in", width = w, height = h, res = 300 )
+
+cowplot::plot_grid(p4,
+                   nrow = 1,
+                   labels = c("D"),
+                   label_size = 12#,
+                   #rel_widths = c(2.5,2,2)
+                   #, rel_heights = c(1)
+)
+  
+dev.off()
+
+##################################################
+sensP
+consurfP
+#=================
+#### Combine sensitivity + ConSurf ####
+# or ConSurf
+#=================
+w = 3
+h = 3
+# sens_conP =  paste0(outdir_images
+#                          ,tolower(gene)
+#                          ,"_sens_cons_CLP.png")
+# 
+# print(paste0("plot filename:", sens_conP))
+# png(sens_conP, units = "in", width = w, height = h, res = 300 )
+# 
+# cowplot::plot_grid(sensP, consurfP,
+#                    nrow = 2,
+#                    rel_heights = c(1, 1.5)
+#                    )
+# 
+# dev.off()
+
+conCLP =  paste0(outdir_images
+                    ,tolower(gene)
+                    ,"_consurf_BP.png")
+
+print(paste0("plot filename:", conCLP))
+png(conCLP, units = "in", width = w, height = h, res = 300 )
+consurfP
+
+dev.off()
+#================================
+# Sensitivity mutation numbers: geom_tile
+#================================
+sensCLP =  paste0(outdir_images
+                 ,tolower(gene)
+                 ,"_sensN_tile.png")
+
+print(paste0("plot filename:", sensCLP))
+png(sensCLP, units = "in", width = 1, height = 1, res = 300 )
+sensP
+dev.off()
+#================================
+# Sensitivity SITE numbers: geom_tile
+#================================
+sens_siteCLP =  paste0(outdir_images
+                  ,tolower(gene)
+                  ,"_sens_siteC_tile.png")
+
+print(paste0("plot filename:", sens_siteCLP))
+png(sens_siteCLP, units = "in", width = 1.2, height = 1, res = 300 )
+sens_siteP
+dev.off()
+
+###########################################################
+
diff --git a/scripts/plotting/plotting_thesis/rpob/basic_barplots_rpob.R b/scripts/plotting/plotting_thesis/rpob/basic_barplots_rpob.R
new file mode 100644
index 0000000..769c32c
--- /dev/null
+++ b/scripts/plotting/plotting_thesis/rpob/basic_barplots_rpob.R
@@ -0,0 +1,372 @@
+#!/usr/bin/env Rscript   
+#########################################################
+# TASK: Barplots
+# basic barplots with outcome
+# basic barplots with frequency of count of mutations
+#########################################################
+#=============
+# Data: Input
+#==============
+#source("~/git/LSHTM_analysis/config/pnca.R")
+#source("~/git/LSHTM_analysis/config/embb.R")
+#source("~/git/LSHTM_analysis/config/gid.R")
+
+#source("~/git/LSHTM_analysis/config/alr.R")
+#source("~/git/LSHTM_analysis/config/katg.R")
+#source("~/git/LSHTM_analysis/config/rpob.R")
+
+#source("~/git/LSHTM_analysis/scripts/plotting/get_plotting_dfs.R")
+#source("~/git/LSHTM_analysis/scripts/plotting/plotting_colnames.R") sourced by above
+
+#cat("\nSourced plotting cols as well:", length(plotting_cols))
+
+####################################################
+class(merged_df3)
+
+df3 = subset(merged_df3, select = -c(pos_count))
+
+#=======
+# output
+#=======
+outdir_images = paste0("~/git/Writing/thesis/images/results/", tolower(gene), "/")
+cat("plots will output to:", outdir_images)
+
+##########################################################
+# blue, red bp
+sts = 8
+lts = 8
+ats = 8
+als = 8
+ltis = 8
+geom_ls = 2.2
+
+#pos_count
+subtitle_size = 8
+geom_ls_pc = 2.2
+leg_text_size = 8
+axis_text_size = 8
+axis_label_size = 8
+
+###########################################################
+#------------------------------
+# plot default sizes
+#------------------------------
+#=========================
+# Affinity outcome
+# check this var: outcome_cols_affinity
+# get from preformatting or put in globals
+#==========================
+DistCutOff
+LigDist_colname  # = "ligand_distance" # from globals 
+ppi2Dist_colname
+naDist_colname
+
+###########################################################
+# get plotting data within the distance
+df3_lig  = df3[df3[[LigDist_colname]]<DistCutOff,]
+df3_ppi2 = df3[df3[[ppi2Dist_colname]]<DistCutOff,]
+df3_na   = df3[df3[[naDist_colname]]<DistCutOff,]
+common_bp_title = paste0("Sites <", DistCutOff, angstroms_symbol)
+
+#------------------------------
+# barplot for ligand affinity:
+# <10 Ang of ligand
+#------------------------------
+mLigP = stability_count_bp(plotdf = df3_lig
+               , df_colname = "ligand_outcome"
+               #, leg_title  = "mCSM-lig"
+               #, bp_plot_title = paste(common_bp_title, "ligand")
+               , yaxis_title = "Number of nsSNPs"
+               , leg_position = "none"
+               , subtitle_text = "mCSM\nLig"
+               , bar_fill_values = c("#F8766D", "#00BFC4")
+               , subtitle_colour= "black"
+               , sts = sts
+               , lts = lts
+               , ats = ats
+               , als = als
+               , ltis = ltis
+               , geom_ls = geom_ls
+               )
+mLigP
+#------------------------------
+# barplot for ligand affinity:
+# <10 Ang of ligand
+# mmCSM-lig: will be the same no. of sites but the effect will be different
+#------------------------------
+mmLigP = stability_count_bp(plotdf = df3_lig
+                   , df_colname = "mmcsm_lig_outcome"
+                   #, leg_title  = "mmCSM-lig"
+                   #, label_categories = labels_mmlig
+                   #, bp_plot_title = paste(common_bp_title, "ligand")
+                   
+                   , yaxis_title = ""
+                   , leg_position = "none"
+                   , subtitle_text = "mmCSM\nLig"
+                   , bar_fill_values = c("#F8766D", "#00BFC4")
+                   , subtitle_colour= "black"
+                   , sts = sts
+                   , lts = lts
+                   , ats = ats
+                   , als = als
+                   , ltis = ltis
+                   , geom_ls = geom_ls
+                   )
+mmLigP
+#------------------------------
+# barplot for ppi2 affinity
+#  <10 Ang of interface
+#------------------------------
+if (tolower(gene)%in%geneL_ppi2){
+    ppi2P = stability_count_bp(plotdf = df3_ppi2
+                     , df_colname = "mcsm_ppi2_outcome"
+                     #, leg_title  = "mCSM-ppi2"
+                     #, label_categories = labels_ppi2
+                     #, bp_plot_title = paste(common_bp_title, "PP-interface")
+                     
+                     , yaxis_title = "Number of nsSNPs"
+                     , leg_position = "none"
+                     , subtitle_text = "mCSM\nPPI2"
+                     , bar_fill_values = c("#F8766D", "#00BFC4")
+                     , subtitle_colour= "black"
+                     , sts = sts
+                     , lts = lts
+                     , ats = ats
+                     , als = als
+                     , ltis = ltis
+                     , geom_ls = geom_ls
+                     )
+  ppi2P
+}
+#----------------------------
+# barplot for ppi2 affinity
+#  <10 Ang of interface
+#------------------------------
+if (tolower(gene)%in%geneL_na){
+  nca_distP = stability_count_bp(plotdf = df3_na
+                             , df_colname = "mcsm_na_outcome"
+                             #, leg_title  = "mCSM-NA"
+                             #, label_categories = 
+                             #, bp_plot_title = paste(common_bp_title, "Dist to NA")
+                             
+                             , yaxis_title = "Number of nsSNPs"
+                             , leg_position = "none"
+                             , subtitle_text = "mCSM\nNA"
+                             , bar_fill_values = c("#F8766D", "#00BFC4")
+                             , subtitle_colour= "black"
+                             , sts = sts
+                             , lts = lts
+                             , ats = ats
+                             , als = als
+                             , ltis = ltis
+                             , geom_ls = geom_ls
+  )
+  nca_distP
+}
+
+#####################################################################
+# ------------------------------
+# bp site site count: mCSM-lig
+# < 10 Ang ligand
+# ------------------------------
+common_bp_title = paste0("Sites <", DistCutOff, angstroms_symbol)
+
+posC_lig = site_snp_count_bp(plotdf = df3_lig
+                  , df_colname = "position"
+                  , xaxis_title = "Number of nsSNPs"
+                  , yaxis_title = "Number of Sites"
+                  , subtitle_colour = "chocolate4"
+                  , subtitle_text = ""
+                  , subtitle_size = subtitle_size
+                  , geom_ls = geom_ls_pc
+                  , leg_text_size = leg_text_size
+                  , axis_text_size = axis_text_size
+                  , axis_label_size = axis_label_size)
+
+posC_lig
+#------------------------------
+# bp site site count: ppi2
+# < 10 Ang interface
+#------------------------------
+if (tolower(gene)%in%geneL_ppi2){
+  posC_ppi2 = site_snp_count_bp(plotdf = df3_ppi2
+                    , df_colname = "position"
+                    , xaxis_title = "Number of nsSNPs"
+                    , yaxis_title = "Number of Sites"
+                    , subtitle_colour = "chocolate4"
+                    , subtitle_text = ""
+                    , subtitle_size = subtitle_size
+                    , geom_ls = geom_ls_pc
+                    , leg_text_size = leg_text_size
+                    , axis_text_size = axis_text_size
+                    , axis_label_size = axis_label_size)
+  posC_ppi2
+}
+
+#------------------------------
+# bp site site count: NCA dist
+# < 10 Ang nca
+#------------------------------
+if (tolower(gene)%in%geneL_na){
+  posC_nca = site_snp_count_bp(plotdf = df3_na
+                                , df_colname = "position"
+                                , xaxis_title = "Number of nsSNPs"
+                                , yaxis_title = "Number of Sites"
+                                , subtitle_colour = "chocolate4"
+                                , subtitle_text = ""
+                               , subtitle_size = subtitle_size
+                               , geom_ls = geom_ls_pc
+                               , leg_text_size = leg_text_size
+                               , axis_text_size = axis_text_size
+                               , axis_label_size = axis_label_size)
+  posC_nca
+}
+#===============================================================
+#------------------------------
+# bp site site count: ALL
+# <10 Ang ligand
+#------------------------------
+posC_all = site_snp_count_bp(plotdf = df3
+                             , df_colname = "position"
+                             , xaxis_title = "Number of nsSNPs"
+                             , yaxis_title = "Number of Sites"
+                             , subtitle_colour = "chocolate4"
+                             , subtitle_text = "All mutations sites"
+                             , subtitle_size = subtitle_size
+                             , geom_ls = geom_ls_pc
+                             , leg_text_size = leg_text_size
+                             , axis_text_size = axis_text_size
+                             , axis_label_size = axis_label_size)
+posC_all
+##################################################################
+consurfP = stability_count_bp(plotdf = df3
+                              , df_colname = "consurf_outcome"
+                              #, leg_title = "ConSurf"
+                              #, label_categories = labels_consurf
+                              , yaxis_title = "Number of nsSNPs"
+                              , leg_position = "top"
+                              , subtitle_text = "ConSurf"
+                              , bar_fill_values = consurf_colours # from globals
+                              , subtitle_colour= "black"
+                              , sts = sts
+                              , lts = lts
+                              , ats = ats
+                              , als = als
+                              , ltis = ltis
+                              , geom_ls = geom_ls)
+
+consurfP
+
+##############################################################
+sts_so = 10
+lts_so = 10
+ats_so = 10
+als_so = 10
+ltis_so = 10
+geom_ls_so = 2.5
+#===================
+# Stability
+#===================
+# duetP
+duetP = stability_count_bp(plotdf = df3
+                           , df_colname = "duet_outcome"
+                           , leg_title = "mCSM-DUET"
+                           #, label_categories = labels_duet
+                           , yaxis_title = "Number of nsSNPs"
+                           , leg_position = "none"
+                           , subtitle_text = "mCSM-DUET"
+                           , bar_fill_values = c("#F8766D", "#00BFC4")
+                           , subtitle_colour= "black"
+                           , sts = sts_so
+                           , lts = lts_so
+                           , ats = ats_so
+                           , als = als_so
+                           , ltis = ltis_so
+                           , geom_ls = geom_ls_so)
+duetP
+
+# foldx
+foldxP = stability_count_bp(plotdf = df3
+                            , df_colname = "foldx_outcome"
+                            #, leg_title = "FoldX"
+                            #, label_categories = labels_foldx
+                            , yaxis_title = ""
+                            , leg_position = "none"
+                            , subtitle_text = "FoldX"
+                            , bar_fill_values = c("#F8766D", "#00BFC4")
+                            , sts = sts_so
+                            , lts = lts_so
+                            , ats = ats_so
+                            , als = als_so
+                            , ltis = ltis_so
+                            , geom_ls = geom_ls_so)
+foldxP
+
+# deepddg
+deepddgP = stability_count_bp(plotdf = df3
+                              , df_colname = "deepddg_outcome"
+                              #, leg_title = "DeepDDG"
+                              #, label_categories = labels_deepddg
+                              , yaxis_title = ""
+                              , leg_position = "none"
+                              , subtitle_text = "DeepDDG"
+                              , bar_fill_values = c("#F8766D", "#00BFC4")
+                              , sts = sts_so
+                              , lts = lts_so
+                              , ats = ats_so
+                              , als = als_so
+                              , ltis = ltis_so
+                              , geom_ls = geom_ls_so)
+deepddgP
+
+# deepddg
+dynamut2P = stability_count_bp(plotdf = df3
+                               , df_colname = "ddg_dynamut2_outcome"
+                               #, leg_title = "Dynamut2"
+                               #, label_categories = labels_ddg_dynamut2_outcome
+                               , yaxis_title = ""
+                               , leg_position = "none"
+                               , subtitle_text = "Dynamut2"
+                               , bar_fill_values = c("#F8766D", "#00BFC4")
+                               , sts = sts_so
+                               , lts = lts_so
+                               , ats = ats_so
+                               , als = als_so
+                               , ltis = ltis_so
+                               , geom_ls = geom_ls_so)
+dynamut2P
+
+# provean
+proveanP = stability_count_bp(plotdf = df3
+                              , df_colname = "provean_outcome"
+                              #, leg_title = "PROVEAN"
+                              #, label_categories = labels_provean
+                              , yaxis_title = "Number of nsSNPs"
+                              , leg_position = "none" # top
+                              , subtitle_text = "PROVEAN"
+                              , bar_fill_values = c("#D01C8B", "#F1B6DA") # light pink and deep
+                              , sts = sts_so
+                              , lts = lts_so
+                              , ats = ats_so
+                              , als = als_so
+                              , ltis = ltis_so
+                              , geom_ls = geom_ls_so)
+proveanP
+
+# snap2
+snap2P = stability_count_bp(plotdf = df3
+                            , df_colname = "snap2_outcome"
+                            #, leg_title = "SNAP2"
+                            #, label_categories = labels_snap2
+                            , yaxis_title = ""
+                            , leg_position = "none" # top
+                            , subtitle_text = "SNAP2"
+                            , bar_fill_values = c("#D01C8B", "#F1B6DA") # light pink and deep
+                            , sts = sts_so
+                            , lts = lts_so
+                            , ats = ats_so
+                            , als = als_so
+                            , ltis = ltis_so
+                            , geom_ls = geom_ls_so)
+snap2P
+#####################################################################################
diff --git a/scripts/plotting/plotting_thesis/rpob/dm_om_plots_layout_rpob.R b/scripts/plotting/plotting_thesis/rpob/dm_om_plots_layout_rpob.R
new file mode 100644
index 0000000..dd9c554
--- /dev/null
+++ b/scripts/plotting/plotting_thesis/rpob/dm_om_plots_layout_rpob.R
@@ -0,0 +1,179 @@
+# source dm_om_plots.R
+source("/home/tanu/git/LSHTM_analysis/scripts/plotting/plotting_thesis/dm_om_plots.R")
+
+##### plots to combine ####
+duetP
+foldxP
+deepddgP
+dynamut2P
+genomicsP
+consurfP
+proveanP
+snap2P
+mcsmligP
+mcsmlig2P
+mcsmppi2P
+mcsmnaP
+
+# Plot labels
+tit1 = "Stability changes"
+tit2 = "Genomic measure"
+tit3 = "Distance to partners"
+tit4 = "Evolutionary Conservation"
+tit5 = "Affinity changes"
+pt_size = 30
+
+theme_georgia <- function(...) {
+  theme_gray(base_family = "sans", ...) + 
+    theme(plot.title = element_text(face = "bold"))
+}
+
+
+title_theme <- calc_element("plot.title", theme_georgia())
+
+pt1 = ggdraw() + 
+  draw_label(
+    tit1,
+    fontfamily = title_theme$family,
+    fontface = title_theme$face,
+    #size = title_theme$size
+    size = pt_size
+  )
+
+pt2 = ggdraw() + 
+  draw_label(
+    tit2,
+    fontfamily = title_theme$family,
+    fontface = title_theme$face,
+    size = pt_size
+  )
+
+pt3 = ggdraw() + 
+  draw_label(
+    tit3,
+    fontfamily = title_theme$family,
+    fontface = title_theme$face,
+    size = pt_size
+  )
+
+pt4 = ggdraw() + 
+  draw_label(
+    tit4,
+    fontfamily = title_theme$family,
+    fontface = title_theme$face,
+    size = pt_size
+  )
+
+
+pt5 = ggdraw() + 
+  draw_label(
+    tit5,
+    fontfamily = title_theme$family,
+    fontface = title_theme$face,
+    size = pt_size
+  )
+
+#======================
+# Output plot function
+#======================
+OutPlot_dm_om = function(x){
+  
+  # dist b/w plot title and plot
+  relH_tp = c(0.08, 0.92)
+  
+  my_label_size = 25
+  #----------------
+  # Top panel
+  #----------------
+  top_panel = cowplot::plot_grid(
+    cowplot::plot_grid(pt1, 
+                       cowplot::plot_grid(duetP, foldxP, deepddgP, dynamut2P
+                                          , nrow = 1
+                                          , labels = c("A", "B", "C", "D")
+                                          , label_size = my_label_size)
+                       , ncol = 1
+                       , rel_heights = relH_tp
+    ),
+    NULL,
+    cowplot::plot_grid(pt2, 
+                       cowplot::plot_grid(genomicsP
+                                          , nrow = 1
+                                          , labels = c("E")
+                                          , label_size = my_label_size)
+                       , ncol = 1
+                       , rel_heights = relH_tp
+    ),
+    NULL,
+    cowplot::plot_grid(pt3, 
+                       cowplot::plot_grid( #distanceP
+                         distanceP_lig
+                         #, distanceP_ppi2
+                         , distanceP_na
+                         , nrow = 1
+                         , labels = c("F", "G")
+                         , label_size = my_label_size)
+                       , ncol = 1
+                       , rel_heights = relH_tp
+    ),
+    nrow = 1,
+    rel_widths = c(2/7, 0.1/7, 0.5/7, 0.1/7,  1/7)
+  )
+  
+  #----------------
+  # Bottom panel
+  #----------------
+  bottom_panel = cowplot::plot_grid(
+    cowplot::plot_grid(pt4, 
+                       cowplot::plot_grid(consurfP, proveanP, snap2P
+                                          , nrow = 1
+                                          , labels = c("H", "I", "J")
+                                          , label_size = my_label_size)
+                       , ncol = 1
+                       , rel_heights =relH_tp
+    ),NULL,
+    cowplot::plot_grid(pt5, 
+                       cowplot::plot_grid(mcsmligP
+                                          , mcsmlig2P
+                                          , mcsmppi2P
+                                          , mcsmnaP
+                                          , nrow = 1
+                                          , labels = c("K", "L", "M")
+                                          , label_size = my_label_size)
+                       , ncol = 1
+                       , rel_heights = relH_tp
+    ),NULL,
+    nrow = 1, 
+    rel_widths = c(3/6,0.1/6,3/6, 0.1/6 )
+  )
+  
+  #-------------------------------
+  # combine: Top and Bottom panel
+  #-------------------------------
+  cowplot::plot_grid (top_panel, bottom_panel
+                      , nrow =2
+                      , rel_widths = c(1, 1)
+                      , align = "hv")
+}
+
+#=====================
+# OutPlot: svg and png
+#======================
+dm_om_combinedP =  paste0(outdir_images
+                          ,tolower(gene)
+                          ,"_dm_om_all.svg")
+
+cat("DM OM plots with stats:", dm_om_combinedP)
+svg(dm_om_combinedP, width = 32, height = 18)
+
+OutPlot_dm_om()
+dev.off()
+
+
+dm_om_combinedP_png =  paste0(outdir_images
+                              ,tolower(gene)
+                              ,"_dm_om_all.png")
+cat("DM OM plots with stats:", dm_om_combinedP_png)
+png(dm_om_combinedP_png, width = 32, height = 18, units = "in", res = 300)
+
+OutPlot_dm_om()
+dev.off()
diff --git a/scripts/plotting/plotting_thesis/rpob/gg_pairs_all_rpob.R b/scripts/plotting/plotting_thesis/rpob/gg_pairs_all_rpob.R
new file mode 100644
index 0000000..2bc7b89
--- /dev/null
+++ b/scripts/plotting/plotting_thesis/rpob/gg_pairs_all_rpob.R
@@ -0,0 +1,203 @@
+#source("~/git/LSHTM_analysis/config/embb.R")
+#source("~/git/LSHTM_analysis/scripts/plotting/plotting_colnames.R")
+#source("~/git/LSHTM_analysis/scripts/plotting/get_plotting_dfs.R")
+
+my_gg_pairs=function(plot_df, plot_title
+                     , tt_args_size = 2.5
+                     , gp_args_size = 2.5){
+  ggpairs(plot_df,
+          columns = 1:(ncol(plot_df)-1),
+          upper = list(
+            continuous = wrap('cor', # ggally_cor()
+                              method = "spearman",
+                              use = "pairwise.complete.obs",
+                              title="ρ",
+                              digits=2,
+                              justify_labels = "centre",
+                              title_args=list(size=tt_args_size, colour="black"),#2.5
+                              group_args=list(size=gp_args_size)#2.5
+            )
+          ),
+          lower = list(
+            continuous = wrap("points",
+                              alpha = 0.7,
+                              size=0.125),
+            combo     = wrap("dot",
+                             alpha = 0.7,
+                             size=0.125)
+          ),
+          aes(colour = factor(ifelse(dst_mode==0,
+                                     "S",
+                                     "R") ),
+              alpha = 0.5),
+          title=plot_title) +
+    
+    scale_colour_manual(values = c("red", "blue")) +
+    scale_fill_manual(values = c("red", "blue")) #+
+  # theme(text = element_text(size=7,
+  #                           face="bold"))
+}
+
+DistCutOff = 10
+###########################################################################
+geneL_normal  = c("pnca")
+geneL_na      = c("gid", "rpob")
+geneL_ppi2    = c("alr", "embb", "katg", "rpob")
+
+merged_df3 = as.data.frame(merged_df3)
+
+corr_plotdf = corr_data_extract(merged_df3
+                                , gene = gene
+                                , drug = drug
+                                , extract_scaled_cols = F)
+
+aff_dist_cols = colnames(corr_plotdf)[grep("Dist", colnames(corr_plotdf))]
+static_cols =  c("Log10(MAF)"
+                 , "Log10(OR)"
+)
+############################################################
+#=============================================
+# Creating masked df for affinity data
+#=============================================
+corr_affinity_df = corr_plotdf
+
+#----------------------
+# Mask affinity columns
+#-----------------------
+corr_affinity_df[corr_affinity_df["Lig-Dist"]>DistCutOff,"mCSM-lig"]=0
+corr_affinity_df[corr_affinity_df["Lig-Dist"]>DistCutOff,"mmCSM-lig"]=0
+
+if (tolower(gene)%in%geneL_ppi2){
+  corr_affinity_df[corr_affinity_df["PPI-Dist"]>DistCutOff,"mCSM-PPI2"]=0
+}
+
+if (tolower(gene)%in%geneL_na){
+  corr_affinity_df[corr_affinity_df["NCA-Dist"]>DistCutOff,"mCSM-NA"]=0
+}
+
+# count 0
+#res <- colSums(corr_affinity_df==0)/nrow(corr_affinity_df)*100
+unmasked_vals <- nrow(corr_affinity_df) - colSums(corr_affinity_df==0) 
+unmasked_vals
+
+##########################################################
+#================
+# Stability
+#================
+corr_ps_colnames = c(static_cols
+                     , "DUET"
+                     , "FoldX"
+                     , "DeepDDG"
+                     , "Dynamut2"
+                     , aff_dist_cols
+                     , "dst_mode")
+
+corr_df_ps = corr_plotdf[, corr_ps_colnames]
+
+# Plot #1
+plot_corr_df_ps = my_gg_pairs(corr_df_ps, plot_title="Stability estimates")
+
+##########################################################
+#================
+# Conservation
+#================
+corr_conservation_cols = c( static_cols
+                            , "ConSurf"
+                            , "SNAP2"
+                            , "PROVEAN"
+                            #, aff_dist_cols
+                            , "dst_mode"
+)
+
+corr_df_cons = corr_plotdf[, corr_conservation_cols]
+
+# Plot #2
+plot_corr_df_cons = my_gg_pairs(corr_df_cons, plot_title="Conservation estimates")
+
+##########################################################
+#================
+# Affinity: lig, ppi and na as applicable
+#================
+#corr_df_lig = corr_plotdf[corr_plotdf["Lig-Dist"]<DistCutOff,]
+common_aff_colnames = c("mCSM-lig"
+                        , "mmCSM-lig")
+
+if (tolower(gene)%in%geneL_normal){
+  aff_colnames = common_aff_colnames
+}
+if (tolower(gene)%in%geneL_ppi2){
+  aff_colnames = c(common_aff_colnames, "mCSM-PPI2")
+}
+
+if (tolower(gene)%in%geneL_na){
+  aff_colnames = c(common_aff_colnames, "mCSM-NA")
+}
+
+# building ffinal affinity colnames for correlation
+corr_aff_colnames = c(static_cols
+                      , aff_colnames
+                      , "dst_mode") # imp
+
+corr_df_aff = corr_affinity_df[, corr_aff_colnames]
+colnames(corr_df_aff)
+
+# Plot #3
+plot_corr_df_aff = my_gg_pairs(corr_df_aff
+                               , plot_title="Affinity estimates"
+                               #, tt_args_size = 4
+                               #, gp_args_size = 4
+                               )
+
+#### Combine plots #####
+# #png("/home/tanu/tmp/gg_pairs_all.png", height = 6, width=11.75, unit="in",res=300)
+# png(paste0(outdir_images
+#            ,tolower(gene)
+#            ,"_CorrAB.png"), height = 6, width=11.75, unit="in",res=300)
+# 
+# cowplot::plot_grid(ggmatrix_gtable(plot_corr_df_ps),
+#                    ggmatrix_gtable(plot_corr_df_cons),
+#                    #                  ggmatrix_gtable(plot_corr_df_aff),
+#                    #                   nrow=1, ncol=3, rel_heights = 7,7,3
+#                    nrow=1,
+#                    #rel_heights = 1,1
+#                    labels = "AUTO",
+#                    label_size = 12)
+# dev.off()
+# 
+# # affinity corr
+# #png("/home/tanu/tmp/gg_pairs_affinity.png", height =7, width=7, unit="in",res=300)
+# png(paste0(outdir_images
+#            ,tolower(gene)
+#            ,"_CorrC.png"), height =7, width=7, unit="in",res=300)
+# 
+# cowplot::plot_grid(ggmatrix_gtable(plot_corr_df_aff),
+#                    labels = "C",
+#                    label_size = 12)
+# dev.off()
+
+#### Combine A ####
+png(paste0(outdir_images
+           ,tolower(gene)
+           ,"_CorrA.png"), height =8, width=8, unit="in",res=300)
+
+cowplot::plot_grid(ggmatrix_gtable(plot_corr_df_ps),
+                   labels = "A",
+                   label_size = 12)
+dev.off()
+
+#### Combine B+C ####
+# B + C
+png(paste0(outdir_images
+           ,tolower(gene)
+           ,"_CorrBC.png"), height = 6, width=11.75, unit="in",res=300)
+
+cowplot::plot_grid(ggmatrix_gtable(plot_corr_df_cons),
+                   ggmatrix_gtable(plot_corr_df_aff),
+                   #                  ggmatrix_gtable(plot_corr_df_aff),
+                   #                   nrow=1, ncol=3, rel_heights = 7,7,3
+                   nrow=1,
+                   #rel_heights = 1,1
+                   labels = c("B", "C"),
+                   label_size = 12)
+dev.off()
+
diff --git a/scripts/plotting/plotting_thesis/rpob/katg_other_plots.R b/scripts/plotting/plotting_thesis/rpob/katg_other_plots.R
new file mode 100644
index 0000000..989f8ee
--- /dev/null
+++ b/scripts/plotting/plotting_thesis/rpob/katg_other_plots.R
@@ -0,0 +1,2 @@
+
+source("/home/tanu/git/LSHTM_analysis/scripts/plotting/plotting_thesis/linage_bp_dist_layout.R")
\ No newline at end of file
diff --git a/scripts/plotting/plotting_thesis/rpob/linage_bp_dist.R b/scripts/plotting/plotting_thesis/rpob/linage_bp_dist.R
new file mode 100644
index 0000000..bfe6118
--- /dev/null
+++ b/scripts/plotting/plotting_thesis/rpob/linage_bp_dist.R
@@ -0,0 +1,172 @@
+#!/usr/bin/env Rscript  
+
+#########################################################
+# TASK: Lineage plots [merged_df2]
+# Count
+# Diversity
+# Average stability dist
+# Avergae affinity dist: optional
+#########################################################
+#=======
+# output
+#=======
+# outdir_images = paste0("~/git/Writing/thesis/images/results/"
+#                        , tolower(gene), "/")
+# cat("plots will output to:", outdir_images)
+#########################################################
+
+#===============
+#Quick numbers checks
+#===============
+nsample_lin = merged_df2[merged_df2$lineage%in%c("L1", "L2", "L3", "L4"),]
+
+if ( all(table(nsample_lin$sensitivity)== table(nsample_lin$mutation_info_labels)) ){
+  cat("\nTotal no. of samples belonging to L1-l4 for", gene,":", nrow(nsample_lin)
+      , "\nCounting R and S samples")
+  if( sum(table(nsample_lin$sensitivity)) ==  nrow(nsample_lin) ){
+    cat("\nPASSNumbers cross checked:")
+    print(table(nsample_lin$sensitivity))
+  }
+}else{
+  stop("Abort: Numbers mismatch. Please check")
+}
+########################################################################
+###################################################
+#                  Lineage barplots               #
+###################################################
+
+#===============================
+# lineage sample and SNP count
+#===============================
+lin_countP = lin_count_bp(lf_data = lineage_dfL[['lin_lf']]
+             , all_lineages = F
+             , x_categ = "sel_lineages"
+             , y_count = "p_count"
+             , use_lineages = c("L1", "L2", "L3", "L4")
+             , bar_fill_categ = "count_categ"
+             , display_label_col = "p_count"
+             , bar_stat_stype = "identity"
+             , d_lab_size = 8
+             , d_lab_col = "black"
+             , my_xats = 25 # x axis text size
+             , my_yats = 25 # y axis text sized_lab_size
+             , my_xals = 25 # x axis label size
+             , my_yals = 25 # y axis label size
+             , my_lls  = 25 # legend label size
+             , bar_col_labels =  c("SNPs", "Total Samples")
+             , bar_col_values = c("grey50", "gray75")
+             , bar_leg_name = ""
+             , leg_location = "top"
+             , y_log10 = F
+             , y_scale_percent = FALSE
+             , y_label = c("Count")
+             )
+lin_countP
+#===============================
+# lineage SNP diversity count
+#===============================
+lin_diversityP = lin_count_bp_diversity(lf_data = lineage_dfL[['lin_wf']]
+                              , x_categ = "sel_lineages"
+                              , y_count = "snp_diversity"
+                              #, all_lineages = F
+                              , use_lineages = c("L1", "L2", "L3", "L4")
+                              , display_label_col = "snp_diversity_f"
+                              , bar_stat_stype = "identity"
+                              , x_lab_angle = 90
+                              , d_lab_size =9
+                              , my_xats = 25 # x axis text size
+                              , my_yats = 25 # y axis text size
+                              , my_xals = 25 # x axis label size
+                              , my_yals = 25 # y axis label size
+                              , my_lls  = 25 # legend label size
+                              , y_log10 = F
+                              , y_scale_percent = F
+                              , leg_location = "top"
+                              , y_label = "Percent" #"SNP diversity"
+                              , bp_plot_title = "nsSNP diversity"
+                              , title_colour = "black" #"chocolate4"
+                              , subtitle_text = NULL
+                              , sts = 20
+                              , subtitle_colour = "#350E20FF")
+lin_diversityP
+#=============================================
+# Output plots: Lineage count and Diversity
+#=============================================
+# lineage_bp_CL =  paste0(outdir_images
+#                         ,tolower(gene)
+#                         ,"_lineage_bp_CL_Tall.svg")
+# 
+# cat("Lineage barplots:", lineage_bp_CL)
+# svg(lineage_bp_CL, width = 8, height = 15)
+# 
+# cowplot::plot_grid(lin_countP, lin_diversityP
+#                      #, labels = c("(a)", "(b)", "(c)", "(d)")
+#                       , nrow = 2
+#                      # , ncols = 2
+#                       , labels = "AUTO"
+#                       , label_size = 25)
+# 
+# dev.off()
+########################################################################
+
+
+###################################################
+#                   Stability dist                #
+###################################################
+# scaled_cols_stability = c("duet_scaled"       
+#                           , "deepddg_scaled"   
+#                           , "ddg_dynamut2_scaled"
+#                           , "foldx_scaled"
+#                           , "avg_stability_scaled")
+
+my_xlabel = paste0("Average stability ", "(", stability_suffix, ")"); my_xlabel
+#plotdf = merged_df2[merged_df2$lineage%in%c("L1", "L2", "L3", "L4"),]
+
+linP_dm_om = lineage_distP(merged_df2
+                           , with_facet = F
+                           , x_axis = "avg_stability_scaled"
+                           , y_axis = "lineage_labels"
+                           , x_lab = my_xlabel
+                           , use_lineages = c("L1", "L2", "L3", "L4")
+                           #, fill_categ = "mutation_info_orig", fill_categ_cols = c("#E69F00", "#999999")
+                           , fill_categ = "sensitivity"
+                           , fill_categ_cols = c("red", "blue")
+                           , label_categories = c("Resistant", "Sensitive")
+                           , leg_label = "Mutation group"
+                           , my_ats = 22 # axis text size
+                           , my_als = 22 # axis label size
+                           , my_leg_ts = 22
+                           , my_leg_title = 22
+                           , my_strip_ts = 22
+                           , alpha = 0.56
+)
+
+linP_dm_om
+
+###################################################
+#                 Affinity dist [OPTIONAL]        #
+###################################################
+# scaled_cols_affinity = c("affinity_scaled" 
+#                          , "mmcsm_lig_scaled" 
+#                          , "mcsm_ppi2_scaled" 
+#                          , "mcsm_na_scaled"
+#                          , "avg_lig_affinity_scaled")
+
+# lineage_distP(merged_df2
+#               , with_facet = F
+#               , x_axis = "avg_lig_affinity_scaled"
+#               , y_axis = "lineage_labels"
+#               , x_lab = my_xlabel
+#               , use_lineages = c("L1", "L2", "L3", "L4")
+#               #, fill_categ = "mutation_info_orig", fill_categ_cols = c("#E69F00", "#999999")
+#               , fill_categ = "sensitivity"
+#               , fill_categ_cols = c("red", "blue")
+#               , label_categories = c("Resistant", "Sensitive")
+#               , leg_label = "Mutation group"
+#               , my_ats = 22 # axis text size
+#               , my_als = 22 # axis label size
+#               , my_leg_ts = 22
+#               , my_leg_title = 22
+#               , my_strip_ts = 22
+#               , alpha = 0.56
+# )
diff --git a/scripts/plotting/plotting_thesis/rpob/linage_bp_dist_layout.R b/scripts/plotting/plotting_thesis/rpob/linage_bp_dist_layout.R
new file mode 100644
index 0000000..00f3adc
--- /dev/null
+++ b/scripts/plotting/plotting_thesis/rpob/linage_bp_dist_layout.R
@@ -0,0 +1,30 @@
+#!/usr/bin/env Rscript  
+
+###########################################
+# TASK: generate plots for lineage
+# Individual plots in 
+#lineage_bp_both.R
+#linage_dist_ens_stability.R
+###########################################
+my_label_size = 25
+
+linPlots_combined = paste0(outdir_images
+                        , tolower(gene)
+                        ,"_linP_combined.svg")
+
+cat("\nOutput plot:", linPlots_combined)
+svg(linPlots_combined, width = 18, height = 12)
+
+cowplot::plot_grid(
+  cowplot::plot_grid(lin_countP, lin_diversityP
+                     , nrow = 2
+                     , labels = "AUTO"
+                     , label_size = my_label_size),
+  NULL,
+  linP_dm_om,
+  nrow = 1,
+  labels = c("", "", "C"),
+  label_size = my_label_size,
+  rel_widths = c(35, 3, 52)
+)
+dev.off()
\ No newline at end of file
diff --git a/scripts/plotting/plotting_thesis/rpob/pe_sens_site_count_rpob.R b/scripts/plotting/plotting_thesis/rpob/pe_sens_site_count_rpob.R
new file mode 100644
index 0000000..741d7fe
--- /dev/null
+++ b/scripts/plotting/plotting_thesis/rpob/pe_sens_site_count_rpob.R
@@ -0,0 +1,179 @@
+source("/home/tanu/git/LSHTM_analysis/scripts/plotting/plotting_thesis/prominent_effects_rpob.R")
+source("/home/tanu/git/LSHTM_analysis/scripts/plotting/plotting_thesis/sensitivity_count_rpob.R")
+
+##############################################################
+#                         PE count
+#lig-- na--ppi2--stab
+# pe_colour_map = c("DD_lig"         = "#ffd700"   # gold
+#                   , "SS_lig"         = "#f0e68c"  # khaki
+#                   
+#                   , "DD_nucleic_acid"= "#a0522d"  # sienna
+#                   , "SS_nucleic_acid"= "#d2b48c"  # tan
+#                   
+#                   , "DD_ppi2"        = "#da70d6"  # orchid
+#                   , "SS_ppi2"        = "#ff1493"  # deeppink
+#                   
+#                   , "DD_stability"   = "#f8766d"  # red
+#                   , "SS_stability"   = "#00BFC4") # blue
+table(str_df_plot_cols$pe_effect_outcome)
+##############################################################
+#===========
+#PE count
+#===========
+rects <- data.frame(x=1:8, 
+                     colors = c("#ffd700" ,
+                                "#f0e68c" ,
+                                
+                                "#a0522d" ,
+                                "#d2b48c" ,
+                                
+                                "#da70d6" ,
+                                "#ff1493" ,
+                                
+                                "#f8766d" ,
+                                "#00BFC4") 
+                     )
+
+rects$text =  c("-ve Lig"
+                , "+ve Lig"
+                
+                , "-ve\nNuc.Acid"
+                , "+ve\nNuc.Acid"
+                
+                , "-ve PPI2"
+                , "+ve PPI2"
+
+                , "-ve stability"
+                , "+ve stability"
+)
+
+cell1 = table(str_df_plot_cols$pe_effect_outcome)[["DD_lig"]]
+cell2 = table(str_df_plot_cols$pe_effect_outcome)[["SS_lig"]]
+
+cell3 = table(str_df_plot_cols$pe_effect_outcome)[["DD_nucleic_acid"]]
+cell4 = table(str_df_plot_cols$pe_effect_outcome)[["SS_nucleic_acid"]]
+
+cell5 = table(str_df_plot_cols$pe_effect_outcome)[["DD_ppi2"]]
+cell6 = table(str_df_plot_cols$pe_effect_outcome)[["SS_ppi2"]]
+
+cell7 = table(str_df_plot_cols$pe_effect_outcome)[["DD_stability"]]
+cell8 = table(str_df_plot_cols$pe_effect_outcome)[["SS_stability"]]
+
+
+#rects$numbers = c(38, 0, 22, 9, 108, 681) #for embb
+rects$numbers = c(cell1, cell2,
+                  cell3, cell4,
+                  cell5, cell6,
+                  cell7, cell8)
+
+rects$num_labels = paste0("n=", rects$numbers)
+
+rects
+#------
+# Plot
+#------
+#https://stackoverflow.com/questions/47986055/create-a-rectangle-filled-with-text
+peP = ggplot(rects, aes(x, y = 0, fill = colors, label = paste0(text,"\n", num_labels))) +
+  geom_tile(width = 1, height = 1) + # make square tiles
+  geom_text(color = "black", size = 1.7) + # add white text in the middle
+  scale_fill_identity(guide = "none") + # color the tiles with the colors in the data frame
+  coord_fixed() + # make sure tiles are square
+  coord_flip()+ scale_x_reverse() +
+  # theme_void() # remove any axis markings
+  theme_nothing() # remove any axis markings
+peP
+
+#------
+# Plot: this one is better
+#------
+peP2 = ggplot(rects, aes(x, y = 0, fill = colors, label = paste0(text,"\n", num_labels))) +
+  geom_tile() + # make square tiles
+  geom_text(color = "black", size = 1.6) + # add white text in the middle
+  scale_fill_identity(guide = "none") + # color the tiles with the colors in the data frame
+  coord_fixed() + # make sure tiles are square
+  theme_nothing() # remove any axis markings
+peP2
+
+########################################################
+# From: script sensitivity_count per gene
+#===============================
+# Sensitivity count: SITE
+#===============================
+#--------
+# embb
+#--------
+#rsc = 54
+#ccc = 46
+#ssc = 470
+
+rsc = site_Rc; rsc
+ccc = site_Cc; ccc
+ssc = site_Sc; ssc
+
+rect_rs_siteC <- data.frame(x=1:3,
+                    colors = c("red",
+                               "purple",
+                               "blue")
+                    )
+                               
+rect_rs_siteC                          
+rect_rs_siteC$text = c("Resistant",
+                       "Common",
+                       "Sensitive")
+
+rect_rs_siteC$numbers = c(rsc,ccc,ssc)
+rect_rs_siteC$num_labels = paste0("n=", rect_rs_siteC$numbers)
+rect_rs_siteC
+
+#------
+# Plot
+#------
+sens_siteP = ggplot(rect_rs_siteC, aes(x, y = 0,
+                                       fill = colors,
+                                       label = num_labels
+                                       #,label = paste0(text,"\n", num_labels)
+                                       )) +
+  geom_tile(width = 1, height = 1) +
+  #geom_text(color = "black", size = 1.7) +
+  geom_label(color = "black", size = 1.7,fill = "white", alpha=0.7) + 
+  scale_fill_identity(guide = "none") + 
+  coord_fixed()+
+  theme_nothing() # remove any axis markings
+sens_siteP
+
+################################################################
+#===============================
+# Sensitivity count: Mutations
+#===============================
+table(sensP_df$sensitivity)
+muts_Rc = table(sensP_df$sensitivity)[["R"]]
+muts_Sc = table(sensP_df$sensitivity)[["S"]]
+rect_sens <- data.frame(x=1:2,
+                              colors = c("red",
+                                         "blue")
+                        )
+
+rect_sens$text = c("Resistant",
+                       "Sensitive")
+rect_sens$numbers = c(muts_Rc,muts_Sc)
+rect_sens$num_labels = paste0("n=", rect_sens$numbers)
+rect_sens
+#------
+# Plot
+#------
+sensP = ggplot(rect_sens, aes(x, y = 0,
+                              fill = colors,
+                              label = paste0(text,"\n", num_labels))) +
+  geom_tile(width = 1, height = 1) +
+  #geom_text(color = "black", size = 1.7) +
+  geom_label(color = "black", size = 1.7,fill = "white", alpha=0.7) + 
+  scale_fill_identity(guide = "none") + 
+  coord_fixed()+
+  theme_nothing() # remove any axis markings
+sensP
+
+sensP2 = sensP + 
+   coord_flip() + scale_x_reverse()
+sensP2
+
+
diff --git a/scripts/plotting/plotting_thesis/rpob/prominent_effects_rpob.R b/scripts/plotting/plotting_thesis/rpob/prominent_effects_rpob.R
new file mode 100644
index 0000000..685a255
--- /dev/null
+++ b/scripts/plotting/plotting_thesis/rpob/prominent_effects_rpob.R
@@ -0,0 +1,398 @@
+#!/usr/bin/env Rscript       
+#source("~/git/LSHTM_analysis/config/alr.R")
+#source("~/git/LSHTM_analysis/config/embb.R")
+#source("~/git/LSHTM_analysis/config/katg.R")
+#source("~/git/LSHTM_analysis/config/gid.R")
+#source("~/git/LSHTM_analysis/config/pnca.R")
+#source("~/git/LSHTM_analysis/config/rpob.R")
+
+# get plotting dfs 
+#source("~/git/LSHTM_analysis/scripts/plotting/get_plotting_dfs.R")
+########################################################
+if (length(merged_df3$position) == length(merged_df3$X5uhc_position) ){
+  pos_colname = "X5uhc_position"
+}else{
+  stop("Abort: position colname could not be found")
+}
+
+#-------------
+# from ~/git/LSHTM_analysis/scripts/plotting/plotting_colnames.R
+#-------------
+length(all_stability_cols); length(raw_stability_cols)
+length(scaled_stability_cols); length(outcome_stability_cols)
+length(affinity_dist_colnames)
+
+
+static_cols =  c("mutationinformation",
+                 #"position",
+                 pos_colname,
+                 "sensitivity")
+
+other_cols_all = c(scaled_stability_cols, scaled_affinity_cols, affinity_dist_colnames)
+
+#omit avg cols and foldx_scaled_signC cols
+other_cols =  other_cols_all[grep("avg", other_cols_all, invert = T)]
+other_cols = other_cols[grep("foldx_scaled_signC",other_cols, invert = T )]
+other_cols
+
+cols_to_extract =  c(static_cols, other_cols)
+cat("\nExtracting cols:", cols_to_extract)
+expected_ncols = length(static_cols) + length(other_cols)
+expected_ncols
+
+str_df = merged_df3[, cols_to_extract]
+
+if (ncol(str_df) == expected_ncols){
+  cat("\nPASS: successfully extracted cols for calculating prominent effects")
+}else{
+  stop("\nAbort: Could not extract cols for calculating prominent effects")
+}
+
+#=========================
+# Masking affinity columns
+#=========================
+# First make values for affinity cols 0 when their corresponding dist >10
+head(str_df)
+
+# replace in place affinity values >10
+str_df[str_df["ligand_distance"]>10,"affinity_scaled"]=0
+str_df[str_df["ligand_distance"]>10,"mmcsm_lig_scaled"]=0
+
+#ppi2 gene: replace in place ppi2 affinity values where ppi2 dist >10
+if (tolower(gene)%in%geneL_ppi2){
+  str_df[str_df["interface_dist"]>10,"mcsm_ppi2_scaled"]=0
+} 
+  
+# na gene: replace in place na affinity values where na dist >10
+if (tolower(gene)%in%geneL_na){
+  str_df[str_df["nca_distance"]>10,"mcsm_na_scaled"]=0
+}
+
+colnames(str_df)
+head(str_df)
+
+# get names of cols to calculate the prominent effects from
+# scaled_cols_tc = c("duet_scaled",
+#                    "deepddg_scaled",
+#                    "ddg_dynamut2_scaled",
+#                    "foldx_scaled",
+#                    "affinity_scaled",
+#                    "mmcsm_lig_scaled",
+#                    "mcsm_ppi2_scaled", "mcsm_na_scaled")
+
+scaled_cols_tc = other_cols[grep("scaled", other_cols)]
+
+#--------------------------------
+#get rowmax for absolute values
+#--------------------------------
+#str_df$row_maximum = apply(str_df[,-1], 1, function(x){max(abs(x))})
+#str_df$row_maximum = apply(str_df[,scaled_cols_tc], 1, function(x){max(abs(x))})
+
+#correct
+#BOO= abs(str_df[,scaled_cols_tc]) == str_df[,'row_maximum']; head(BOO)
+#also corr but weird
+#POO = which(abs(str_df[,scaled_cols_tc]) == str_df[,'row_maximum'], arr.ind =T); head(POO)
+
+################################################
+
+
+#===============
+# whole df
+#===============
+give_col=function(x,y,df=str_df){
+  df[df[[pos_colname]]==x,y]
+}
+
+for (i in unique(str_df[[pos_colname]]) ){
+  print(i)
+  #cat(length(unique(str_df[[pos_colname]])))
+  
+  biggest     = max(abs(give_col(i,scaled_cols_tc)))
+  
+  str_df[str_df[[pos_colname]]==i,'abs_max_effect'] = biggest
+  str_df[str_df[[pos_colname]]==i,'effect_type']= names(
+    give_col(i,scaled_cols_tc)[which(
+      abs(
+        give_col(i,scaled_cols_tc)
+      ) == biggest, arr.ind=T
+    )[, "col"]])[1]
+  
+  effect_name = unique(str_df[str_df[[pos_colname]]==i,'effect_type'])#[1] # pick first one in case we have multiple exact values
+  
+  # get index/rowname for value of max effect, and then use it to get the original sign
+  # here
+  #ind = rownames(which(abs(str_df[str_df[[pos_colname]]==i,c('position',effect_name)][effect_name])== biggest, arr.ind=T))
+  ind = rownames(which(abs(str_df[str_df[[pos_colname]]==i,c(pos_colname,effect_name)][effect_name])== biggest, arr.ind=T))
+  
+  str_df[str_df[[pos_colname]]==i,'effect_sign'] = sign(str_df[effect_name][ind,])[1]
+}
+
+# ends with suffix 2 if dups
+str_df$effect_type = sub("\\.[0-9]+", "", str_df$effect_type) # cull duplicate effect types that happen when there are exact duplicate values
+colnames(str_df)
+table(str_df$effect_type)
+
+# check
+str_df_check = str_df[str_df[[pos_colname]]%in%c(24, 32, 160, 303, 334),]
+
+#================
+# for Plots
+#================
+str_df_short = str_df[, c("mutationinformation",
+                          #"position",
+                          pos_colname,
+                          "sensitivity"
+                          , "effect_type"
+                          , "effect_sign")]
+
+table(str_df_short$effect_type)
+table(str_df_short$effect_sign)
+str(str_df_short)
+
+# assign pe outcome
+str_df_short$pe_outcome = ifelse(str_df_short$effect_sign<0, "DD", "SS")
+table(str_df_short$pe_outcome )
+table(str_df_short$effect_sign)
+
+#==============
+# group effect type:
+# lig, ppi2, nuc. acid, stability
+#==============
+affcols = c("affinity_scaled",  "mmcsm_lig_scaled")
+ppi2_cols = c("mcsm_ppi2_scaled")
+nuc_na_cols = c("mcsm_na_scaled")
+
+
+#lig 
+table(str_df_short$effect_type)
+str_df_short$effect_grouped = ifelse(str_df_short$effect_type%in%affcols
+                                     , "lig"
+                                     , str_df_short$effect_type)
+table(str_df_short$effect_grouped)
+
+#ppi2
+str_df_short$effect_grouped = ifelse(str_df_short$effect_grouped%in%ppi2_cols
+                                     , "ppi2"
+                                     , str_df_short$effect_grouped)
+table(str_df_short$effect_grouped)
+
+
+#na
+str_df_short$effect_grouped = ifelse(str_df_short$effect_grouped%in%nuc_na_cols
+                                     , "nucleic_acid"
+                                     , str_df_short$effect_grouped)
+table(str_df_short$effect_grouped)
+
+#stability
+str_df_short$effect_grouped = ifelse(!str_df_short$effect_grouped%in%c("lig",
+                                                                       "ppi2", 
+                                                                       "nucleic_acid")
+                                     , "stability"
+                                     , str_df_short$effect_grouped)
+
+table(str_df_short$effect_grouped)
+
+# create a sign as well
+str_df_short$pe_effect_outcome = paste0(str_df_short$pe_outcome, "_"
+                                     , str_df_short$effect_grouped)
+
+table(str_df_short$pe_effect_outcome)
+
+#####################################################################
+# Chimera: for colouring
+####################################################################
+
+#-------------------------------------
+# get df with unique position
+#pos_colname = "position"
+if (length(merged_df3[[pos_colname]]) == length(merged_df3$X5uhc_position) ){
+  pos_colname = "X5uhc_position"
+}else{
+  stop("Abort: position colname could not be found")
+}
+#--------------------------------------
+#data[!duplicated(data$x), ]   
+str_df_plot = str_df_short[!duplicated(str_df[[pos_colname]]),]
+
+if (nrow(str_df_plot) == length(unique(str_df[[pos_colname]]))){
+  cat("\nPASS: successfully extracted df with unique positions")
+}else{
+  stop("\nAbort: Could not extract df with unique positions")
+}
+
+#-------------------------------------
+# generate colours for effect types
+#--------------------------------------
+str_df_plot_cols = str_df_plot[, c(pos_colname,
+                                   "sensitivity",
+                                   "pe_outcome",
+                                   "effect_grouped",
+                                   "pe_effect_outcome")]
+head(str_df_plot_cols)
+
+# colour intensity based on sign
+#str_df_plot_cols$colour_hue = ifelse(str_df_plot_cols$effect_sign<0, "bright", "light")
+str_df_plot_cols$colour_hue = ifelse(str_df_plot_cols$pe_outcome=="DD", "bright", "light")
+
+table(str_df_plot_cols$colour_hue); table(str_df_plot$pe_outcome)
+head(str_df_plot_cols)
+
+# colour based on effect
+table(str_df_plot_cols$pe_effect_outcome)
+
+# colors = c("#ffd700" #gold
+#            , "#f0e68c" #khaki
+#            , "#da70d6"# orchid
+#            , "#ff1493"# deeppink
+#            , "#a0522d" #sienna
+#            , "#d2b48c" # tan
+#            , "#00BFC4" #, "#007d85" #blue
+#            , "#F8766D" )# red
+
+pe_colour_map = c("DD_lig"         = "#ffd700"   # gold
+                , "SS_lig"         = "#f0e68c"  # khaki
+             
+                , "DD_nucleic_acid"= "#a0522d"  # sienna
+                , "SS_nucleic_acid"= "#d2b48c"  # tan
+                
+                , "DD_ppi2"        = "#da70d6"  # orchid
+                , "SS_ppi2"        = "#ff1493"  # deeppink
+
+                , "DD_stability"   = "#f8766d"  # red
+                , "SS_stability"   = "#00BFC4") # blue
+        
+#unlist(d[c('a', 'a', 'c', 'b')], use.names=FALSE)
+
+#map the colours
+str_df_plot_cols$colour_map= unlist(map(str_df_plot_cols$pe_effect_outcome
+                                 ,function(x){pe_colour_map[[x]]}
+                                 ))
+head(str_df_plot_cols$colour_map)
+table(str_df_plot_cols$colour_map)
+table(str_df_plot_cols$pe_effect_outcome)
+
+# str_df_plot_cols$colours = paste0(str_df_plot_cols$colour_hue
+#                                   , "_"
+#                                   , str_df_plot_cols$colour_map)
+# head(str_df_plot_cols$colours)
+# table(str_df_plot_cols$colours)
+# 
+# 
+# class(str_df_plot_cols$colour_map)
+# str(str_df_plot_cols)
+
+# sort by colour
+head(str_df_plot_cols)
+str_df_plot_cols = str_df_plot_cols[order(str_df_plot_cols$colour_map), ]
+head(str_df_plot_cols)
+
+#======================================
+# write file with prominent effects
+#======================================
+outdir_images = paste0("~/git/Writing/thesis/images/results/", tolower(gene), "/")
+write.csv(str_df_plot_cols, paste0(outdir_images, tolower(gene), "_prominent_effects.csv"))
+
+################################
+# printing for chimera
+###############################
+str_df_plot_cols$pos_chain = paste0(str_df_plot_cols[[pos_colname]], ".C,")
+table(str_df_plot_cols$colour_map)
+table(str_df_plot_cols$pe_effect_outcome)
+
+#===================================================
+#-------------------
+# Ligand Affinity
+#-------------------
+# -ve Lig Aff
+dd_lig        = str_df_plot_cols[str_df_plot_cols$pe_effect_outcome=="DD_lig",]
+if (nrow(dd_lig) == table(str_df_plot_cols$pe_effect_outcome)[['DD_lig']]){
+  dd_lig_pos  = dd_lig[[pos_colname]]
+}else{
+  stop("Abort: DD affinity colour numbers mismtatch")
+}
+
+# +ve Lig Aff
+ss_lig            = str_df_plot_cols[str_df_plot_cols$pe_effect_outcome=="SS_lig",]
+if (nrow(ss_lig)  == table(str_df_plot_cols$pe_effect_outcome)[['SS_lig']]){
+  ss_lig_pos      = ss_lig[[pos_colname]]
+}else{
+  stop("Abort: SS affinity colour numbers mismtatch")
+}
+
+#put in chimera cmd
+paste0(dd_lig_pos, ".C")
+paste0(ss_lig_pos, ".C")
+
+#===================================================
+#------------------------
+# Nucleic Acid Affinity
+#------------------------
+# -ve NA aff
+dd_nca           =  str_df_plot_cols[str_df_plot_cols$pe_effect_outcome=="DD_nucleic_acid",]
+if (nrow(dd_nca) == table(str_df_plot_cols$pe_effect_outcome)[['DD_nucleic_acid']]){
+  dd_nca_pos     =  dd_nca[[pos_colname]]
+}else{
+  stop("Abort: DD nucleic_acid colour numbers mismtatch")
+}
+
+# +ve NA aff
+ss_nca            = str_df_plot_cols[str_df_plot_cols$pe_effect_outcome=="SS_nucleic_acid",]
+if (nrow(ss_nca)  == table(str_df_plot_cols$pe_effect_outcome)[['SS_nucleic_acid']]){
+  ss_nca_pos      = ss_nca[[pos_colname]]
+}else{
+  stop("Abort: SS nucleic_acid colour numbers mismtatch")
+}
+
+#put in chimera cmd
+paste0(dd_nca_pos, ".C")
+paste0(ss_nca_pos, ".C")
+#===================================================
+#-------------------
+# PPI2 Affinity
+#-------------------
+# -ve PPI2
+dd_ppi2           =  str_df_plot_cols[str_df_plot_cols$pe_effect_outcome=="DD_ppi2",]
+if (nrow(dd_ppi2) == table(str_df_plot_cols$pe_effect_outcome)[['DD_ppi2']]){
+  dd_ppi2_pos     =  dd_ppi2[[pos_colname]]
+}else{
+  stop("Abort: DD PPI2 colour numbers mismtatch")
+}
+
+# +ve PPI2
+ss_ppi2            = str_df_plot_cols[str_df_plot_cols$pe_effect_outcome=="SS_ppi2",]
+if (nrow(ss_ppi2)  == table(str_df_plot_cols$pe_effect_outcome)[['SS_ppi2']]){
+  ss_ppi2_pos      = ss_ppi2[[pos_colname]]
+}else{
+  stop("Abort: SS PPI2 colour numbers mismtatch")
+}
+
+#put in chimera cmd
+paste0(dd_ppi2_pos, ".C")
+paste0(ss_ppi2_pos, ".C")
+
+#=========================================================
+#------------------------
+# Stability
+#------------------------
+# -ve Stability
+dd_stability           =  str_df_plot_cols[str_df_plot_cols$pe_effect_outcome=="DD_stability",]
+if (nrow(dd_stability) == table(str_df_plot_cols$pe_effect_outcome)[['DD_stability']]){
+  dd_stability_pos     =  dd_stability[[pos_colname]]
+}else{
+  stop("Abort: DD Stability colour numbers mismtatch")
+}
+
+# +ve Stability
+ss_stability            = str_df_plot_cols[str_df_plot_cols$pe_effect_outcome=="SS_stability",]
+if (nrow(ss_stability)  == table(str_df_plot_cols$pe_effect_outcome)[['SS_stability']]){
+  ss_stability_pos      = ss_stability[[pos_colname]]
+}else{
+  stop("Abort: SS Stability colour numbers mismtatch")
+}
+
+#put in chimera cmd
+# stabiliting first as it has less numbers
+paste0(ss_stability_pos, ".C")
+paste0(dd_stability_pos, ".C")
+####################################################################
+
diff --git a/scripts/plotting/plotting_thesis/rpob/sensitivity_count_rpob.R b/scripts/plotting/plotting_thesis/rpob/sensitivity_count_rpob.R
new file mode 100644
index 0000000..a0fc5dd
--- /dev/null
+++ b/scripts/plotting/plotting_thesis/rpob/sensitivity_count_rpob.R
@@ -0,0 +1,66 @@
+#=========================
+# Count Sensitivity
+# Mutations and positions
+#=========================
+pos_colname_c ="X5uhc_position"
+#pos_colname_c ="position"
+
+sensP_df = merged_df3[,c("mutationinformation",
+                         #"position",
+                         pos_colname_c,
+                         "sensitivity")]
+
+head(sensP_df)
+table(sensP_df$sensitivity)
+
+#---------------
+# Total unique positions
+#----------------
+tot_mut_pos = length(unique(sensP_df[[pos_colname_c]]))
+cat("\nNo of Tot muts sites:", tot_mut_pos)
+
+# resistant mut pos
+sens_site_allR = sensP_df[[pos_colname_c]][sensP_df$sensitivity=="R"]
+sens_site_UR   = unique(sens_site_allR)
+length(sens_site_UR)
+
+# Sensitive mut pos
+sens_site_allS = sensP_df[[pos_colname_c]][sensP_df$sensitivity=="S"]
+sens_site_US = unique(sens_site_allS)
+length(sens_site_UR)
+
+#---------------
+# Common Sites
+#----------------
+common_pos = intersect(sens_site_UR,sens_site_US)
+site_Cc = length(common_pos)
+cat("\nNo of Common sites:", site_Cc
+    , "\nThese are:", common_pos)
+
+#---------------
+# Resistant muts
+#----------------
+site_R = sens_site_UR[!sens_site_UR%in%common_pos]
+site_Rc = length(site_R)
+
+if ( length(sens_site_allR) == table(sensP_df$sensitivity)[['R']] ){
+  cat("\nNo of R muts:", length(sens_site_allR) 
+      , "\nNo. of R sites:",site_Rc
+      , "\nThese are:", site_R
+)
+}
+
+#---------------
+# Sensitive muts
+#----------------
+site_S = sens_site_US[!sens_site_US%in%common_pos]
+site_Sc = length(site_S)
+
+if ( length(sens_site_allS) == table(sensP_df$sensitivity)[['S']] ){
+  cat("\nNo of S muts:", length(sens_site_allS) 
+      , "\nNo. of S sites:", site_Sc
+      , "\nThese are:", site_S)
+}
+
+#########################
+