added pnca plot dir to generate plots that weren#t covered in the paper

2022-09-05 14:02:04 +01:00 · 2022-09-05 14:02:04 +01:00 · 2cec743ae0
commit 2cec743ae0
parent 1dacebbaf6
8 changed files with 1391 additions and 0 deletions
--- a/scripts/plotting/plotting_thesis/pnca/basic_barplots_pnca.R
+++ b/scripts/plotting/plotting_thesis/pnca/basic_barplots_pnca.R
@ -0,0 +1,364 @@
+#!/usr/bin/env Rscript   
+#########################################################
+# TASK: Barplots
+# basic barplots with outcome
+# basic barplots with frequency of count of mutations
+#########################################################
+#=============
+# Data: Input
+#==============
+#source("~/git/LSHTM_analysis/config/pnca.R")
+#source("~/git/LSHTM_analysis/scripts/plotting/get_plotting_dfs.R")
+
+#cat("\nSourced plotting cols as well:", length(plotting_cols))
+
+####################################################
+class(merged_df3)
+
+df3 = subset(merged_df3, select = -c(pos_count))
+
+#=======
+# output
+#=======
+outdir_images = paste0("~/git/Writing/thesis/images/results/", tolower(gene), "/")
+cat("plots will output to:", outdir_images)
+
+##########################################################
+# blue, red bp
+sts = 8
+lts = 8
+ats = 8
+als = 8
+ltis = 8
+geom_ls = 2.2
+
+#pos_count
+subtitle_size = 8
+geom_ls_pc = 2.2
+leg_text_size = 8
+axis_text_size = 8
+axis_label_size = 8
+
+###########################################################
+#------------------------------
+# plot default sizes
+#------------------------------
+#=========================
+# Affinity outcome
+# check this var: outcome_cols_affinity
+# get from preformatting or put in globals
+#==========================
+DistCutOff
+LigDist_colname  # = "ligand_distance" # from globals 
+ppi2Dist_colname
+naDist_colname
+
+###########################################################
+# get plotting data within the distance
+df3_lig  = df3[df3[[LigDist_colname]]<DistCutOff,]
+df3_ppi2 = df3[df3[[ppi2Dist_colname]]<DistCutOff,]
+df3_na   = df3[df3[[naDist_colname]]<DistCutOff,]
+common_bp_title = paste0("Sites <", DistCutOff, angstroms_symbol)
+
+#------------------------------
+# barplot for ligand affinity:
+# <10 Ang of ligand
+#------------------------------
+mLigP = stability_count_bp(plotdf = df3_lig
+               , df_colname = "ligand_outcome"
+               #, leg_title  = "mCSM-lig"
+               #, bp_plot_title = paste(common_bp_title, "ligand")
+               , yaxis_title = "Number of nsSNPs"
+               , leg_position = "none"
+               , subtitle_text = "mCSM\nLig"
+               , bar_fill_values = c("#F8766D", "#00BFC4")
+               , subtitle_colour= "black"
+               , sts = sts
+               , lts = lts
+               , ats = ats
+               , als = als
+               , ltis = ltis
+               , geom_ls = geom_ls
+               )
+mLigP
+#------------------------------
+# barplot for ligand affinity:
+# <10 Ang of ligand
+# mmCSM-lig: will be the same no. of sites but the effect will be different
+#------------------------------
+mmLigP = stability_count_bp(plotdf = df3_lig
+                   , df_colname = "mmcsm_lig_outcome"
+                   #, leg_title  = "mmCSM-lig"
+                   #, label_categories = labels_mmlig
+                   #, bp_plot_title = paste(common_bp_title, "ligand")
+                   
+                   , yaxis_title = ""
+                   , leg_position = "none"
+                   , subtitle_text = "mmCSM\nLig"
+                   , bar_fill_values = c("#F8766D", "#00BFC4")
+                   , subtitle_colour= "black"
+                   , sts = sts
+                   , lts = lts
+                   , ats = ats
+                   , als = als
+                   , ltis = ltis
+                   , geom_ls = geom_ls
+                   )
+mmLigP
+#------------------------------
+# barplot for ppi2 affinity
+#  <10 Ang of interface
+#------------------------------
+if (tolower(gene)%in%geneL_ppi2){
+    ppi2P = stability_count_bp(plotdf = df3_ppi2
+                     , df_colname = "mcsm_ppi2_outcome"
+                     #, leg_title  = "mCSM-ppi2"
+                     #, label_categories = labels_ppi2
+                     #, bp_plot_title = paste(common_bp_title, "PP-interface")
+                     
+                     , yaxis_title = "Number of nsSNPs"
+                     , leg_position = "none"
+                     , subtitle_text = "mCSM\nPPI2"
+                     , bar_fill_values = c("#F8766D", "#00BFC4")
+                     , subtitle_colour= "black"
+                     , sts = sts
+                     , lts = lts
+                     , ats = ats
+                     , als = als
+                     , ltis = ltis
+                     , geom_ls = geom_ls
+                     )
+  ppi2P
+}
+#----------------------------
+# barplot for ppi2 affinity
+#  <10 Ang of interface
+#------------------------------
+if (tolower(gene)%in%geneL_na){
+  nca_distP = stability_count_bp(plotdf = df3_na
+                             , df_colname = "mcsm_na_outcome"
+                             #, leg_title  = "mCSM-NA"
+                             #, label_categories = 
+                             #, bp_plot_title = paste(common_bp_title, "Dist to NA")
+                             
+                             , yaxis_title = "Number of nsSNPs"
+                             , leg_position = "none"
+                             , subtitle_text = "mCSM\nNA"
+                             , bar_fill_values = c("#F8766D", "#00BFC4")
+                             , subtitle_colour= "black"
+                             , sts = sts
+                             , lts = lts
+                             , ats = ats
+                             , als = als
+                             , ltis = ltis
+                             , geom_ls = geom_ls
+  )
+  nca_distP
+}
+
+#####################################################################
+# ------------------------------
+# bp site site count: mCSM-lig
+# < 10 Ang ligand
+# ------------------------------
+common_bp_title = paste0("Sites <", DistCutOff, angstroms_symbol)
+
+posC_lig = site_snp_count_bp(plotdf = df3_lig
+                  , df_colname = "position"
+                  , xaxis_title = "Number of nsSNPs"
+                  , yaxis_title = "Number of Sites"
+                  , subtitle_colour = "chocolate4"
+                  , subtitle_text = ""
+                  , subtitle_size = subtitle_size
+                  , geom_ls = geom_ls_pc
+                  , leg_text_size = leg_text_size
+                  , axis_text_size = axis_text_size
+                  , axis_label_size = axis_label_size)
+
+posC_lig
+#------------------------------
+# bp site site count: ppi2
+# < 10 Ang interface
+#------------------------------
+if (tolower(gene)%in%geneL_ppi2){
+  posC_ppi2 = site_snp_count_bp(plotdf = df3_ppi2
+                    , df_colname = "position"
+                    , xaxis_title = "Number of nsSNPs"
+                    , yaxis_title = "Number of Sites"
+                    , subtitle_colour = "chocolate4"
+                    , subtitle_text = ""
+                    , subtitle_size = subtitle_size
+                    , geom_ls = geom_ls_pc
+                    , leg_text_size = leg_text_size
+                    , axis_text_size = axis_text_size
+                    , axis_label_size = axis_label_size)
+  posC_ppi2
+}
+
+#------------------------------
+# bp site site count: NCA dist
+# < 10 Ang nca
+#------------------------------
+if (tolower(gene)%in%geneL_na){
+  posC_nca = site_snp_count_bp(plotdf = df3_na
+                                , df_colname = "position"
+                                , xaxis_title = "Number of nsSNPs"
+                                , yaxis_title = "Number of Sites"
+                                , subtitle_colour = "chocolate4"
+                                , subtitle_text = ""
+                               , subtitle_size = subtitle_size
+                               , geom_ls = geom_ls_pc
+                               , leg_text_size = leg_text_size
+                               , axis_text_size = axis_text_size
+                               , axis_label_size = axis_label_size)
+  posC_nca
+}
+#===============================================================
+#------------------------------
+# bp site site count: ALL
+# <10 Ang ligand
+#------------------------------
+posC_all = site_snp_count_bp(plotdf = df3
+                             , df_colname = "position"
+                             , xaxis_title = "Number of nsSNPs"
+                             , yaxis_title = "Number of Sites"
+                             , subtitle_colour = "chocolate4"
+                             , subtitle_text = "All mutations sites"
+                             , subtitle_size = subtitle_size
+                             , geom_ls = geom_ls_pc
+                             , leg_text_size = leg_text_size
+                             , axis_text_size = axis_text_size
+                             , axis_label_size = axis_label_size)
+posC_all
+##################################################################
+consurfP = stability_count_bp(plotdf = df3
+                              , df_colname = "consurf_outcome"
+                              #, leg_title = "ConSurf"
+                              #, label_categories = labels_consurf
+                              , yaxis_title = "Number of nsSNPs"
+                              , leg_position = "top"
+                              , subtitle_text = "ConSurf"
+                              , bar_fill_values = consurf_colours # from globals
+                              , subtitle_colour= "black"
+                              , sts = sts
+                              , lts = lts
+                              , ats = ats
+                              , als = als
+                              , ltis = ltis
+                              , geom_ls = geom_ls)
+
+consurfP
+
+##############################################################
+sts_so = 10
+lts_so = 10
+ats_so = 10
+als_so = 10
+ltis_so = 10
+geom_ls_so = 2.5
+#===================
+# Stability
+#===================
+# duetP
+duetP = stability_count_bp(plotdf = df3
+                           , df_colname = "duet_outcome"
+                           , leg_title = "mCSM-DUET"
+                           #, label_categories = labels_duet
+                           , yaxis_title = "Number of nsSNPs"
+                           , leg_position = "none"
+                           , subtitle_text = "mCSM-DUET"
+                           , bar_fill_values = c("#F8766D", "#00BFC4")
+                           , subtitle_colour= "black"
+                           , sts = sts_so
+                           , lts = lts_so
+                           , ats = ats_so
+                           , als = als_so
+                           , ltis = ltis_so
+                           , geom_ls = geom_ls_so)
+duetP
+
+# foldx
+foldxP = stability_count_bp(plotdf = df3
+                            , df_colname = "foldx_outcome"
+                            #, leg_title = "FoldX"
+                            #, label_categories = labels_foldx
+                            , yaxis_title = ""
+                            , leg_position = "none"
+                            , subtitle_text = "FoldX"
+                            , bar_fill_values = c("#F8766D", "#00BFC4")
+                            , sts = sts_so
+                            , lts = lts_so
+                            , ats = ats_so
+                            , als = als_so
+                            , ltis = ltis_so
+                            , geom_ls = geom_ls_so)
+foldxP
+
+# deepddg
+deepddgP = stability_count_bp(plotdf = df3
+                              , df_colname = "deepddg_outcome"
+                              #, leg_title = "DeepDDG"
+                              #, label_categories = labels_deepddg
+                              , yaxis_title = ""
+                              , leg_position = "none"
+                              , subtitle_text = "DeepDDG"
+                              , bar_fill_values = c("#F8766D", "#00BFC4")
+                              , sts = sts_so
+                              , lts = lts_so
+                              , ats = ats_so
+                              , als = als_so
+                              , ltis = ltis_so
+                              , geom_ls = geom_ls_so)
+deepddgP
+
+# deepddg
+dynamut2P = stability_count_bp(plotdf = df3
+                               , df_colname = "ddg_dynamut2_outcome"
+                               #, leg_title = "Dynamut2"
+                               #, label_categories = labels_ddg_dynamut2_outcome
+                               , yaxis_title = ""
+                               , leg_position = "none"
+                               , subtitle_text = "Dynamut2"
+                               , bar_fill_values = c("#F8766D", "#00BFC4")
+                               , sts = sts_so
+                               , lts = lts_so
+                               , ats = ats_so
+                               , als = als_so
+                               , ltis = ltis_so
+                               , geom_ls = geom_ls_so)
+dynamut2P
+
+# provean
+proveanP = stability_count_bp(plotdf = df3
+                              , df_colname = "provean_outcome"
+                              #, leg_title = "PROVEAN"
+                              #, label_categories = labels_provean
+                              , yaxis_title = "Number of nsSNPs"
+                              , leg_position = "none" # top
+                              , subtitle_text = "PROVEAN"
+                              , bar_fill_values = c("#D01C8B", "#F1B6DA") # light pink and deep
+                              , sts = sts_so
+                              , lts = lts_so
+                              , ats = ats_so
+                              , als = als_so
+                              , ltis = ltis_so
+                              , geom_ls = geom_ls_so)
+proveanP
+
+# snap2
+snap2P = stability_count_bp(plotdf = df3
+                            , df_colname = "snap2_outcome"
+                            #, leg_title = "SNAP2"
+                            #, label_categories = labels_snap2
+                            , yaxis_title = ""
+                            , leg_position = "none" # top
+                            , subtitle_text = "SNAP2"
+                            , bar_fill_values = c("#D01C8B", "#F1B6DA") # light pink and deep
+                            , sts = sts_so
+                            , lts = lts_so
+                            , ats = ats_so
+                            , als = als_so
+                            , ltis = ltis_so
+                            , geom_ls = geom_ls_so)
+snap2P
+#####################################################################################
--- a/scripts/plotting/plotting_thesis/pnca/basic_barplots_pnca_layout.R
+++ b/scripts/plotting/plotting_thesis/pnca/basic_barplots_pnca_layout.R
@ -0,0 +1,261 @@
+#=============
+# Data: Input
+#==============
+#source("~/git/LSHTM_analysis/config/pnca.R")
+#source("~/git/LSHTM_analysis/scripts/plotting/get_plotting_dfs.R")
+
+source("/home/tanu/git/LSHTM_analysis/scripts/plotting/plotting_thesis/pnca/basic_barplots_pnca.R")
+source("/home/tanu/git/LSHTM_analysis/scripts/plotting/plotting_thesis/pnca/pe_sens_site_count_pnca.R")
+
+if ( tolower(gene)%in%c("pnca") ){
+  cat("\nPlots available for layout are:")
+  
+  duetP
+  foldxP
+  deepddgP
+  dynamut2P
+  proveanP
+  snap2P
+  
+  mLigP
+  mmLigP
+  posC_lig
+  
+  #ppi2P
+  #posC_ppi2
+  
+  peP2
+  sens_siteP
+  peP   # not used
+  sensP # not used
+}
+
+
+#========================
+# Common title settings
+#=========================
+theme_georgia <- function(...) {
+  theme_gray(base_family = "sans", ...) + 
+    theme(plot.title = element_text(face = "bold"))
+}
+title_theme <- calc_element("plot.title", theme_georgia())
+
+###############################################################
+common_bp_title = paste0("Sites <", DistCutOff, angstroms_symbol)
+
+# extract common legends
+# lig affinity
+common_legend_outcome = get_legend(mLigP +
+                                     guides(color = guide_legend(nrow = 1)) +
+                                     theme(legend.position = "top"))
+
+# stability
+common_legend_outcome = get_legend(duetP +
+                                     guides(color = guide_legend(nrow = 1)) +
+                                     theme(legend.position = "top"))
+# conservation
+cons_common_legend_outcome = get_legend(snap2P +
+                                          guides(color = guide_legend(nrow = 1)) +
+                                          theme(legend.position = "top"))
+###################################################################
+#==================================
+# Stability+Conservation: COMBINE
+#==================================
+tt_size = 10
+#----------------------------
+# stability and consv title
+#----------------------------
+tt_stab = ggdraw() + 
+  draw_label(
+    paste0("Stability outcome"),
+    fontfamily = title_theme$family,
+    fontface = title_theme$face,
+    #size = title_theme$size
+    size = tt_size
+  )
+
+tt_cons = ggdraw() + 
+  draw_label(
+    paste0("Conservation outcome"),
+    fontfamily = title_theme$family,
+    fontface = title_theme$face,
+    size = tt_size
+  )
+
+#----------------------
+# Output plot
+#-----------------------
+stab_cons_CLP =  paste0(outdir_images
+                        ,tolower(gene)
+                        ,"_stab_cons_BP_CLP.png")
+
+print(paste0("plot filename:", stab_cons_CLP))
+png(stab_cons_CLP, units = "in", width = 10, height = 5, res = 300 )
+
+cowplot::plot_grid(
+  cowplot::plot_grid(
+    cowplot::plot_grid(
+      tt_stab,
+      common_legend_outcome,
+      nrow = 2
+    ),
+    cowplot::plot_grid(
+      duetP, 
+      foldxP, 
+      deepddgP, 
+      dynamut2P,
+      nrow = 1,
+      labels = c("A", "B", "C", "D"),
+      label_size = 12),
+    nrow = 2,
+    rel_heights=c(1,10)
+  ),
+  NULL,
+  cowplot::plot_grid(
+    cowplot::plot_grid(
+      cowplot::plot_grid(
+        tt_cons,
+        cons_common_legend_outcome,
+        nrow = 2
+      ),
+      cowplot::plot_grid(
+        proveanP,
+        snap2P,
+        nrow=1, 
+        labels = c("E", "F"),
+        align = "hv"),
+      nrow = 2,
+      rel_heights = c(1, 10),
+      label_size = 12),
+    nrow=1
+  ),
+  rel_widths = c(2,0.15,1),
+  nrow=1
+)
+
+dev.off()
+
+#################################################################
+#=======================================
+# Affinity barplots: COMBINE ALL four
+#========================================
+ligT = paste0(common_bp_title, " ligand")
+lig_affT = ggdraw() +
+  draw_label(
+    ligT,
+    fontfamily = title_theme$family,
+    fontface = title_theme$face,
+    #size = title_theme$size
+    size = 8
+  )
+
+p1 = cowplot::plot_grid(cowplot::plot_grid(lig_affT
+                                           , common_legend_outcome
+                                           , nrow=2),
+                        cowplot::plot_grid(mLigP, mmLigP, posC_lig
+                                           , nrow = 1
+                                           , rel_widths = c(1,0.65,1.8)
+                                           , align = "h"),
+                        nrow = 2,
+                        rel_heights = c(1,8)
+                        
+)
+p1
+
+#### Combine p1 ####
+w = 11.79
+h = 3.5
+mut_impact_CLP =  paste0(outdir_images
+                         ,tolower(gene)
+                         ,"_mut_impactCLP.png")
+
+#svg(affP, width = 20, height = 5.5)
+print(paste0("plot filename:", mut_impact_CLP))
+png(mut_impact_CLP, units = "in", width = w, height = h, res = 300 )
+
+cowplot::plot_grid(p1,
+                   nrow = 1,
+                   labels = "AUTO",
+                   label_size = 12,
+                   rel_widths = c(2.5,2,2)
+                   #, rel_heights = c(1)
+)
+
+dev.off()
+w = 11.79
+h = 3.5
+mut_impact_CLP =  paste0(outdir_images
+                         ,tolower(gene)
+                         ,"_mut_impactCLP.png")
+
+#svg(affP, width = 20, height = 5.5)
+print(paste0("plot filename:", mut_impact_CLP))
+png(mut_impact_CLP, units = "in", width = w, height = h, res = 300 )
+
+cowplot::plot_grid(p1,
+                   nrow = 1,
+                   labels = "AUTO",
+                   label_size = 12,
+                   rel_widths = c(2.5,2,2)
+                   #, rel_heights = c(1)
+)
+
+dev.off()
+
+##################################################
+sensP
+consurfP
+#=================
+#### Combine sensitivity + ConSurf ####
+# or ConSurf
+#=================
+w = 3
+h = 3
+# sens_conP =  paste0(outdir_images
+#                          ,tolower(gene)
+#                          ,"_sens_cons_CLP.png")
+# 
+# print(paste0("plot filename:", sens_conP))
+# png(sens_conP, units = "in", width = w, height = h, res = 300 )
+# 
+# cowplot::plot_grid(sensP, consurfP,
+#                    nrow = 2,
+#                    rel_heights = c(1, 1.5)
+#                    )
+# 
+# dev.off()
+
+conCLP =  paste0(outdir_images
+                    ,tolower(gene)
+                    ,"_consurf_BP.png")
+
+print(paste0("plot filename:", conCLP))
+png(conCLP, units = "in", width = w, height = h, res = 300 )
+consurfP
+
+dev.off()
+#================================
+# Sensitivity mutation numbers: geom_tile
+#================================
+sensCLP =  paste0(outdir_images
+                 ,tolower(gene)
+                 ,"_sensN_tile.png")
+
+print(paste0("plot filename:", sensCLP))
+png(sensCLP, units = "in", width = 1, height = 1, res = 300 )
+sensP
+dev.off()
+#================================
+# Sensitivity SITE numbers: geom_tile
+#================================
+sens_siteCLP =  paste0(outdir_images
+                  ,tolower(gene)
+                  ,"_sens_siteC_tile.png")
+
+print(paste0("plot filename:", sens_siteCLP))
+png(sens_siteCLP, units = "in", width = 1.2, height = 1, res = 300 )
+sens_siteP
+dev.off()
+
+###########################################################
+
--- a/scripts/plotting/plotting_thesis/pnca/lineage_bp_dist.R
+++ b/scripts/plotting/plotting_thesis/pnca/lineage_bp_dist.R
@ -0,0 +1,165 @@
+#!/usr/bin/env Rscript  
+
+#########################################################
+# TASK: Lineage plots [merged_df2]
+# Count
+# Diversity
+# Average stability dist
+# Avergae affinity dist: optional
+#########################################################
+#=======
+# output
+#=======
+# outdir_images = paste0("~/git/Writing/thesis/images/results/"
+#                        , tolower(gene), "/")
+# cat("plots will output to:", outdir_images)
+#########################################################
+
+#===============
+#Quick numbers checks
+#===============
+nsample_lin = merged_df2[merged_df2$lineage%in%c("L1", "L2", "L3", "L4"),]
+
+if ( all(table(nsample_lin$sensitivity)== table(nsample_lin$mutation_info_labels)) ){
+  cat("\nTotal no. of samples belonging to L1-l4 for", gene,":", nrow(nsample_lin)
+      , "\nCounting R and S samples")
+  if( sum(table(nsample_lin$sensitivity)) ==  nrow(nsample_lin) ){
+    cat("\nPASSNumbers cross checked:")
+    print(table(nsample_lin$sensitivity))
+  }
+}else{
+  stop("Abort: Numbers mismatch. Please check")
+}
+########################################################################
+###################################################
+#                  Lineage barplots               #
+###################################################
+my_xats = 8 # x axis text size # were  25
+my_yats = 8# y axis text sized_lab_size
+my_xals = 8 # x axis label size
+my_yals = 8 # y axis label size
+my_lls  = 8 # legend label size
+d_lab_size = 2.3
+#===============================
+# lineage sample and SNP count
+#===============================
+lin_countP = lin_count_bp(lf_data = lineage_dfL[['lin_lf']]
+             , all_lineages = F
+             , x_categ = "sel_lineages"
+             , y_count = "p_count"
+             , use_lineages = c("L1", "L2", "L3", "L4")
+             , bar_fill_categ = "count_categ"
+             , display_label_col = "p_count"
+             , bar_stat_stype = "identity"
+             , d_lab_size = d_lab_size
+             , d_lab_col = "black"
+             , my_xats = my_xats # x axis text size
+             , my_yats = my_yats # y axis text sized_lab_size
+             , my_xals = my_xals # x axis label size
+             , my_yals = my_yals # y axis label size
+             , my_lls  = my_lls # legend label size
+             , bar_col_labels =  c("nsSNPs", "Total Samples")
+             , bar_col_values = c("grey50", "gray75")
+             , bar_leg_name = ""
+             , leg_location = "top"
+             , y_log10 = F
+             , y_scale_percent = FALSE
+             , y_label = c("Count")
+             )
+lin_countP
+#===============================
+# lineage SNP diversity count
+#===============================
+lin_diversityP = lin_count_bp_diversity(lf_data = lineage_dfL[['lin_wf']]
+                              , x_categ = "sel_lineages"
+                              , y_count = "snp_diversity"
+                              #, all_lineages = F
+                              , use_lineages = c("L1", "L2", "L3", "L4")
+                              , display_label_col = "snp_diversity_f"
+                              , bar_stat_stype = "identity"
+                              , x_lab_angle = 90
+                              , d_lab_size = d_lab_size
+                              , my_xats = my_xats # x axis text size
+                              , my_yats = my_yats # y axis text sized_lab_size
+                              , my_xals = my_xals # x axis label size
+                              , my_yals = my_yals # y axis label size
+                              , my_lls  = my_lls # legend label size
+                              , y_log10 = F
+                              , y_scale_percent = F
+                              , leg_location = "top"
+                              , y_label = "Percent" #"SNP diversity"
+                              , bp_plot_title = "nsSNP diversity"
+                              , title_colour = "black" #"chocolate4"
+                              , subtitle_text = NULL
+                              , sts = 10
+                              , subtitle_colour = "#350E20FF")
+lin_diversityP
+
+
+###################################################
+#                   Stability dist                #
+###################################################
+# scaled_cols_stability = c("duet_scaled"       
+#                           , "deepddg_scaled"   
+#                           , "ddg_dynamut2_scaled"
+#                           , "foldx_scaled"
+#                           , "avg_stability_scaled")
+
+my_ats = 8 # x axis text size # were  25
+my_als = 8# y axis text sized_lab_size
+my_leg_ts = 8 # x axis label size
+my_leg_title = 8 # y axis label size
+my_strip_ts  = 8 #
+
+
+my_xlabel = paste0("Average stability ", "(", stability_suffix, ")"); my_xlabel
+#plotdf = merged_df2[merged_df2$lineage%in%c("L1", "L2", "L3", "L4"),]
+
+linP_dm_om = lineage_distP(merged_df2
+                           , with_facet = F
+                           , x_axis = "avg_stability_scaled"
+                           , y_axis = "lineage_labels"
+                           , x_lab = my_xlabel
+                           , use_lineages = c("L1", "L2", "L3", "L4")
+                           #, fill_categ = "mutation_info_orig", fill_categ_cols = c("#E69F00", "#999999")
+                           , fill_categ = "sensitivity"
+                           , fill_categ_cols = c("red", "blue")
+                           , label_categories = c("Resistant", "Sensitive")
+                           , leg_label = "Mutation group"
+                           , my_ats = my_ats # axis text size
+                           , my_als = my_als # axis label size
+                           , my_leg_ts = my_leg_ts
+                           , my_leg_title = my_leg_title
+                           , my_strip_ts = my_strip_ts
+                           , alpha = 0.56
+)
+
+linP_dm_om
+
+###################################################
+#                 Affinity dist [OPTIONAL]        #
+###################################################
+# scaled_cols_affinity = c("affinity_scaled" 
+#                          , "mmcsm_lig_scaled" 
+#                          , "mcsm_ppi2_scaled" 
+#                          , "mcsm_na_scaled"
+#                          , "avg_lig_affinity_scaled")
+
+# lineage_distP(merged_df2
+#               , with_facet = F
+#               , x_axis = "avg_lig_affinity_scaled"
+#               , y_axis = "lineage_labels"
+#               , x_lab = my_xlabel
+#               , use_lineages = c("L1", "L2", "L3", "L4")
+#               #, fill_categ = "mutation_info_orig", fill_categ_cols = c("#E69F00", "#999999")
+#               , fill_categ = "sensitivity"
+#               , fill_categ_cols = c("red", "blue")
+#               , label_categories = c("Resistant", "Sensitive")
+#               , leg_label = "Mutation group"
+#               , my_ats = 22 # axis text size
+#               , my_als = 22 # axis label size
+#               , my_leg_ts = 22
+#               , my_leg_title = 22
+#               , my_strip_ts = 22
+#               , alpha = 0.56
+# )
--- a/scripts/plotting/plotting_thesis/pnca/lineage_bp_dist_layout.R
+++ b/scripts/plotting/plotting_thesis/pnca/lineage_bp_dist_layout.R
@ -0,0 +1,62 @@
+#!/usr/bin/env Rscript  
+source("/home/tanu/git/LSHTM_analysis/scripts/plotting/plotting_thesis/pnca/lineage_bp_dist.R")
+
+#=======
+# output
+#=======
+#outdir_images = paste0("~/git/Writing/thesis/images/results/", tolower(gene), "/")
+#cat("plots will output to:", outdir_images)
+###########################################
+# TASK: generate plots for lineage
+# Individual plots in 
+#lineage_bp_both.R
+#linage_dist_ens_stability.R
+###########################################
+# svg
+# linPlots_combined = paste0(outdir_images
+#                         , tolower(gene)
+#                         ,"_linP_combined.svg")
+# 
+# cat("\nOutput plot:", linPlots_combined)
+# svg(linPlots_combined, width = 18, height = 12)
+# 
+# cowplot::plot_grid(
+#   cowplot::plot_grid(lin_countP, lin_diversityP
+#                      , nrow = 2
+#                      , rel_heights = c(1.2,1)
+#                      , labels = "AUTO"
+#                      , label_size = my_label_size),
+#   NULL,
+#   linP_dm_om,
+#   nrow = 1,
+#   labels = c("", "", "C"),
+#   label_size = my_label_size,
+#   rel_widths = c(35, 3, 52)
+# )
+# dev.off()
+
+# png
+my_label_size = 12
+linPlots_combined = paste0(outdir_images
+                            , tolower(gene)
+                            ,"_linP_combined.png")
+
+cat("\nOutput plot:", linPlots_combined)
+png(linPlots_combined, width = 9, height = 6, units = "in" ,res = 300)
+ 
+cowplot::plot_grid(
+  cowplot::plot_grid(lin_countP, lin_diversityP,
+                     nrow = 2,
+                     rel_heights = c(1.2,1),
+                     labels = "AUTO",
+                     label_size = my_label_size),
+  NULL,
+  linP_dm_om,
+  nrow = 1,
+  labels = c("", "", "C"),
+  label_size = my_label_size,
+  rel_widths = c(35, 3, 52)
+)
+dev.off()
+
+
--- a/scripts/plotting/plotting_thesis/pnca/pe_sens_site_count_pnca.R
+++ b/scripts/plotting/plotting_thesis/pnca/pe_sens_site_count_pnca.R
@ -0,0 +1,173 @@
+source("/home/tanu/git/LSHTM_analysis/scripts/plotting/plotting_thesis/pnca/prominent_effects_pnca.R")
+source("/home/tanu/git/LSHTM_analysis/scripts/plotting/plotting_thesis/pnca/sensitivity_count_pnca.R")
+
+##############################################################
+#                         PE count
+#pe_colour_map = c("DD_lig"           = "#f0e68c" # khaki
+#                  , "SS_lig"         = "#ffd700"  # gold
+                  
+#                  , "DD_nucleic_acid"= "#d2b48c"  # sandybrown
+#                  , "SS_nucleic_acid"= "#a0522d"  # sienna
+                  
+#                  , "DD_ppi2"        = "#da70d6"  # orchid
+#                  , "SS_ppi2"        = "#ff1493"  # deeppink
+                  
+#                  , "DD_stability"   = "#f8766d"  # red
+#                  , "SS_stability"   = "#00BFC4") # blue
+table(str_df_plot_cols$pe_effect_outcome)
+##############################################################
+#===========
+#PE count:
+# lig, ppi2, stability
+#===========
+rects <- data.frame(x=1:6, 
+                     colors = c("#f0e68c" ,
+                                "#ffd700" ,
+                                
+                                "#da70d6" ,
+                                "#ff1493" ,
+                                
+                                "#f8766d" ,
+                                "#00BFC4") 
+                     )
+
+rects$text =  c("-ve Lig"
+                , "+ve Lig"
+                
+                , "-ve PPI2"
+                , "+ve PPI2"
+
+                , "-ve stability"
+                , "+ve stability"
+)
+
+cell1 = table(str_df_plot_cols$pe_effect_outcome)[["DD_lig"]]
+cell2 = 0
+
+#cell3 = table(str_df_plot_cols$pe_effect_outcome)[["DD_nucleic_acid"]]
+#cell4 = table(str_df_plot_cols$pe_effect_outcome)[["SS_nucleic_acid"]]
+
+cell5 = table(str_df_plot_cols$pe_effect_outcome)[["DD_ppi2"]]
+cell6 = table(str_df_plot_cols$pe_effect_outcome)[["SS_ppi2"]]
+
+cell7 = table(str_df_plot_cols$pe_effect_outcome)[["DD_stability"]]
+cell8 = table(str_df_plot_cols$pe_effect_outcome)[["SS_stability"]]
+
+
+#rects$numbers = c(38, 0, 22, 9, 108, 681) #for embb
+rects$numbers = c(cell1, cell2,
+                  #cell3, cell4,
+                  cell5, cell6,
+                  cell7, cell8)
+
+rects$num_labels = paste0("n=", rects$numbers)
+
+rects
+#------
+# Plot
+#------
+#https://stackoverflow.com/questions/47986055/create-a-rectangle-filled-with-text
+peP = ggplot(rects, aes(x, y = 0, fill = colors, label = paste0(text,"\n", num_labels))) +
+  geom_tile(width = 1, height = 1) + # make square tiles
+  geom_text(color = "black", size = 1.7) + # add white text in the middle
+  scale_fill_identity(guide = "none") + # color the tiles with the colors in the data frame
+  coord_fixed() + # make sure tiles are square
+  coord_flip()+ scale_x_reverse() +
+  # theme_void() # remove any axis markings
+  theme_nothing() # remove any axis markings
+peP
+
+#------
+# Plot: this one is better
+#------
+peP2 = ggplot(rects, aes(x, y = 0, fill = colors, label = paste0(text,"\n", num_labels))) +
+  geom_tile() + # make square tiles
+  geom_text(color = "black", size = 1.6) + # add white text in the middle
+  scale_fill_identity(guide = "none") + # color the tiles with the colors in the data frame
+  coord_fixed() + # make sure tiles are square
+  theme_nothing() # remove any axis markings
+peP2
+
+########################################################
+# From: script sensitivity_count per gene
+#===============================
+# Sensitivity count: SITE
+#===============================
+#--------
+# embb
+#--------
+#rsc = 54
+#ccc = 46
+#ssc = 470
+
+rsc = site_Rc; rsc
+ccc = site_Cc; ccc
+ssc = site_Sc; ssc
+
+rect_rs_siteC <- data.frame(x=1:3,
+                    colors = c("red",
+                               "purple",
+                               "blue")
+                    )
+                               
+rect_rs_siteC                          
+rect_rs_siteC$text = c("Resistant",
+                       "Common",
+                       "Sensitive")
+
+rect_rs_siteC$numbers = c(rsc,ccc,ssc)
+rect_rs_siteC$num_labels = paste0("n=", rect_rs_siteC$numbers)
+rect_rs_siteC
+
+#------
+# Plot
+#------
+sens_siteP = ggplot(rect_rs_siteC, aes(x, y = 0,
+                                       fill = colors,
+                                       label = num_labels
+                                       #,label = paste0(text,"\n", num_labels)
+                                       )) +
+  geom_tile(width = 1, height = 1) +
+  #geom_text(color = "black", size = 1.7) +
+  geom_label(color = "black", size = 1.7,fill = "white", alpha=0.7) + 
+  scale_fill_identity(guide = "none") + 
+  coord_fixed()+
+  theme_nothing() # remove any axis markings
+sens_siteP
+
+################################################################
+#===============================
+# Sensitivity count: Mutations
+#===============================
+table(sensP_df$sensitivity)
+muts_Rc = table(sensP_df$sensitivity)[["R"]]
+muts_Sc = table(sensP_df$sensitivity)[["S"]]
+rect_sens <- data.frame(x=1:2,
+                              colors = c("red",
+                                         "blue")
+                        )
+
+rect_sens$text = c("Resistant",
+                       "Sensitive")
+rect_sens$numbers = c(muts_Rc,muts_Sc)
+rect_sens$num_labels = paste0("n=", rect_sens$numbers)
+rect_sens
+#------
+# Plot
+#------
+sensP = ggplot(rect_sens, aes(x, y = 0,
+                              fill = colors,
+                              label = paste0(text,"\n", num_labels))) +
+  geom_tile(width = 1, height = 1) +
+  #geom_text(color = "black", size = 1.7) +
+  geom_label(color = "black", size = 1.7,fill = "white", alpha=0.7) + 
+  scale_fill_identity(guide = "none") + 
+  coord_fixed()+
+  theme_nothing() # remove any axis markings
+sensP
+
+sensP2 = sensP + 
+   coord_flip() + scale_x_reverse()
+sensP2
+
+
--- a/scripts/plotting/plotting_thesis/pnca/plot_data_pnca.R
+++ b/scripts/plotting/plotting_thesis/pnca/plot_data_pnca.R
@ -0,0 +1,16 @@
+#!/usr/bin/env Rscript   
+#=============
+# Data: Input
+#==============
+source("~/git/LSHTM_analysis/config/pnca.R")
+source("~/git/LSHTM_analysis/scripts/plotting/get_plotting_dfs.R")
+####################################################
+class(merged_df3)
+
+df3 = subset(merged_df3, select = -c(pos_count))
+
+#=======
+# output
+#=======
+outdir_images = paste0("~/git/Writing/thesis/images/results/", tolower(gene), "/")
+cat("plots will output to:", outdir_images)
--- a/scripts/plotting/plotting_thesis/pnca/prominent_effects_pnca.R
+++ b/scripts/plotting/plotting_thesis/pnca/prominent_effects_pnca.R
@ -0,0 +1,285 @@
+########################################################
+pos_colname = "position"
+
+#-------------
+# from ~/git/LSHTM_analysis/scripts/plotting/plotting_colnames.R
+#-------------
+length(all_stability_cols); length(raw_stability_cols)
+length(scaled_stability_cols); length(outcome_stability_cols)
+length(affinity_dist_colnames)
+
+
+static_cols =  c("mutationinformation",
+                 #"position",
+                 pos_colname,
+                 "sensitivity")
+
+other_cols_all = c(scaled_stability_cols, scaled_affinity_cols, affinity_dist_colnames)
+
+#omit avg cols and foldx_scaled_signC cols
+other_cols =  other_cols_all[grep("avg", other_cols_all, invert = T)]
+other_cols = other_cols[grep("foldx_scaled_signC",other_cols, invert = T )]
+other_cols
+
+cols_to_extract =  c(static_cols, other_cols)
+cat("\nExtracting cols:", cols_to_extract)
+expected_ncols = length(static_cols) + length(other_cols)
+expected_ncols
+
+str_df = merged_df3[, cols_to_extract]
+
+if (ncol(str_df) == expected_ncols){
+  cat("\nPASS: successfully extracted cols for calculating prominent effects")
+}else{
+  stop("\nAbort: Could not extract cols for calculating prominent effects")
+}
+
+#=========================
+# Masking affinity columns
+#=========================
+# First make values for affinity cols 0 when their corresponding dist >10
+head(str_df)
+
+# replace in place affinity values >10
+str_df[str_df["ligand_distance"]>10,"affinity_scaled"]=0
+str_df[str_df["ligand_distance"]>10,"mmcsm_lig_scaled"]=0
+
+#ppi2 gene: replace in place ppi2 affinity values where ppi2 dist >10
+if (tolower(gene)%in%geneL_ppi2){
+  str_df[str_df["interface_dist"]>10,"mcsm_ppi2_scaled"]=0
+} 
+  
+# na gene: replace in place na affinity values where na dist >10
+if (tolower(gene)%in%geneL_na){
+  str_df[str_df["nca_distance"]>10,"mcsm_na_scaled"]=0
+}
+
+colnames(str_df)
+head(str_df)
+
+scaled_cols_tc = other_cols[grep("scaled", other_cols)]
+
+
+################################################
+#===============
+# whole df
+#===============
+give_col=function(x,y,df=str_df){
+  df[df[[pos_colname]]==x,y]
+}
+
+for (i in unique(str_df[[pos_colname]]) ){
+  print(i)
+  #cat(length(unique(str_df[[pos_colname]])))
+  
+  biggest     = max(abs(give_col(i,scaled_cols_tc)))
+  
+  str_df[str_df[[pos_colname]]==i,'abs_max_effect'] = biggest
+  str_df[str_df[[pos_colname]]==i,'effect_type']= names(
+    give_col(i,scaled_cols_tc)[which(
+      abs(
+        give_col(i,scaled_cols_tc)
+      ) == biggest, arr.ind=T
+    )[, "col"]])[1]
+  
+  effect_name = unique(str_df[str_df[[pos_colname]]==i,'effect_type'])#[1] # pick first one in case we have multiple exact values
+  
+  # get index/rowname for value of max effect, and then use it to get the original sign
+  # here
+  #ind = rownames(which(abs(str_df[str_df[[pos_colname]]==i,c('position',effect_name)][effect_name])== biggest, arr.ind=T))
+  ind = rownames(which(abs(str_df[str_df[[pos_colname]]==i,c(pos_colname,effect_name)][effect_name])== biggest, arr.ind=T))
+  
+  str_df[str_df[[pos_colname]]==i,'effect_sign'] = sign(str_df[effect_name][ind,])[1]
+}
+
+# ends with suffix 2 if dups
+str_df$effect_type = sub("\\.[0-9]+", "", str_df$effect_type) # cull duplicate effect types that happen when there are exact duplicate values
+colnames(str_df)
+table(str_df$effect_type)
+
+# check
+str_df_check = str_df[str_df[[pos_colname]]%in%c(24, 32, 160, 303, 334),]
+
+#================
+# for Plots
+#================
+str_df_short = str_df[, c("mutationinformation",
+                          #"position",
+                          pos_colname,
+                          "sensitivity"
+                          , "effect_type"
+                          , "effect_sign")]
+
+table(str_df_short$effect_type)
+table(str_df_short$effect_sign)
+str(str_df_short)
+
+# assign pe outcome
+str_df_short$pe_outcome = ifelse(str_df_short$effect_sign<0, "DD", "SS")
+table(str_df_short$pe_outcome )
+table(str_df_short$effect_sign)
+
+#==============
+# group effect type:
+# lig, ppi2, nuc. acid, stability
+#==============
+affcols = c("affinity_scaled",  "mmcsm_lig_scaled")
+
+#lig 
+table(str_df_short$effect_type)
+str_df_short$effect_grouped = ifelse(str_df_short$effect_type%in%affcols
+                                     , "lig"
+                                     , str_df_short$effect_type)
+table(str_df_short$effect_grouped)
+
+
+#stability
+str_df_short$effect_grouped = ifelse(!str_df_short$effect_grouped%in%c("lig")
+                                     , "stability"
+                                     , str_df_short$effect_grouped)
+
+table(str_df_short$effect_grouped)
+
+# create a sign as well
+str_df_short$pe_effect_outcome = paste0(str_df_short$pe_outcome, "_"
+                                     , str_df_short$effect_grouped)
+
+table(str_df_short$pe_effect_outcome)
+
+#####################################################################
+# Chimera: for colouring
+####################################################################
+
+#-------------------------------------
+# get df with unique position
+#--------------------------------------
+#data[!duplicated(data$x), ]   
+str_df_plot = str_df_short[!duplicated(str_df[[pos_colname]]),]
+
+if (nrow(str_df_plot) == length(unique(str_df[[pos_colname]]))){
+  cat("\nPASS: successfully extracted df with unique positions")
+}else{
+  stop("\nAbort: Could not extract df with unique positions")
+}
+
+#-------------------------------------
+# generate colours for effect types
+#--------------------------------------
+str_df_plot_cols = str_df_plot[, c(pos_colname,
+                                   "sensitivity",
+                                   "pe_outcome",
+                                   "effect_grouped",
+                                   "pe_effect_outcome")]
+head(str_df_plot_cols)
+
+# colour intensity based on sign
+#str_df_plot_cols$colour_hue = ifelse(str_df_plot_cols$effect_sign<0, "bright", "light")
+str_df_plot_cols$colour_hue = ifelse(str_df_plot_cols$pe_outcome=="DD", "bright", "light")
+
+table(str_df_plot_cols$colour_hue); table(str_df_plot$pe_outcome)
+head(str_df_plot_cols)
+
+# colour based on effect
+table(str_df_plot_cols$pe_effect_outcome)
+
+pe_colour_map = c("DD_lig"           = "#f0e68c"  # khaki
+                  , "SS_lig"         = "#ffd700"  # gold
+                  
+                  , "DD_nucleic_acid"= "#d2b48c"  # sandybrown
+                  , "SS_nucleic_acid"= "#a0522d"  # sienna
+                  
+                  , "DD_ppi2"        = "#da70d6"  # orchid
+                  , "SS_ppi2"        = "#ff1493"  # deeppink
+                  
+                  , "DD_stability"   = "#f8766d"  # red
+                  , "SS_stability"   = "#00BFC4") # blue
+                  
+#unlist(d[c('a', 'a', 'c', 'b')], use.names=FALSE)
+
+#map the colours
+str_df_plot_cols$colour_map= unlist(map(str_df_plot_cols$pe_effect_outcome
+                                 ,function(x){pe_colour_map[[x]]}
+                                 ))
+head(str_df_plot_cols$colour_map)
+table(str_df_plot_cols$colour_map)
+table(str_df_plot_cols$pe_effect_outcome)
+
+# str_df_plot_cols$colours = paste0(str_df_plot_cols$colour_hue
+#                                   , "_"
+#                                   , str_df_plot_cols$colour_map)
+# head(str_df_plot_cols$colours)
+# table(str_df_plot_cols$colours)
+# 
+# 
+# class(str_df_plot_cols$colour_map)
+# str(str_df_plot_cols)
+
+# sort by colour
+head(str_df_plot_cols)
+str_df_plot_cols = str_df_plot_cols[order(str_df_plot_cols$colour_map), ]
+head(str_df_plot_cols)
+
+#======================================
+# write file with prominent effects
+#======================================
+outdir_images = paste0("~/git/Writing/thesis/images/results/", tolower(gene), "/")
+write.csv(str_df_plot_cols, paste0(outdir_images, tolower(gene), "_prominent_effects.csv"))
+
+################################
+# printing for chimera
+###############################
+chain_suffix = ".A"
+str_df_plot_cols$pos_chain = paste0(str_df_plot_cols[[pos_colname]], chain_suffix)
+table(str_df_plot_cols$colour_map)
+table(str_df_plot_cols$pe_effect_outcome)
+
+#===================================================
+#-------------------
+# Ligand Affinity
+#-------------------
+# -ve Lig Aff
+dd_lig        = str_df_plot_cols[str_df_plot_cols$pe_effect_outcome=="DD_lig",]
+if (nrow(dd_lig) == table(str_df_plot_cols$pe_effect_outcome)[['DD_lig']]){
+  dd_lig_pos  = dd_lig[[pos_colname]]
+}else{
+  stop("Abort: DD affinity colour numbers mismtatch")
+}
+toString(paste0(dd_lig_pos, chain_suffix))
+
+# +ve Lig Aff
+ss_lig            = str_df_plot_cols[str_df_plot_cols$pe_effect_outcome=="SS_lig",]
+if (!empty(ss_lig)){
+  if (nrow(ss_lig)  == table(str_df_plot_cols$pe_effect_outcome)[['SS_lig']]){
+    ss_lig_pos      = ss_lig[[pos_colname]]
+  }else{
+    stop("Abort: SS affinity colour numbers mismtatch")
+  } 
+  #put in chimera cmd
+  toString(paste0(ss_lig_pos, chain_suffix))
+}
+
+#=========================================================
+#------------------------
+# Stability
+#------------------------
+# -ve Stability
+dd_stability           =  str_df_plot_cols[str_df_plot_cols$pe_effect_outcome=="DD_stability",]
+if (nrow(dd_stability) == table(str_df_plot_cols$pe_effect_outcome)[['DD_stability']]){
+  dd_stability_pos     =  dd_stability[[pos_colname]]
+}else{
+  stop("Abort: DD Stability colour numbers mismtatch")
+}
+
+# +ve Stability
+ss_stability            = str_df_plot_cols[str_df_plot_cols$pe_effect_outcome=="SS_stability",]
+if (nrow(ss_stability)  == table(str_df_plot_cols$pe_effect_outcome)[['SS_stability']]){
+  ss_stability_pos      = ss_stability[[pos_colname]]
+}else{
+  stop("Abort: SS Stability colour numbers mismtatch")
+}
+
+#put in chimera cmd
+toString(paste0(dd_stability_pos, chain_suffix))
+toString(paste0(ss_stability_pos, chain_suffix))
+####################################################################
+
--- a/scripts/plotting/plotting_thesis/pnca/sensitivity_count_pnca.R
+++ b/scripts/plotting/plotting_thesis/pnca/sensitivity_count_pnca.R
@ -0,0 +1,65 @@
+#=========================
+# Count Sensitivity
+# Mutations and positions
+#=========================
+pos_colname_c ="position"
+
+sensP_df = merged_df3[,c("mutationinformation",
+                         #"position",
+                         pos_colname_c,
+                         "sensitivity")]
+
+head(sensP_df)
+table(sensP_df$sensitivity)
+
+#---------------
+# Total unique positions
+#----------------
+tot_mut_pos = length(unique(sensP_df[[pos_colname_c]]))
+cat("\nNo of Tot muts sites:", tot_mut_pos)
+
+# resistant mut pos
+sens_site_allR = sensP_df[[pos_colname_c]][sensP_df$sensitivity=="R"]
+sens_site_UR   = unique(sens_site_allR)
+length(sens_site_UR)
+
+# Sensitive mut pos
+sens_site_allS = sensP_df[[pos_colname_c]][sensP_df$sensitivity=="S"]
+sens_site_US = unique(sens_site_allS)
+length(sens_site_UR)
+
+#---------------
+# Common Sites
+#----------------
+common_pos = intersect(sens_site_UR,sens_site_US)
+site_Cc = length(common_pos)
+cat("\nNo of Common sites:", site_Cc
+    , "\nThese are:", common_pos)
+
+#---------------
+# Resistant muts
+#----------------
+site_R = sens_site_UR[!sens_site_UR%in%common_pos]
+site_Rc = length(site_R)
+
+if ( length(sens_site_allR) == table(sensP_df$sensitivity)[['R']] ){
+  cat("\nNo of R muts:", length(sens_site_allR) 
+      , "\nNo. of R sites:",site_Rc
+      , "\nThese are:", site_R
+)
+}
+
+#---------------
+# Sensitive muts
+#----------------
+site_S = sens_site_US[!sens_site_US%in%common_pos]
+site_Sc = length(site_S)
+
+if ( length(sens_site_allS) == table(sensP_df$sensitivity)[['S']] ){
+  cat("\nNo of S muts:", length(sens_site_allS) 
+      , "\nNo. of S sites:", site_Sc
+      , "\nThese are:", site_S)
+}
+
+#########################
+