diff --git a/scripts/plotting/plotting_thesis/corr_thesis_ref/corr_adjusted_PS_LIG.R b/scripts/plotting/plotting_thesis/corr_thesis_ref/corr_adjusted_PS_LIG.R
deleted file mode 100644
index 4355a23..0000000
--- a/scripts/plotting/plotting_thesis/corr_thesis_ref/corr_adjusted_PS_LIG.R
+++ /dev/null
@@ -1,330 +0,0 @@
-#!/usr/bin/env Rscript
-#########################################################
-# TASK: Corr plots for PS and Lig 
-
-# Output: 1 svg
-
-#=======================================================================
-# working dir and loading libraries
-getwd()
-setwd("~/git/LSHTM_analysis/scripts/plotting/")
-getwd()
-
-
-source("~/git/LSHTM_analysis/scripts/Header_TT.R")
-require(cowplot)
-source("combining_dfs_plotting.R")
-source("my_pairs_panel.R")
-# should return the following dfs, directories and variables
-
-# PS combined: 
-# 1) merged_df2
-# 2) merged_df2_comp
-# 3) merged_df3
-# 4) merged_df3_comp
-
-# LIG combined: 
-# 5) merged_df2_lig
-# 6) merged_df2_comp_lig
-# 7) merged_df3_lig
-# 8) merged_df3_comp_lig
-
-# 9) my_df_u
-# 10) my_df_u_lig
-
-cat(paste0("Directories imported:"
-           , "\ndatadir:", datadir
-           , "\nindir:", indir
-           , "\noutdir:", outdir
-           , "\nplotdir:", plotdir))
-
-cat(paste0("Variables imported:"
-           , "\ndrug:", drug
-           , "\ngene:", gene
-           , "\ngene_match:", gene_match
-           , "\nAngstrom symbol:", angstroms_symbol
-           , "\nNo. of duplicated muts:", dup_muts_nu
-           , "\nNA count for ORs:", na_count
-           , "\nNA count in df2:", na_count_df2
-           , "\nNA count in df3:", na_count_df3))     
-
-#=======
-# output
-#=======
-# can't combine by cowplot because not ggplots
-#corr_plot_combined = "corr_combined.svg"
-#plot_corr_plot_combined  =  paste0(plotdir,"/", corr_plot_combined)
-
-# PS
-corr_ps_adjusted = "corr_PS_adjusted.svg"
-plot_corr_ps_adjusted =  paste0(plotdir,"/", corr_ps)
-
-# LIG
-corr_lig_adjusted = "corr_LIG_adjusted.svg"
-plot_corr_lig_adjusted =  paste0(plotdir,"/", corr_lig)
-
-####################################################################
-#               end of loading libraries and functions                 #
-########################################################################
-
-#%%%%%%%%%%%%%%%%%%%%%%%%%
-df_ps = merged_df3_comp 
-df_lig = merged_df3_comp_lig
-#%%%%%%%%%%%%%%%%%%%%%%%%%
-
-rm( merged_df2, merged_df2_comp, merged_df2_lig, merged_df2_comp_lig, my_df_u, my_df_u_lig)
-
-########################################################################
-#               end of data extraction and cleaning for plots          #
-########################################################################
-
-#===========================
-# Data for Correlation plots:PS
-#===========================
-table(df_ps$duet_outcome)
-
-
-#===========================
-# Data for Correlation plots:foldx
-#===========================
-#============================
-# adding foldx scaled values
-# scale data b/w -1 and 1
-#============================
-n = which(colnames(df_ps) == "ddg"); n 
-
-my_min = min(df_ps[,n]); my_min 
-my_max = max(df_ps[,n]); my_max 
-
-df_ps$foldx_scaled = ifelse(df_ps[,n] < 0
-                            , df_ps[,n]/abs(my_min)
-                            , df_ps[,n]/my_max) 
-# sanity check
-my_min = min(df_ps$foldx_scaled); my_min 
-my_max = max(df_ps$foldx_scaled); my_max
-
-if (my_min == -1 && my_max == 1){
-  cat("PASS: foldx ddg successfully scaled b/w -1 and 1"
-      , "\nProceeding with assigning foldx outcome category")
-}else{
-  cat("FAIL: could not scale foldx ddg values"
-      , "Aborting!")
-}
-
-
-#================================
-# adding foldx outcome category
-# ddg<0 = "Stabilising" (-ve)
-#=================================
-
-c1 = table(df_ps$ddg < 0)
-df_ps$foldx_outcome = ifelse(df_ps$ddg < 0, "Stabilising", "Destabilising")
-c2 = table(df_ps$ddg < 0)
-
-if ( all(c1 == c2) ){
-  cat("PASS: foldx outcome successfully created")
-}else{
-  cat("FAIL: foldx outcome could not be created. Aborting!")
-  exit()
-}
-
-table(df_ps$foldx_outcome)
-
-
-#======================
-# adding log cols 
-#======================
- 
-df_ps$log10_or_mychisq = log10(df_ps$or_mychisq)
-df_ps$neglog_pval_fisher = -log10(df_ps$pval_fisher)
-
-df_ps$log10_or_kin = log10(df_ps$or_kin)
-df_ps$neglog_pwald_kin = -log10(df_ps$pwald_kin)
-
-# subset data to generate pairwise correlations
-cols_to_select =  c("duet_scaled"
-                    
-                    , "foldx_scaled"
-                    
-                    #, "log10_or_mychisq"
-                    #, "neglog_pval_fisher"
-                    
-                    , "or_kin"
-                    , "neglog_pwald_kin"
-                    
-                    , "af"
-                    
-                    , "asa"
-                    , "rsa"
-                    , "kd_values"
-                    , "rd_values"
-                    
-                    , "duet_outcome"
-                    , drug)
-
-corr_data_ps = df_ps[, cols_to_select]
-
-dim(corr_data_ps)
-
-#p_italic = substitute(paste("-Log(", italic('P'), ")"));p_italic 
-#p_adjusted_italic = substitute(paste("-Log(", italic('P adjusted'), ")"));p_adjusted_italic
-
-# assign nice colnames (for display)
-my_corr_colnames = c("DUET"
-                     
-                     , "Foldx"
-                     #, "Log(OR)"
-                     #, "-Log(P)"
-                     
-                     , "OR adjusted"
-                     , "-Log(P wald)"
-                     
-                     , "AF"
-                     
-                     , "ASA"
-                     , "RSA"
-                     , "KD"
-                     , "RD"
-                     
-                     , "duet_outcome"
-                     , drug)
-
-length(my_corr_colnames)
-
-colnames(corr_data_ps)
-colnames(corr_data_ps) <- my_corr_colnames
-colnames(corr_data_ps)
-
-#-----------------
-# generate corr PS plot
-#-----------------
-start = 1
-end = which(colnames(corr_data_ps) == drug); end # should be the last column
-offset = 1
-
-my_corr_ps = corr_data_ps[start:(end-offset)]
-head(my_corr_ps)
-
-#my_cols = c("#f8766d", "#00bfc4")
-# deep blue :#007d85
-# deep red: #ae301e
-
-cat("Corr plot PS:", plot_corr_ps_adjusted)
-svg(plot_corr_ps_adjusted, width = 15, height = 15)
-
-OutPlot1 = pairs.panels(my_corr_ps[1:(length(my_corr_ps)-1)]
-             , method = "spearman" # correlation method
-             , hist.col = "grey" ##00AFBB
-             , density = TRUE  # show density plots
-             , ellipses = F # show correlation ellipses
-             , stars = T
-             , rug = F
-             , breaks = "Sturges"
-             , show.points = T
-             , bg = c("#f8766d", "#00bfc4")[unclass(factor(my_corr_ps$duet_outcome))]
-             , pch = 21
-             , jitter = T
-             #, alpha = .05
-             #, points(pch = 19, col = c("#f8766d", "#00bfc4"))
-             , cex = 2
-             , cex.axis = 1.5
-             , cex.labels = 1.5
-             , cex.cor = 1
-             , smooth = F
-)
-
-print(OutPlot1)
-dev.off()
-
-#===========================
-# Data for Correlation plots: LIG
-#===========================
-table(df_lig$ligand_outcome)
-
-df_lig$log10_or_mychisq = log10(df_lig$or_mychisq)
-df_lig$neglog_pval_fisher = -log10(df_lig$pval_fisher)
-
-
-df_lig$log10_or_kin = log10(df_lig$or_kin)
-df_lig$neglog_pwald_kin = -log10(df_lig$pwald_kin)
-
-
-# subset data to generate pairwise correlations
-cols_to_select =  c("affinity_scaled"
-                    
-                    , "log10_or_mychisq"
-                    , "neglog_pval_fisher"
-                    
-                    #, "or_kin"
-                    #, "neglog_pwald_kin"
-                    
-                    , "af"
-                    
-                    , "ligand_outcome"
-                    , drug)
-
-corr_data_lig = df_lig[, cols_to_select]
-
-
-dim(corr_data_lig)
-
-# assign nice colnames (for display)
-my_corr_colnames = c("Ligand Affinity"
-                     
-                     , "Log(OR)"
-                     , "-Log(P)"
-                     
-                     #, "OR adjusted"
-                     #, "-Log(P wald)"
-                     
-                     , "AF"
-                     
-                     , "ligand_outcome"
-                     , drug)
-
-length(my_corr_colnames)
-
-colnames(corr_data_lig)
-colnames(corr_data_lig) <- my_corr_colnames
-colnames(corr_data_lig)
-
-#-----------------
-# generate corr LIG plot
-#-----------------
-
-start = 1
-end = which(colnames(corr_data_lig) == drug); end # should be the last column
-offset = 1
-
-my_corr_lig = corr_data_lig[start:(end-offset)]
-head(my_corr_lig)
-
-cat("Corr LIG plot:", plot_corr_lig_adjusted)
-svg(plot_corr_lig_adjusted, width = 15, height = 15)
-
-OutPlot2  = pairs.panels(my_corr_lig[1:(length(my_corr_lig)-1)]
-                  , method = "spearman" # correlation method
-                  , hist.col = "grey" ##00AFBB
-                  , density = TRUE  # show density plots
-                  , ellipses = F # show correlation ellipses
-                  , stars = T
-                  , rug = F
-                  , breaks = "Sturges"
-                  , show.points = T
-                  , bg = c("#f8766d", "#00bfc4")[unclass(factor(my_corr_lig$ligand_outcome))]
-                  , pch = 21
-                  , jitter = T
-                  #, alpha = .05
-                  #, points(pch = 19, col = c("#f8766d", "#00bfc4"))
-                  , cex = 3
-                  , cex.axis = 2.5
-                  , cex.labels = 2.1
-                  , cex.cor = 1
-                  , smooth = F
-)
-
-print(OutPlot2)
-dev.off()
-#######################################################
-
-
diff --git a/scripts/plotting/plotting_thesis/corr_thesis_ref/corr_plots.R b/scripts/plotting/plotting_thesis/corr_thesis_ref/corr_plots.R
deleted file mode 100755
index ed5979d..0000000
--- a/scripts/plotting/plotting_thesis/corr_thesis_ref/corr_plots.R
+++ /dev/null
@@ -1,242 +0,0 @@
-#!/usr/bin/env Rscript
-getwd()
-setwd("~/git/LSHTM_analysis/scripts/plotting")
-getwd()
-source("~/git/LSHTM_analysis/scripts/Header_TT.R")
-
-spec = matrix(c(
-  "drug"       , "d",  1, "character",
-  "gene"       , "g",  1, "character",
-  "data_file1" , "fa", 2, "character",
-  "data_file2" , "fb", 2, "character" 
-), byrow = TRUE, ncol = 4)
-
-opt = getopt(spec)
-
-drug            = opt$drug
-gene            = opt$gene
-infile_params   = opt$data_file1
-infile_metadata = opt$data_file2
-
-if(is.null(drug)|is.null(gene)) {
-  stop("Missing arguments: --drug and --gene must both be specified (case-sensitive)")
-}
-
-#===========
-# Input
-#===========
-
-source("get_plotting_dfs.R")
-
-#===========
-# output
-#===========
-# PS
-corr_ps = "corr_PS.svg"
-plot_corr_ps =  paste0(plotdir,"/", corr_ps)
-
-corr_ps_all = "corr_PS_all.svg"
-plot_corr_ps_all =  paste0(plotdir,"/", corr_ps_all)
-
-
-# LIG
-corr_lig = "corr_LIG.svg"
-plot_corr_lig =  paste0(plotdir,"/", corr_lig)
-
-corr_lig_all = "corr_LIG_all.svg"
-plot_corr_lig_all =  paste0(plotdir,"/", corr_lig_all)
-
-##############################################################################
-foo = corr_ps_df3
-#foo2 = corr_ps_df2
-
-bar = corr_lig_df3
-#bar2 = corr_lig_df2
-
-#================================
-# Data for Correlation plots: PS
-#================================
-# subset data to generate pairwise correlations
-cols_to_select = c("DUET"
-                     , "Foldx"
-                     , "Log (OR)"
-                     , "-Log (P)"
-                     , "MAF"
-                     , "duet_outcome"
-                     , drug)
-corr_data_ps = foo[names(foo)%in%cols_to_select]
-length(cols_to_select)
-
-colnames(corr_data_ps)
-
-start = 1
-end = which(colnames(corr_data_ps) == drug); end # should be the last column
-offset = 1
-
-my_corr_ps = corr_data_ps[start:(end - offset)]
-head(my_corr_ps)
-
-#---------------------
-# Corr plot PS: short
-# data: corr_ps_df3
-# cols: 7
-#---------------------
-cat("Corr plot PS DUET with coloured dots:", plot_corr_ps)
-svg(plot_corr_ps, width = 15, height = 15)
-
-pairs.panels(my_corr_ps[1:(length(my_corr_ps)-1)]
-             , method = "spearman" # correlation method
-             , hist.col = "grey" ##00AFBB
-             , density = TRUE  # show density plots
-             , ellipses = F # show correlation ellipses
-             , stars = T
-             , rug = F
-             , breaks = "Sturges"
-             , show.points = T
-             , bg = c("#f8766d", "#00bfc4")[unclass(factor(my_corr_ps$duet_outcome))] # foldx colours are reveresed
-             , pch = 21 # for bg
-             , jitter = T
-             , alpha = 1
-             , cex = 1.8
-             , cex.axis = 2
-             , cex.labels = 4
-             , cex.cor = 1
-             , smooth = F
-)
-dev.off()
-
-corr_ps_rho = corr.test(my_corr_ps[1:5], method = "spearman")$r
-corr_ps_p = corr.test(my_corr_ps[1:5], method = "spearman")$p
-
-#---------------------
-# Corr plot PS: ALL
-# data: corr_ps_df3
-# cols: 10
-#---------------------
-end_ps_all = which(colnames(foo) == drug); end_ps_all # should be the last column
-
-my_corr_ps_all = foo[start:(end_ps_all - offset)]
-cols_to_drop = "Mutation"
-my_corr_ps_all = my_corr_ps_all[, !(names(my_corr_ps_all)%in%cols_to_drop)]
-head(my_corr_ps_all)
-length(colnames(my_corr_ps_all))
-
-cat("Corr plot PS DUET with coloured dots:", plot_corr_ps_all)
-svg(plot_corr_ps_all, width = 15, height = 15)
-
-pairs.panels(my_corr_ps_all[1:(length(my_corr_ps_all)-1)]
-             , method = "spearman" # correlation method
-             , hist.col = "grey" ##00AFBB
-             , density = TRUE  # show density plots
-             , ellipses = F # show correlation ellipses
-             , stars = T
-             , rug = F
-             , breaks = "Sturges"
-             , show.points = T
-             , bg = c("#f8766d", "#00bfc4")[unclass(factor(my_corr_ps_all$duet_outcome))] # foldx colours are reveresed
-             , pch = 21 # for bg
-             , jitter = T
-             , alpha = 1
-             , cex = 1.5
-             , cex.axis = 2
-             , cex.labels = 2.5
-             , cex.cor = 1
-             , smooth = F
-)
-dev.off()
-
-#==================================
-# Data for Correlation plots: LIG
-#==================================
-cols_to_select_lig = c("Ligand Affinity"
-                     , "Log (OR)"
-                     , "-Log (P)"
-                     , "MAF"
-                     , "ligand_outcome"
-                     , drug)
-
-corr_data_lig = bar[names(bar)%in%cols_to_select_lig]
-length(cols_to_select_lig)
-
-colnames(corr_data_lig)
-
-start_lig = 1
-end_lig = which(colnames(corr_data_lig) == drug); end_lig # should be the last column
-offset_lig = 1
-
-my_corr_lig = corr_data_lig[start_lig:(end_lig-offset_lig)]
-head(my_corr_lig)
-
-#---------------------
-# Corr plot LIG: short
-# data: corr_lig_df3
-# cols: 7
-#---------------------
-cat("Corr LIG plot with coloured dots:", plot_corr_lig)
-svg(plot_corr_lig, width = 15, height = 15)
-
-pairs.panels(my_corr_lig[1:(length(my_corr_lig)-1)]
-             , method = "spearman" # correlation method
-             , hist.col = "grey" ##00AFBB
-             , density = TRUE  # show density plots
-             , ellipses = F # show correlation ellipses
-             , stars = T
-             , rug = F
-             , breaks = "Sturges"
-             , show.points = T
-             , bg = c("#f8766d", "#00bfc4")[unclass(factor(my_corr_lig$ligand_outcome))] 
-             , pch = 21 # for bg
-             , jitter = T
-             , cex = 2
-             , cex.axis = 2
-             , cex.labels = 4
-             , cex.cor = 1
-             , smooth = F
-)
-
-dev.off()
-
-corr_lig_rho = corr.test(my_corr_lig[1:4], method = "spearman")$r
-corr_lig_p = corr.test(my_corr_lig[1:4], method = "spearman")$p
-
-#---------------------
-# Corr plot LIG: ALL
-# data: corr_lig_df3
-# cols: 9
-#---------------------
-end_lig_all = which(colnames(bar) == drug); end_lig_all # should be the last column
-
-my_corr_lig_all = bar[start_lig:(end_lig_all - offset_lig)]
-cols_to_drop = "Mutation"
-my_corr_lig_all = my_corr_lig_all[, !(names(my_corr_lig_all)%in%cols_to_drop)]
-head(my_corr_lig_all)
-length(colnames(my_corr_lig_all))
-
-cat("Corr plot LIG with coloured dots:", plot_corr_lig_all)
-svg(plot_corr_lig_all, width = 15, height = 15)
-
-pairs.panels(my_corr_lig_all[1:(length(my_corr_lig_all)-1)]
-             , method = "spearman" # correlation method
-             , hist.col = "grey" ##00AFBB
-             , density = TRUE  # show density plots
-             , ellipses = F # show correlation ellipses
-             , stars = T
-             , rug = F
-             , breaks = "Sturges"
-             , show.points = T
-             , bg = c("#f8766d", "#00bfc4")[unclass(factor(my_corr_lig_all$ligand_outcome))] # foldx colours are reveresed
-             , pch = 21 # for bg
-             , jitter = T
-             , alpha = 1
-             , cex = 1.5
-             , cex.axis = 2
-             , cex.labels = 2.2
-             , cex.cor = 1
-             , smooth = F
-)
-dev.off()
-
-
-######################################################################=
-#                             End of script
-######################################################################=
\ No newline at end of file
diff --git a/scripts/plotting/plotting_thesis/corr_thesis_ref/corr_plots_gc_i.R b/scripts/plotting/plotting_thesis/corr_thesis_ref/corr_plots_gc_i.R
deleted file mode 100644
index 64cf7ff..0000000
--- a/scripts/plotting/plotting_thesis/corr_thesis_ref/corr_plots_gc_i.R
+++ /dev/null
@@ -1,276 +0,0 @@
-#!/usr/bin/env Rscript       
-source("~/git/LSHTM_analysis/config/gid.R")
-source("~/git/LSHTM_analysis/scripts/plotting/get_plotting_dfs.R")
-
-#===================================================================
-corr_data = corr_data_extract(merged_df3, drug_name = drug)
-#corr_data = corr_data_extract(merged_df2, drug_name = drug)
-
-geneL_normal = c("pnca")
-geneL_na_dy = c("gid")
-geneL_na = c("rpob")
-geneL_ppi2 = c("alr", "embb", "katg", "rpob")
-
-core_cols <- c( "Log (OR)" , "MAF", "-Log (P)"
-                    , "DUET", "FoldX"
-                    , "DeepDDG", "Dynamut2"
-                    , "ASA", "RSA", "RD", "KD"
-                    , "Consurf", "SNAP2"
-                    #, "mutation_info_labels"
-)
-
-
-if (tolower(gene)%in%geneL_normal){
-  corrplot_cols = core_cols
-}
-
-if (tolower(gene)%in%geneL_na_dy){
-  additional_cols = c("mCSM-NA"
-                    , "Dynamut"
-                    , "ENCoM-DDG"
-                    , "ENCoM-DDS"
-                    , "mCSM"
-                    , "SDM"
-                    , "DUET-d"
-                    , "mutation_info_labels")
-  corrplot_cols = c(core_cols, additional_cols)
-}
-if (tolower(gene)%in%geneL_na){
-  additional_cols = c("mCSM-NA"
-                    , "mutation_info_labels")
-  corrplot_cols = c(core_cols, additional_cols)
-  
-}
-
-if (tolower(gene)%in%geneL_ppi2){
-  additional_cols = c("mCSM-PPI2"
-                    , "mutation_info_labels")
-  corrplot_cols = c(core_cols, additional_cols)
-}
-
-#========================================
-# corrplot_cols <- c( "Log (OR)"
-#                     , "MAF"
-#                     , "-Log (P)"
-#                     , "DUET"
-#                     , "FoldX"
-#                     , "DeepDDG"
-#                     , "Dynamut2"
-#                     , "mCSM-NA"
-#                     , "Dynamut"
-#                     , "ENCoM-DDG"
-#                     , "ENCoM-DDS"
-#                     , "mCSM"
-#                     , "SDM"
-#                     , "DUET-d"
-#                     , "ASA"
-#                     , "RSA"
-#                     , "RD"
-#                     , "KD"
-#                     , "mutation_info_labels"
-#                    )
-
-corr_df <- corr_data[, corrplot_cols] # col order is according to corrplot_cols
-head(corr_df); names(corr_df)
-
-if ( all( corrplot_cols%in%names(corr_df) ) ){
-  cat("\nPASS: Successfully selected"
-      , length(corrplot_cols)
-      , "columns for building correlation df")
-} else {
-  cat("\nFAIl: Something went wrong, numbers mismatch"
-      , "\nExpected cols:", length(corrplot_cols)
-      , "\nGot:", length(corr_df) )
-}
-
-#=====================================================
-corrplot_df <- corr_df 
-
-# stat_df = corrplot_df[, c("Log (OR)"
-#                           , "MAF" 
-#                           , "-Log (P)")]
-
-plot_title <- "Correlation plots (stability)"
-
-# Checkbox Names
-# FIXME: select columns conditionally based on gene and grey out the ones that are not present!
-
-cBCorrNames = c( "Odds Ratio"
-                , "Allele Frequency"
-                , "P-value"
-                , "DUET"
-                , "FoldX"
-                , "DeepDDG"
-                , "Dynamut2"
-                , "ASA"
-                , "RSA"
-                , "RD"
-                , "KD"
-                , "Consurf"
-                , "SNAP2"
-                , "Nucleic Acid affinity"
-                , "PPi2 affinity"
-                
-                #, "Dynamut"
-                #, "ENCoM-Stability"
-                #, "ENCoM-Flexibility"
-                #, "mCSM"
-                #, "SDM"
-                #, "DUET-d"
-)
-
-# Checkbox Values (aka Column Names that are in corrplot_df)
-cBCorrVals = c("Log (OR)"
-              , "MAF"
-              , "-Log (P)"
-              , "DUET"
-              , "FoldX"
-              , "DeepDDG"
-              , "Dynamut2"
-              , "ASA"
-              , "RSA"
-              , "RD"
-              , "KD"
-              , "Consurf"
-              , "SNAP2"
-              , "mCSM-NA"
-              , "mCSM-PPI2"
-              # , "Dynamut"
-              # , "ENCoM-DDG"
-              # , "ENCoM-DDS"
-              # , "mCSM"
-              # , "SDM"
-              # , "DUET-d"
- )
-
-# Pre-selected checkboxes
-cBCorrSelected = c("Log (OR)"
-                   , "MAF"
-                   , "-Log (P)")
-
-#################
-# Define UI
-#################
-u_corr <- fluidPage(
-  
-  headerPanel(plot_title),
-  
-  sidebarLayout(position = "left"
-                , sidebarPanel(
-                 checkboxGroupInput("variable", "Choose parameter:"
-                                       , choiceNames  = cBCorrNames
-                                       , choiceValues = cBCorrVals
-                                       , selected     = cBCorrSelected
-                  )
-                  
-                  # could be a fluid Row
-                  , actionButton("add_col"     , "Render")
-                  , actionButton("reset_graph" , "Reset Graphs")
-                  , actionButton("select_all"  , "Select All")
-                 
-                  )
-                
-                # output/display
-                , mainPanel(plotOutput(outputId = 'corrplot'
-                                    , height = "1200px"
-                                    , width  = "1500px")
-#                           , height = "800px"
-#                          , width  = "600px")
-                          , textOutput("txt")
-                )
-  )
-)
-
-#################
-# Define server
-#################
-s_corr <- shinyServer(function(input, output, session)
-  
-{
-  
- #================
- # Initial render
- #================
-  output$corrplot <- renderPlot({ 
-  
-    #---------------------
-    # My correlation plot: initial plot
-    #---------------------
-    c_plot <- my_corr_pairs(corr_data_all = corrplot_df
-                            , corr_cols = cBCorrSelected
-                            , corr_method = "spearman"
-                            , dot_size = 2
-                            , ats = 1.5
-                            , corr_lab_size = length(cBCorrNames)/length(cBCorrSelected) * 1.3
-                            , corr_value_size = 1)
-  })
-  
- #====================
- # Interactive render
- #====================
-  observeEvent(
-    input$add_col, {
-     
-      # select cols for corrplot
-      corr_cols_s <- c(input$variable)
-    
-      # render plot
-      if (length(c(input$variable)) >= 2) {
-      output$corrplot <- renderPlot({
-        
-        #---------------------
-        # My correlation plot: user selects columns
-        #---------------------
-        c_plot <- my_corr_pairs(corr_data_all     = corrplot_df
-                                , corr_cols       = corr_cols_s
-                                , dot_size        = 2
-                                , ats             = 1.5
-                                , corr_lab_size   = length(cBCorrNames)/length(corr_cols_s) * 1.3
-                                , corr_value_size = 1)
-
-      })
-      } else{ output$txt = renderText({"Argh, common! It's a correlation plot. Select >=2 vars!"})
-        
-      }
-      
-    })
-  
- #==================================
- # Add button: Select All checkbox
- #==================================
-  observeEvent(
-    input$select_all,{
-
-      updateCheckboxGroupInput(session, "variable", selected = cBCorrVals)
-    }
-)
-
- #================
- # Reset render
- #================
-  observeEvent(
-    input$reset_graph,{
-
-      # reset checkboxes to default selection
-      updateCheckboxGroupInput(session, "variable", selected = cBCorrSelected)
-
-
-      # render plot
-      output$corrplot <- renderPlot({
-
-        #---------------------
-        # My correlation plot: reset plot
-        #---------------------
-        c_plot <- my_corr_pairs(corr_data_all     = corrplot_df
-                                , corr_cols       = cBCorrSelected
-                                , dot_size        = 1.2
-                                , ats             = 1.5
-                                , corr_lab_size   = length(cBCorrNames)/length(cBCorrSelected) * 1.3
-                                , corr_value_size = 1)
-      })
-    }
-  )
-}
-)
-
-shinyApp(ui = u_corr, server = s_corr)
diff --git a/scripts/plotting/plotting_thesis/corr_thesis_ref/corr_plots_gc_lig_i.R b/scripts/plotting/plotting_thesis/corr_thesis_ref/corr_plots_gc_lig_i.R
deleted file mode 100644
index 6ca6337..0000000
--- a/scripts/plotting/plotting_thesis/corr_thesis_ref/corr_plots_gc_lig_i.R
+++ /dev/null
@@ -1,220 +0,0 @@
-#!/usr/bin/env Rscript       
-
-source("~/git/LSHTM_analysis/config/gid.R")
-source("~/git/LSHTM_analysis/scripts/plotting/get_plotting_dfs.R")
-
-#===================================================================
-corr_data = corr_data_extract(merged_df3, drug_name = drug)
-#corr_data = corr_data_extract(merged_df2, drug_name = drug)
-#================================================================
-#other globals
-dist_colname <- LigDist_colname # ligand_distance (from globals)
-dist_cutoff <- LigDist_cutoff # 10 (from globals)
-
-cat("\nLigand distance cut off, colname:", dist_colname
-    , "\nThe max distance", gene, "structure df" , ":", max_ang, "\u212b"
-    , "\nThe min distance", gene, "structure df" , ":", min_ang, "\u212b")
-
-########################################################################
-
-#==========================================
-#####################
-# Correlation plot
-#####################
-colnames(corr_df_m3_f)
-
-corrplot_cols_lig <- c( "Log (OR)"
-                    ,  "MAF"
-                    , "-Log (P)"
-                    , "mCSM-lig"
-                    , "mCSM-NA"
-                    , "ASA"
-                    , "RSA"
-                    , "RD"
-                    , "KD" 
-                    , dist_colname
-                   , "mutation_info_labels"
-                   )
-
-corr_df_lig <- corr_df_m3_f[, corrplot_cols_lig]
-head(corr_df_lig)
-
-corrplot_df_lig <- corr_df_lig 
-
-# static df
-# stat_df = corrplot_df_lig[, c("Log (OR)"
-#                           , "MAF"
-#                           , "-Log (P)"
-#                           )]
-
-plot_title_lig <- "Correlation plots (ligand affinity)"
-
-# Checkbox Names
-cCorrNames = c( "Odds Ratio"
-             , "Allele Frequency"
-             , "P-value"
-             , "Ligand affinity"
-             , "Nucleic Acid affinity"
-             , "ASA"
-             , "RSA"
-             , "RD"
-             , "KD" 
-             , "Ligand Distance")
-
-# Checkbox Values (aka Column Names that are in corrplot_df_lig)
-cCorrVals = c("Log (OR)"
-          , "MAF"
-          , "-Log (P)"
-          , "mCSM-lig"
-          , "mCSM-NA"
-          , "ASA"
-          , "RSA"
-          , "RD"
-          , "KD" 
-          , dist_colname)
-
-# Pre-selected checkboxes
-cCorrSelected = c("Log (OR)"
-                   , "MAF"
-                   , "-Log (P)")
-#============
-# Define UI 
-#============
-u_corr_lig<- fluidPage(
-  headerPanel(plot_title_lig),
-  sidebarLayout(position = "left"
-                , sidebarPanel("Correlations: Filtered data data"
-                  , numericInput(inputId = "lig_dist"
-                                 , label = "Ligand distance cutoff"
-                                 , value =  dist_cutoff # 10 default from globals
-                                 , min = min_ang
-                                 , max = max_ang)
-                  , checkboxGroupInput("variable", "Choose parameter:"
-                                       , choiceNames  = cCorrNames
-                                       , choiceValues = cCorrVals
-                                       , selected     = cCorrSelected
-                  )
-                  # could be a fluid Row
-                  , actionButton("add_col"     , "Render")
-                  , actionButton("reset_graph" , "Reset Graphs")
-                  , actionButton("select_all"  , "Select All")
-                  
-                )
-                
-                # output/display
-                , mainPanel(plotOutput(outputId = 'corrplot'
-                                     , height = "1000px"
-                                     , width  = "1200px")
-                          # , height = "800px"
-                          # , width  = "600px")
-                , textOutput("txt")
-                )
-  )
-)
-
-#===============
-# Define server
-#===============
-s_corr_lig <- shinyServer(function(input, output, session)
-  
-{ 
-
- #================
- # Initial render
- #================
-  output$corrplot <- renderPlot({ 
-
-    # get the user-specified lig_list
-    dist_cutoff_ini = input$lig_dist
-    
-    # subset data for plot
-    corrplot_df_lig_ini = corrplot_df_lig[corrplot_df_lig[[dist_colname]] < dist_cutoff_ini,]
-    
-    #---------------------
-    # My correlation plot: initial plot
-    #---------------------
-    c_plot <- my_corr_pairs(
-                            #corr_data_all = corrplot_df_lig
-                            corr_data_all = corrplot_df_lig_ini
-                            , corr_cols = cCorrSelected
-                            , dot_size = 2
-                            , ats = 1.5
-                            , corr_lab_size = length(cCorrNames)/length(cCorrSelected) * 1.3
-                            , corr_value_size = 1)
-    
-  })
-  
- #====================
- # Interactive render
- #====================
-  observeEvent(
-    input$add_col, {
-      
-      # get the user-specified lig_list
-      dist_cutoff_user = input$lig_dist
-
-      # subset data for plot
-      corrplot_df_lig_s = corrplot_df_lig[corrplot_df_lig[[dist_colname]] < dist_cutoff_user,]
-      
-      # select cols for corrplot
-      corr_cols_s = c(input$variable)
-    
-      # render plot
-      if (length(c(input$variable)) >= 2) {
-      
-        output$corrplot <- renderPlot({ 
-          
-          #---------------------
-          # My correlation plot: user selects columns
-          #---------------------
-          c_plot <- my_corr_pairs(corr_data_all = corrplot_df_lig_s
-                                  , corr_cols = corr_cols_s
-                                  , dot_size = 1.6
-                                  , ats = 1.5
-                                  , corr_lab_size = length(cCorrNames)/length(corr_cols_s) * 1.3
-                                  , corr_value_size = 1)
-        })
-      } else { output$txt = renderText({"Fuddu! It's a correlation plot. Select >=2 vars bewakoof!"})}
-      
-    })
-    
- #==================================
- # Add button: Select All checkbox
- #==================================
-  observeEvent(
-    input$select_all,{
-
-      updateCheckboxGroupInput(session, "variable", selected = cCorrVals)
-    }
-  )
-  
- #================
- # Reset render
- #================
-  observeEvent(
-    input$reset_graph,{
-      
-      # reset checkboxes
-      updateCheckboxGroupInput(session, "variable", selected = cCorrSelected)
-
-      # render plot
-      output$corrplot <- renderPlot({ 
-        
-        #---------------------
-        # My correlation plot: reset plot
-        #---------------------
-        c_plot <- my_corr_pairs(corr_data_all = corrplot_df_lig
-                                , corr_cols = cCorrSelected
-                                , dot_size = 2
-                                , ats = 1.5
-                                , corr_lab_size = length(cCorrNames)/length(cCorrSelected) * 1.3
-                                , corr_value_size = 1)
-        
-      })
-    }
-  )
-}
-)
-
-shinyApp(ui = u_corr_lig, server = s_corr_lig)
-
diff --git a/scripts/plotting/plotting_thesis/corr_thesis_ref/ggcorr_all_PS_LIG.R b/scripts/plotting/plotting_thesis/corr_thesis_ref/ggcorr_all_PS_LIG.R
deleted file mode 100644
index e7701e2..0000000
--- a/scripts/plotting/plotting_thesis/corr_thesis_ref/ggcorr_all_PS_LIG.R
+++ /dev/null
@@ -1,323 +0,0 @@
-#!/usr/bin/env Rscript
-#########################################################
-# TASK: Corr plots for PS and Lig 
-
-# Output: 1 svg
-
-#=======================================================================
-# working dir and loading libraries
-getwd()
-setwd("~/git/LSHTM_analysis/scripts/plotting/")
-getwd()
-
-source("~/git/LSHTM_analysis/scripts/Header_TT.R")
-require(cowplot)
-source("combining_dfs_plotting.R")
-#source("my_pairs_panel.R")
-# should return the following dfs, directories and variables
-
-# FIXME: Can't output from here
-
-# PS combined: 
-# 1) merged_df2
-# 2) merged_df2_comp
-# 3) merged_df3
-# 4) merged_df3_comp
-
-# LIG combined: 
-# 5) merged_df2_lig
-# 6) merged_df2_comp_lig
-# 7) merged_df3_lig
-# 8) merged_df3_comp_lig
-
-# 9) my_df_u
-# 10) my_df_u_lig
-
-cat(paste0("Directories imported:"
-           , "\ndatadir:", datadir
-           , "\nindir:", indir
-           , "\noutdir:", outdir
-           , "\nplotdir:", plotdir))
-
-cat(paste0("Variables imported:"
-           , "\ndrug:", drug
-           , "\ngene:", gene
-           , "\ngene_match:", gene_match
-           , "\nAngstrom symbol:", angstroms_symbol
-           , "\nNo. of duplicated muts:", dup_muts_nu
-           , "\nNA count for ORs:", na_count
-           , "\nNA count in df2:", na_count_df2
-           , "\nNA count in df3:", na_count_df3))     
-
-#=======
-# output
-#=======
-# can't combine by cowplot because not ggplots
-#corr_plot_combined = "corr_combined.svg"
-#plot_corr_plot_combined  =  paste0(plotdir,"/", corr_plot_combined)
-
-# PS
-#ggcorr_all_ps = "ggcorr_all_PS.svg"
-ggcorr_all_ps = "ggcorr_all_PS.png"
-plot_ggcorr_all_ps =  paste0(plotdir,"/", ggcorr_all_ps)
-
-# LIG
-#ggcorr_all_lig = "ggcorr_all_LIG.svg"
-ggcorr_all_lig = "ggcorr_all_LIG.png"
-plot_ggcorr_all_lig =  paste0(plotdir,"/", ggcorr_all_lig )
-
-# combined
-ggcorr_all_combined_labelled = "ggcorr_all_combined_labelled.png"
-plot_ggcorr_all_combined_labelled  =  paste0(plotdir,"/", ggcorr_all_combined_labelled)
-
-####################################################################
-#               end of loading libraries and functions                 #
-########################################################################
-
-#%%%%%%%%%%%%%%%%%%%%%%%%%
-#df_ps = merged_df3_comp 
-#df_lig = merged_df3_comp_lig
-merged_df3 = as.data.frame(merged_df3)
-df_ps = merged_df3 
-df_lig = merged_df3_lig
-
-#%%%%%%%%%%%%%%%%%%%%%%%%%
-
-rm( merged_df2, merged_df2_comp, merged_df2_lig, merged_df2_comp_lig, my_df_u, my_df_u_lig)
-
-########################################################################
-#               end of data extraction and cleaning for plots          #
-########################################################################
-
-
-#======================
-# adding log cols 
-#======================
-# subset data to generate pairwise correlations
-cols_to_select =  c("duet_scaled"
-                    
-                    , "foldx_scaled"
-                    
-                    , "log10_or_mychisq"
-                    , "neglog_pval_fisher"
-                    
-                    #, "or_kin"
-                    #, "neglog_pwald_kin"
-                    
-                    , "af"
-                    
-                    , "asa"
-                    , "rsa"
-                    , "kd_values"
-                    , "rd_values"
-                    
-                    , "duet_outcome"
-                    , drug)
-
-corr_data_ps = df_ps[, cols_to_select]
-
-dim(corr_data_ps)
-
-#p_italic = substitute(paste("-Log(", italic('P'), ")"));p_italic 
-#p_adjusted_italic = substitute(paste("-Log(", italic('P adjusted'), ")"));p_adjusted_italic
-
-# assign nice colnames (for display)
-my_corr_colnames = c("DUET"
-                     
-                     , "Foldx"
-                     
-                     , "Log (OR)"
-                     , "-Log (P)"
-                     
-                     #, "OR (adjusted)"
-                     #, "-Log (P wald)"
-                     
-                     , "AF"
-                     
-                     , "ASA"
-                     , "RSA"
-                     , "KD"
-                     , "RD"
-                     
-                     , "duet_outcome"
-                     , drug)
-
-length(my_corr_colnames)
-
-colnames(corr_data_ps)
-colnames(corr_data_ps) <- my_corr_colnames
-colnames(corr_data_ps)
-
-#------------------------
-# Data for ggcorr PS plot
-#------------------------
-start = 1
-end_ggcorr = which(colnames(corr_data_ps) == "duet_outcome"); end_ggcorr # should be the last column
-offset = 1
-
-my_ggcorr_ps = corr_data_ps[start:(end_ggcorr-1)]
-head(my_ggcorr_ps)
-
-# correlation matrix
-corr1 <- round(cor(my_ggcorr_ps, method = "spearman", use = "pairwise.complete.obs"), 1)
-
-# p-value matrix
-pmat1 <- cor_pmat(my_ggcorr_ps, method = "spearman", use = "pairwise.complete.obs"
-                  ,   conf.level = 0.99)
-
-corr2 = psych::corr.test(my_ggcorr_ps
-                        , method = "spearman"
-                        , use =  "pairwise.complete.obs")$r
-corr2 = round(corr2, 1)
-
-pmat2 =  psych::corr.test(my_ggcorr_ps
-                            , method = "spearman"
-                            , adjust = "none"
-                            , use =  "pairwise.complete.obs")$p
-
-corr1== corr2
-pmat1==pmat2
-
-#------------------------
-# Generate ggcorr PS plot
-#------------------------
-cat("ggCorr plot PS:", plot_ggcorr_all_ps)
-#png(filename = plot_ggcorr_all_ps, width = 1024, height = 768, units = "px", pointsize = 20)
-ggcorr_ps = ggcorrplot(corr1
-                       , p.mat = pmat1
-                       , hc.order = TRUE
-                       , outline.col = "black"
-                       , ggtheme = ggplot2::theme_gray
-                       , colors = c("#6D9EC1", "white", "#E46726")
-                       , title = "DUET and Foldx stability")    
-
-
-ggcorr_ps
-#dev.off()
-
-#===========================
-# Data for Correlation plots: LIG
-#===========================
-table(df_lig$ligand_outcome)
-
-df_lig$log10_or_mychisq = log10(df_lig$or_mychisq)
-df_lig$neglog_pval_fisher = -log10(df_lig$pval_fisher)
-
-
-df_lig$log10_or_kin = log10(df_lig$or_kin)
-df_lig$neglog_pwald_kin = -log10(df_lig$pwald_kin)
-
-# subset data to generate pairwise correlations
-cols_to_select_lig =  c("affinity_scaled"
-                    
-                    , "log10_or_mychisq"
-                    , "neglog_pval_fisher"
-                    
-                    , "or_kin"
-                    , "neglog_pwald_kin"
-                    
-                    , "af"
-                    
-                    , "asa"
-                    , "rsa"
-                    , "kd_values"
-                    , "rd_values"
-                    
-                    , "ligand_outcome"
-                    , drug)
-
-corr_data_lig = df_lig[, cols_to_select_lig]
-
-dim(corr_data_lig)
-
-# assign nice colnames (for display)
-my_corr_colnames_lig = c("Ligand Affinity"
-                     
-                     , "Log (OR)"
-                     , "-Log (P)"
-                     
-                     , "OR (adjusted)"
-                     , "-Log(P wald)"
-                     
-                     , "AF"
-                     
-                     , "ASA"
-                     , "RSA"
-                     , "KD"
-                     , "RD"
-                     
-                     , "ligand_outcome"
-                     , drug)
-
-length(my_corr_colnames)
-
-colnames(corr_data_lig)
-colnames(corr_data_lig) <- my_corr_colnames_lig
-colnames(corr_data_lig)
-
-#------------------------
-# Data for ggcorr LIG plot
-#------------------------
-
-start = 1
-end_ggcorr_lig = which(colnames(corr_data_lig) == "ligand_outcome"); end_ggcorr_lig # should be the last column
-offset = 1
-
-my_ggcorr_lig = corr_data_lig[start:(end_ggcorr_lig-1)]
-head(my_ggcorr_lig); str(my_ggcorr_lig)
-
-# correlation matrix
-corr1_lig <- round(cor(my_ggcorr_lig, method = "spearman", use = "pairwise.complete.obs"), 1)
-
-# p-value matrix
-pmat1_lig <- cor_pmat(my_ggcorr_lig, method = "spearman", use = "pairwise.complete.obs")
-
-corr2_lig = psych::corr.test(my_ggcorr_lig
-                             , method = "spearman"
-                             , use =  "pairwise.complete.obs")$r
-
-corr2_lig = round(corr2_lig, 1)
-
-pmat2_lig =  psych::corr.test(my_ggcorr_lig
-                              , method = "spearman"
-                              , adjust = "none"
-                              , use =  "pairwise.complete.obs")$p
-
-corr1_lig == corr2_lig
-pmat1_lig == pmat2_lig
-
-
-# for display order columns by hc order of ps 
-
-#col_order = levels(ggcorr_ps$data[2])
-
-#col_order <- c("Species", "Petal.Width", "Sepal.Length",
-               #"Sepal.Width", "Petal.Length")
-#my_data2 <- my_data[, col_order]
-#my_data2
-
-#------------------------
-# Generate ggcorr LIG plot
-#------------------------
-cat("ggCorr LIG plot:", plot_ggcorr_all_lig)
-#svg(plot_ggcorr_all_lig, width = 15, height = 15)
-#png(plot_ggcorr_all_lig, width = 1024, height = 768, units = "px", pointsize = 20)
-
-ggcorr_lig = ggcorrplot(corr1_lig
-                        , p.mat = pmat1_lig
-                        , hc.order = TRUE
-                        , outline.col = "black"
-                        
-                        , ggtheme = ggplot2::theme_gray
-                        , colors = c("#6D9EC1", "white", "#E46726")
-                        , title = "Ligand affinty")    
-
-
-ggcorr_lig
-#dev.off()
-
-#######################################################
-#=============================
-# combine plots for output
-#=============================
-+
\ No newline at end of file
diff --git a/scripts/plotting/plotting_thesis/preformatting.R b/scripts/plotting/plotting_thesis/preformatting.R
index f88afa8..74fbbf7 100644
--- a/scripts/plotting/plotting_thesis/preformatting.R
+++ b/scripts/plotting/plotting_thesis/preformatting.R
@@ -88,7 +88,7 @@ all_cols = c(common_cols
 #=======
 # output
 #=======
-outdir_images = paste0("~/git/Writing/thesis/images/results/", tolower(gene))
+outdir_images = paste0("~/git/Writing/thesis/images/results/", tolower(gene), "/")
 
 ####################################
 # merged_df3: NECESSARY pre-processing
@@ -228,6 +228,15 @@ corr_lig_colnames = c("mCSM-lig"
                      , drug)
 
 corr_ppi2_colnames = c("mCSM-PPI2"
+                       , "SNAP2"             
+                       , "Log (OR)"       
+                       , "-Log (P)"
+                       , "interface_dist" 
+                       , "dst_mode"
+                       , drug)
+
+
+corr_conservation = c("Consurf"
                        , "MAF"             
                        , "Log (OR)"       
                        , "-Log (P)"