generated ggpairs plots finally

2022-08-15 19:05:22 +01:00 · 2022-08-15 19:05:22 +01:00 · a3e5283a9b
commit a3e5283a9b
parent b68841b337
11 changed files with 657 additions and 939 deletions
--- a/scripts/functions/position_count_bp.R
+++ b/scripts/functions/position_count_bp.R
@ -143,6 +143,9 @@ site_snp_count_bp <- function (plotdf
          #, legend.position = c(0.73,0.8)
          #, legend.text = element_text(size = leg_text_size)
          #, legend.title = element_text(size =  axis_label_size)
          #, panel.grid.major = element_blank(),
          #, panel.grid.minor = element_blank(),
          , panel.grid = element_blank()
          , plot.title = element_text(size = leg_text_size
                                      , colour = title_colour
                                      , hjust = 0.5)
--- a/scripts/functions/stability_count_bp.R
+++ b/scripts/functions/stability_count_bp.R
@ -56,6 +56,9 @@ stability_count_bp <- function(plotdf
          , legend.position = leg_position
          , legend.text = element_text(size = lts)
          , legend.title = element_text(size =  ltis)
          #, panel.grid.major = element_blank(),
          #, panel.grid.minor = element_blank(),
          , panel.grid = element_blank()
          , legend.key.size = unit(lts,"pt")
          , plot.title = element_text(size =  als
                                      , colour = title_colour
--- a/scripts/plotting/LINEAGE2.R
+++ b/scripts/plotting/LINEAGE2.R
@ -4,22 +4,41 @@ library("ggforce")
 #install.packages("gginference")
 library(gginference)
 library(ggpubr)
 library(svglite)
 ##################################################
 #%% read data
 # DOME: read data using gene and drug combination
 # gene must be lowercase
 # tolower(gene)
-#################################################
+############################################################
 #gene="pncA"
 #drug="pyrazinamide"
 #lineage_filename=paste0(tolower(gene),"_merged_df2.csv")
 #lineage_data_path="~/git/Data/pyrazinamide/output"
-df2 = read.csv(paste0(lineage_data_path,"/",lineage_filename))
+#=============
 # Data: Input
 #==============
 #source("~/git/LSHTM_analysis/config/alr.R")
 #source("~/git/LSHTM_analysis/config/embb.R")
 # source("~/git/LSHTM_analysis/config/gid.R")
 source("~/git/LSHTM_analysis/config/katg.R")
 #source("~/git/LSHTM_analysis/config/pnca.R")
 #source("~/git/LSHTM_analysis/config/rpob.R")
-foo = as.data.frame(colnames(df2))
+source("~/git/LSHTM_analysis/scripts/plotting/get_plotting_dfs.R")
 source("~/git/LSHTM_analysis/scripts/plotting/plotting_colnames.R")
 #=======
 # output
 #=======
 outdir_images = paste0("~/git/Writing/thesis/images/results/", tolower(gene), "/")
 cat("plots will output to:", outdir_images)
 ###########################################################
 class(merged_df2)
 foo = as.data.frame(colnames(merged_df2))
 cols_to_subset = c('mutationinformation'
                   , 'snp_frequency'
@ -36,7 +55,7 @@ cols_to_subset = c('mutationinformation'
 #cols_to_subset%in%foo
-my_df = df2[ ,cols_to_subset]
+my_df = merged_df2[ ,cols_to_subset]
 # r24p_embb = df_embb[df_embb$mutationinformation == "R24P",]
 # #tm = c("A102P", "M1T")
@ -73,10 +92,9 @@ table(my_df2$lineage)
 sel_lineages2 = c("L1", "L2", "L3", "L4")
 my_df2 = my_df2[my_df2$lineage%in%sel_lineages2,]
 table(my_df2$lineage)
 sum(table(my_df2$lineage)) == nrow(my_df2)
 table(my_df2$lineage)
 table(my_df2$lineage, my_df2$sensitivity)
 # %%
 # str(my_df2)
@ -85,6 +103,7 @@ table(my_df2$lineage)
 #%% get only muts which belong to > 1 lineage and have different sensitivity classifications
 muts = unique(my_df2$mutationinformation)
 cat ("Total unique muts in L1-L4", tolower(gene), ":", length(muts))
 #-----------------------------------------------
 # step 0 : get muts with more than one lineage
 #-----------------------------------------------
@ -100,7 +119,6 @@ for (i in muts) {
 }
 cat("\nGot:", length(lin_muts), "mutations belonging to >1 lineage with differing drug sensitivities")
 #-----------------------------------------------
 # step 1 : get other muts that do not have this
 #-----------------------------------------------
@ -111,7 +129,6 @@ cat("\nGot:", length(consist_muts), "mutations that are consistent")
 # step 2: subset these muts for plotting
 #-----------------------------------------------
 plot_df = my_df2[my_df2$mutationinformation%in%lin_muts,]
 cat("\nnrow of plot_df:", nrow(plot_df))
 #-----------------------------------------------
@ -125,7 +142,9 @@ for (i in lin_muts) {
  s_tab = table(s_mut$lineage, s_mut$sens2)
  #print(s_tab)
  #ft_pvalue_i = round(fisher.test(s_tab)$p.value, 3)
-  ft_pvalue_i = fisher.test(s_tab)$p.value
+  ft_pvalue_i = fisher.test(s_tab
                            #, workspace=2e9
                            , simulate.p.value=TRUE,B=1e7)$p.value
  #print(ft_pvalue_i)
  plot_df$pval[plot_df$mutationinformation == i] <- ft_pvalue_i
  #print(s_tab)
@ -155,8 +174,6 @@ plot_df
 head(plot_df)
 table(plot_df$pvalR<0.05)
 # format p value
 # TODO: add case statement for correct pvalue formatting
 #plot_df$pvalF = ifelse(plot_df$pval <= 0.0001, paste0(round(plot_df$pval, 3), "**** "), plot_df$pval )
@ -233,6 +250,7 @@ cat("\nGot:", sig_muts, "mutations that are significant")
 plot_df_ns = plot_df2[plot_df2$pvalR>0.05,]
 ns_muts = length(unique(plot_df_ns$mutationinformation))
 cat("\nGot:", ns_muts, "mutations that are NOT significant")
 p_title = gene
 ts = 8
 gls = 3
@ -244,7 +262,7 @@ gls = 3
 #3) Add *: Extend yaxis for each plot to allow geom_label to have space (or see
 # if this self resolving with facet_wrap_paginate())
 #================================================
-#svg(paste0(outdir_images, "embb_linDS.svg"), width = 6, height = 10 ) # old-school square 4:3 CRT shape 1.3:1
+#svg(paste0(outdir_images, tolower(gene), "_linDS.svg"), width = 6, height = 10 ) # old-school square 4:3 CRT shape 1.3:1
 ds_s =  ggplot(plot_df_sig, aes(x = lineage
                         , fill = sens2)) + 
      geom_bar(stat = 'count') +
@ -280,7 +298,7 @@ ds_s =  ggplot(plot_df_sig, aes(x = lineage
 ###################################
 #ns muts
-#svg(paste0(outdir_images, "embb_linDS_ns.svg"), width =10 , height = 8) # old-school square 4:3 CRT shape 1.3:1
+#svg(paste0(outdir_images, tolower(gene), "_linDS_ns.svg"), width =10 , height = 8) # old-school square 4:3 CRT shape 1.3:1
 ds_ns =  ggplot(plot_df_ns, aes(x = lineage
                         , fill = sens2)) + 
    geom_bar(stat = 'count') +
@ -309,31 +327,57 @@ ds_ns =  ggplot(plot_df_ns, aes(x = lineage
    labs(title = paste0(p_title, ": sensitivity by lineage")
         , y = 'Sample Count')
 #dev.off()
 #####################################################################
 #===================
 # Combine output
 #====================
-
+# svg(paste0(outdir_images, tolower(gene), "_linDS_CL.svg")
 # svg(paste0(outdir_images, "embb_linDS_CL.svg")
 #     , width = 11
 #     , height = 8 ) 
-png(paste0(outdir_images, "embb_linDS_CL.png")
+png(paste0(outdir_images, tolower(gene), "_linDS_CL2.png")
-    , width = 11.75
+    , width = 11.75*1.15
    , height = 8, units = "in", res = 300 ) 
 cowplot::plot_grid(ds_s, ds_ns
                   , ncol = 2
-                   ,rel_widths = c(1,2)
+                   #, align = "hv"
                   , rel_widths = c(1,2.5)
                   , labels = "AUTO")
 dev.off()
 ########################################################################
 #==================
 # Summary output
 #==================
 cat ("Total unique muts in ALL samples for", tolower(gene), ":", length(unique(merged_df2$mutationinformation)))
 other_lin_muts = unique(merged_df2$mutationinformation)[!unique(merged_df2$mutationinformation)%in%unique(my_df2$mutationinformation)]
 cat ("Total unique muts NOT in L1-L4:", length(other_lin_muts))
 cat("These are:\n", other_lin_muts)
 other_lin_muts_df = merged_df2[merged_df2$mutationinformation%in%other_lin_muts,]
 if ( length(unique(other_lin_muts_df$mutationinformation)) == length(other_lin_muts)) {
  cat("\nPASS: other lin muts extracted")
 }else{
  stop("\nAbort: other lin muts numbers mismatch")
 }
 table(other_lin_muts_df$mutationinformation, other_lin_muts_df$lineage)
 cat("\n==============================================\n")
 cat ("Total samples L1-L4:", nrow(my_df2))
 table(my_df2$lineage)
 table(my_df2$lineage, my_df2$sensitivity)
 cat ("Total unique muts in L1-L4", tolower(gene), ":", length(muts))
 cat("\nGot:", length(lin_muts), "mutations belonging to >1 lineage with differing drug sensitivities")
 cat("\nGot:", sig_muts, "mutations that are significant"
    , "\nThese are:", unique(plot_df_sig$mutationinformation))
-#geom_text(aes(label = paste0("p=",pvalF), x = 2.5, ypos_label+1))# +
+cat("\nGot:", ns_muts, "mutations that are NOT significant"
    , "\nThese are:", unique(plot_df_ns$mutationinformation))
-  #geom_segment(aes(x = 1, y = ypos_label+0.5, xend = 4, yend = ypos_label+0.5))
+cat("\n==============================================\n")
  #geom_hline(data = lin_muts_dfM, aes(yintercept=ypos_label+0.5))
  #geom_bracket(data=lin_muts_dfM, aes(xmin = 1, xmax = 4, y.position = ypos_label+0.5, label=''))
--- a/scripts/plotting/get_plotting_dfs.R
+++ b/scripts/plotting/get_plotting_dfs.R
@ -109,137 +109,137 @@ merged_df3      = all_plot_dfs[[2]]
 ####################################################################
 #                        Data for logoplots
 ####################################################################
-
+# 
-source(paste0(plot_script_path, "logo_data_msa.R"))
+# source(paste0(plot_script_path, "logo_data_msa.R"))
-s1 = c("\nSuccessfully sourced logo_data_msa.R")
+# s1 = c("\nSuccessfully sourced logo_data_msa.R")
-cat(s1)
+# cat(s1)
-
+# 
-####################################################################
+# ####################################################################
-#                     Data for DM OM Plots: WF and LF dfs
+# #                     Data for DM OM Plots: WF and LF dfs
-#                   My function: dm_om_wf_lf_data()
+# #                   My function: dm_om_wf_lf_data()
-#                 location: scripts/functions/dm_om_data.R
+# #                 location: scripts/functions/dm_om_data.R
-#source("other_plots_data.R")
+# #source("other_plots_data.R")
-####################################################################
+# ####################################################################
-
+# 
-#source(paste0(plot_script_path, "dm_om_data.R")) # calling the function directly instead
+# #source(paste0(plot_script_path, "dm_om_data.R")) # calling the function directly instead
-geneL_normal  = c("pnca")
+# geneL_normal  = c("pnca")
-geneL_na      = c("gid", "rpob")
+# geneL_na      = c("gid", "rpob")
-geneL_ppi2    = c("alr", "embb", "katg", "rpob")
+# geneL_ppi2    = c("alr", "embb", "katg", "rpob")
-
+# 
-all_dm_om_df = dm_om_wf_lf_data(df = merged_df3, gene = gene)
+# all_dm_om_df = dm_om_wf_lf_data(df = merged_df3, gene = gene)
- 
+#  
-wf_duet      = all_dm_om_df[['wf_duet']]
+# wf_duet      = all_dm_om_df[['wf_duet']]
-lf_duet      = all_dm_om_df[['lf_duet']]
+# lf_duet      = all_dm_om_df[['lf_duet']]
- 
+#  
-wf_mcsm_lig  = all_dm_om_df[['wf_mcsm_lig']]
+# wf_mcsm_lig  = all_dm_om_df[['wf_mcsm_lig']]
-lf_mcsm_lig  = all_dm_om_df[['lf_mcsm_lig']]
+# lf_mcsm_lig  = all_dm_om_df[['lf_mcsm_lig']]
- 
+#  
-wf_foldx     = all_dm_om_df[['wf_foldx']]
+# wf_foldx     = all_dm_om_df[['wf_foldx']]
-lf_foldx     = all_dm_om_df[['lf_foldx']]
+# lf_foldx     = all_dm_om_df[['lf_foldx']]
- 
+#  
-wf_deepddg   = all_dm_om_df[['wf_deepddg']]
+# wf_deepddg   = all_dm_om_df[['wf_deepddg']]
-lf_deepddg   = all_dm_om_df[['lf_deepddg']]
+# lf_deepddg   = all_dm_om_df[['lf_deepddg']]
- 
+#  
-wf_dynamut2  = all_dm_om_df[['wf_dynamut2']]
+# wf_dynamut2  = all_dm_om_df[['wf_dynamut2']]
-lf_dynamut2  = all_dm_om_df[['lf_dynamut2']]
+# lf_dynamut2  = all_dm_om_df[['lf_dynamut2']]
- 
+#  
-wf_consurf   = all_dm_om_df[['wf_consurf']]
+# wf_consurf   = all_dm_om_df[['wf_consurf']]
-lf_consurf   = all_dm_om_df[['lf_consurf']]
+# lf_consurf   = all_dm_om_df[['lf_consurf']]
- 
+#  
-wf_snap2     = all_dm_om_df[['wf_snap2']]
+# wf_snap2     = all_dm_om_df[['wf_snap2']]
-lf_snap2     = all_dm_om_df[['lf_snap2']]
+# lf_snap2     = all_dm_om_df[['lf_snap2']]
- 
+#  
-wf_provean   = all_dm_om_df[['wf_provean']]
+# wf_provean   = all_dm_om_df[['wf_provean']]
-lf_provean   = all_dm_om_df[['lf_provean']]
+# lf_provean   = all_dm_om_df[['lf_provean']]
-
+# 
-# NEW
+# # NEW
-wf_dist_gen   = all_dm_om_df[['wf_dist_gen']]
+# wf_dist_gen   = all_dm_om_df[['wf_dist_gen']]
-lf_dist_gen   = all_dm_om_df[['lf_dist_gen']]
+# lf_dist_gen   = all_dm_om_df[['lf_dist_gen']]
- 
+#  
-if (tolower(gene)%in%geneL_na){
+# if (tolower(gene)%in%geneL_na){
-   wf_mcsm_na   = all_dm_om_df[['wf_mcsm_na']]
+#    wf_mcsm_na   = all_dm_om_df[['wf_mcsm_na']]
-   lf_mcsm_na   = all_dm_om_df[['lf_mcsm_na']]
+#    lf_mcsm_na   = all_dm_om_df[['lf_mcsm_na']]
-}
+# }
- 
+#  
-if (tolower(gene)%in%geneL_ppi2){
+# if (tolower(gene)%in%geneL_ppi2){
-   wf_mcsm_ppi2 = all_dm_om_df[['wf_mcsm_ppi2']]
+#    wf_mcsm_ppi2 = all_dm_om_df[['wf_mcsm_ppi2']]
-   lf_mcsm_ppi2 = all_dm_om_df[['lf_mcsm_ppi2']]
+#    lf_mcsm_ppi2 = all_dm_om_df[['lf_mcsm_ppi2']]
-}
+# }
-
+# 
-s2 = c("\nSuccessfully sourced other_plots_data.R")
+# s2 = c("\nSuccessfully sourced other_plots_data.R")
-cat(s2)
+# cat(s2)
-
+# 
-####################################################################
+# ####################################################################
-#                  Data for Lineage barplots: WF and LF dfs
+# #                  Data for Lineage barplots: WF and LF dfs
-#               My function: lineage_plot_data()
+# #               My function: lineage_plot_data()
-#           location: scripts/functions/lineage_plot_data.R
+# #           location: scripts/functions/lineage_plot_data.R
-####################################################################
+# ####################################################################
- 
+#  
-#source(paste0(plot_script_path, "lineage_data.R"))
+# #source(paste0(plot_script_path, "lineage_data.R"))
-# converted to a function. Moved lineage_data.R to redundant/
+# # converted to a function. Moved lineage_data.R to redundant/
-lineage_dfL = lineage_plot_data(merged_df2
+# lineage_dfL = lineage_plot_data(merged_df2
-                                , lineage_column_name = "lineage"
+#                                 , lineage_column_name = "lineage"
-                                , remove_empty_lineage = F
+#                                 , remove_empty_lineage = F
-                                , lineage_label_col_name = "lineage_labels"
+#                                 , lineage_label_col_name = "lineage_labels"
-                                , id_colname = "id"
+#                                 , id_colname = "id"
-                                , snp_colname = "mutationinformation"
+#                                 , snp_colname = "mutationinformation"
-                                )
+#                                 )
-
+# 
-lin_wf = lineage_dfL[['lin_wf']]
+# lin_wf = lineage_dfL[['lin_wf']]
-lin_lf = lineage_dfL[['lin_lf']]
+# lin_lf = lineage_dfL[['lin_lf']]
-
+# 
-s3 = c("\nSuccessfully sourced lineage_data.R")
+# s3 = c("\nSuccessfully sourced lineage_data.R")
-cat(s3)
+# cat(s3)
-
+# 
-####################################################################
+# ####################################################################
-#                  Data for corr plots:
+# #                  Data for corr plots:
-#               My function: corr_data_extract()
+# #               My function: corr_data_extract()
-#          location: scripts/functions/corr_plot_data.R
+# #          location: scripts/functions/corr_plot_data.R
-####################################################################
+# ####################################################################
-# make sure the above script works because merged_df2_combined is needed
+# # make sure the above script works because merged_df2_combined is needed
-merged_df3 = as.data.frame(merged_df3)
+# merged_df3 = as.data.frame(merged_df3)
-
+# 
-corr_df_m3_f = corr_data_extract(merged_df3
+# corr_df_m3_f = corr_data_extract(merged_df3
                                 , gene = gene
                                 , drug = drug
                                 , extract_scaled_cols = F)
 head(corr_df_m3_f)
 # corr_df_m2_f = corr_data_extract(merged_df2
 #                                  , gene = gene
 #                                  , drug = drug
 #                                  , extract_scaled_cols = F)
-# head(corr_df_m2_f)
+# head(corr_df_m3_f)
-
+# 
-s4 = c("\nSuccessfully sourced Corr_data.R")
+# # corr_df_m2_f = corr_data_extract(merged_df2
-cat(s4)
+# #                                  , gene = gene
-
+# #                                  , drug = drug
-########################################################################
+# #                                  , extract_scaled_cols = F)
-#                           End of script
+# # head(corr_df_m2_f)
-########################################################################
+# 
-if (  all( length(s1), length(s2), length(s3), length(s4) ) > 0 ){
+# s4 = c("\nSuccessfully sourced Corr_data.R")
- cat(
+# cat(s4)
-  "\n##################################################"
+# 
- , "\nSuccessful: get_plotting_dfs.R worked!"
+# ########################################################################
- , "\n###################################################\n")
+# #                           End of script
- } else {
+# ########################################################################
- cat(
+# if (  all( length(s1), length(s2), length(s3), length(s4) ) > 0 ){
-  "\n#################################################"
+#  cat(
- , "\nFAIL: get_plotting_dfs.R didn't complete fully!Please check"
+#   "\n##################################################"
- , "\n###################################################\n" )
+#  , "\nSuccessful: get_plotting_dfs.R worked!"
-}
+#  , "\n###################################################\n")
- 
+#  } else {
-########################################################################
+#  cat(
-# clear excess variables: from the global enviornment
+#   "\n#################################################"
-
+#  , "\nFAIL: get_plotting_dfs.R didn't complete fully!Please check"
-vars0 = ls(envir = .GlobalEnv)[grepl("curr_*", ls(envir = .GlobalEnv))] 
+#  , "\n###################################################\n" )
-vars1 = ls(envir = .GlobalEnv)[grepl("^cols_to*", ls(envir = .GlobalEnv))] 
+# }
-vars2 = ls(envir = .GlobalEnv)[grepl("pivot_cols_*", ls(envir = .GlobalEnv))]
+#  
-vars3 = ls(envir = .GlobalEnv)[grepl("expected_*", ls(envir = .GlobalEnv))]
+# ########################################################################
-
+# # clear excess variables: from the global enviornment
-rm( infile_metadata
+# 
-   , infile_params
+# vars0 = ls(envir = .GlobalEnv)[grepl("curr_*", ls(envir = .GlobalEnv))] 
-   , vars0
+# vars1 = ls(envir = .GlobalEnv)[grepl("^cols_to*", ls(envir = .GlobalEnv))] 
-   , vars1
+# vars2 = ls(envir = .GlobalEnv)[grepl("pivot_cols_*", ls(envir = .GlobalEnv))]
-   , vars2
+# vars3 = ls(envir = .GlobalEnv)[grepl("expected_*", ls(envir = .GlobalEnv))]
-   , vars3)
+# 
 # rm( infile_metadata
 #    , infile_params
 #    , vars0
 #    , vars1
 #    , vars2
 #    , vars3)
--- a/scripts/plotting/plotting_thesis/basic_barplots2.R
+++ b/scripts/plotting/plotting_thesis/basic_barplots2.R
@ -38,7 +38,7 @@ source("~/git/LSHTM_analysis/scripts/plotting/plotting_colnames.R")
 class(merged_df3)
 merged_df3 = as.data.frame(merged_df3)
-class(df3)
+class(merged_df3)
 head(merged_df3$pos_count)
 nc_pc_CHANGE = which(colnames(merged_df3)== "pos_count"); nc_pc_CHANGE
@ -198,10 +198,10 @@ rects <- data.frame(x = 1:6,
 )
 rects
-rects$text =  c("-ve Lig affinty"
+rects$text =  c("-ve Lig"
-                , "+ve Lig affinity"
+                , "+ve Lig"
-                , "+ve PPI2 affinity"
+                , "+ve PPI2"
-                , "-ve PPI2 affinity"
+                , "-ve PPI2"
                , "+ve stability"
                , "-ve stability")
@ -221,7 +221,7 @@ peP = ggplot(rects, aes(x, y = 0, fill = colors, label = paste0(text,"\n", num_l
  coord_flip()+ scale_x_reverse() +
  # theme_void() # remove any axis markings
  theme_nothing() # remove any axis markings
-
+peP
 peP2 = ggplot(rects, aes(x, y = 0, fill = colors, label = paste0(text,"\n", num_labels))) +
  geom_tile() + # make square tiles
@ -229,7 +229,7 @@ peP2 = ggplot(rects, aes(x, y = 0, fill = colors, label = paste0(text,"\n", num_
  scale_fill_identity(guide = "none") + # color the tiles with the colors in the data frame
  coord_fixed() + # make sure tiles are square
  theme_nothing() # remove any axis markings
-
+peP2
 # ------------------------------
 # bp site site count: ALL
@ -252,24 +252,24 @@ posC_all = site_snp_count_bp(plotdf = df3
 #------------------------------
 # barplot for sensitivity:
 #------------------------------
-sensP = stability_count_bp(plotdf = df3
+# sensP = stability_count_bp(plotdf = df3
-                          , df_colname = "sensitivity"
+#                           , df_colname = "sensitivity"
-                          #, leg_title  = "mCSM-ppi2"
+#                           #, leg_title  = "mCSM-ppi2"
-                          #, label_categories = labels_ppi2
+#                           #, label_categories = labels_ppi2
-                          #, bp_plot_title = paste(common_bp_title, "PP-interface")
+#                           #, bp_plot_title = paste(common_bp_title, "PP-interface")
-                          
+#                           
-                          , yaxis_title = "Number of nsSNPs"
+#                           , yaxis_title = "Number of nsSNPs"
-                          , leg_position = "none"
+#                           , leg_position = "none"
-                          , subtitle_text = "Sensitivity"
+#                           , subtitle_text = "Sensitivity"
-                          , bar_fill_values = c("red", "blue")
+#                           , bar_fill_values = c("red", "blue")
-                          , subtitle_colour= "black"
+#                           , subtitle_colour= "black"
-                          , sts = 10
+#                           , sts = 10
-                          , lts = 8
+#                           , lts = 8
-                          , ats = 8
+#                           , ats = 8
-                          , als =8
+#                           , als =8
-                          , ltis = 11
+#                           , ltis = 11
-                          , geom_ls =2
+#                           , geom_ls =2
-)
+# )
 consurfP = stability_count_bp(plotdf = df3
@ -290,3 +290,95 @@ consurfP = stability_count_bp(plotdf = df3
 consurfP
 ####################
 # Sensitivity count
 ####################
 table(df3$sensitivity)
 rect_sens=data.frame(mutation_class=c("Resistant","Sensitive")
                    , tile_colour =c("red","blue")
                    , numbers = c(table(df3$sensitivity)[[1]], table(df3$sensitivity)[[2]]))
 sensP = ggplot(rect_sens, aes(mutation_class, y = 0
                              , fill = tile_colour
                              , label = paste0("n=", numbers)
                              )) +
  geom_tile(width = 1, height = 1) + # make square tiles
  geom_label(color = "black", size = 1.7,fill = "white", alpha=0.7) + # add white text in the middle
  scale_fill_identity(guide = "none") + # color the tiles with the colors in the data frame
  coord_fixed() + # make sure tiles are square
  #coord_flip()+ scale_x_reverse() +
  # theme_void() # remove any axis markings
  theme_nothing() # remove any axis markings
 sensP
 # sensP2 = sensP + 
 #   coord_flip() + scale_x_reverse()
 # sensP2
 ##############################
 # FIXME for other genes: ATTEMPTED to derive numbers
 ##############################
 # 
 # table(str_df_short$pe_effect_outcome)
 # # extract the numbers
 # DD_lig_n       = table(str_df_short$pe_effect_outcome)[[1]]
 # SS_lig_n       = 0
 # DD_ppi2_n      = table(str_df_short$pe_effect_outcome)[[2]]
 # SS_ppi2_n      = table(str_df_short$pe_effect_outcome)[[4]]
 # DD_stability_n = table(str_df_short$pe_effect_outcome)[[3]]
 # SS_stability_n = table(str_df_short$pe_effect_outcome)[[5]]
 # 
 # nums = c(DD_lig_n, SS_lig_n,DD_ppi2_n,SS_ppi2_n, DD_stability_n, SS_stability_n )
 # 
 # rect_pe = data.frame(x = 1:6
 #                      , pe_effect_type=c("-ve Lig aff"
 #                                       , "+ve Lig aff"
 #                                       , "-ve PPI2 aff"
 #                                       , " +ve PPI2 aff"
 #                                       , "-ve stability"
 #                                       , "+ve stability")
 #                      
 #                      , tile_colour =c("#ffd700" #gold
 #                                       ,"#f0e68c" # khaki
 #                                       , "#ff1493" #deeppink
 #                                       , "#da70d6" #orchid
 #                                       , "#F8766D" # Sred
 #                                       , "#00BFC4") #Sblue
 #                      # , numbers = c(DD_lig_n
 #                      #               , SS_lig_n
 #                      #               , DD_ppi2_n
 #                      #               , SS_ppi2_n
 #                      #               , DD_stability_n
 #                      #               , SS_stability_n )
 #                      , numbers = nums
 #                      )
 # 
 # rect_pe$num_labels = paste0("n=", rect_pe$numbers)
 # rect_pe
 # 
 # # create plot
 # peP = ggplot(rect_pe, aes(x=pe_effect_type , y = 0, fill = tile_colour
 #                           , label = paste0(pe_effect_type,"\n", num_labels))) +
 #   geom_tile(width = 1, height = 1) + # make square tiles
 #   geom_text(color = "black", size = 1.7) + # add white text in the middle
 #   scale_fill_identity(guide = "none") + # color the tiles with the colors in the data frame
 #   coord_fixed() + # make sure tiles are square
 #   coord_flip()+ scale_x_reverse() +
 #   # theme_void() # remove any axis markings
 #   theme_nothing() # remove any axis markings
 # peP
 # 
 # peP2 = ggplot(rect_pe, aes(x=pe_effect_type, y = 0, fill = tile_colour
 #                            , label = paste0(pe_effect_type,"\n", num_labels))) +
 #   geom_tile() + 
 #   geom_text(color = "black", size = 1.6) + 
 #   scale_fill_identity(guide = "none") + 
 #   coord_fixed() +
 #   theme_nothing() 
 # peP2
--- a/scripts/plotting/plotting_thesis/basic_barplots_layout_v2.R
+++ b/scripts/plotting/plotting_thesis/basic_barplots_layout_v2.R
@ -4,7 +4,6 @@ posC_lig
 ppi2P
 posC_ppi2
 peP
 pe_allCL
 theme_georgia <- function(...) {
@ -22,12 +21,127 @@ common_legend_outcome = get_legend(mLigP +
                                     guides(color = guide_legend(nrow = 1)) +
                                     theme(legend.position = "top"))
-###############################################################
+# ###############################################################
 # #================================
 # # Lig Affinity: outcome + site
 # #================================
 # ligT = paste0(common_bp_title, " ligand")
 # lig_affT = ggdraw() +
 #   draw_label(
 #     ligT,
 #     fontfamily = title_theme$family,
 #     fontface = title_theme$face,
 #     #size = title_theme$size
 #     size = 8
 #   )
 # #-------------
 # # Outplot
 # #-------------
 # ligaffP =  paste0(outdir_images
 #                   ,tolower(gene)
 #                   ,"_lig_oc.png")
 # 
 # #svg(affP, width = 20, height = 5.5)
 # print(paste0("plot filename:", ligaffP))
 # png(ligaffP, units = "in", width = 6, height = 4, res = 300 )
 # cowplot::plot_grid(cowplot::plot_grid(lig_affT,common_legend_outcome,
 #                                       nrow = 2,
 #                                       rel_heights = c(1,1)
 # ), 
 # cowplot::plot_grid(mLigP, mmLigP, posC_lig
 #                    , nrow = 1
 #                    #, labels = c("A", "B", "C","D")
 #                    , rel_widths = c(1,1,1.8)
 #                    , align = "h"),
 # nrow = 2,
 # labels = c("A", ""),
 # label_size = 12,
 # rel_heights = c(1,8))
 # dev.off()
 # #############################################################
 # #================================
 # # PPI2 Affinity: outcome + site
 # #================================
 # ppi2T = paste0(common_bp_title, " PP-interface")
 # ppi2_affT = ggdraw() + 
 #   draw_label(
 #     ppi2T,
 #     fontfamily = title_theme$family,
 #     fontface = title_theme$face,
 #     #size = title_theme$size
 #     size = 8
 #   )
 # 
 # 
 # #-------------
 # # Outplot: PPI2
 # #-------------
 # ppiaffP =  paste0(outdir_images
 #                   ,tolower(gene)
 #                   ,"_ppi2_oc.png")
 # 
 # #svg(affP, width = 20, height = 5.5)
 # print(paste0("plot filename:", ppiaffP))
 # png(ppiaffP, units = "in", width = 6, height = 4, res = 300 )
 # 
 # 
 # cowplot::plot_grid(cowplot::plot_grid(ppi2_affT, common_legend_outcome,
 #                                       nrow = 2,
 #                                       rel_heights = c(1,1)), 
 #                    cowplot::plot_grid(ppi2P, posC_ppi2
 #                                       , nrow = 1
 #                                       , rel_widths = c(1.2,1.8)
 #                                       , align = "h"
 #                                       , label_size = my_label_size),
 #                    nrow = 2,
 #                    labels = c("B", ""),
 #                    label_size = 12,
 #                    rel_heights = c(1,8)
 # )
 # 
 # dev.off()
 # #############################################################
 #peP # pe counts
 #================================
-# Lig Affinity: outcome + site
+# PE + All position count
 #================================
 # peT_allT = ggdraw() +
 #   draw_label(
 #     paste0("All mutation sites"),
 #     fontfamily = title_theme$family,
 #     fontface = title_theme$face,
 #     #size = title_theme$size
 #     size = 8
 #   )
 # #------------------------
 # # Outplot: lig+ppi2+pe
 # #------------------------
 # pe_allCL =  paste0(outdir_images
 #                    ,tolower(gene)
 #                    ,"_pe_oc.png")
 # 
 # #svg(affP, width = 20, height = 5.5)
 # print(paste0("plot filename:", pe_allCL))
 # png(pe_allCL, units = "in", width = 6, height = 4, res = 300 )
 # 
 # 
 # cowplot::plot_grid(peT_allT,
 #                    cowplot::plot_grid(peP, posC_all
 #                                       , nrow = 1
 #                                       , rel_widths = c(1, 2)
 #                                       , align = "h"),
 #                    nrow = 2,
 #                    labels = c("C", "", ""),
 #                    label_size = 12,
 #                    rel_heights = c(1,8))
 # 
 # dev.off()
 #===========================================
 # COMBINE ALL three
 #==========================================
 ligT = paste0(common_bp_title, " ligand")
-lig_affT = ggdraw() + 
+lig_affT = ggdraw() +
  draw_label(
    ligT,
    fontfamily = title_theme$family,
@ -36,113 +150,6 @@ lig_affT = ggdraw() +
    size = 8
  )
 #-------------
 # Outplot
 #-------------
 ligaffP =  paste0(outdir_images
                  ,tolower(gene)
                  ,"_lig_oc.png")
 #svg(affP, width = 20, height = 5.5)
 print(paste0("plot filename:", ligaffP))
 png(ligaffP, units = "in", width = 6, height = 4, res = 300 )
 cowplot::plot_grid(cowplot::plot_grid(lig_affT,common_legend_outcome,
                                      nrow = 2,
                                      rel_heights = c(1,1)
 ), 
 cowplot::plot_grid(mLigP, mmLigP, posC_lig
                   , nrow = 1
                   #, labels = c("A", "B", "C","D")
                   , rel_widths = c(1,1,1.8)
                   , align = "h"),
 nrow = 2,
 labels = c("A", ""),
 label_size = 12,
 rel_heights = c(1,8))
 dev.off()
 #############################################################
 #================================
 # PPI2 Affinity: outcome + site
 #================================
 ppi2T = paste0(common_bp_title, " PP-interface")
 ppi2_affT = ggdraw() + 
  draw_label(
    ppi2T,
    fontfamily = title_theme$family,
    fontface = title_theme$face,
    #size = title_theme$size
    size = 8
  )
 #-------------
 # Outplot: PPI2
 #-------------
 ppiaffP =  paste0(outdir_images
                  ,tolower(gene)
                  ,"_ppi2_oc.png")
 #svg(affP, width = 20, height = 5.5)
 print(paste0("plot filename:", ppiaffP))
 png(ppiaffP, units = "in", width = 6, height = 4, res = 300 )
 cowplot::plot_grid(cowplot::plot_grid(ppi2_affT, common_legend_outcome,
                                      nrow = 2,
                                      rel_heights = c(1,1)), 
                   cowplot::plot_grid(ppi2P, posC_ppi2
                                      , nrow = 1
                                      , rel_widths = c(1.2,1.8)
                                      , align = "h"
                                      , label_size = my_label_size),
                   nrow = 2,
                   labels = c("B", ""),
                   label_size = 12,
                   rel_heights = c(1,8)
 )
 dev.off()
 #############################################################
 peP # pe counts
 #================================
 # PE + All position count
 #================================
 peT_allT = ggdraw() + 
  draw_label(
    paste0("All mutation sites"),
    fontfamily = title_theme$family,
    fontface = title_theme$face,
    #size = title_theme$size
    size = 8
  )
 #-------------
 # Outplot: PPI2
 #-------------
 pe_allCL =  paste0(outdir_images
                   ,tolower(gene)
                   ,"_pe_oc.png")
 #svg(affP, width = 20, height = 5.5)
 print(paste0("plot filename:", pe_allCL))
 png(pe_allCL, units = "in", width = 6, height = 4, res = 300 )
 cowplot::plot_grid(peT_allT,
                   cowplot::plot_grid(peP, posC_all
                                      , nrow = 1
                                      , rel_widths = c(1, 2)
                                      , align = "h"),
                   nrow = 2,
                   labels = c("C", "", ""),
                   label_size = 12,
                   rel_heights = c(1,8))
 dev.off()
 #===========================================
 # COMBINE ALL three
 #==========================================
 p1 = cowplot::plot_grid(cowplot::plot_grid(lig_affT,common_legend_outcome, nrow=2),
                        cowplot::plot_grid(mLigP, mmLigP, posC_lig
                                           , nrow = 1
@ -152,8 +159,18 @@ p1 = cowplot::plot_grid(cowplot::plot_grid(lig_affT,common_legend_outcome, nrow=
                        rel_heights = c(1,8)
 )
-
+#p1
-
+###########################################################
 ppi2T = paste0(common_bp_title, " PP-interface")
 ppi2_affT = ggdraw() +
  draw_label(
    ppi2T,
    fontfamily = title_theme$family,
    fontface = title_theme$face,
    #size = title_theme$size
    size = 8
  )
 ###########################################################
 p2 = cowplot::plot_grid(cowplot::plot_grid(ppi2_affT, common_legend_outcome, nrow=2),
                        cowplot::plot_grid(ppi2P, posC_ppi2
                                             , nrow = 1
@ -162,7 +179,17 @@ p2 = cowplot::plot_grid(cowplot::plot_grid(ppi2_affT, common_legend_outcome, nro
                        nrow = 2,
                        rel_heights = c(1,8)
 )
-
+#p2
 ###########################################################
 # PE + All position count
 peT_allT = ggdraw() +
  draw_label(
    paste0("All mutation sites"),
    fontfamily = title_theme$family,
    fontface = title_theme$face,
    #size = title_theme$size
    size = 8
  )
 p3 = cowplot::plot_grid(cowplot::plot_grid(peT_allT, nrow = 2
                                           , rel_widths = c(1,3),axis = "lr"),
@ -174,16 +201,14 @@ p3 = cowplot::plot_grid(cowplot::plot_grid(peT_allT, nrow = 2
                          axis = "lr",
                          rel_heights = c(1,8)
                          ),
-                        rel_heights = c(1,10),
+                        rel_heights = c(1,18),
                        nrow = 2,axis = "lr")
-
+p3
 #===============
 # Final combine
 #===============
-w = 11.75
+w = 11.79
-h = 3.7
+h = 3.5
 mut_impact_CLP =  paste0(outdir_images
                         ,tolower(gene)
                         ,"_mut_impactCLP.png")
@ -229,9 +254,21 @@ conCLP =  paste0(outdir_images
                    ,tolower(gene)
                    ,"_consurf_BP.png")
-print(paste0("plot filename:", sens_conP))
+print(paste0("plot filename:", conCLP))
-png(sens_conP, units = "in", width = w, height = h, res = 300 )
+png(conCLP, units = "in", width = w, height = h, res = 300 )
 consurfP
 dev.off()
 #================================
 # Sensitivity  numbers: geom_tile
 #================================
 sensCLP =  paste0(outdir_images
                 ,tolower(gene)
                 ,"_sensN_tile.png")
 print(paste0("plot filename:", sensCLP))
 png(sensCLP, units = "in", width = 1, height = 1, res = 300 )
 sensP
 dev.off()
--- a/scripts/plotting/plotting_thesis/bp_PE.R
+++ b/scripts/plotting/plotting_thesis/bp_PE.R
@ -1,182 +0,0 @@
 colnames(str_df_short)
 table(str_df_short$effect_type)
 table(str_df_short$effect_sign)
 str(str_df_short)
 str_df_short$pe_outcome = ifelse(str_df_short$effect_sign<0, "DD", "SS")
 table(str_df_short$pe_outcome )
 table(str_df_short$effect_sign)
 affcols = c("affinity_scaled",  "mmcsm_lig_scaled")
 ppi2_cols = c("mcsm_ppi2_scaled")
 #lig 
 table(str_df_short$effect_type)
 str_df_short$effect_grouped = ifelse(str_df_short$effect_type%in%affcols
                                     , "affinity"
                                     , str_df_short$effect_type)
 table(str_df_short$effect_grouped)
 #ppi2
 str_df_short$effect_grouped = ifelse(str_df_short$effect_grouped%in%ppi2_cols
                                     , "ppi2"
                                     , str_df_short$effect_grouped)
 table(str_df_short$effect_grouped)
 #stability
 str_df_short$effect_grouped = ifelse(!str_df_short$effect_grouped%in%c("affinity", "ppi2")
                                     , "stability"
                                     , str_df_short$effect_grouped)
 table(str_df_short$effect_grouped)
 # create a sign as well
 str_df_short$effect_outcome = paste0(str_df_short$pe_outcome
                                     , str_df_short$effect_grouped)
 table(str_df_short$effect_outcome)
 pe_colour_map2 = c(   "DDaffinity"    = 	"#ffd700" # gold
                  ,    "SSaffinity"    = 	"#f0e68c" # khaki
                  ,  "DDppi2"        =  "#ff1493" # deeppink
                  ,  "SSppi2"         =  "#da70d6" # orchid
                  ,  "DDstability "  =  "#ae301e"
                  ,  "SSstability"    =  "#007d85"
 )
 str_df_short$effect_colours = str_df_short$effect_outcome
 str_df_short = dplyr::mutate(str_df_short
                         , effect_colours = case_when(effect_colours == "DDaffinity" ~ "#ffd700"
                                                  , effect_colours == "DDppi2" ~ '#ff1493'
                                                  , effect_colours == "SSppi2" ~ '#da70d6' 
                                                  , effect_colours == "DDstability" ~ '#ae301e'
                                                  , effect_colours =="SSstability" ~ '#007d85'
                                                  , TRUE ~ 'ns'))
 "#F8766D" #red
 "#00BFC4" #blue
 table(str_df_short$effect_colours)
 ###########################################
 ggplot(str_df_short
       , aes( x=effect_grouped
              , fill = effect_colours)) + 
  geom_bar() +
    scale_fill_manual(values = str_df_short$effect_colours)
 first_col  = c(38, 0)
 second_col = c(9, 22)
 third_col  = c(681, 108)
 thing_df = data.frame(first_row, second_row, third_row)
 rownames(thing_df) = c("Destabilising","Stabilising")
 thing_df
 ###############################################
 rect_colour_map = c("EMB"   = "green"
                    ,"DSL"  = "slategrey"
                    , "CDL" = "navyblue"
                    , "Ca"  = "purple")
 rects <- data.frame(x = 1:6,
                    colors = c("#ffd700" #gold
                               , "#f0e68c" #khaki
                               , "#da70d6"# orchid
                               , "#ff1493"# deeppink
                               , "#00BFC4" #, "#007d85" #blue
                               , "#F8766D" )# red,
 )
 rects
 rects$text =  c("-ve Lig affinty"
                , "+ve Lig affinity"
                , "+ve PPI2 affinity"
                , "-ve PPI2 affinity"
                , "+ve stability"
                , "-ve stability")
 rects$numbers = c(38, 0, 22, 9, 108, 681)
 rects$num_labels = paste0("n=", rects$numbers)
 rects
 outdir_images = paste0("~/git/Writing/thesis/images/results/", tolower(gene), "/")
 #https://stackoverflow.com/questions/47986055/create-a-rectangle-filled-with-text
 png(paste0(outdir_images, "test.png")
    , width = 0.5
    , height = 2.5
    , units = "in", res = 300)
 ggplot(rects, aes(x, y = 0, fill = colors, label = paste0(text,"\n", num_labels))) +
  geom_tile(width = 1, height = 1) + # make square tiles
  geom_text(color = "black", size = 1.5) + # add white text in the middle
  scale_fill_identity(guide = "none") + # color the tiles with the colors in the data frame
  coord_fixed() + # make sure tiles are square
  coord_flip()+ scale_x_reverse() +
 # theme_void() # remove any axis markings
  theme_nothing() # remove any axis markings
 dev.off()
 ##########################################################
 tile_map=data.frame(tile=c("EMB","DSL","CDL","Ca")
                    ,tile_colour =c("green","darkslategrey","navyblue","purple"))
 # great
 tile_colour_map = c("EMB"   = "green"
                    ,"DSL"  = "darkslategrey"
                    , "CDL" = "navyblue"
                    , "Ca"  = "purple")
 tile_legend=get_legend(
  ggplot(tile_map, aes(factor(tile),y=0
                       , colour=tile_colour
                       , fill=tile_colour))+
    geom_tile() +
    theme(legend.direction="horizontal") +
    scale_colour_manual(name=NULL
                        #, values = tile_map$tile_colour
                        , values=tile_colour_map) +
    scale_fill_manual(name=NULL 
                      #,values=tile_map$tile_colour
                      , values = tile_colour_map)
 )
 #############################################################
 ###############################################
 library(ggplot2)
 library(viridis)
 library(hrbrthemes)
 ggplot(str_df_short, aes(fill=effect_colours,x=effect_type)) + 
  geom_bar() +
  scale_fill_viridis(discrete = T) +
  ggtitle("Studying 4 species..") 
 ####################################################
--- a/scripts/plotting/plotting_thesis/corr_plots_thesis_ggpairs.R
+++ b/scripts/plotting/plotting_thesis/corr_plots_thesis_ggpairs.R
@ -1,366 +0,0 @@
 #!/usr/bin/env Rscript       
 #source("~/git/LSHTM_analysis/config/alr.R")
 source("~/git/LSHTM_analysis/config/embb.R")
 #source("~/git/LSHTM_analysis/config/katg.R")
 #source("~/git/LSHTM_analysis/config/gid.R")
 #source("~/git/LSHTM_analysis/config/pnca.R")
 #source("~/git/LSHTM_analysis/config/rpob.R")
 # get plottting dfs 
 source("~/git/LSHTM_analysis/scripts/plotting/get_plotting_dfs.R")
 source("~/git/LSHTM_analysis/scripts/plotting/plotting_colnames.R")
 ####################################################
 # ggpairs wrapper
 my_gg_pairs=function(plot_df){
  ggpairs(plot_df, columns = 1:(ncol(plot_df)-1),
          upper = list(continuous = wrap('cor',
                                         method = "spearman",
                                         title="ρ",
                                         digits=2,
                                         title_args=c(colour="black")
          )
          ),
          lower = list(
            continuous = wrap("points", alpha = 0.7, size=0.5),
            combo     = wrap("dot", alpha = 0.7, size=0.5)
          ),
          aes(colour = factor(ifelse(plot_df$dst_mode==0, "S", "R")), alpha = 0.5),
          title="Stability") +
    scale_colour_manual(values = c("red", "blue")) +
    scale_fill_manual(values = c("red", "blue")) + 
    theme(
      text = element_text(size=12, face="bold")
    )
 }
 #=======
 # output
 #=======
 outdir_images = paste0("~/git/Writing/thesis/images/results/", tolower(gene), "/")
 #=======
 # Input
 #=======
 merged_df3 = as.data.frame(merged_df3)
 corr_plotdf = corr_data_extract(merged_df3
                                , gene = gene
                                , drug = drug
                                , extract_scaled_cols = F)
 colnames(corr_plotdf)
 if (all(colnames(corr_df_m3_f) == colnames(corr_plotdf))){
  cat("PASS: corr plot colnames match for dashboard")
 }else{
  stop("Abort: corr plot colnames DO NOT match for dashboard")
 }
 #corr_plotdf = corr_df_m3_f  #for downstream code
 aff_dist_cols = colnames(corr_plotdf)[grep("Dist", colnames(corr_plotdf))]
 aff_dist_cols
 static_cols =  c("Log10(MAF)"
                 , "Log10(OR)"
                 #, "-Log10(P)"
 )
 #================
 # stability
 #================
 #affinity_dist_colnames# lIg DIst and ppi Di
 corr_ps_colnames = c(static_cols
                     , "DUET"
                     , "FoldX"
                     , "DeepDDG"
                     , "Dynamut2"
                     , aff_dist_cols
                     , "dst_mode")
 if (all(corr_ps_colnames%in%colnames(corr_plotdf))){
  cat("PASS: all colnames exist for correlation")
 }else{
  stop("Abort: all colnames DO NOT exist for correlation")
 }
 corr_df_ps = corr_plotdf[, corr_ps_colnames]
 complete_obs_ps = nrow(corr_df_ps) - sum(is.na(corr_df_ps$`Log(OR)`))
 cat("\nComplete muts for Conservation for", gene, ":", complete_obs_ps)
 color_coln = which(colnames(corr_df_ps) == "dst_mode")
 #end = which(colnames(corr_df_ps) == drug)
 #ncol_omit = 2
 #corr_end = end-ncol_omit
 corr_end = color_coln-1
 #------------------------
 # Output: stability corrP
 #------------------------
 corr_psP =  paste0(outdir_images
                   ,tolower(gene)
                   ,"_corr_stability.svg" )
 cat("Corr plot stability with coloured dots:", corr_psP)
 svg(corr_psP, width = 15, height = 15)
 my_corr_pairs(corr_data_all = corr_df_ps
              , corr_cols = colnames(corr_df_ps[1:corr_end])
              , corr_method = "spearman"
              , colour_categ_col = colnames(corr_df_ps[color_coln]) #"dst_mode"
              , categ_colour =  c("red", "blue")
              , density_show = F
              , hist_col = "coral4"
              , dot_size = 1.6
              , ats = 1.5
              , corr_lab_size =2.5
              , corr_value_size = 1)
 dev.off()
 #===============
 # CONSERVATION
 #==============
 corr_conservation_cols = c( static_cols
                            , "ConSurf"
                            , "SNAP2"
                            , "PROVEAN"
                            , aff_dist_cols
                            , "dst_mode"
                            , drug)
 if (all(corr_conservation_cols%in%colnames(corr_plotdf))){
  cat("PASS: all colnames exist for ConSurf-correlation")
 }else{
  stop("Abort: all colnames DO NOT exist for ConSurf-correlation")
 }
 corr_df_cons = corr_plotdf[, corr_conservation_cols]
 complete_obs_cons = nrow(corr_df_cons) - sum(is.na(corr_df_cons$`Log(OR)`))
 cat("\nComplete muts for Conservation for", gene, ":", complete_obs_cons)
 color_coln = which(colnames(corr_df_cons) == "dst_mode")
 # end = which(colnames(corr_df_cons) == drug)
 # ncol_omit = 2 
 # corr_end = end-ncol_omit
 corr_end = color_coln-1
 #---------------------------
 # Output: Conservation corrP
 #----------------------------
 corr_consP =  paste0(outdir_images
                     ,tolower(gene)
                     ,"_corr_conservation.svg" )
 cat("Corr plot conservation coloured dots:", corr_consP)
 svg(corr_consP, width = 10, height = 10)
 my_corr_pairs(corr_data_all = corr_df_cons
              , corr_cols = colnames(corr_df_cons[1:corr_end])
              , corr_method = "spearman" 
              , colour_categ_col = colnames(corr_df_cons[color_coln]) #"dst_mode"
              , categ_colour =  c("red", "blue")
              , density_show = F
              , hist_col = "coral4"
              , dot_size =1.1
              , ats = 1.5
              , corr_lab_size = 1.8
              , corr_value_size = 1)
 dev.off()
 #####################################################
 #DistCutOff = 10
 #LigDist_colname  # = "ligand_distance" # from globals 
 #ppi2Dist_colname  = "interface_dist"
 #naDist_colname    = "TBC"
 #####################################################
 #================
 # ligand affinity
 #================
 corr_df_lig = corr_plotdf[corr_plotdf["Lig-Dist"]<DistCutOff,]
 corr_lig_colnames = c(static_cols
                      , "mCSM-lig"
                      , "mmCSM-lig"
                      , "dst_mode")
 #, drug)
 if (all(corr_lig_colnames%in%colnames(corr_plotdf))){
  cat("PASS: all colnames exist for Lig-correlation")
 }else{
  stop("Abort: all colnames DO NOT exist for Lig-correlation")
 }
 corr_df_lig = corr_plotdf[, corr_lig_colnames]
 complete_obs_lig = nrow(corr_df_lig) - sum(is.na(corr_df_lig$`Log(OR)`))
 cat("\nComplete muts for lig affinity for", gene, ":", complete_obs_lig)
 color_coln = which(colnames(corr_df_lig) == "dst_mode")
 # end = which(colnames(corr_df_lig) == drug)
 # ncol_omit = 2 
 # corr_end = end-ncol_omit
 corr_end = color_coln-1
 #------------------------
 # Output: ligand corrP
 #------------------------
 corr_ligP =  paste0(outdir_images
                    ,tolower(gene)
                    ,"_corr_lig.svg" )
 cat("Corr plot affinity with coloured dots:", corr_ligP)
 svg(corr_ligP, width = 10, height = 10)
 my_corr_pairs(corr_data_all = corr_df_lig
              , corr_cols = colnames(corr_df_lig[1:corr_end])
              , corr_method = "spearman"
              , colour_categ_col = colnames(corr_df_lig[color_coln]) #"dst_mode"
              , categ_colour =  c("red", "blue")
              , density_show = F
              , hist_col = "coral4"
              , dot_size = 2
              , ats = 1.5
              , corr_lab_size =3
              , corr_value_size = 1)
 dev.off()
 ####################################################
 #================
 # ppi2 affinity
 #================
 if (tolower(gene)%in%geneL_ppi2){
  corr_df_ppi2 = corr_plotdf[corr_plotdf["PPI-Dist"]<DistCutOff,]
  corr_ppi2_colnames = c(static_cols
                         , "mCSM-PPI2"
                         , "dst_mode"
                         , drug)
  if (all(corr_ppi2_colnames%in%colnames(corr_plotdf))){
    cat("PASS: all colnames exist for mcsm-ppi2 correlation")
  }else{
    stop("Abort: all colnames DO NOT exist for mcsm-ppi2 correlation")
  }
  corr_df_ppi2 = corr_plotdf[, corr_ppi2_colnames]
  complete_obs_ppi2 = nrow(corr_df_ppi2) - sum(is.na(corr_df_ppi2$`Log(OR)`))
  cat("\nComplete muts for ppi2 affinity for", gene, ":", complete_obs_ppi2)
  color_coln = which(colnames(corr_df_ppi2) == "dst_mode")
  # end = which(colnames(corr_df_ppi2) == drug)
  # ncol_omit = 2 
  # corr_end = end-ncol_omit
  corr_end = color_coln-1
  #------------------------
  # Output: ppi2 corrP
  #------------------------
  corr_ppi2P =  paste0(outdir_images
                       ,tolower(gene)
                       ,"_corr_ppi2.svg" )
  cat("Corr plot ppi2 with coloured dots:", corr_ppi2P)
  svg(corr_ppi2P, width = 10, height = 10)
  my_corr_pairs(corr_data_all = corr_df_ppi2
                , corr_cols = colnames(corr_df_ppi2[1:corr_end])
                , corr_method = "spearman"
                , colour_categ_col = colnames(corr_df_ppi2[color_coln]) #"dst_mode"
                , categ_colour =  c("red", "blue")
                , density_show = F
                , hist_col = "coral4"
                , dot_size = 2
                , ats = 1.5
                , corr_lab_size = 3
                , corr_value_size = 1)
  dev.off()
 }
 # FIXME: ADD distance
 #==================
 # mCSSM-NA affinity
 #==================
 #================
 # NA affinity
 #================
 if (tolower(gene)%in%geneL_na){
  corr_df_na = corr_df_na[corr_df_na["NA-Dist"]<DistCutOff,]
  corr_na_colnames = c(static_cols
                       , "mCSM-NA"
                       , "dst_mode"
                       , drug)
  if (all(corr_na_colnames%in%colnames(corr_plotdf))){
    cat("PASS: all colnames exist for mcsm-NA-correlation")
  }else{
    stop("Abort: all colnames DO NOT exist for mcsm-NA-correlation")
  }
  corr_na_colnames%in%colnames(corr_plotdf)
  corr_df_na = corr_plotdf[, corr_na_colnames]
  complete_obs_na = nrow(corr_df_na) - sum(is.na(corr_df_na$`Log(OR)`))
  cat("\nComplete muts for NA affinity for", gene, ":", complete_obs_na)
  color_coln = which(colnames(corr_df_na) == "dst_mode")
  # end = which(colnames(corr_df_na) == drug)
  # ncol_omit = 2
  # corr_end = end-ncol_omit
  corr_end = color_coln-1
  #------------------------
  # Output: mCSM-NA corrP
  #------------------------
  corr_naP =  paste0(outdir_images
                     ,tolower(gene)
                     ,"_corr_na.svg" )
  cat("Corr plot mCSM-NA with coloured dots:", corr_naP)
  svg(corr_naP, width = 10, height = 10)
  my_corr_pairs(corr_data_all = corr_df_na
                , corr_cols = colnames(corr_df_na[1:corr_end])
                , corr_method = "spearman" 
                , colour_categ_col = colnames(corr_df_na[color_coln]) #"dst_mode"
                , categ_colour =  c("red", "blue")
                , density_show = F
                , hist_col = "coral4"
                , dot_size = 2
                , ats = 1.5
                , corr_lab_size = 3
                , corr_value_size = 1)
  dev.off()
 }
 ####################################################
 #===============
 #ggpairs:
 #================
 #corr_df_ps$dst_mode = ifelse(corr_df_cons$dst_mode=="1", "R", "S")
 corr_plotting_df = corr_df_ps
 svg('~/tmp/foo.svg',
    width=10,
    height=10,
    units="in",
    res=300)
 my_gg_pairs(corr_plotting_df)
 dev.off()
 png('~/tmp/foo.png',
    width=10,
    height=10,
    units="in",
    res=300)
 my_gg_pairs(corr_plotting_df)
 dev.off()
 # 
--- a/scripts/plotting/plotting_thesis/gg_pairs.R
+++ b/scripts/plotting/plotting_thesis/gg_pairs.R
@ -21,12 +21,19 @@ png('~/tmp/foo.png',
    units="in",
    res=300)
 # 
-corr_plotting_df = corr_df_ps
+#corr_plotting_df = corr_df_ps
-
+colnames(corr_plotdf)
-
+corr_plotting_df = subset(corr_plotdf, select = -c(ethambutol,`Log10(OR)`,`-Log10(P)`, ASA, RSA, KD, RD
-ggpairs(corr_plotting_df, columns = 1:(ncol(corr_plotting_df)-1),
+                                                   , FoldX
                                                   , DeepDDG
                                                   , Dynamut2 ))
 colnames(corr_plotting_df)
 #ggpairs(corr_plotting_df, columns = 1:(ncol(corr_plotting_df)-1),
 ggpairs(corr_plotting_df, columns = 1:(ncol(corr_plotting_df)),
        upper = list(continuous = wrap('cor',
                                       method = "spearman",
                                       use = "pairwise.complete.obs",
                                       title="ρ",
                                       digits=2,
                                       title_args=c(colour="black")
@ -36,7 +43,7 @@ ggpairs(corr_plotting_df, columns = 1:(ncol(corr_plotting_df)-1),
          continuous = wrap("points", alpha = 0.7, size=0.5),
          combo     = wrap("dot", alpha = 0.7, size=0.5)
        ),
-        aes(colour = factor(ifelse(corr_plotting_df$dst_mode==0, "S", "R")), alpha = 0.5),
+        aes(colour = factor(ifelse(dst_mode==0, "S", "R")), alpha = 0.5),
        title="Stability") +
  scale_colour_manual(values = c("red", "blue")) +
--- a/scripts/plotting/plotting_thesis/gg_pairs_all.R
+++ b/scripts/plotting/plotting_thesis/gg_pairs_all.R
@ -1,51 +1,88 @@
-source("~/git/LSHTM_analysis/config/embb.R")
+#source("~/git/LSHTM_analysis/config/embb.R")
-source("~/git/LSHTM_analysis/scripts/plotting/plotting_colnames.R")
+#source("~/git/LSHTM_analysis/scripts/plotting/plotting_colnames.R")
-source("~/git/LSHTM_analysis/scripts/plotting/get_plotting_dfs.R")
+#source("~/git/LSHTM_analysis/scripts/plotting/get_plotting_dfs.R")
-my_gg_pairs=function(plot_df){
+my_gg_pairs=function(plot_df, plot_title
                     , tt_args_size = 2.5
                     , gp_args_size = 2.5){
  ggpairs(plot_df,
          columns = 1:(ncol(plot_df)-1),
          upper = list(
-            continuous = wrap('cor',
+            continuous = wrap('cor', # ggally_cor()
                              method = "spearman",
                              use = "pairwise.complete.obs",
                              title="ρ",
                              digits=2,
-                              justify_labels = "left",
+                              justify_labels = "centre",
-                              title_args=c(colour="black")
+                              #title_args=c(colour="black"),
                              title_args=c(size=tt_args_size),#2.5
                              group_args=c(size=gp_args_size)#2.5
            )
          ),
          lower = list(
            continuous = wrap("points",
                              alpha = 0.7,
-                              size=0.5),
+                              size=0.125),
            combo     = wrap("dot",
                             alpha = 0.7,
-                             size=0.5)
+                             size=0.125)
          ),
-          aes(colour = factor(ifelse(plot_df$dst_mode==0,
+          aes(colour = factor(ifelse(dst_mode==0,
                                     "S",
                                     "R") ),
              alpha = 0.5),
-          title="Stability") +
+          title=plot_title) +
    scale_colour_manual(values = c("red", "blue")) +
-    scale_fill_manual(values = c("red", "blue")) +
+    scale_fill_manual(values = c("red", "blue")) #+
-    theme(text = element_text(size=12,
+  # theme(text = element_text(size=7,
-                              face="bold") )
+  #                           face="bold"))
 }
 DistCutOff = 10
 ###########################################################################
 geneL_normal  = c("pnca")
 geneL_na      = c("gid", "rpob")
 geneL_ppi2    = c("alr", "embb", "katg", "rpob")
 merged_df3 = as.data.frame(merged_df3)
 corr_plotdf = corr_data_extract(merged_df3
                                , gene = gene
                                , drug = drug
                                , extract_scaled_cols = F)
 aff_dist_cols = colnames(corr_plotdf)[grep("Dist", colnames(corr_plotdf))]
-static_cols =  c("Log10(MAF)"
+static_cols =  c("Log10(MAF)")
-                 , "Log10(OR)")
+#, "Log10(OR)")
 ############################################################
 #=============================================
 # Creating masked df for affinity data
 #=============================================
 corr_affinity_df = corr_plotdf
 #----------------------
 # Mask affinity columns
 #-----------------------
 corr_affinity_df[corr_affinity_df["Lig-Dist"]>DistCutOff,"mCSM-lig"]=0
 corr_affinity_df[corr_affinity_df["Lig-Dist"]>DistCutOff,"mmCSM-lig"]=0
 if (tolower(gene)%in%geneL_ppi2){
  corr_affinity_df[corr_affinity_df["PPI-Dist"]>DistCutOff,"mCSM-PPI2"]=0
 }
 # if (tolower(gene)%in%geneL_na){
 #   corr_affinity_df[corr_affinity_df["NA-Dist"]>DistCutOff,"mCSM-NA"]=0
 # }
 # count 0
 #res <- colSums(corr_affinity_df==0)/nrow(corr_affinity_df)*100
 unmasked_vals <- nrow(corr_affinity_df) - colSums(corr_affinity_df==0) 
 unmasked_vals
 ##########################################################
 #================
 # Stability
 #================
 corr_ps_colnames = c(static_cols
                     , "DUET"
                     , "FoldX"
@ -54,14 +91,13 @@ corr_ps_colnames = c(static_cols
                     , aff_dist_cols
                     , "dst_mode")
 corr_df_ps = corr_plotdf[, corr_ps_colnames]
 complete_obs_ps = nrow(corr_df_ps) - sum(is.na(corr_df_ps$`Log(OR)`))
 color_coln = which(colnames(corr_df_ps) == "dst_mode")
 corr_end = color_coln-1
 # Plot #1
-plot_corr_df_ps = my_gg_pairs(corr_df_ps)
+plot_corr_df_ps = my_gg_pairs(corr_df_ps, plot_title="Stability features")
-
+##########################################################
-
+#================
 # Conservation
 #================
 corr_conservation_cols = c( static_cols
                            , "ConSurf"
                            , "SNAP2"
@ -71,74 +107,66 @@ corr_conservation_cols = c( static_cols
 )
 corr_df_cons = corr_plotdf[, corr_conservation_cols]
 complete_obs_cons = nrow(corr_df_cons) - sum(is.na(corr_df_cons$`Log(OR)`))
 color_coln = which(colnames(corr_df_cons) == "dst_mode")
 corr_end = color_coln-1
 # Plot #2
 plot_corr_df_cons = my_gg_pairs(corr_df_cons, plot_title="Conservation features")
-#my_gg_pairs(corr_df_cons)
+##########################################################
-plot_corr_df_cons = my_gg_pairs(corr_df_cons)
+#================
 # Affinity: lig, ppi and na as applicable
 #================
 #corr_df_lig = corr_plotdf[corr_plotdf["Lig-Dist"]<DistCutOff,]
 common_aff_colnames = c("mCSM-lig"
                        , "mmCSM-lig")
 if (tolower(gene)%in%geneL_normal){
  aff_colnames = common_aff_colnames
 }
 if (tolower(gene)%in%geneL_ppi2){
  aff_colnames = c(common_aff_colnames, "mCSM-PPI2")
 }
-corr_df_lig = corr_plotdf[corr_plotdf["Lig-Dist"]<DistCutOff,]
+if (tolower(gene)%in%geneL_na){
-corr_lig_colnames = c(static_cols
+  aff_colnames = c(common_aff_colnames, "mCSM-NA")
-                      , "mCSM-lig"
+}
                      , "mmCSM-lig"
                      , "dst_mode")
-corr_df_lig = corr_plotdf[, corr_lig_colnames]
+# building ffinal affinity colnames for correlation
 corr_aff_colnames = c(static_cols
                      , aff_colnames
                      , "dst_mode") # imp
 corr_df_aff = corr_affinity_df[, corr_aff_colnames]
 colnames(corr_df_aff)
 complete_obs_lig = nrow(corr_df_lig) - sum(is.na(corr_df_lig$`Log(OR)`))
 color_coln = which(colnames(corr_df_lig) == "dst_mode")
 corr_end = color_coln-1
 # Plot #3
 plot_corr_df_aff = my_gg_pairs(corr_df_aff, plot_title="Affinity features", tt_args_size = 4, gp_args_size =4)
-#my_gg_pairs(corr_df_lig)
+#=============
-plot_corr_df_lig = my_gg_pairs(corr_df_lig)
+# combine
 #=============
-corr_df_ppi2 = corr_plotdf[corr_plotdf["PPI-Dist"]<DistCutOff,]
+#png("/home/tanu/tmp/gg_pairs_all.png", height = 6, width=11.75, unit="in",res=300)
-corr_ppi2_colnames = c(static_cols
+png(paste0(outdir_images
-                       , "mCSM-PPI2"
+           ,tolower(gene)
-                       , "dst_mode"
+           ,"_CorrAB.png"), height = 6, width=11.75, unit="in",res=300)
 )
 corr_df_ppi2 = corr_plotdf[, corr_ppi2_colnames]
 complete_obs_ppi2 = nrow(corr_df_ppi2) - sum(is.na(corr_df_ppi2$`Log(OR)`))
 color_coln = which(colnames(corr_df_ppi2) == "dst_mode")
 corr_end = color_coln-1
-# NOTE: DELETE LOG OR FROM CORRELATION PLOTS!!!!!
+cowplot::plot_grid(ggmatrix_gtable(plot_corr_df_ps),
-# NOTE: ALSO MAYBE DELETE DISTANCES AS WELL
+                   ggmatrix_gtable(plot_corr_df_cons),
-# NOTE: http://ggobi.github.io/ggally/reference/ggally_cor.html
+                   #                  ggmatrix_gtable(plot_corr_df_aff),
-
+                   #                   nrow=1, ncol=3, rel_heights = 7,7,3
-# "***" if the p-value is < 0.001
+                   nrow=1,
-# "**" if the p-value is < 0.01
+                   #rel_heights = 1,1
-# "*" if the p-value is < 0.05
+                   labels = "AUTO",
-# "." if the p-value is < 0.10
+                   label_size = 12)
-# "" otherwise
+dev.off()
-# 
+
-
+# affinity corr
-# Plot #4
+#png("/home/tanu/tmp/gg_pairs_affinity.png", height =7, width=7, unit="in",res=300)
-#my_gg_pairs(corr_df_ppi2)
+png(paste0(outdir_images
-plot_corr_df_ppi2 = my_gg_pairs(corr_df_ppi2)
+           ,tolower(gene)
-
+           ,"_CorrC.png"), height =7, width=7, unit="in",res=300)
-
+
-# corr_df_na = corr_df_na[corr_df_na["NA-Dist"]<DistCutOff,]
+cowplot::plot_grid(ggmatrix_gtable(plot_corr_df_aff),
-# corr_na_colnames = c(static_cols
+                  labels = "C",
-#                      , "mCSM-NA"
+                   label_size = 12)
 #                      , "dst_mode"
 # )
 # 
 # corr_df_na = corr_plotdf[, corr_na_colnames]
 # complete_obs_na = nrow(corr_df_na) - sum(is.na(corr_df_na$`Log(OR)`))
 # color_coln = which(colnames(corr_df_na) == "dst_mode")
 # corr_end = color_coln-1
 # 
 # # Plot #5
 # #my_gg_pairs(corr_df_na)
 # plot_corr_df_na = my_gg_pairs(corr_df_na) 
 png("/tmp/gg_pairs_all.png", height = 8, width=11.75, unit="in",res=300)
 cowplot::plot_grid(ggmatrix_gtable(plot_corr_df_ps),ggmatrix_gtable(plot_corr_df_cons),
                   ggmatrix_gtable(plot_corr_df_lig),ggmatrix_gtable(plot_corr_df_ppi2),
                   nrow=2, ncol=2, rel_heights = 7,7,3,3)
 dev.off()
--- a/scripts/plotting/plotting_thesis/prominent_effects.R
+++ b/scripts/plotting/plotting_thesis/prominent_effects.R
@ -154,6 +154,11 @@ for (i in unique(str_df$position) ){
 str_df$effect_type = sub("\\.[0-9]+", "", str_df$effect_type) # cull duplicate effect types that happen when there are exact duplicate values
 colnames(str_df)
 # check
 str_df_check = str_df[str_df$position%in%c(24, 32,160, 303, 334),]
 table(str_df$effect_type)
 #================
 # for Plots
 #================
@ -161,9 +166,56 @@ str_df_short = str_df[, c("mutationinformation","position","sensitivity"
                          , "effect_type"
                          , "effect_sign")]
-# check
+table(str_df_short$effect_type)
-str_df_check = str_df[str_df$position%in%c(24, 32,160, 303, 334),]
+table(str_df_short$effect_sign)
-table(str_df$effect_type)
+str(str_df_short)
 # assign pe outcome
 str_df_short$pe_outcome = ifelse(str_df_short$effect_sign<0, "DD", "SS")
 table(str_df_short$pe_outcome )
 table(str_df_short$effect_sign)
 #==============
 # group effect type:
 # lig, ppi2, nuc. acid, stability
 #==============
 affcols = c("affinity_scaled",  "mmcsm_lig_scaled")
 ppi2_cols = c("mcsm_ppi2_scaled")
 #nuc_na_cols = c("mcsm_a_scaled")
 #lig 
 table(str_df_short$effect_type)
 str_df_short$effect_grouped = ifelse(str_df_short$effect_type%in%affcols
                                     , "lig"
                                     , str_df_short$effect_type)
 table(str_df_short$effect_grouped)
 #ppi2
 str_df_short$effect_grouped = ifelse(str_df_short$effect_grouped%in%ppi2_cols
                                     , "ppi2"
                                     , str_df_short$effect_grouped)
 table(str_df_short$effect_grouped)
 #stability
 str_df_short$effect_grouped = ifelse(!str_df_short$effect_grouped%in%c("lig", "ppi2")
                                     , "stability"
                                     , str_df_short$effect_grouped)
 table(str_df_short$effect_grouped)
 # create a sign as well
 str_df_short$pe_effect_outcome = paste0(str_df_short$pe_outcome, "_"
                                     , str_df_short$effect_grouped)
 table(str_df_short$pe_effect_outcome)
 #####################################################################
 # Chimera: for colouring
 ####################################################################
 #-------------------------------------
 # get df with unique position