moved old corr files

2022-08-04 19:28:39 +01:00 · 2022-08-04 19:28:39 +01:00 · 7f9facc1e6
commit 7f9facc1e6
parent dab8294a01
6 changed files with 10 additions and 1392 deletions
--- a/scripts/plotting/plotting_thesis/corr_thesis_ref/corr_adjusted_PS_LIG.R
+++ b/scripts/plotting/plotting_thesis/corr_thesis_ref/corr_adjusted_PS_LIG.R
@ -1,330 +0,0 @@
 #!/usr/bin/env Rscript
 #########################################################
 # TASK: Corr plots for PS and Lig 
 # Output: 1 svg
 #=======================================================================
 # working dir and loading libraries
 getwd()
 setwd("~/git/LSHTM_analysis/scripts/plotting/")
 getwd()
 source("~/git/LSHTM_analysis/scripts/Header_TT.R")
 require(cowplot)
 source("combining_dfs_plotting.R")
 source("my_pairs_panel.R")
 # should return the following dfs, directories and variables
 # PS combined: 
 # 1) merged_df2
 # 2) merged_df2_comp
 # 3) merged_df3
 # 4) merged_df3_comp
 # LIG combined: 
 # 5) merged_df2_lig
 # 6) merged_df2_comp_lig
 # 7) merged_df3_lig
 # 8) merged_df3_comp_lig
 # 9) my_df_u
 # 10) my_df_u_lig
 cat(paste0("Directories imported:"
           , "\ndatadir:", datadir
           , "\nindir:", indir
           , "\noutdir:", outdir
           , "\nplotdir:", plotdir))
 cat(paste0("Variables imported:"
           , "\ndrug:", drug
           , "\ngene:", gene
           , "\ngene_match:", gene_match
           , "\nAngstrom symbol:", angstroms_symbol
           , "\nNo. of duplicated muts:", dup_muts_nu
           , "\nNA count for ORs:", na_count
           , "\nNA count in df2:", na_count_df2
           , "\nNA count in df3:", na_count_df3))     
 #=======
 # output
 #=======
 # can't combine by cowplot because not ggplots
 #corr_plot_combined = "corr_combined.svg"
 #plot_corr_plot_combined  =  paste0(plotdir,"/", corr_plot_combined)
 # PS
 corr_ps_adjusted = "corr_PS_adjusted.svg"
 plot_corr_ps_adjusted =  paste0(plotdir,"/", corr_ps)
 # LIG
 corr_lig_adjusted = "corr_LIG_adjusted.svg"
 plot_corr_lig_adjusted =  paste0(plotdir,"/", corr_lig)
 ####################################################################
 #               end of loading libraries and functions                 #
 ########################################################################
 #%%%%%%%%%%%%%%%%%%%%%%%%%
 df_ps = merged_df3_comp 
 df_lig = merged_df3_comp_lig
 #%%%%%%%%%%%%%%%%%%%%%%%%%
 rm( merged_df2, merged_df2_comp, merged_df2_lig, merged_df2_comp_lig, my_df_u, my_df_u_lig)
 ########################################################################
 #               end of data extraction and cleaning for plots          #
 ########################################################################
 #===========================
 # Data for Correlation plots:PS
 #===========================
 table(df_ps$duet_outcome)
 #===========================
 # Data for Correlation plots:foldx
 #===========================
 #============================
 # adding foldx scaled values
 # scale data b/w -1 and 1
 #============================
 n = which(colnames(df_ps) == "ddg"); n 
 my_min = min(df_ps[,n]); my_min 
 my_max = max(df_ps[,n]); my_max 
 df_ps$foldx_scaled = ifelse(df_ps[,n] < 0
                            , df_ps[,n]/abs(my_min)
                            , df_ps[,n]/my_max) 
 # sanity check
 my_min = min(df_ps$foldx_scaled); my_min 
 my_max = max(df_ps$foldx_scaled); my_max
 if (my_min == -1 && my_max == 1){
  cat("PASS: foldx ddg successfully scaled b/w -1 and 1"
      , "\nProceeding with assigning foldx outcome category")
 }else{
  cat("FAIL: could not scale foldx ddg values"
      , "Aborting!")
 }
 #================================
 # adding foldx outcome category
 # ddg<0 = "Stabilising" (-ve)
 #=================================
 c1 = table(df_ps$ddg < 0)
 df_ps$foldx_outcome = ifelse(df_ps$ddg < 0, "Stabilising", "Destabilising")
 c2 = table(df_ps$ddg < 0)
 if ( all(c1 == c2) ){
  cat("PASS: foldx outcome successfully created")
 }else{
  cat("FAIL: foldx outcome could not be created. Aborting!")
  exit()
 }
 table(df_ps$foldx_outcome)
 #======================
 # adding log cols 
 #======================
 df_ps$log10_or_mychisq = log10(df_ps$or_mychisq)
 df_ps$neglog_pval_fisher = -log10(df_ps$pval_fisher)
 df_ps$log10_or_kin = log10(df_ps$or_kin)
 df_ps$neglog_pwald_kin = -log10(df_ps$pwald_kin)
 # subset data to generate pairwise correlations
 cols_to_select =  c("duet_scaled"
                    , "foldx_scaled"
                    #, "log10_or_mychisq"
                    #, "neglog_pval_fisher"
                    , "or_kin"
                    , "neglog_pwald_kin"
                    , "af"
                    , "asa"
                    , "rsa"
                    , "kd_values"
                    , "rd_values"
                    , "duet_outcome"
                    , drug)
 corr_data_ps = df_ps[, cols_to_select]
 dim(corr_data_ps)
 #p_italic = substitute(paste("-Log(", italic('P'), ")"));p_italic 
 #p_adjusted_italic = substitute(paste("-Log(", italic('P adjusted'), ")"));p_adjusted_italic
 # assign nice colnames (for display)
 my_corr_colnames = c("DUET"
                     , "Foldx"
                     #, "Log(OR)"
                     #, "-Log(P)"
                     , "OR adjusted"
                     , "-Log(P wald)"
                     , "AF"
                     , "ASA"
                     , "RSA"
                     , "KD"
                     , "RD"
                     , "duet_outcome"
                     , drug)
 length(my_corr_colnames)
 colnames(corr_data_ps)
 colnames(corr_data_ps) <- my_corr_colnames
 colnames(corr_data_ps)
 #-----------------
 # generate corr PS plot
 #-----------------
 start = 1
 end = which(colnames(corr_data_ps) == drug); end # should be the last column
 offset = 1
 my_corr_ps = corr_data_ps[start:(end-offset)]
 head(my_corr_ps)
 #my_cols = c("#f8766d", "#00bfc4")
 # deep blue :#007d85
 # deep red: #ae301e
 cat("Corr plot PS:", plot_corr_ps_adjusted)
 svg(plot_corr_ps_adjusted, width = 15, height = 15)
 OutPlot1 = pairs.panels(my_corr_ps[1:(length(my_corr_ps)-1)]
             , method = "spearman" # correlation method
             , hist.col = "grey" ##00AFBB
             , density = TRUE  # show density plots
             , ellipses = F # show correlation ellipses
             , stars = T
             , rug = F
             , breaks = "Sturges"
             , show.points = T
             , bg = c("#f8766d", "#00bfc4")[unclass(factor(my_corr_ps$duet_outcome))]
             , pch = 21
             , jitter = T
             #, alpha = .05
             #, points(pch = 19, col = c("#f8766d", "#00bfc4"))
             , cex = 2
             , cex.axis = 1.5
             , cex.labels = 1.5
             , cex.cor = 1
             , smooth = F
 )
 print(OutPlot1)
 dev.off()
 #===========================
 # Data for Correlation plots: LIG
 #===========================
 table(df_lig$ligand_outcome)
 df_lig$log10_or_mychisq = log10(df_lig$or_mychisq)
 df_lig$neglog_pval_fisher = -log10(df_lig$pval_fisher)
 df_lig$log10_or_kin = log10(df_lig$or_kin)
 df_lig$neglog_pwald_kin = -log10(df_lig$pwald_kin)
 # subset data to generate pairwise correlations
 cols_to_select =  c("affinity_scaled"
                    , "log10_or_mychisq"
                    , "neglog_pval_fisher"
                    #, "or_kin"
                    #, "neglog_pwald_kin"
                    , "af"
                    , "ligand_outcome"
                    , drug)
 corr_data_lig = df_lig[, cols_to_select]
 dim(corr_data_lig)
 # assign nice colnames (for display)
 my_corr_colnames = c("Ligand Affinity"
                     , "Log(OR)"
                     , "-Log(P)"
                     #, "OR adjusted"
                     #, "-Log(P wald)"
                     , "AF"
                     , "ligand_outcome"
                     , drug)
 length(my_corr_colnames)
 colnames(corr_data_lig)
 colnames(corr_data_lig) <- my_corr_colnames
 colnames(corr_data_lig)
 #-----------------
 # generate corr LIG plot
 #-----------------
 start = 1
 end = which(colnames(corr_data_lig) == drug); end # should be the last column
 offset = 1
 my_corr_lig = corr_data_lig[start:(end-offset)]
 head(my_corr_lig)
 cat("Corr LIG plot:", plot_corr_lig_adjusted)
 svg(plot_corr_lig_adjusted, width = 15, height = 15)
 OutPlot2  = pairs.panels(my_corr_lig[1:(length(my_corr_lig)-1)]
                  , method = "spearman" # correlation method
                  , hist.col = "grey" ##00AFBB
                  , density = TRUE  # show density plots
                  , ellipses = F # show correlation ellipses
                  , stars = T
                  , rug = F
                  , breaks = "Sturges"
                  , show.points = T
                  , bg = c("#f8766d", "#00bfc4")[unclass(factor(my_corr_lig$ligand_outcome))]
                  , pch = 21
                  , jitter = T
                  #, alpha = .05
                  #, points(pch = 19, col = c("#f8766d", "#00bfc4"))
                  , cex = 3
                  , cex.axis = 2.5
                  , cex.labels = 2.1
                  , cex.cor = 1
                  , smooth = F
 )
 print(OutPlot2)
 dev.off()
 #######################################################
--- a/scripts/plotting/plotting_thesis/corr_thesis_ref/corr_plots.R
+++ b/scripts/plotting/plotting_thesis/corr_thesis_ref/corr_plots.R
@ -1,242 +0,0 @@
 #!/usr/bin/env Rscript
 getwd()
 setwd("~/git/LSHTM_analysis/scripts/plotting")
 getwd()
 source("~/git/LSHTM_analysis/scripts/Header_TT.R")
 spec = matrix(c(
  "drug"       , "d",  1, "character",
  "gene"       , "g",  1, "character",
  "data_file1" , "fa", 2, "character",
  "data_file2" , "fb", 2, "character" 
 ), byrow = TRUE, ncol = 4)
 opt = getopt(spec)
 drug            = opt$drug
 gene            = opt$gene
 infile_params   = opt$data_file1
 infile_metadata = opt$data_file2
 if(is.null(drug)|is.null(gene)) {
  stop("Missing arguments: --drug and --gene must both be specified (case-sensitive)")
 }
 #===========
 # Input
 #===========
 source("get_plotting_dfs.R")
 #===========
 # output
 #===========
 # PS
 corr_ps = "corr_PS.svg"
 plot_corr_ps =  paste0(plotdir,"/", corr_ps)
 corr_ps_all = "corr_PS_all.svg"
 plot_corr_ps_all =  paste0(plotdir,"/", corr_ps_all)
 # LIG
 corr_lig = "corr_LIG.svg"
 plot_corr_lig =  paste0(plotdir,"/", corr_lig)
 corr_lig_all = "corr_LIG_all.svg"
 plot_corr_lig_all =  paste0(plotdir,"/", corr_lig_all)
 ##############################################################################
 foo = corr_ps_df3
 #foo2 = corr_ps_df2
 bar = corr_lig_df3
 #bar2 = corr_lig_df2
 #================================
 # Data for Correlation plots: PS
 #================================
 # subset data to generate pairwise correlations
 cols_to_select = c("DUET"
                     , "Foldx"
                     , "Log (OR)"
                     , "-Log (P)"
                     , "MAF"
                     , "duet_outcome"
                     , drug)
 corr_data_ps = foo[names(foo)%in%cols_to_select]
 length(cols_to_select)
 colnames(corr_data_ps)
 start = 1
 end = which(colnames(corr_data_ps) == drug); end # should be the last column
 offset = 1
 my_corr_ps = corr_data_ps[start:(end - offset)]
 head(my_corr_ps)
 #---------------------
 # Corr plot PS: short
 # data: corr_ps_df3
 # cols: 7
 #---------------------
 cat("Corr plot PS DUET with coloured dots:", plot_corr_ps)
 svg(plot_corr_ps, width = 15, height = 15)
 pairs.panels(my_corr_ps[1:(length(my_corr_ps)-1)]
             , method = "spearman" # correlation method
             , hist.col = "grey" ##00AFBB
             , density = TRUE  # show density plots
             , ellipses = F # show correlation ellipses
             , stars = T
             , rug = F
             , breaks = "Sturges"
             , show.points = T
             , bg = c("#f8766d", "#00bfc4")[unclass(factor(my_corr_ps$duet_outcome))] # foldx colours are reveresed
             , pch = 21 # for bg
             , jitter = T
             , alpha = 1
             , cex = 1.8
             , cex.axis = 2
             , cex.labels = 4
             , cex.cor = 1
             , smooth = F
 )
 dev.off()
 corr_ps_rho = corr.test(my_corr_ps[1:5], method = "spearman")$r
 corr_ps_p = corr.test(my_corr_ps[1:5], method = "spearman")$p
 #---------------------
 # Corr plot PS: ALL
 # data: corr_ps_df3
 # cols: 10
 #---------------------
 end_ps_all = which(colnames(foo) == drug); end_ps_all # should be the last column
 my_corr_ps_all = foo[start:(end_ps_all - offset)]
 cols_to_drop = "Mutation"
 my_corr_ps_all = my_corr_ps_all[, !(names(my_corr_ps_all)%in%cols_to_drop)]
 head(my_corr_ps_all)
 length(colnames(my_corr_ps_all))
 cat("Corr plot PS DUET with coloured dots:", plot_corr_ps_all)
 svg(plot_corr_ps_all, width = 15, height = 15)
 pairs.panels(my_corr_ps_all[1:(length(my_corr_ps_all)-1)]
             , method = "spearman" # correlation method
             , hist.col = "grey" ##00AFBB
             , density = TRUE  # show density plots
             , ellipses = F # show correlation ellipses
             , stars = T
             , rug = F
             , breaks = "Sturges"
             , show.points = T
             , bg = c("#f8766d", "#00bfc4")[unclass(factor(my_corr_ps_all$duet_outcome))] # foldx colours are reveresed
             , pch = 21 # for bg
             , jitter = T
             , alpha = 1
             , cex = 1.5
             , cex.axis = 2
             , cex.labels = 2.5
             , cex.cor = 1
             , smooth = F
 )
 dev.off()
 #==================================
 # Data for Correlation plots: LIG
 #==================================
 cols_to_select_lig = c("Ligand Affinity"
                     , "Log (OR)"
                     , "-Log (P)"
                     , "MAF"
                     , "ligand_outcome"
                     , drug)
 corr_data_lig = bar[names(bar)%in%cols_to_select_lig]
 length(cols_to_select_lig)
 colnames(corr_data_lig)
 start_lig = 1
 end_lig = which(colnames(corr_data_lig) == drug); end_lig # should be the last column
 offset_lig = 1
 my_corr_lig = corr_data_lig[start_lig:(end_lig-offset_lig)]
 head(my_corr_lig)
 #---------------------
 # Corr plot LIG: short
 # data: corr_lig_df3
 # cols: 7
 #---------------------
 cat("Corr LIG plot with coloured dots:", plot_corr_lig)
 svg(plot_corr_lig, width = 15, height = 15)
 pairs.panels(my_corr_lig[1:(length(my_corr_lig)-1)]
             , method = "spearman" # correlation method
             , hist.col = "grey" ##00AFBB
             , density = TRUE  # show density plots
             , ellipses = F # show correlation ellipses
             , stars = T
             , rug = F
             , breaks = "Sturges"
             , show.points = T
             , bg = c("#f8766d", "#00bfc4")[unclass(factor(my_corr_lig$ligand_outcome))] 
             , pch = 21 # for bg
             , jitter = T
             , cex = 2
             , cex.axis = 2
             , cex.labels = 4
             , cex.cor = 1
             , smooth = F
 )
 dev.off()
 corr_lig_rho = corr.test(my_corr_lig[1:4], method = "spearman")$r
 corr_lig_p = corr.test(my_corr_lig[1:4], method = "spearman")$p
 #---------------------
 # Corr plot LIG: ALL
 # data: corr_lig_df3
 # cols: 9
 #---------------------
 end_lig_all = which(colnames(bar) == drug); end_lig_all # should be the last column
 my_corr_lig_all = bar[start_lig:(end_lig_all - offset_lig)]
 cols_to_drop = "Mutation"
 my_corr_lig_all = my_corr_lig_all[, !(names(my_corr_lig_all)%in%cols_to_drop)]
 head(my_corr_lig_all)
 length(colnames(my_corr_lig_all))
 cat("Corr plot LIG with coloured dots:", plot_corr_lig_all)
 svg(plot_corr_lig_all, width = 15, height = 15)
 pairs.panels(my_corr_lig_all[1:(length(my_corr_lig_all)-1)]
             , method = "spearman" # correlation method
             , hist.col = "grey" ##00AFBB
             , density = TRUE  # show density plots
             , ellipses = F # show correlation ellipses
             , stars = T
             , rug = F
             , breaks = "Sturges"
             , show.points = T
             , bg = c("#f8766d", "#00bfc4")[unclass(factor(my_corr_lig_all$ligand_outcome))] # foldx colours are reveresed
             , pch = 21 # for bg
             , jitter = T
             , alpha = 1
             , cex = 1.5
             , cex.axis = 2
             , cex.labels = 2.2
             , cex.cor = 1
             , smooth = F
 )
 dev.off()
 ######################################################################=
 #                             End of script
 ######################################################################=
--- a/scripts/plotting/plotting_thesis/corr_thesis_ref/corr_plots_gc_i.R
+++ b/scripts/plotting/plotting_thesis/corr_thesis_ref/corr_plots_gc_i.R
@ -1,276 +0,0 @@
 #!/usr/bin/env Rscript       
 source("~/git/LSHTM_analysis/config/gid.R")
 source("~/git/LSHTM_analysis/scripts/plotting/get_plotting_dfs.R")
 #===================================================================
 corr_data = corr_data_extract(merged_df3, drug_name = drug)
 #corr_data = corr_data_extract(merged_df2, drug_name = drug)
 geneL_normal = c("pnca")
 geneL_na_dy = c("gid")
 geneL_na = c("rpob")
 geneL_ppi2 = c("alr", "embb", "katg", "rpob")
 core_cols <- c( "Log (OR)" , "MAF", "-Log (P)"
                    , "DUET", "FoldX"
                    , "DeepDDG", "Dynamut2"
                    , "ASA", "RSA", "RD", "KD"
                    , "Consurf", "SNAP2"
                    #, "mutation_info_labels"
 )
 if (tolower(gene)%in%geneL_normal){
  corrplot_cols = core_cols
 }
 if (tolower(gene)%in%geneL_na_dy){
  additional_cols = c("mCSM-NA"
                    , "Dynamut"
                    , "ENCoM-DDG"
                    , "ENCoM-DDS"
                    , "mCSM"
                    , "SDM"
                    , "DUET-d"
                    , "mutation_info_labels")
  corrplot_cols = c(core_cols, additional_cols)
 }
 if (tolower(gene)%in%geneL_na){
  additional_cols = c("mCSM-NA"
                    , "mutation_info_labels")
  corrplot_cols = c(core_cols, additional_cols)
 }
 if (tolower(gene)%in%geneL_ppi2){
  additional_cols = c("mCSM-PPI2"
                    , "mutation_info_labels")
  corrplot_cols = c(core_cols, additional_cols)
 }
 #========================================
 # corrplot_cols <- c( "Log (OR)"
 #                     , "MAF"
 #                     , "-Log (P)"
 #                     , "DUET"
 #                     , "FoldX"
 #                     , "DeepDDG"
 #                     , "Dynamut2"
 #                     , "mCSM-NA"
 #                     , "Dynamut"
 #                     , "ENCoM-DDG"
 #                     , "ENCoM-DDS"
 #                     , "mCSM"
 #                     , "SDM"
 #                     , "DUET-d"
 #                     , "ASA"
 #                     , "RSA"
 #                     , "RD"
 #                     , "KD"
 #                     , "mutation_info_labels"
 #                    )
 corr_df <- corr_data[, corrplot_cols] # col order is according to corrplot_cols
 head(corr_df); names(corr_df)
 if ( all( corrplot_cols%in%names(corr_df) ) ){
  cat("\nPASS: Successfully selected"
      , length(corrplot_cols)
      , "columns for building correlation df")
 } else {
  cat("\nFAIl: Something went wrong, numbers mismatch"
      , "\nExpected cols:", length(corrplot_cols)
      , "\nGot:", length(corr_df) )
 }
 #=====================================================
 corrplot_df <- corr_df 
 # stat_df = corrplot_df[, c("Log (OR)"
 #                           , "MAF" 
 #                           , "-Log (P)")]
 plot_title <- "Correlation plots (stability)"
 # Checkbox Names
 # FIXME: select columns conditionally based on gene and grey out the ones that are not present!
 cBCorrNames = c( "Odds Ratio"
                , "Allele Frequency"
                , "P-value"
                , "DUET"
                , "FoldX"
                , "DeepDDG"
                , "Dynamut2"
                , "ASA"
                , "RSA"
                , "RD"
                , "KD"
                , "Consurf"
                , "SNAP2"
                , "Nucleic Acid affinity"
                , "PPi2 affinity"
                #, "Dynamut"
                #, "ENCoM-Stability"
                #, "ENCoM-Flexibility"
                #, "mCSM"
                #, "SDM"
                #, "DUET-d"
 )
 # Checkbox Values (aka Column Names that are in corrplot_df)
 cBCorrVals = c("Log (OR)"
              , "MAF"
              , "-Log (P)"
              , "DUET"
              , "FoldX"
              , "DeepDDG"
              , "Dynamut2"
              , "ASA"
              , "RSA"
              , "RD"
              , "KD"
              , "Consurf"
              , "SNAP2"
              , "mCSM-NA"
              , "mCSM-PPI2"
              # , "Dynamut"
              # , "ENCoM-DDG"
              # , "ENCoM-DDS"
              # , "mCSM"
              # , "SDM"
              # , "DUET-d"
 )
 # Pre-selected checkboxes
 cBCorrSelected = c("Log (OR)"
                   , "MAF"
                   , "-Log (P)")
 #################
 # Define UI
 #################
 u_corr <- fluidPage(
  headerPanel(plot_title),
  sidebarLayout(position = "left"
                , sidebarPanel(
                 checkboxGroupInput("variable", "Choose parameter:"
                                       , choiceNames  = cBCorrNames
                                       , choiceValues = cBCorrVals
                                       , selected     = cBCorrSelected
                  )
                  # could be a fluid Row
                  , actionButton("add_col"     , "Render")
                  , actionButton("reset_graph" , "Reset Graphs")
                  , actionButton("select_all"  , "Select All")
                  )
                # output/display
                , mainPanel(plotOutput(outputId = 'corrplot'
                                    , height = "1200px"
                                    , width  = "1500px")
 #                           , height = "800px"
 #                          , width  = "600px")
                          , textOutput("txt")
                )
  )
 )
 #################
 # Define server
 #################
 s_corr <- shinyServer(function(input, output, session)
 {
 #================
 # Initial render
 #================
  output$corrplot <- renderPlot({ 
    #---------------------
    # My correlation plot: initial plot
    #---------------------
    c_plot <- my_corr_pairs(corr_data_all = corrplot_df
                            , corr_cols = cBCorrSelected
                            , corr_method = "spearman"
                            , dot_size = 2
                            , ats = 1.5
                            , corr_lab_size = length(cBCorrNames)/length(cBCorrSelected) * 1.3
                            , corr_value_size = 1)
  })
 #====================
 # Interactive render
 #====================
  observeEvent(
    input$add_col, {
      # select cols for corrplot
      corr_cols_s <- c(input$variable)
      # render plot
      if (length(c(input$variable)) >= 2) {
      output$corrplot <- renderPlot({
        #---------------------
        # My correlation plot: user selects columns
        #---------------------
        c_plot <- my_corr_pairs(corr_data_all     = corrplot_df
                                , corr_cols       = corr_cols_s
                                , dot_size        = 2
                                , ats             = 1.5
                                , corr_lab_size   = length(cBCorrNames)/length(corr_cols_s) * 1.3
                                , corr_value_size = 1)
      })
      } else{ output$txt = renderText({"Argh, common! It's a correlation plot. Select >=2 vars!"})
      }
    })
 #==================================
 # Add button: Select All checkbox
 #==================================
  observeEvent(
    input$select_all,{
      updateCheckboxGroupInput(session, "variable", selected = cBCorrVals)
    }
 )
 #================
 # Reset render
 #================
  observeEvent(
    input$reset_graph,{
      # reset checkboxes to default selection
      updateCheckboxGroupInput(session, "variable", selected = cBCorrSelected)
      # render plot
      output$corrplot <- renderPlot({
        #---------------------
        # My correlation plot: reset plot
        #---------------------
        c_plot <- my_corr_pairs(corr_data_all     = corrplot_df
                                , corr_cols       = cBCorrSelected
                                , dot_size        = 1.2
                                , ats             = 1.5
                                , corr_lab_size   = length(cBCorrNames)/length(cBCorrSelected) * 1.3
                                , corr_value_size = 1)
      })
    }
  )
 }
 )
 shinyApp(ui = u_corr, server = s_corr)
--- a/scripts/plotting/plotting_thesis/corr_thesis_ref/corr_plots_gc_lig_i.R
+++ b/scripts/plotting/plotting_thesis/corr_thesis_ref/corr_plots_gc_lig_i.R
@ -1,220 +0,0 @@
 #!/usr/bin/env Rscript       
 source("~/git/LSHTM_analysis/config/gid.R")
 source("~/git/LSHTM_analysis/scripts/plotting/get_plotting_dfs.R")
 #===================================================================
 corr_data = corr_data_extract(merged_df3, drug_name = drug)
 #corr_data = corr_data_extract(merged_df2, drug_name = drug)
 #================================================================
 #other globals
 dist_colname <- LigDist_colname # ligand_distance (from globals)
 dist_cutoff <- LigDist_cutoff # 10 (from globals)
 cat("\nLigand distance cut off, colname:", dist_colname
    , "\nThe max distance", gene, "structure df" , ":", max_ang, "\u212b"
    , "\nThe min distance", gene, "structure df" , ":", min_ang, "\u212b")
 ########################################################################
 #==========================================
 #####################
 # Correlation plot
 #####################
 colnames(corr_df_m3_f)
 corrplot_cols_lig <- c( "Log (OR)"
                    ,  "MAF"
                    , "-Log (P)"
                    , "mCSM-lig"
                    , "mCSM-NA"
                    , "ASA"
                    , "RSA"
                    , "RD"
                    , "KD" 
                    , dist_colname
                   , "mutation_info_labels"
                   )
 corr_df_lig <- corr_df_m3_f[, corrplot_cols_lig]
 head(corr_df_lig)
 corrplot_df_lig <- corr_df_lig 
 # static df
 # stat_df = corrplot_df_lig[, c("Log (OR)"
 #                           , "MAF"
 #                           , "-Log (P)"
 #                           )]
 plot_title_lig <- "Correlation plots (ligand affinity)"
 # Checkbox Names
 cCorrNames = c( "Odds Ratio"
             , "Allele Frequency"
             , "P-value"
             , "Ligand affinity"
             , "Nucleic Acid affinity"
             , "ASA"
             , "RSA"
             , "RD"
             , "KD" 
             , "Ligand Distance")
 # Checkbox Values (aka Column Names that are in corrplot_df_lig)
 cCorrVals = c("Log (OR)"
          , "MAF"
          , "-Log (P)"
          , "mCSM-lig"
          , "mCSM-NA"
          , "ASA"
          , "RSA"
          , "RD"
          , "KD" 
          , dist_colname)
 # Pre-selected checkboxes
 cCorrSelected = c("Log (OR)"
                   , "MAF"
                   , "-Log (P)")
 #============
 # Define UI 
 #============
 u_corr_lig<- fluidPage(
  headerPanel(plot_title_lig),
  sidebarLayout(position = "left"
                , sidebarPanel("Correlations: Filtered data data"
                  , numericInput(inputId = "lig_dist"
                                 , label = "Ligand distance cutoff"
                                 , value =  dist_cutoff # 10 default from globals
                                 , min = min_ang
                                 , max = max_ang)
                  , checkboxGroupInput("variable", "Choose parameter:"
                                       , choiceNames  = cCorrNames
                                       , choiceValues = cCorrVals
                                       , selected     = cCorrSelected
                  )
                  # could be a fluid Row
                  , actionButton("add_col"     , "Render")
                  , actionButton("reset_graph" , "Reset Graphs")
                  , actionButton("select_all"  , "Select All")
                )
                # output/display
                , mainPanel(plotOutput(outputId = 'corrplot'
                                     , height = "1000px"
                                     , width  = "1200px")
                          # , height = "800px"
                          # , width  = "600px")
                , textOutput("txt")
                )
  )
 )
 #===============
 # Define server
 #===============
 s_corr_lig <- shinyServer(function(input, output, session)
 { 
 #================
 # Initial render
 #================
  output$corrplot <- renderPlot({ 
    # get the user-specified lig_list
    dist_cutoff_ini = input$lig_dist
    # subset data for plot
    corrplot_df_lig_ini = corrplot_df_lig[corrplot_df_lig[[dist_colname]] < dist_cutoff_ini,]
    #---------------------
    # My correlation plot: initial plot
    #---------------------
    c_plot <- my_corr_pairs(
                            #corr_data_all = corrplot_df_lig
                            corr_data_all = corrplot_df_lig_ini
                            , corr_cols = cCorrSelected
                            , dot_size = 2
                            , ats = 1.5
                            , corr_lab_size = length(cCorrNames)/length(cCorrSelected) * 1.3
                            , corr_value_size = 1)
  })
 #====================
 # Interactive render
 #====================
  observeEvent(
    input$add_col, {
      # get the user-specified lig_list
      dist_cutoff_user = input$lig_dist
      # subset data for plot
      corrplot_df_lig_s = corrplot_df_lig[corrplot_df_lig[[dist_colname]] < dist_cutoff_user,]
      # select cols for corrplot
      corr_cols_s = c(input$variable)
      # render plot
      if (length(c(input$variable)) >= 2) {
        output$corrplot <- renderPlot({ 
          #---------------------
          # My correlation plot: user selects columns
          #---------------------
          c_plot <- my_corr_pairs(corr_data_all = corrplot_df_lig_s
                                  , corr_cols = corr_cols_s
                                  , dot_size = 1.6
                                  , ats = 1.5
                                  , corr_lab_size = length(cCorrNames)/length(corr_cols_s) * 1.3
                                  , corr_value_size = 1)
        })
      } else { output$txt = renderText({"Fuddu! It's a correlation plot. Select >=2 vars bewakoof!"})}
    })
 #==================================
 # Add button: Select All checkbox
 #==================================
  observeEvent(
    input$select_all,{
      updateCheckboxGroupInput(session, "variable", selected = cCorrVals)
    }
  )
 #================
 # Reset render
 #================
  observeEvent(
    input$reset_graph,{
      # reset checkboxes
      updateCheckboxGroupInput(session, "variable", selected = cCorrSelected)
      # render plot
      output$corrplot <- renderPlot({ 
        #---------------------
        # My correlation plot: reset plot
        #---------------------
        c_plot <- my_corr_pairs(corr_data_all = corrplot_df_lig
                                , corr_cols = cCorrSelected
                                , dot_size = 2
                                , ats = 1.5
                                , corr_lab_size = length(cCorrNames)/length(cCorrSelected) * 1.3
                                , corr_value_size = 1)
      })
    }
  )
 }
 )
 shinyApp(ui = u_corr_lig, server = s_corr_lig)
--- a/scripts/plotting/plotting_thesis/corr_thesis_ref/ggcorr_all_PS_LIG.R
+++ b/scripts/plotting/plotting_thesis/corr_thesis_ref/ggcorr_all_PS_LIG.R
@ -1,323 +0,0 @@
 #!/usr/bin/env Rscript
 #########################################################
 # TASK: Corr plots for PS and Lig 
 # Output: 1 svg
 #=======================================================================
 # working dir and loading libraries
 getwd()
 setwd("~/git/LSHTM_analysis/scripts/plotting/")
 getwd()
 source("~/git/LSHTM_analysis/scripts/Header_TT.R")
 require(cowplot)
 source("combining_dfs_plotting.R")
 #source("my_pairs_panel.R")
 # should return the following dfs, directories and variables
 # FIXME: Can't output from here
 # PS combined: 
 # 1) merged_df2
 # 2) merged_df2_comp
 # 3) merged_df3
 # 4) merged_df3_comp
 # LIG combined: 
 # 5) merged_df2_lig
 # 6) merged_df2_comp_lig
 # 7) merged_df3_lig
 # 8) merged_df3_comp_lig
 # 9) my_df_u
 # 10) my_df_u_lig
 cat(paste0("Directories imported:"
           , "\ndatadir:", datadir
           , "\nindir:", indir
           , "\noutdir:", outdir
           , "\nplotdir:", plotdir))
 cat(paste0("Variables imported:"
           , "\ndrug:", drug
           , "\ngene:", gene
           , "\ngene_match:", gene_match
           , "\nAngstrom symbol:", angstroms_symbol
           , "\nNo. of duplicated muts:", dup_muts_nu
           , "\nNA count for ORs:", na_count
           , "\nNA count in df2:", na_count_df2
           , "\nNA count in df3:", na_count_df3))     
 #=======
 # output
 #=======
 # can't combine by cowplot because not ggplots
 #corr_plot_combined = "corr_combined.svg"
 #plot_corr_plot_combined  =  paste0(plotdir,"/", corr_plot_combined)
 # PS
 #ggcorr_all_ps = "ggcorr_all_PS.svg"
 ggcorr_all_ps = "ggcorr_all_PS.png"
 plot_ggcorr_all_ps =  paste0(plotdir,"/", ggcorr_all_ps)
 # LIG
 #ggcorr_all_lig = "ggcorr_all_LIG.svg"
 ggcorr_all_lig = "ggcorr_all_LIG.png"
 plot_ggcorr_all_lig =  paste0(plotdir,"/", ggcorr_all_lig )
 # combined
 ggcorr_all_combined_labelled = "ggcorr_all_combined_labelled.png"
 plot_ggcorr_all_combined_labelled  =  paste0(plotdir,"/", ggcorr_all_combined_labelled)
 ####################################################################
 #               end of loading libraries and functions                 #
 ########################################################################
 #%%%%%%%%%%%%%%%%%%%%%%%%%
 #df_ps = merged_df3_comp 
 #df_lig = merged_df3_comp_lig
 merged_df3 = as.data.frame(merged_df3)
 df_ps = merged_df3 
 df_lig = merged_df3_lig
 #%%%%%%%%%%%%%%%%%%%%%%%%%
 rm( merged_df2, merged_df2_comp, merged_df2_lig, merged_df2_comp_lig, my_df_u, my_df_u_lig)
 ########################################################################
 #               end of data extraction and cleaning for plots          #
 ########################################################################
 #======================
 # adding log cols 
 #======================
 # subset data to generate pairwise correlations
 cols_to_select =  c("duet_scaled"
                    , "foldx_scaled"
                    , "log10_or_mychisq"
                    , "neglog_pval_fisher"
                    #, "or_kin"
                    #, "neglog_pwald_kin"
                    , "af"
                    , "asa"
                    , "rsa"
                    , "kd_values"
                    , "rd_values"
                    , "duet_outcome"
                    , drug)
 corr_data_ps = df_ps[, cols_to_select]
 dim(corr_data_ps)
 #p_italic = substitute(paste("-Log(", italic('P'), ")"));p_italic 
 #p_adjusted_italic = substitute(paste("-Log(", italic('P adjusted'), ")"));p_adjusted_italic
 # assign nice colnames (for display)
 my_corr_colnames = c("DUET"
                     , "Foldx"
                     , "Log (OR)"
                     , "-Log (P)"
                     #, "OR (adjusted)"
                     #, "-Log (P wald)"
                     , "AF"
                     , "ASA"
                     , "RSA"
                     , "KD"
                     , "RD"
                     , "duet_outcome"
                     , drug)
 length(my_corr_colnames)
 colnames(corr_data_ps)
 colnames(corr_data_ps) <- my_corr_colnames
 colnames(corr_data_ps)
 #------------------------
 # Data for ggcorr PS plot
 #------------------------
 start = 1
 end_ggcorr = which(colnames(corr_data_ps) == "duet_outcome"); end_ggcorr # should be the last column
 offset = 1
 my_ggcorr_ps = corr_data_ps[start:(end_ggcorr-1)]
 head(my_ggcorr_ps)
 # correlation matrix
 corr1 <- round(cor(my_ggcorr_ps, method = "spearman", use = "pairwise.complete.obs"), 1)
 # p-value matrix
 pmat1 <- cor_pmat(my_ggcorr_ps, method = "spearman", use = "pairwise.complete.obs"
                  ,   conf.level = 0.99)
 corr2 = psych::corr.test(my_ggcorr_ps
                        , method = "spearman"
                        , use =  "pairwise.complete.obs")$r
 corr2 = round(corr2, 1)
 pmat2 =  psych::corr.test(my_ggcorr_ps
                            , method = "spearman"
                            , adjust = "none"
                            , use =  "pairwise.complete.obs")$p
 corr1== corr2
 pmat1==pmat2
 #------------------------
 # Generate ggcorr PS plot
 #------------------------
 cat("ggCorr plot PS:", plot_ggcorr_all_ps)
 #png(filename = plot_ggcorr_all_ps, width = 1024, height = 768, units = "px", pointsize = 20)
 ggcorr_ps = ggcorrplot(corr1
                       , p.mat = pmat1
                       , hc.order = TRUE
                       , outline.col = "black"
                       , ggtheme = ggplot2::theme_gray
                       , colors = c("#6D9EC1", "white", "#E46726")
                       , title = "DUET and Foldx stability")    
 ggcorr_ps
 #dev.off()
 #===========================
 # Data for Correlation plots: LIG
 #===========================
 table(df_lig$ligand_outcome)
 df_lig$log10_or_mychisq = log10(df_lig$or_mychisq)
 df_lig$neglog_pval_fisher = -log10(df_lig$pval_fisher)
 df_lig$log10_or_kin = log10(df_lig$or_kin)
 df_lig$neglog_pwald_kin = -log10(df_lig$pwald_kin)
 # subset data to generate pairwise correlations
 cols_to_select_lig =  c("affinity_scaled"
                    , "log10_or_mychisq"
                    , "neglog_pval_fisher"
                    , "or_kin"
                    , "neglog_pwald_kin"
                    , "af"
                    , "asa"
                    , "rsa"
                    , "kd_values"
                    , "rd_values"
                    , "ligand_outcome"
                    , drug)
 corr_data_lig = df_lig[, cols_to_select_lig]
 dim(corr_data_lig)
 # assign nice colnames (for display)
 my_corr_colnames_lig = c("Ligand Affinity"
                     , "Log (OR)"
                     , "-Log (P)"
                     , "OR (adjusted)"
                     , "-Log(P wald)"
                     , "AF"
                     , "ASA"
                     , "RSA"
                     , "KD"
                     , "RD"
                     , "ligand_outcome"
                     , drug)
 length(my_corr_colnames)
 colnames(corr_data_lig)
 colnames(corr_data_lig) <- my_corr_colnames_lig
 colnames(corr_data_lig)
 #------------------------
 # Data for ggcorr LIG plot
 #------------------------
 start = 1
 end_ggcorr_lig = which(colnames(corr_data_lig) == "ligand_outcome"); end_ggcorr_lig # should be the last column
 offset = 1
 my_ggcorr_lig = corr_data_lig[start:(end_ggcorr_lig-1)]
 head(my_ggcorr_lig); str(my_ggcorr_lig)
 # correlation matrix
 corr1_lig <- round(cor(my_ggcorr_lig, method = "spearman", use = "pairwise.complete.obs"), 1)
 # p-value matrix
 pmat1_lig <- cor_pmat(my_ggcorr_lig, method = "spearman", use = "pairwise.complete.obs")
 corr2_lig = psych::corr.test(my_ggcorr_lig
                             , method = "spearman"
                             , use =  "pairwise.complete.obs")$r
 corr2_lig = round(corr2_lig, 1)
 pmat2_lig =  psych::corr.test(my_ggcorr_lig
                              , method = "spearman"
                              , adjust = "none"
                              , use =  "pairwise.complete.obs")$p
 corr1_lig == corr2_lig
 pmat1_lig == pmat2_lig
 # for display order columns by hc order of ps 
 #col_order = levels(ggcorr_ps$data[2])
 #col_order <- c("Species", "Petal.Width", "Sepal.Length",
               #"Sepal.Width", "Petal.Length")
 #my_data2 <- my_data[, col_order]
 #my_data2
 #------------------------
 # Generate ggcorr LIG plot
 #------------------------
 cat("ggCorr LIG plot:", plot_ggcorr_all_lig)
 #svg(plot_ggcorr_all_lig, width = 15, height = 15)
 #png(plot_ggcorr_all_lig, width = 1024, height = 768, units = "px", pointsize = 20)
 ggcorr_lig = ggcorrplot(corr1_lig
                        , p.mat = pmat1_lig
                        , hc.order = TRUE
                        , outline.col = "black"
                        , ggtheme = ggplot2::theme_gray
                        , colors = c("#6D9EC1", "white", "#E46726")
                        , title = "Ligand affinty")    
 ggcorr_lig
 #dev.off()
 #######################################################
 #=============================
 # combine plots for output
 #=============================
 +
--- a/scripts/plotting/plotting_thesis/preformatting.R
+++ b/scripts/plotting/plotting_thesis/preformatting.R
@ -88,7 +88,7 @@ all_cols = c(common_cols
 #=======
 # output
 #=======
-outdir_images = paste0("~/git/Writing/thesis/images/results/", tolower(gene))
+outdir_images = paste0("~/git/Writing/thesis/images/results/", tolower(gene), "/")
 ####################################
 # merged_df3: NECESSARY pre-processing
@ -228,6 +228,15 @@ corr_lig_colnames = c("mCSM-lig"
                     , drug)
 corr_ppi2_colnames = c("mCSM-PPI2"
                       , "SNAP2"             
                       , "Log (OR)"       
                       , "-Log (P)"
                       , "interface_dist" 
                       , "dst_mode"
                       , drug)
 corr_conservation = c("Consurf"
                       , "MAF"             
                       , "Log (OR)"       
                       , "-Log (P)"