going through functions and script for interactive plots

2022-01-12 17:58:16 +00:00 · 2022-01-12 17:58:16 +00:00 · 3f7bc908ec
commit 3f7bc908ec
parent 1f266c4cb8
7 changed files with 83 additions and 95 deletions
--- a/scripts/functions/combining_dfs_plotting.R
+++ b/scripts/functions/combining_dfs_plotting.R
@ -21,7 +21,7 @@
 # 1) large combined df including NAs for AF, OR,etc
 # 		Dim: same no. of rows as gene associated meta_data_with_AFandOR
 # 2) small combined df including NAs for AF, OR, etc.
-# 		Dim: same as mcsm data
+# 		Dim: same as mcsm data or foldX
 # 3) large combined df excluding NAs 
 # 		Dim: dim(#1) - na_count_df2
 # 4) small combined df excluding NAs
@ -31,10 +31,13 @@
 # 6) LIGAND small combined df excluding NAs
 # 		Dim: dim()
 #==========================================================
+#lig_dist_colname = 'ligand_distance' or global var LigDist_colname
+#lig_dist_cutoff  =  10 or global var LigDist_cutoff
+
 combining_dfs_plotting <- function(  my_df_u
                                   , gene_metadata
-                                   , lig_dist_colname = 'ligand_distance'
-                                   , lig_dist_cutoff = 10){
+                                   , lig_dist_colname = ''
+                                   , lig_dist_cutoff = ''){

  # counting NAs in AF, OR cols
  # or_mychisq
--- a/scripts/functions/plotting_data.R
+++ b/scripts/functions/plotting_data.R
@ -16,9 +16,12 @@ library(dplyr)
  ## my_df_u_lig
  ## dup_muts
 #========================================================
+#lig_dist_colname = 'ligand_distance' or global var LigDist_colname
+#lig_dist_cutoff  =  10 or global var LigDist_cutoff
+
 plotting_data <- function(df
-                          , lig_dist_colname = 'ligand_distance'
-                          , lig_dist_cutoff = 10) {
+                          , lig_dist_colname = '' 
+                          , lig_dist_cutoff = '') {
 my_df       = data.frame()
 my_df_u     = data.frame()
 my_df_u_lig = data.frame()
--- a/scripts/functions/plotting_globals.R
+++ b/scripts/functions/plotting_globals.R
@ -23,12 +23,16 @@ import_dirs <- function(drug_name, gene_name) {
  
  dr_muts_col    <<- paste0('dr_mutations_', drug_name)
  other_muts_col <<- paste0('other_mutations_', drug_name)
-  resistance_col <<- "drtype"
  gene_match     <<- paste0(gene_name,"_p.")
  
 }

-# other globals
+# Other globals
+#=====================
+# Resistance colname
+#=====================
+resistance_col <<- "drtype"
+
 #===============================
 # mcsm ligand distance cut off
 #===============================
@ -39,7 +43,6 @@ LigDist_cutoff <<- 10
 # Angstroms symbol
 #==================
 angstroms_symbol <<- "\u212b"
-#cat(paste0("There are ", nrow(my_df_u_lig), " sites lying within 10", angstroms_symbol, " of the ligand\n"))

 #===============
 # Delta symbol
--- a/scripts/plotting/Header_TT.R
+++ b/scripts/plotting/Header_TT.R
@ -186,3 +186,7 @@ func_path = "~/git/LSHTM_analysis/scripts/functions/"
 source_files <- list.files(func_path, "\\.R$")  # locate all .R files
 map(paste0(func_path, source_files), source)  # source all your R scripts!

+# set plot script dir
+plot_script_path = "~/git/LSHTM_analysis/scripts/plotting/"
+
+
--- a/scripts/plotting/get_plotting_dfs.R
+++ b/scripts/plotting/get_plotting_dfs.R
@ -12,20 +12,24 @@ source("/home/tanu/git/LSHTM_analysis/scripts/plotting/Header_TT.R")
 # in from other scripts
 # to call this
 #********************
+# set drug and gene name

-#====================
-# variables for lig
-#====================
+#==========================================
+# variables for lig:
+# comes from functions/plotting_globals.R
+#==========================================

-#LigDist_colname = "ligand_distance"
-#LigDist_cutoff = 10
+cat("\nGlobal variables for Ligand:"
+    , "\nligand distance colname:", LigDist_colname
+    , "\nligand distance cut off:", LigDist_cutoff)

 #===========
 # input
 #===========
-#---------------------
+#--------------------------------------------
 # call: import_dirs()
-#---------------------
+# comes from functions/plotting_globals.R
+#--------------------------------------------
 import_dirs(drug, gene)

 #---------------------------
@ -72,6 +76,8 @@ gene_metadata <- read.csv(infile_metadata
                          , stringsAsFactors = F
                          , header = T)

+cat("\nDim of meta data file: ", dim(gene_metadata))
+
 all_plot_dfs = combining_dfs_plotting(my_df_u
                                      , gene_metadata
                                      , lig_dist_colname = LigDist_colname
@ -82,34 +88,6 @@ merged_df3      = all_plot_dfs[[2]]
 merged_df2_comp = all_plot_dfs[[3]]
 merged_df3_comp = all_plot_dfs[[4]]
 #======================================================================
-#TODO: Think! MOVE TO COMBINE or singular file for deepddg
-
-#============================
-# adding deepddg scaled values
-# scale data b/w -1 and 1
-#============================
-# n = which(colnames(merged_df3) == "deepddg"); n 
-# 
-# my_min = min(merged_df3[,n]); my_min 
-# my_max = max(merged_df3[,n]); my_max 
-# 
-# merged_df3$deepddg_scaled = ifelse(merged_df3[,n] < 0
-#                                    , merged_df3[,n]/abs(my_min)
-#                                    , merged_df3[,n]/my_max) 
-# # sanity check
-# my_min = min(merged_df3$deepddg_scaled); my_min 
-# my_max = max(merged_df3$deepddg_scaled); my_max
-# 
-# if (my_min == -1 && my_max == 1){
-#    cat("\nPASS: DeepDDG successfully scaled b/w -1 and 1"
-#        #, "\nProceeding with assigning deep outcome category")
-#        , "\n")
-# }else{
-#    cat("\nFAIL: could not scale DeepDDG ddg values"
-#        , "Aborting!")
-# }
-# 
-
 ####################################################################
 #                        Data for combining other dfs
 ####################################################################
@ -131,7 +109,7 @@ merged_df3_comp = all_plot_dfs[[4]]
 #                        Data for logoplots
 ####################################################################

-source("logo_data.R")
+source(paste0(plot_script_path, "logo_data.R"))

 s1 = c("\nSuccessfully sourced logo_data.R")
 cat(s1)
@ -142,7 +120,7 @@ cat(s1)

 #source("other_plots_data.R")

-source("dm_om_data.R")
+source(paste0(plot_script_path, "dm_om_data.R"))

 s2 = c("\nSuccessfully sourced other_plots_data.R")
 cat(s2)
@ -151,7 +129,7 @@ cat(s2)
 #                  Data for Lineage barplots: WF and LF dfs
 ####################################################################

-source("lineage_data.R")
+source(paste0(plot_script_path, "lineage_data.R"))

 s3 = c("\nSuccessfully sourced lineage_data.R")
 cat(s3)
@ -160,7 +138,7 @@ cat(s3)
 #                  Data for corr plots:
 ####################################################################
 # make sure the above script works because merged_df2_combined is needed
-source("corr_data.R")
+source(paste0(plot_script_path, "corr_data.R"))

 s4 = c("\nSuccessfully sourced corr_data.R")
 cat(s4)
@ -181,45 +159,18 @@ if (  all( length(s1), length(s2), length(s3), length(s4) ) >0 ){
 }   
 
 ########################################################################
-# clear excess variables
-rm(c1, c2, c3, c4, check1
-   , curr_count, curr_total
-   , cols_check
-   , cols_to_select
-   , cols_to_select_deepddg
-   , cols_to_select_duet
-   , cols_to_select_dynamut
-   , cols_to_select_dynamut2
-   , cols_to_select_encomddg
-   , cols_to_select_encomdds
-   , cols_to_select_mcsm
-   , cols_to_select_mcsm_na
-   , cols_to_select_sdm
+# clear excess variables: from the global enviornment
+
+vars0 = ls(envir = .GlobalEnv)[grepl("curr_*", ls(envir = .GlobalEnv))] 
+vars1 = ls(envir = .GlobalEnv)[grepl("^cols_to*", ls(envir = .GlobalEnv))] 
+vars2 = ls(envir = .GlobalEnv)[grepl("pivot_cols_*", ls(envir = .GlobalEnv))]
+vars3 = ls(envir = .GlobalEnv)[grepl("expected_*", ls(envir = .GlobalEnv))]
+
+rm(c1
+   , fact_cols
   , infile_metadata
   , infile_params
-   #, infilename_dynamut
-   #, infilename_dynamut2
-   #, infilename_mcsm_f_snps
-   #, infilename_mcsm_na
-   )
-
-rm(pivot_cols
-, pivot_cols_deepddg
-, pivot_cols_duet
-, pivot_cols_dynamut
-, pivot_cols_dynamut2
-, pivot_cols_encomddg
-, pivot_cols_encomdds
-, pivot_cols_foldx
-, pivot_cols_mcsm
-, pivot_cols_mcsm_na
-, pivot_cols_n
-, pivot_cols_sdm)
-
-rm(expected_cols
-, expected_ncols
-, expected_rows
-, expected_rows_lf
-, fact_cols)
-
-   
+   , vars0
+   , vars1
+   , vars2
+   , vars3)
--- a/scripts/plotting/logo_data.R
+++ b/scripts/plotting/logo_data.R
@ -140,3 +140,4 @@ wide_df_or_mult = wide_df_or_mult[,-1]
 str(wide_df_or_mult)

 position_or_mult = as.numeric(colnames(wide_df_or_mult))
+
--- a/scripts/plotting/logo_plots.R
+++ b/scripts/plotting/logo_plots.R
@ -56,6 +56,8 @@ logo_combined_labelled = "logo_combined_labelled.svg"
 plot_logo_combined_labelled  = paste0(plotdir,"/", logo_combined_labelled)

 #########################################################
+#logo_or_mult_p + theme_dark()
+#logo_or_mult_p + theme(plot.background = element_rect(fill = "black"))

 #==================================
 # Output
@ -124,8 +126,24 @@ print(logo_logOR)
 #*****************************
 # Mutant logo plot: >1 nsSNP
 #******************************
+aa_col_choices = c('chemistry', 'hydrophobicity', 'clustalx', 'taylor')
+my_logo_col = aa_col_choices[[1]]
+
+if (my_logo_col == 'clustalx || taylor'){
+  cat("\nSelected colour scheme:", my_logo_col
+      , "\nUsing black theme\n")
+  theme_bgc = "black"
+  font_bgc  = "white"
+} if (my_logo_col == 'chemistry || hydrophobicity') {
+  cat('\nSelected colour scheme:', my_logo_col
+      , "\nUsing grey theme")
+  theme_bgc = "grey"
+  font_bgc  = "black"
+}
+
 p0 = ggseqlogo(tab_mt
               , method = 'custom'
+               , col_scheme = my_logo_col
               , seq_type = 'aa') + 
  #ylab('my custom height') +
  theme(axis.text.x = element_blank()) +
@ -143,9 +161,12 @@ cat('\nDone: p0')
 mut_logo_p = p0 + theme(legend.position = "none"
                , legend.title = element_blank()
                , legend.text = element_text(size = 20)
-                , axis.text.x = element_text(size = 14, angle = 90)
-                , axis.text.y = element_blank())
-#mut_logo_p
+                , axis.text.x = element_text(size = 14
+                                             , angle = 90
+                                             , colour = font_bgc)
+                , axis.text.y = element_blank()
+                , plot.background = element_rect(fill = theme_bgc))
+mut_logo_p
 cat('\nDone: p0+mut_logo_p')

 #*************************
@ -154,9 +175,7 @@ cat('\nDone: p0+mut_logo_p')
 p2 = ggseqlogo(tab_wt
               , method = 'custom'
               , seq_type = 'aa'
-               #, col_scheme = "taylor"
-               #, col_scheme = chemistry2
-) + 
+               , col_scheme = my_logo_col) + 
  #ylab('my custom height') +
  theme(text=element_text(family="FreeSans"))+
  theme(axis.text.x = element_blank()
@ -185,7 +204,11 @@ cat('\nDone: wt_logo_p')
 #***********************
 # Logo OR >1 nsSNP
 #***********************
-logo_or_mult_p = ggseqlogo(wide_df_or_mult, method="custom", seq_type="aa") + ylab("my custom height") +
+logo_or_mult_p = ggseqlogo(wide_df_or_mult
+                           , method = "custom"
+                           , col_scheme = my_logo_col
+                           , seq_type="aa") + 
+  ylab("my custom height") +
  theme(axis.text.x = element_text(size = 14
                                   , angle = 90
                                   , hjust = 1