going through functions and script for interactive plots

2022-01-12 17:58:16 +00:00 · 2022-01-12 17:58:16 +00:00 · 3f7bc908ec
commit 3f7bc908ec
parent 1f266c4cb8
7 changed files with 83 additions and 95 deletions
--- a/scripts/functions/combining_dfs_plotting.R
+++ b/scripts/functions/combining_dfs_plotting.R
@ -21,7 +21,7 @@
 # 1) large combined df including NAs for AF, OR,etc
 # 		Dim: same no. of rows as gene associated meta_data_with_AFandOR
 # 2) small combined df including NAs for AF, OR, etc.
-# 		Dim: same as mcsm data
+# 		Dim: same as mcsm data or foldX
 # 3) large combined df excluding NAs 
 # 		Dim: dim(#1) - na_count_df2
 # 4) small combined df excluding NAs
@ -31,10 +31,13 @@
 # 6) LIGAND small combined df excluding NAs
 # 		Dim: dim()
 #==========================================================
 #lig_dist_colname = 'ligand_distance' or global var LigDist_colname
 #lig_dist_cutoff  =  10 or global var LigDist_cutoff
 combining_dfs_plotting <- function(  my_df_u
                                   , gene_metadata
-                                   , lig_dist_colname = 'ligand_distance'
+                                   , lig_dist_colname = ''
-                                   , lig_dist_cutoff = 10){
+                                   , lig_dist_cutoff = ''){
  # counting NAs in AF, OR cols
  # or_mychisq
--- a/scripts/functions/plotting_data.R
+++ b/scripts/functions/plotting_data.R
@ -16,9 +16,12 @@ library(dplyr)
  ## my_df_u_lig
  ## dup_muts
 #========================================================
 #lig_dist_colname = 'ligand_distance' or global var LigDist_colname
 #lig_dist_cutoff  =  10 or global var LigDist_cutoff
 plotting_data <- function(df
-                          , lig_dist_colname = 'ligand_distance'
+                          , lig_dist_colname = '' 
-                          , lig_dist_cutoff = 10) {
+                          , lig_dist_cutoff = '') {
 my_df       = data.frame()
 my_df_u     = data.frame()
 my_df_u_lig = data.frame()
--- a/scripts/functions/plotting_globals.R
+++ b/scripts/functions/plotting_globals.R
@ -23,12 +23,16 @@ import_dirs <- function(drug_name, gene_name) {
  dr_muts_col    <<- paste0('dr_mutations_', drug_name)
  other_muts_col <<- paste0('other_mutations_', drug_name)
  resistance_col <<- "drtype"
  gene_match     <<- paste0(gene_name,"_p.")
 }
-# other globals
+# Other globals
 #=====================
 # Resistance colname
 #=====================
 resistance_col <<- "drtype"
 #===============================
 # mcsm ligand distance cut off
 #===============================
@ -39,7 +43,6 @@ LigDist_cutoff <<- 10
 # Angstroms symbol
 #==================
 angstroms_symbol <<- "\u212b"
 #cat(paste0("There are ", nrow(my_df_u_lig), " sites lying within 10", angstroms_symbol, " of the ligand\n"))
 #===============
 # Delta symbol
--- a/scripts/plotting/Header_TT.R
+++ b/scripts/plotting/Header_TT.R
@ -186,3 +186,7 @@ func_path = "~/git/LSHTM_analysis/scripts/functions/"
 source_files <- list.files(func_path, "\\.R$")  # locate all .R files
 map(paste0(func_path, source_files), source)  # source all your R scripts!
 # set plot script dir
 plot_script_path = "~/git/LSHTM_analysis/scripts/plotting/"
--- a/scripts/plotting/get_plotting_dfs.R
+++ b/scripts/plotting/get_plotting_dfs.R
@ -12,20 +12,24 @@ source("/home/tanu/git/LSHTM_analysis/scripts/plotting/Header_TT.R")
 # in from other scripts
 # to call this
 #********************
 # set drug and gene name
-#====================
+#==========================================
-# variables for lig
+# variables for lig:
-#====================
+# comes from functions/plotting_globals.R
 #==========================================
-#LigDist_colname = "ligand_distance"
+cat("\nGlobal variables for Ligand:"
-#LigDist_cutoff = 10
+    , "\nligand distance colname:", LigDist_colname
    , "\nligand distance cut off:", LigDist_cutoff)
 #===========
 # input
 #===========
-#---------------------
+#--------------------------------------------
 # call: import_dirs()
-#---------------------
+# comes from functions/plotting_globals.R
 #--------------------------------------------
 import_dirs(drug, gene)
 #---------------------------
@ -72,6 +76,8 @@ gene_metadata <- read.csv(infile_metadata
                          , stringsAsFactors = F
                          , header = T)
 cat("\nDim of meta data file: ", dim(gene_metadata))
 all_plot_dfs = combining_dfs_plotting(my_df_u
                                      , gene_metadata
                                      , lig_dist_colname = LigDist_colname
@ -82,34 +88,6 @@ merged_df3      = all_plot_dfs[[2]]
 merged_df2_comp = all_plot_dfs[[3]]
 merged_df3_comp = all_plot_dfs[[4]]
 #======================================================================
 #TODO: Think! MOVE TO COMBINE or singular file for deepddg
 #============================
 # adding deepddg scaled values
 # scale data b/w -1 and 1
 #============================
 # n = which(colnames(merged_df3) == "deepddg"); n 
 # 
 # my_min = min(merged_df3[,n]); my_min 
 # my_max = max(merged_df3[,n]); my_max 
 # 
 # merged_df3$deepddg_scaled = ifelse(merged_df3[,n] < 0
 #                                    , merged_df3[,n]/abs(my_min)
 #                                    , merged_df3[,n]/my_max) 
 # # sanity check
 # my_min = min(merged_df3$deepddg_scaled); my_min 
 # my_max = max(merged_df3$deepddg_scaled); my_max
 # 
 # if (my_min == -1 && my_max == 1){
 #    cat("\nPASS: DeepDDG successfully scaled b/w -1 and 1"
 #        #, "\nProceeding with assigning deep outcome category")
 #        , "\n")
 # }else{
 #    cat("\nFAIL: could not scale DeepDDG ddg values"
 #        , "Aborting!")
 # }
 # 
 ####################################################################
 #                        Data for combining other dfs
 ####################################################################
@ -131,7 +109,7 @@ merged_df3_comp = all_plot_dfs[[4]]
 #                        Data for logoplots
 ####################################################################
-source("logo_data.R")
+source(paste0(plot_script_path, "logo_data.R"))
 s1 = c("\nSuccessfully sourced logo_data.R")
 cat(s1)
@ -142,7 +120,7 @@ cat(s1)
 #source("other_plots_data.R")
-source("dm_om_data.R")
+source(paste0(plot_script_path, "dm_om_data.R"))
 s2 = c("\nSuccessfully sourced other_plots_data.R")
 cat(s2)
@ -151,7 +129,7 @@ cat(s2)
 #                  Data for Lineage barplots: WF and LF dfs
 ####################################################################
-source("lineage_data.R")
+source(paste0(plot_script_path, "lineage_data.R"))
 s3 = c("\nSuccessfully sourced lineage_data.R")
 cat(s3)
@ -160,7 +138,7 @@ cat(s3)
 #                  Data for corr plots:
 ####################################################################
 # make sure the above script works because merged_df2_combined is needed
-source("corr_data.R")
+source(paste0(plot_script_path, "corr_data.R"))
 s4 = c("\nSuccessfully sourced corr_data.R")
 cat(s4)
@ -168,7 +146,7 @@ cat(s4)
 ########################################################################
 #                           End of script
 ########################################################################
-if (  all( length(s1), length(s2), length(s3), length(s4) ) >0 ){
+if (  all( length(s1), length(s2), length(s3), length(s4) ) > 0 ){
 cat(
  "\n##################################################"
 , "\nSuccessful: get_plotting_dfs.R worked!"
@ -181,45 +159,18 @@ if (  all( length(s1), length(s2), length(s3), length(s4) ) >0 ){
 }   
 ########################################################################
-# clear excess variables
+# clear excess variables: from the global enviornment
-rm(c1, c2, c3, c4, check1
+
-   , curr_count, curr_total
+vars0 = ls(envir = .GlobalEnv)[grepl("curr_*", ls(envir = .GlobalEnv))] 
-   , cols_check
+vars1 = ls(envir = .GlobalEnv)[grepl("^cols_to*", ls(envir = .GlobalEnv))] 
-   , cols_to_select
+vars2 = ls(envir = .GlobalEnv)[grepl("pivot_cols_*", ls(envir = .GlobalEnv))]
-   , cols_to_select_deepddg
+vars3 = ls(envir = .GlobalEnv)[grepl("expected_*", ls(envir = .GlobalEnv))]
-   , cols_to_select_duet
+
-   , cols_to_select_dynamut
+rm(c1
-   , cols_to_select_dynamut2
+   , fact_cols
   , cols_to_select_encomddg
   , cols_to_select_encomdds
   , cols_to_select_mcsm
   , cols_to_select_mcsm_na
   , cols_to_select_sdm
   , infile_metadata
   , infile_params
-   #, infilename_dynamut
+   , vars0
-   #, infilename_dynamut2
+   , vars1
-   #, infilename_mcsm_f_snps
+   , vars2
-   #, infilename_mcsm_na
+   , vars3)
   )
 rm(pivot_cols
 , pivot_cols_deepddg
 , pivot_cols_duet
 , pivot_cols_dynamut
 , pivot_cols_dynamut2
 , pivot_cols_encomddg
 , pivot_cols_encomdds
 , pivot_cols_foldx
 , pivot_cols_mcsm
 , pivot_cols_mcsm_na
 , pivot_cols_n
 , pivot_cols_sdm)
 rm(expected_cols
 , expected_ncols
 , expected_rows
 , expected_rows_lf
 , fact_cols)
--- a/scripts/plotting/logo_data.R
+++ b/scripts/plotting/logo_data.R
@ -140,3 +140,4 @@ wide_df_or_mult = wide_df_or_mult[,-1]
 str(wide_df_or_mult)
 position_or_mult = as.numeric(colnames(wide_df_or_mult))
--- a/scripts/plotting/logo_plots.R
+++ b/scripts/plotting/logo_plots.R
@ -56,6 +56,8 @@ logo_combined_labelled = "logo_combined_labelled.svg"
 plot_logo_combined_labelled  = paste0(plotdir,"/", logo_combined_labelled)
 #########################################################
 #logo_or_mult_p + theme_dark()
 #logo_or_mult_p + theme(plot.background = element_rect(fill = "black"))
 #==================================
 # Output
@ -124,8 +126,24 @@ print(logo_logOR)
 #*****************************
 # Mutant logo plot: >1 nsSNP
 #******************************
 aa_col_choices = c('chemistry', 'hydrophobicity', 'clustalx', 'taylor')
 my_logo_col = aa_col_choices[[1]]
 if (my_logo_col == 'clustalx || taylor'){
  cat("\nSelected colour scheme:", my_logo_col
      , "\nUsing black theme\n")
  theme_bgc = "black"
  font_bgc  = "white"
 } if (my_logo_col == 'chemistry || hydrophobicity') {
  cat('\nSelected colour scheme:', my_logo_col
      , "\nUsing grey theme")
  theme_bgc = "grey"
  font_bgc  = "black"
 }
 p0 = ggseqlogo(tab_mt
               , method = 'custom'
               , col_scheme = my_logo_col
               , seq_type = 'aa') + 
  #ylab('my custom height') +
  theme(axis.text.x = element_blank()) +
@ -143,9 +161,12 @@ cat('\nDone: p0')
 mut_logo_p = p0 + theme(legend.position = "none"
                , legend.title = element_blank()
                , legend.text = element_text(size = 20)
-                , axis.text.x = element_text(size = 14, angle = 90)
+                , axis.text.x = element_text(size = 14
-                , axis.text.y = element_blank())
+                                             , angle = 90
-#mut_logo_p
+                                             , colour = font_bgc)
                , axis.text.y = element_blank()
                , plot.background = element_rect(fill = theme_bgc))
 mut_logo_p
 cat('\nDone: p0+mut_logo_p')
 #*************************
@ -154,9 +175,7 @@ cat('\nDone: p0+mut_logo_p')
 p2 = ggseqlogo(tab_wt
               , method = 'custom'
               , seq_type = 'aa'
-               #, col_scheme = "taylor"
+               , col_scheme = my_logo_col) + 
               #, col_scheme = chemistry2
 ) + 
  #ylab('my custom height') +
  theme(text=element_text(family="FreeSans"))+
  theme(axis.text.x = element_blank()
@ -185,7 +204,11 @@ cat('\nDone: wt_logo_p')
 #***********************
 # Logo OR >1 nsSNP
 #***********************
-logo_or_mult_p = ggseqlogo(wide_df_or_mult, method="custom", seq_type="aa") + ylab("my custom height") +
+logo_or_mult_p = ggseqlogo(wide_df_or_mult
                           , method = "custom"
                           , col_scheme = my_logo_col
                           , seq_type="aa") + 
  ylab("my custom height") +
  theme(axis.text.x = element_text(size = 14
                                   , angle = 90
                                   , hjust = 1
`@ -140,3 +140,4 @@ wide_df_or_mult = wide_df_or_mult[,-1]`
	`str(wide_df_or_mult)`	`str(wide_df_or_mult)`

	`position_or_mult = as.numeric(colnames(wide_df_or_mult))`	`position_or_mult = as.numeric(colnames(wide_df_or_mult))`