going through functions and script for interactive plots

This commit is contained in:
Tanushree Tunstall 2022-01-12 17:58:16 +00:00
parent 1f266c4cb8
commit 3f7bc908ec
7 changed files with 83 additions and 95 deletions

View file

@ -21,7 +21,7 @@
# 1) large combined df including NAs for AF, OR,etc
# Dim: same no. of rows as gene associated meta_data_with_AFandOR
# 2) small combined df including NAs for AF, OR, etc.
# Dim: same as mcsm data
# Dim: same as mcsm data or foldX
# 3) large combined df excluding NAs
# Dim: dim(#1) - na_count_df2
# 4) small combined df excluding NAs
@ -31,10 +31,13 @@
# 6) LIGAND small combined df excluding NAs
# Dim: dim()
#==========================================================
#lig_dist_colname = 'ligand_distance' or global var LigDist_colname
#lig_dist_cutoff = 10 or global var LigDist_cutoff
combining_dfs_plotting <- function( my_df_u
, gene_metadata
, lig_dist_colname = 'ligand_distance'
, lig_dist_cutoff = 10){
, lig_dist_colname = ''
, lig_dist_cutoff = ''){
# counting NAs in AF, OR cols
# or_mychisq

View file

@ -16,9 +16,12 @@ library(dplyr)
## my_df_u_lig
## dup_muts
#========================================================
#lig_dist_colname = 'ligand_distance' or global var LigDist_colname
#lig_dist_cutoff = 10 or global var LigDist_cutoff
plotting_data <- function(df
, lig_dist_colname = 'ligand_distance'
, lig_dist_cutoff = 10) {
, lig_dist_colname = ''
, lig_dist_cutoff = '') {
my_df = data.frame()
my_df_u = data.frame()
my_df_u_lig = data.frame()

View file

@ -23,12 +23,16 @@ import_dirs <- function(drug_name, gene_name) {
dr_muts_col <<- paste0('dr_mutations_', drug_name)
other_muts_col <<- paste0('other_mutations_', drug_name)
resistance_col <<- "drtype"
gene_match <<- paste0(gene_name,"_p.")
}
# other globals
# Other globals
#=====================
# Resistance colname
#=====================
resistance_col <<- "drtype"
#===============================
# mcsm ligand distance cut off
#===============================
@ -39,7 +43,6 @@ LigDist_cutoff <<- 10
# Angstroms symbol
#==================
angstroms_symbol <<- "\u212b"
#cat(paste0("There are ", nrow(my_df_u_lig), " sites lying within 10", angstroms_symbol, " of the ligand\n"))
#===============
# Delta symbol

View file

@ -186,3 +186,7 @@ func_path = "~/git/LSHTM_analysis/scripts/functions/"
source_files <- list.files(func_path, "\\.R$") # locate all .R files
map(paste0(func_path, source_files), source) # source all your R scripts!
# set plot script dir
plot_script_path = "~/git/LSHTM_analysis/scripts/plotting/"

View file

@ -12,20 +12,24 @@ source("/home/tanu/git/LSHTM_analysis/scripts/plotting/Header_TT.R")
# in from other scripts
# to call this
#********************
# set drug and gene name
#====================
# variables for lig
#====================
#==========================================
# variables for lig:
# comes from functions/plotting_globals.R
#==========================================
#LigDist_colname = "ligand_distance"
#LigDist_cutoff = 10
cat("\nGlobal variables for Ligand:"
, "\nligand distance colname:", LigDist_colname
, "\nligand distance cut off:", LigDist_cutoff)
#===========
# input
#===========
#---------------------
#--------------------------------------------
# call: import_dirs()
#---------------------
# comes from functions/plotting_globals.R
#--------------------------------------------
import_dirs(drug, gene)
#---------------------------
@ -72,6 +76,8 @@ gene_metadata <- read.csv(infile_metadata
, stringsAsFactors = F
, header = T)
cat("\nDim of meta data file: ", dim(gene_metadata))
all_plot_dfs = combining_dfs_plotting(my_df_u
, gene_metadata
, lig_dist_colname = LigDist_colname
@ -82,34 +88,6 @@ merged_df3 = all_plot_dfs[[2]]
merged_df2_comp = all_plot_dfs[[3]]
merged_df3_comp = all_plot_dfs[[4]]
#======================================================================
#TODO: Think! MOVE TO COMBINE or singular file for deepddg
#============================
# adding deepddg scaled values
# scale data b/w -1 and 1
#============================
# n = which(colnames(merged_df3) == "deepddg"); n
#
# my_min = min(merged_df3[,n]); my_min
# my_max = max(merged_df3[,n]); my_max
#
# merged_df3$deepddg_scaled = ifelse(merged_df3[,n] < 0
# , merged_df3[,n]/abs(my_min)
# , merged_df3[,n]/my_max)
# # sanity check
# my_min = min(merged_df3$deepddg_scaled); my_min
# my_max = max(merged_df3$deepddg_scaled); my_max
#
# if (my_min == -1 && my_max == 1){
# cat("\nPASS: DeepDDG successfully scaled b/w -1 and 1"
# #, "\nProceeding with assigning deep outcome category")
# , "\n")
# }else{
# cat("\nFAIL: could not scale DeepDDG ddg values"
# , "Aborting!")
# }
#
####################################################################
# Data for combining other dfs
####################################################################
@ -131,7 +109,7 @@ merged_df3_comp = all_plot_dfs[[4]]
# Data for logoplots
####################################################################
source("logo_data.R")
source(paste0(plot_script_path, "logo_data.R"))
s1 = c("\nSuccessfully sourced logo_data.R")
cat(s1)
@ -142,7 +120,7 @@ cat(s1)
#source("other_plots_data.R")
source("dm_om_data.R")
source(paste0(plot_script_path, "dm_om_data.R"))
s2 = c("\nSuccessfully sourced other_plots_data.R")
cat(s2)
@ -151,7 +129,7 @@ cat(s2)
# Data for Lineage barplots: WF and LF dfs
####################################################################
source("lineage_data.R")
source(paste0(plot_script_path, "lineage_data.R"))
s3 = c("\nSuccessfully sourced lineage_data.R")
cat(s3)
@ -160,7 +138,7 @@ cat(s3)
# Data for corr plots:
####################################################################
# make sure the above script works because merged_df2_combined is needed
source("corr_data.R")
source(paste0(plot_script_path, "corr_data.R"))
s4 = c("\nSuccessfully sourced corr_data.R")
cat(s4)
@ -181,45 +159,18 @@ if ( all( length(s1), length(s2), length(s3), length(s4) ) >0 ){
}
########################################################################
# clear excess variables
rm(c1, c2, c3, c4, check1
, curr_count, curr_total
, cols_check
, cols_to_select
, cols_to_select_deepddg
, cols_to_select_duet
, cols_to_select_dynamut
, cols_to_select_dynamut2
, cols_to_select_encomddg
, cols_to_select_encomdds
, cols_to_select_mcsm
, cols_to_select_mcsm_na
, cols_to_select_sdm
# clear excess variables: from the global enviornment
vars0 = ls(envir = .GlobalEnv)[grepl("curr_*", ls(envir = .GlobalEnv))]
vars1 = ls(envir = .GlobalEnv)[grepl("^cols_to*", ls(envir = .GlobalEnv))]
vars2 = ls(envir = .GlobalEnv)[grepl("pivot_cols_*", ls(envir = .GlobalEnv))]
vars3 = ls(envir = .GlobalEnv)[grepl("expected_*", ls(envir = .GlobalEnv))]
rm(c1
, fact_cols
, infile_metadata
, infile_params
#, infilename_dynamut
#, infilename_dynamut2
#, infilename_mcsm_f_snps
#, infilename_mcsm_na
)
rm(pivot_cols
, pivot_cols_deepddg
, pivot_cols_duet
, pivot_cols_dynamut
, pivot_cols_dynamut2
, pivot_cols_encomddg
, pivot_cols_encomdds
, pivot_cols_foldx
, pivot_cols_mcsm
, pivot_cols_mcsm_na
, pivot_cols_n
, pivot_cols_sdm)
rm(expected_cols
, expected_ncols
, expected_rows
, expected_rows_lf
, fact_cols)
, vars0
, vars1
, vars2
, vars3)

View file

@ -140,3 +140,4 @@ wide_df_or_mult = wide_df_or_mult[,-1]
str(wide_df_or_mult)
position_or_mult = as.numeric(colnames(wide_df_or_mult))

View file

@ -56,6 +56,8 @@ logo_combined_labelled = "logo_combined_labelled.svg"
plot_logo_combined_labelled = paste0(plotdir,"/", logo_combined_labelled)
#########################################################
#logo_or_mult_p + theme_dark()
#logo_or_mult_p + theme(plot.background = element_rect(fill = "black"))
#==================================
# Output
@ -124,8 +126,24 @@ print(logo_logOR)
#*****************************
# Mutant logo plot: >1 nsSNP
#******************************
aa_col_choices = c('chemistry', 'hydrophobicity', 'clustalx', 'taylor')
my_logo_col = aa_col_choices[[1]]
if (my_logo_col == 'clustalx || taylor'){
cat("\nSelected colour scheme:", my_logo_col
, "\nUsing black theme\n")
theme_bgc = "black"
font_bgc = "white"
} if (my_logo_col == 'chemistry || hydrophobicity') {
cat('\nSelected colour scheme:', my_logo_col
, "\nUsing grey theme")
theme_bgc = "grey"
font_bgc = "black"
}
p0 = ggseqlogo(tab_mt
, method = 'custom'
, col_scheme = my_logo_col
, seq_type = 'aa') +
#ylab('my custom height') +
theme(axis.text.x = element_blank()) +
@ -143,9 +161,12 @@ cat('\nDone: p0')
mut_logo_p = p0 + theme(legend.position = "none"
, legend.title = element_blank()
, legend.text = element_text(size = 20)
, axis.text.x = element_text(size = 14, angle = 90)
, axis.text.y = element_blank())
#mut_logo_p
, axis.text.x = element_text(size = 14
, angle = 90
, colour = font_bgc)
, axis.text.y = element_blank()
, plot.background = element_rect(fill = theme_bgc))
mut_logo_p
cat('\nDone: p0+mut_logo_p')
#*************************
@ -154,9 +175,7 @@ cat('\nDone: p0+mut_logo_p')
p2 = ggseqlogo(tab_wt
, method = 'custom'
, seq_type = 'aa'
#, col_scheme = "taylor"
#, col_scheme = chemistry2
) +
, col_scheme = my_logo_col) +
#ylab('my custom height') +
theme(text=element_text(family="FreeSans"))+
theme(axis.text.x = element_blank()
@ -185,7 +204,11 @@ cat('\nDone: wt_logo_p')
#***********************
# Logo OR >1 nsSNP
#***********************
logo_or_mult_p = ggseqlogo(wide_df_or_mult, method="custom", seq_type="aa") + ylab("my custom height") +
logo_or_mult_p = ggseqlogo(wide_df_or_mult
, method = "custom"
, col_scheme = my_logo_col
, seq_type="aa") +
ylab("my custom height") +
theme(axis.text.x = element_text(size = 14
, angle = 90
, hjust = 1