going through functions and script for interactive plots
This commit is contained in:
parent
1f266c4cb8
commit
3f7bc908ec
7 changed files with 83 additions and 95 deletions
|
@ -21,7 +21,7 @@
|
|||
# 1) large combined df including NAs for AF, OR,etc
|
||||
# Dim: same no. of rows as gene associated meta_data_with_AFandOR
|
||||
# 2) small combined df including NAs for AF, OR, etc.
|
||||
# Dim: same as mcsm data
|
||||
# Dim: same as mcsm data or foldX
|
||||
# 3) large combined df excluding NAs
|
||||
# Dim: dim(#1) - na_count_df2
|
||||
# 4) small combined df excluding NAs
|
||||
|
@ -31,10 +31,13 @@
|
|||
# 6) LIGAND small combined df excluding NAs
|
||||
# Dim: dim()
|
||||
#==========================================================
|
||||
#lig_dist_colname = 'ligand_distance' or global var LigDist_colname
|
||||
#lig_dist_cutoff = 10 or global var LigDist_cutoff
|
||||
|
||||
combining_dfs_plotting <- function( my_df_u
|
||||
, gene_metadata
|
||||
, lig_dist_colname = 'ligand_distance'
|
||||
, lig_dist_cutoff = 10){
|
||||
, lig_dist_colname = ''
|
||||
, lig_dist_cutoff = ''){
|
||||
|
||||
# counting NAs in AF, OR cols
|
||||
# or_mychisq
|
||||
|
|
|
@ -16,9 +16,12 @@ library(dplyr)
|
|||
## my_df_u_lig
|
||||
## dup_muts
|
||||
#========================================================
|
||||
#lig_dist_colname = 'ligand_distance' or global var LigDist_colname
|
||||
#lig_dist_cutoff = 10 or global var LigDist_cutoff
|
||||
|
||||
plotting_data <- function(df
|
||||
, lig_dist_colname = 'ligand_distance'
|
||||
, lig_dist_cutoff = 10) {
|
||||
, lig_dist_colname = ''
|
||||
, lig_dist_cutoff = '') {
|
||||
my_df = data.frame()
|
||||
my_df_u = data.frame()
|
||||
my_df_u_lig = data.frame()
|
||||
|
|
|
@ -23,12 +23,16 @@ import_dirs <- function(drug_name, gene_name) {
|
|||
|
||||
dr_muts_col <<- paste0('dr_mutations_', drug_name)
|
||||
other_muts_col <<- paste0('other_mutations_', drug_name)
|
||||
resistance_col <<- "drtype"
|
||||
gene_match <<- paste0(gene_name,"_p.")
|
||||
|
||||
}
|
||||
|
||||
# other globals
|
||||
# Other globals
|
||||
#=====================
|
||||
# Resistance colname
|
||||
#=====================
|
||||
resistance_col <<- "drtype"
|
||||
|
||||
#===============================
|
||||
# mcsm ligand distance cut off
|
||||
#===============================
|
||||
|
@ -39,7 +43,6 @@ LigDist_cutoff <<- 10
|
|||
# Angstroms symbol
|
||||
#==================
|
||||
angstroms_symbol <<- "\u212b"
|
||||
#cat(paste0("There are ", nrow(my_df_u_lig), " sites lying within 10", angstroms_symbol, " of the ligand\n"))
|
||||
|
||||
#===============
|
||||
# Delta symbol
|
||||
|
|
|
@ -186,3 +186,7 @@ func_path = "~/git/LSHTM_analysis/scripts/functions/"
|
|||
source_files <- list.files(func_path, "\\.R$") # locate all .R files
|
||||
map(paste0(func_path, source_files), source) # source all your R scripts!
|
||||
|
||||
# set plot script dir
|
||||
plot_script_path = "~/git/LSHTM_analysis/scripts/plotting/"
|
||||
|
||||
|
||||
|
|
|
@ -12,20 +12,24 @@ source("/home/tanu/git/LSHTM_analysis/scripts/plotting/Header_TT.R")
|
|||
# in from other scripts
|
||||
# to call this
|
||||
#********************
|
||||
# set drug and gene name
|
||||
|
||||
#====================
|
||||
# variables for lig
|
||||
#====================
|
||||
#==========================================
|
||||
# variables for lig:
|
||||
# comes from functions/plotting_globals.R
|
||||
#==========================================
|
||||
|
||||
#LigDist_colname = "ligand_distance"
|
||||
#LigDist_cutoff = 10
|
||||
cat("\nGlobal variables for Ligand:"
|
||||
, "\nligand distance colname:", LigDist_colname
|
||||
, "\nligand distance cut off:", LigDist_cutoff)
|
||||
|
||||
#===========
|
||||
# input
|
||||
#===========
|
||||
#---------------------
|
||||
#--------------------------------------------
|
||||
# call: import_dirs()
|
||||
#---------------------
|
||||
# comes from functions/plotting_globals.R
|
||||
#--------------------------------------------
|
||||
import_dirs(drug, gene)
|
||||
|
||||
#---------------------------
|
||||
|
@ -72,6 +76,8 @@ gene_metadata <- read.csv(infile_metadata
|
|||
, stringsAsFactors = F
|
||||
, header = T)
|
||||
|
||||
cat("\nDim of meta data file: ", dim(gene_metadata))
|
||||
|
||||
all_plot_dfs = combining_dfs_plotting(my_df_u
|
||||
, gene_metadata
|
||||
, lig_dist_colname = LigDist_colname
|
||||
|
@ -82,34 +88,6 @@ merged_df3 = all_plot_dfs[[2]]
|
|||
merged_df2_comp = all_plot_dfs[[3]]
|
||||
merged_df3_comp = all_plot_dfs[[4]]
|
||||
#======================================================================
|
||||
#TODO: Think! MOVE TO COMBINE or singular file for deepddg
|
||||
|
||||
#============================
|
||||
# adding deepddg scaled values
|
||||
# scale data b/w -1 and 1
|
||||
#============================
|
||||
# n = which(colnames(merged_df3) == "deepddg"); n
|
||||
#
|
||||
# my_min = min(merged_df3[,n]); my_min
|
||||
# my_max = max(merged_df3[,n]); my_max
|
||||
#
|
||||
# merged_df3$deepddg_scaled = ifelse(merged_df3[,n] < 0
|
||||
# , merged_df3[,n]/abs(my_min)
|
||||
# , merged_df3[,n]/my_max)
|
||||
# # sanity check
|
||||
# my_min = min(merged_df3$deepddg_scaled); my_min
|
||||
# my_max = max(merged_df3$deepddg_scaled); my_max
|
||||
#
|
||||
# if (my_min == -1 && my_max == 1){
|
||||
# cat("\nPASS: DeepDDG successfully scaled b/w -1 and 1"
|
||||
# #, "\nProceeding with assigning deep outcome category")
|
||||
# , "\n")
|
||||
# }else{
|
||||
# cat("\nFAIL: could not scale DeepDDG ddg values"
|
||||
# , "Aborting!")
|
||||
# }
|
||||
#
|
||||
|
||||
####################################################################
|
||||
# Data for combining other dfs
|
||||
####################################################################
|
||||
|
@ -131,7 +109,7 @@ merged_df3_comp = all_plot_dfs[[4]]
|
|||
# Data for logoplots
|
||||
####################################################################
|
||||
|
||||
source("logo_data.R")
|
||||
source(paste0(plot_script_path, "logo_data.R"))
|
||||
|
||||
s1 = c("\nSuccessfully sourced logo_data.R")
|
||||
cat(s1)
|
||||
|
@ -142,7 +120,7 @@ cat(s1)
|
|||
|
||||
#source("other_plots_data.R")
|
||||
|
||||
source("dm_om_data.R")
|
||||
source(paste0(plot_script_path, "dm_om_data.R"))
|
||||
|
||||
s2 = c("\nSuccessfully sourced other_plots_data.R")
|
||||
cat(s2)
|
||||
|
@ -151,7 +129,7 @@ cat(s2)
|
|||
# Data for Lineage barplots: WF and LF dfs
|
||||
####################################################################
|
||||
|
||||
source("lineage_data.R")
|
||||
source(paste0(plot_script_path, "lineage_data.R"))
|
||||
|
||||
s3 = c("\nSuccessfully sourced lineage_data.R")
|
||||
cat(s3)
|
||||
|
@ -160,7 +138,7 @@ cat(s3)
|
|||
# Data for corr plots:
|
||||
####################################################################
|
||||
# make sure the above script works because merged_df2_combined is needed
|
||||
source("corr_data.R")
|
||||
source(paste0(plot_script_path, "corr_data.R"))
|
||||
|
||||
s4 = c("\nSuccessfully sourced corr_data.R")
|
||||
cat(s4)
|
||||
|
@ -168,7 +146,7 @@ cat(s4)
|
|||
########################################################################
|
||||
# End of script
|
||||
########################################################################
|
||||
if ( all( length(s1), length(s2), length(s3), length(s4) ) >0 ){
|
||||
if ( all( length(s1), length(s2), length(s3), length(s4) ) > 0 ){
|
||||
cat(
|
||||
"\n##################################################"
|
||||
, "\nSuccessful: get_plotting_dfs.R worked!"
|
||||
|
@ -181,45 +159,18 @@ if ( all( length(s1), length(s2), length(s3), length(s4) ) >0 ){
|
|||
}
|
||||
|
||||
########################################################################
|
||||
# clear excess variables
|
||||
rm(c1, c2, c3, c4, check1
|
||||
, curr_count, curr_total
|
||||
, cols_check
|
||||
, cols_to_select
|
||||
, cols_to_select_deepddg
|
||||
, cols_to_select_duet
|
||||
, cols_to_select_dynamut
|
||||
, cols_to_select_dynamut2
|
||||
, cols_to_select_encomddg
|
||||
, cols_to_select_encomdds
|
||||
, cols_to_select_mcsm
|
||||
, cols_to_select_mcsm_na
|
||||
, cols_to_select_sdm
|
||||
# clear excess variables: from the global enviornment
|
||||
|
||||
vars0 = ls(envir = .GlobalEnv)[grepl("curr_*", ls(envir = .GlobalEnv))]
|
||||
vars1 = ls(envir = .GlobalEnv)[grepl("^cols_to*", ls(envir = .GlobalEnv))]
|
||||
vars2 = ls(envir = .GlobalEnv)[grepl("pivot_cols_*", ls(envir = .GlobalEnv))]
|
||||
vars3 = ls(envir = .GlobalEnv)[grepl("expected_*", ls(envir = .GlobalEnv))]
|
||||
|
||||
rm(c1
|
||||
, fact_cols
|
||||
, infile_metadata
|
||||
, infile_params
|
||||
#, infilename_dynamut
|
||||
#, infilename_dynamut2
|
||||
#, infilename_mcsm_f_snps
|
||||
#, infilename_mcsm_na
|
||||
)
|
||||
|
||||
rm(pivot_cols
|
||||
, pivot_cols_deepddg
|
||||
, pivot_cols_duet
|
||||
, pivot_cols_dynamut
|
||||
, pivot_cols_dynamut2
|
||||
, pivot_cols_encomddg
|
||||
, pivot_cols_encomdds
|
||||
, pivot_cols_foldx
|
||||
, pivot_cols_mcsm
|
||||
, pivot_cols_mcsm_na
|
||||
, pivot_cols_n
|
||||
, pivot_cols_sdm)
|
||||
|
||||
rm(expected_cols
|
||||
, expected_ncols
|
||||
, expected_rows
|
||||
, expected_rows_lf
|
||||
, fact_cols)
|
||||
|
||||
|
||||
, vars0
|
||||
, vars1
|
||||
, vars2
|
||||
, vars3)
|
|
@ -140,3 +140,4 @@ wide_df_or_mult = wide_df_or_mult[,-1]
|
|||
str(wide_df_or_mult)
|
||||
|
||||
position_or_mult = as.numeric(colnames(wide_df_or_mult))
|
||||
|
||||
|
|
|
@ -56,6 +56,8 @@ logo_combined_labelled = "logo_combined_labelled.svg"
|
|||
plot_logo_combined_labelled = paste0(plotdir,"/", logo_combined_labelled)
|
||||
|
||||
#########################################################
|
||||
#logo_or_mult_p + theme_dark()
|
||||
#logo_or_mult_p + theme(plot.background = element_rect(fill = "black"))
|
||||
|
||||
#==================================
|
||||
# Output
|
||||
|
@ -124,8 +126,24 @@ print(logo_logOR)
|
|||
#*****************************
|
||||
# Mutant logo plot: >1 nsSNP
|
||||
#******************************
|
||||
aa_col_choices = c('chemistry', 'hydrophobicity', 'clustalx', 'taylor')
|
||||
my_logo_col = aa_col_choices[[1]]
|
||||
|
||||
if (my_logo_col == 'clustalx || taylor'){
|
||||
cat("\nSelected colour scheme:", my_logo_col
|
||||
, "\nUsing black theme\n")
|
||||
theme_bgc = "black"
|
||||
font_bgc = "white"
|
||||
} if (my_logo_col == 'chemistry || hydrophobicity') {
|
||||
cat('\nSelected colour scheme:', my_logo_col
|
||||
, "\nUsing grey theme")
|
||||
theme_bgc = "grey"
|
||||
font_bgc = "black"
|
||||
}
|
||||
|
||||
p0 = ggseqlogo(tab_mt
|
||||
, method = 'custom'
|
||||
, col_scheme = my_logo_col
|
||||
, seq_type = 'aa') +
|
||||
#ylab('my custom height') +
|
||||
theme(axis.text.x = element_blank()) +
|
||||
|
@ -143,9 +161,12 @@ cat('\nDone: p0')
|
|||
mut_logo_p = p0 + theme(legend.position = "none"
|
||||
, legend.title = element_blank()
|
||||
, legend.text = element_text(size = 20)
|
||||
, axis.text.x = element_text(size = 14, angle = 90)
|
||||
, axis.text.y = element_blank())
|
||||
#mut_logo_p
|
||||
, axis.text.x = element_text(size = 14
|
||||
, angle = 90
|
||||
, colour = font_bgc)
|
||||
, axis.text.y = element_blank()
|
||||
, plot.background = element_rect(fill = theme_bgc))
|
||||
mut_logo_p
|
||||
cat('\nDone: p0+mut_logo_p')
|
||||
|
||||
#*************************
|
||||
|
@ -154,9 +175,7 @@ cat('\nDone: p0+mut_logo_p')
|
|||
p2 = ggseqlogo(tab_wt
|
||||
, method = 'custom'
|
||||
, seq_type = 'aa'
|
||||
#, col_scheme = "taylor"
|
||||
#, col_scheme = chemistry2
|
||||
) +
|
||||
, col_scheme = my_logo_col) +
|
||||
#ylab('my custom height') +
|
||||
theme(text=element_text(family="FreeSans"))+
|
||||
theme(axis.text.x = element_blank()
|
||||
|
@ -185,7 +204,11 @@ cat('\nDone: wt_logo_p')
|
|||
#***********************
|
||||
# Logo OR >1 nsSNP
|
||||
#***********************
|
||||
logo_or_mult_p = ggseqlogo(wide_df_or_mult, method="custom", seq_type="aa") + ylab("my custom height") +
|
||||
logo_or_mult_p = ggseqlogo(wide_df_or_mult
|
||||
, method = "custom"
|
||||
, col_scheme = my_logo_col
|
||||
, seq_type="aa") +
|
||||
ylab("my custom height") +
|
||||
theme(axis.text.x = element_text(size = 14
|
||||
, angle = 90
|
||||
, hjust = 1
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue