going through functions and script for interactive plots
This commit is contained in:
parent
1f266c4cb8
commit
3f7bc908ec
7 changed files with 83 additions and 95 deletions
|
@ -21,7 +21,7 @@
|
||||||
# 1) large combined df including NAs for AF, OR,etc
|
# 1) large combined df including NAs for AF, OR,etc
|
||||||
# Dim: same no. of rows as gene associated meta_data_with_AFandOR
|
# Dim: same no. of rows as gene associated meta_data_with_AFandOR
|
||||||
# 2) small combined df including NAs for AF, OR, etc.
|
# 2) small combined df including NAs for AF, OR, etc.
|
||||||
# Dim: same as mcsm data
|
# Dim: same as mcsm data or foldX
|
||||||
# 3) large combined df excluding NAs
|
# 3) large combined df excluding NAs
|
||||||
# Dim: dim(#1) - na_count_df2
|
# Dim: dim(#1) - na_count_df2
|
||||||
# 4) small combined df excluding NAs
|
# 4) small combined df excluding NAs
|
||||||
|
@ -31,10 +31,13 @@
|
||||||
# 6) LIGAND small combined df excluding NAs
|
# 6) LIGAND small combined df excluding NAs
|
||||||
# Dim: dim()
|
# Dim: dim()
|
||||||
#==========================================================
|
#==========================================================
|
||||||
|
#lig_dist_colname = 'ligand_distance' or global var LigDist_colname
|
||||||
|
#lig_dist_cutoff = 10 or global var LigDist_cutoff
|
||||||
|
|
||||||
combining_dfs_plotting <- function( my_df_u
|
combining_dfs_plotting <- function( my_df_u
|
||||||
, gene_metadata
|
, gene_metadata
|
||||||
, lig_dist_colname = 'ligand_distance'
|
, lig_dist_colname = ''
|
||||||
, lig_dist_cutoff = 10){
|
, lig_dist_cutoff = ''){
|
||||||
|
|
||||||
# counting NAs in AF, OR cols
|
# counting NAs in AF, OR cols
|
||||||
# or_mychisq
|
# or_mychisq
|
||||||
|
|
|
@ -16,9 +16,12 @@ library(dplyr)
|
||||||
## my_df_u_lig
|
## my_df_u_lig
|
||||||
## dup_muts
|
## dup_muts
|
||||||
#========================================================
|
#========================================================
|
||||||
|
#lig_dist_colname = 'ligand_distance' or global var LigDist_colname
|
||||||
|
#lig_dist_cutoff = 10 or global var LigDist_cutoff
|
||||||
|
|
||||||
plotting_data <- function(df
|
plotting_data <- function(df
|
||||||
, lig_dist_colname = 'ligand_distance'
|
, lig_dist_colname = ''
|
||||||
, lig_dist_cutoff = 10) {
|
, lig_dist_cutoff = '') {
|
||||||
my_df = data.frame()
|
my_df = data.frame()
|
||||||
my_df_u = data.frame()
|
my_df_u = data.frame()
|
||||||
my_df_u_lig = data.frame()
|
my_df_u_lig = data.frame()
|
||||||
|
|
|
@ -23,12 +23,16 @@ import_dirs <- function(drug_name, gene_name) {
|
||||||
|
|
||||||
dr_muts_col <<- paste0('dr_mutations_', drug_name)
|
dr_muts_col <<- paste0('dr_mutations_', drug_name)
|
||||||
other_muts_col <<- paste0('other_mutations_', drug_name)
|
other_muts_col <<- paste0('other_mutations_', drug_name)
|
||||||
resistance_col <<- "drtype"
|
|
||||||
gene_match <<- paste0(gene_name,"_p.")
|
gene_match <<- paste0(gene_name,"_p.")
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
# other globals
|
# Other globals
|
||||||
|
#=====================
|
||||||
|
# Resistance colname
|
||||||
|
#=====================
|
||||||
|
resistance_col <<- "drtype"
|
||||||
|
|
||||||
#===============================
|
#===============================
|
||||||
# mcsm ligand distance cut off
|
# mcsm ligand distance cut off
|
||||||
#===============================
|
#===============================
|
||||||
|
@ -39,7 +43,6 @@ LigDist_cutoff <<- 10
|
||||||
# Angstroms symbol
|
# Angstroms symbol
|
||||||
#==================
|
#==================
|
||||||
angstroms_symbol <<- "\u212b"
|
angstroms_symbol <<- "\u212b"
|
||||||
#cat(paste0("There are ", nrow(my_df_u_lig), " sites lying within 10", angstroms_symbol, " of the ligand\n"))
|
|
||||||
|
|
||||||
#===============
|
#===============
|
||||||
# Delta symbol
|
# Delta symbol
|
||||||
|
|
|
@ -186,3 +186,7 @@ func_path = "~/git/LSHTM_analysis/scripts/functions/"
|
||||||
source_files <- list.files(func_path, "\\.R$") # locate all .R files
|
source_files <- list.files(func_path, "\\.R$") # locate all .R files
|
||||||
map(paste0(func_path, source_files), source) # source all your R scripts!
|
map(paste0(func_path, source_files), source) # source all your R scripts!
|
||||||
|
|
||||||
|
# set plot script dir
|
||||||
|
plot_script_path = "~/git/LSHTM_analysis/scripts/plotting/"
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -12,20 +12,24 @@ source("/home/tanu/git/LSHTM_analysis/scripts/plotting/Header_TT.R")
|
||||||
# in from other scripts
|
# in from other scripts
|
||||||
# to call this
|
# to call this
|
||||||
#********************
|
#********************
|
||||||
|
# set drug and gene name
|
||||||
|
|
||||||
#====================
|
#==========================================
|
||||||
# variables for lig
|
# variables for lig:
|
||||||
#====================
|
# comes from functions/plotting_globals.R
|
||||||
|
#==========================================
|
||||||
|
|
||||||
#LigDist_colname = "ligand_distance"
|
cat("\nGlobal variables for Ligand:"
|
||||||
#LigDist_cutoff = 10
|
, "\nligand distance colname:", LigDist_colname
|
||||||
|
, "\nligand distance cut off:", LigDist_cutoff)
|
||||||
|
|
||||||
#===========
|
#===========
|
||||||
# input
|
# input
|
||||||
#===========
|
#===========
|
||||||
#---------------------
|
#--------------------------------------------
|
||||||
# call: import_dirs()
|
# call: import_dirs()
|
||||||
#---------------------
|
# comes from functions/plotting_globals.R
|
||||||
|
#--------------------------------------------
|
||||||
import_dirs(drug, gene)
|
import_dirs(drug, gene)
|
||||||
|
|
||||||
#---------------------------
|
#---------------------------
|
||||||
|
@ -72,6 +76,8 @@ gene_metadata <- read.csv(infile_metadata
|
||||||
, stringsAsFactors = F
|
, stringsAsFactors = F
|
||||||
, header = T)
|
, header = T)
|
||||||
|
|
||||||
|
cat("\nDim of meta data file: ", dim(gene_metadata))
|
||||||
|
|
||||||
all_plot_dfs = combining_dfs_plotting(my_df_u
|
all_plot_dfs = combining_dfs_plotting(my_df_u
|
||||||
, gene_metadata
|
, gene_metadata
|
||||||
, lig_dist_colname = LigDist_colname
|
, lig_dist_colname = LigDist_colname
|
||||||
|
@ -82,34 +88,6 @@ merged_df3 = all_plot_dfs[[2]]
|
||||||
merged_df2_comp = all_plot_dfs[[3]]
|
merged_df2_comp = all_plot_dfs[[3]]
|
||||||
merged_df3_comp = all_plot_dfs[[4]]
|
merged_df3_comp = all_plot_dfs[[4]]
|
||||||
#======================================================================
|
#======================================================================
|
||||||
#TODO: Think! MOVE TO COMBINE or singular file for deepddg
|
|
||||||
|
|
||||||
#============================
|
|
||||||
# adding deepddg scaled values
|
|
||||||
# scale data b/w -1 and 1
|
|
||||||
#============================
|
|
||||||
# n = which(colnames(merged_df3) == "deepddg"); n
|
|
||||||
#
|
|
||||||
# my_min = min(merged_df3[,n]); my_min
|
|
||||||
# my_max = max(merged_df3[,n]); my_max
|
|
||||||
#
|
|
||||||
# merged_df3$deepddg_scaled = ifelse(merged_df3[,n] < 0
|
|
||||||
# , merged_df3[,n]/abs(my_min)
|
|
||||||
# , merged_df3[,n]/my_max)
|
|
||||||
# # sanity check
|
|
||||||
# my_min = min(merged_df3$deepddg_scaled); my_min
|
|
||||||
# my_max = max(merged_df3$deepddg_scaled); my_max
|
|
||||||
#
|
|
||||||
# if (my_min == -1 && my_max == 1){
|
|
||||||
# cat("\nPASS: DeepDDG successfully scaled b/w -1 and 1"
|
|
||||||
# #, "\nProceeding with assigning deep outcome category")
|
|
||||||
# , "\n")
|
|
||||||
# }else{
|
|
||||||
# cat("\nFAIL: could not scale DeepDDG ddg values"
|
|
||||||
# , "Aborting!")
|
|
||||||
# }
|
|
||||||
#
|
|
||||||
|
|
||||||
####################################################################
|
####################################################################
|
||||||
# Data for combining other dfs
|
# Data for combining other dfs
|
||||||
####################################################################
|
####################################################################
|
||||||
|
@ -131,7 +109,7 @@ merged_df3_comp = all_plot_dfs[[4]]
|
||||||
# Data for logoplots
|
# Data for logoplots
|
||||||
####################################################################
|
####################################################################
|
||||||
|
|
||||||
source("logo_data.R")
|
source(paste0(plot_script_path, "logo_data.R"))
|
||||||
|
|
||||||
s1 = c("\nSuccessfully sourced logo_data.R")
|
s1 = c("\nSuccessfully sourced logo_data.R")
|
||||||
cat(s1)
|
cat(s1)
|
||||||
|
@ -142,7 +120,7 @@ cat(s1)
|
||||||
|
|
||||||
#source("other_plots_data.R")
|
#source("other_plots_data.R")
|
||||||
|
|
||||||
source("dm_om_data.R")
|
source(paste0(plot_script_path, "dm_om_data.R"))
|
||||||
|
|
||||||
s2 = c("\nSuccessfully sourced other_plots_data.R")
|
s2 = c("\nSuccessfully sourced other_plots_data.R")
|
||||||
cat(s2)
|
cat(s2)
|
||||||
|
@ -151,7 +129,7 @@ cat(s2)
|
||||||
# Data for Lineage barplots: WF and LF dfs
|
# Data for Lineage barplots: WF and LF dfs
|
||||||
####################################################################
|
####################################################################
|
||||||
|
|
||||||
source("lineage_data.R")
|
source(paste0(plot_script_path, "lineage_data.R"))
|
||||||
|
|
||||||
s3 = c("\nSuccessfully sourced lineage_data.R")
|
s3 = c("\nSuccessfully sourced lineage_data.R")
|
||||||
cat(s3)
|
cat(s3)
|
||||||
|
@ -160,7 +138,7 @@ cat(s3)
|
||||||
# Data for corr plots:
|
# Data for corr plots:
|
||||||
####################################################################
|
####################################################################
|
||||||
# make sure the above script works because merged_df2_combined is needed
|
# make sure the above script works because merged_df2_combined is needed
|
||||||
source("corr_data.R")
|
source(paste0(plot_script_path, "corr_data.R"))
|
||||||
|
|
||||||
s4 = c("\nSuccessfully sourced corr_data.R")
|
s4 = c("\nSuccessfully sourced corr_data.R")
|
||||||
cat(s4)
|
cat(s4)
|
||||||
|
@ -168,7 +146,7 @@ cat(s4)
|
||||||
########################################################################
|
########################################################################
|
||||||
# End of script
|
# End of script
|
||||||
########################################################################
|
########################################################################
|
||||||
if ( all( length(s1), length(s2), length(s3), length(s4) ) >0 ){
|
if ( all( length(s1), length(s2), length(s3), length(s4) ) > 0 ){
|
||||||
cat(
|
cat(
|
||||||
"\n##################################################"
|
"\n##################################################"
|
||||||
, "\nSuccessful: get_plotting_dfs.R worked!"
|
, "\nSuccessful: get_plotting_dfs.R worked!"
|
||||||
|
@ -181,45 +159,18 @@ if ( all( length(s1), length(s2), length(s3), length(s4) ) >0 ){
|
||||||
}
|
}
|
||||||
|
|
||||||
########################################################################
|
########################################################################
|
||||||
# clear excess variables
|
# clear excess variables: from the global enviornment
|
||||||
rm(c1, c2, c3, c4, check1
|
|
||||||
, curr_count, curr_total
|
vars0 = ls(envir = .GlobalEnv)[grepl("curr_*", ls(envir = .GlobalEnv))]
|
||||||
, cols_check
|
vars1 = ls(envir = .GlobalEnv)[grepl("^cols_to*", ls(envir = .GlobalEnv))]
|
||||||
, cols_to_select
|
vars2 = ls(envir = .GlobalEnv)[grepl("pivot_cols_*", ls(envir = .GlobalEnv))]
|
||||||
, cols_to_select_deepddg
|
vars3 = ls(envir = .GlobalEnv)[grepl("expected_*", ls(envir = .GlobalEnv))]
|
||||||
, cols_to_select_duet
|
|
||||||
, cols_to_select_dynamut
|
rm(c1
|
||||||
, cols_to_select_dynamut2
|
, fact_cols
|
||||||
, cols_to_select_encomddg
|
|
||||||
, cols_to_select_encomdds
|
|
||||||
, cols_to_select_mcsm
|
|
||||||
, cols_to_select_mcsm_na
|
|
||||||
, cols_to_select_sdm
|
|
||||||
, infile_metadata
|
, infile_metadata
|
||||||
, infile_params
|
, infile_params
|
||||||
#, infilename_dynamut
|
, vars0
|
||||||
#, infilename_dynamut2
|
, vars1
|
||||||
#, infilename_mcsm_f_snps
|
, vars2
|
||||||
#, infilename_mcsm_na
|
, vars3)
|
||||||
)
|
|
||||||
|
|
||||||
rm(pivot_cols
|
|
||||||
, pivot_cols_deepddg
|
|
||||||
, pivot_cols_duet
|
|
||||||
, pivot_cols_dynamut
|
|
||||||
, pivot_cols_dynamut2
|
|
||||||
, pivot_cols_encomddg
|
|
||||||
, pivot_cols_encomdds
|
|
||||||
, pivot_cols_foldx
|
|
||||||
, pivot_cols_mcsm
|
|
||||||
, pivot_cols_mcsm_na
|
|
||||||
, pivot_cols_n
|
|
||||||
, pivot_cols_sdm)
|
|
||||||
|
|
||||||
rm(expected_cols
|
|
||||||
, expected_ncols
|
|
||||||
, expected_rows
|
|
||||||
, expected_rows_lf
|
|
||||||
, fact_cols)
|
|
||||||
|
|
||||||
|
|
|
@ -140,3 +140,4 @@ wide_df_or_mult = wide_df_or_mult[,-1]
|
||||||
str(wide_df_or_mult)
|
str(wide_df_or_mult)
|
||||||
|
|
||||||
position_or_mult = as.numeric(colnames(wide_df_or_mult))
|
position_or_mult = as.numeric(colnames(wide_df_or_mult))
|
||||||
|
|
||||||
|
|
|
@ -56,6 +56,8 @@ logo_combined_labelled = "logo_combined_labelled.svg"
|
||||||
plot_logo_combined_labelled = paste0(plotdir,"/", logo_combined_labelled)
|
plot_logo_combined_labelled = paste0(plotdir,"/", logo_combined_labelled)
|
||||||
|
|
||||||
#########################################################
|
#########################################################
|
||||||
|
#logo_or_mult_p + theme_dark()
|
||||||
|
#logo_or_mult_p + theme(plot.background = element_rect(fill = "black"))
|
||||||
|
|
||||||
#==================================
|
#==================================
|
||||||
# Output
|
# Output
|
||||||
|
@ -124,8 +126,24 @@ print(logo_logOR)
|
||||||
#*****************************
|
#*****************************
|
||||||
# Mutant logo plot: >1 nsSNP
|
# Mutant logo plot: >1 nsSNP
|
||||||
#******************************
|
#******************************
|
||||||
|
aa_col_choices = c('chemistry', 'hydrophobicity', 'clustalx', 'taylor')
|
||||||
|
my_logo_col = aa_col_choices[[1]]
|
||||||
|
|
||||||
|
if (my_logo_col == 'clustalx || taylor'){
|
||||||
|
cat("\nSelected colour scheme:", my_logo_col
|
||||||
|
, "\nUsing black theme\n")
|
||||||
|
theme_bgc = "black"
|
||||||
|
font_bgc = "white"
|
||||||
|
} if (my_logo_col == 'chemistry || hydrophobicity') {
|
||||||
|
cat('\nSelected colour scheme:', my_logo_col
|
||||||
|
, "\nUsing grey theme")
|
||||||
|
theme_bgc = "grey"
|
||||||
|
font_bgc = "black"
|
||||||
|
}
|
||||||
|
|
||||||
p0 = ggseqlogo(tab_mt
|
p0 = ggseqlogo(tab_mt
|
||||||
, method = 'custom'
|
, method = 'custom'
|
||||||
|
, col_scheme = my_logo_col
|
||||||
, seq_type = 'aa') +
|
, seq_type = 'aa') +
|
||||||
#ylab('my custom height') +
|
#ylab('my custom height') +
|
||||||
theme(axis.text.x = element_blank()) +
|
theme(axis.text.x = element_blank()) +
|
||||||
|
@ -143,9 +161,12 @@ cat('\nDone: p0')
|
||||||
mut_logo_p = p0 + theme(legend.position = "none"
|
mut_logo_p = p0 + theme(legend.position = "none"
|
||||||
, legend.title = element_blank()
|
, legend.title = element_blank()
|
||||||
, legend.text = element_text(size = 20)
|
, legend.text = element_text(size = 20)
|
||||||
, axis.text.x = element_text(size = 14, angle = 90)
|
, axis.text.x = element_text(size = 14
|
||||||
, axis.text.y = element_blank())
|
, angle = 90
|
||||||
#mut_logo_p
|
, colour = font_bgc)
|
||||||
|
, axis.text.y = element_blank()
|
||||||
|
, plot.background = element_rect(fill = theme_bgc))
|
||||||
|
mut_logo_p
|
||||||
cat('\nDone: p0+mut_logo_p')
|
cat('\nDone: p0+mut_logo_p')
|
||||||
|
|
||||||
#*************************
|
#*************************
|
||||||
|
@ -154,9 +175,7 @@ cat('\nDone: p0+mut_logo_p')
|
||||||
p2 = ggseqlogo(tab_wt
|
p2 = ggseqlogo(tab_wt
|
||||||
, method = 'custom'
|
, method = 'custom'
|
||||||
, seq_type = 'aa'
|
, seq_type = 'aa'
|
||||||
#, col_scheme = "taylor"
|
, col_scheme = my_logo_col) +
|
||||||
#, col_scheme = chemistry2
|
|
||||||
) +
|
|
||||||
#ylab('my custom height') +
|
#ylab('my custom height') +
|
||||||
theme(text=element_text(family="FreeSans"))+
|
theme(text=element_text(family="FreeSans"))+
|
||||||
theme(axis.text.x = element_blank()
|
theme(axis.text.x = element_blank()
|
||||||
|
@ -185,7 +204,11 @@ cat('\nDone: wt_logo_p')
|
||||||
#***********************
|
#***********************
|
||||||
# Logo OR >1 nsSNP
|
# Logo OR >1 nsSNP
|
||||||
#***********************
|
#***********************
|
||||||
logo_or_mult_p = ggseqlogo(wide_df_or_mult, method="custom", seq_type="aa") + ylab("my custom height") +
|
logo_or_mult_p = ggseqlogo(wide_df_or_mult
|
||||||
|
, method = "custom"
|
||||||
|
, col_scheme = my_logo_col
|
||||||
|
, seq_type="aa") +
|
||||||
|
ylab("my custom height") +
|
||||||
theme(axis.text.x = element_text(size = 14
|
theme(axis.text.x = element_text(size = 14
|
||||||
, angle = 90
|
, angle = 90
|
||||||
, hjust = 1
|
, hjust = 1
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue