diff --git a/scripts/functions/combining_dfs_plotting.R b/scripts/functions/combining_dfs_plotting.R index 107c114..60731ad 100644 --- a/scripts/functions/combining_dfs_plotting.R +++ b/scripts/functions/combining_dfs_plotting.R @@ -21,7 +21,7 @@ # 1) large combined df including NAs for AF, OR,etc # Dim: same no. of rows as gene associated meta_data_with_AFandOR # 2) small combined df including NAs for AF, OR, etc. -# Dim: same as mcsm data +# Dim: same as mcsm data or foldX # 3) large combined df excluding NAs # Dim: dim(#1) - na_count_df2 # 4) small combined df excluding NAs @@ -31,10 +31,13 @@ # 6) LIGAND small combined df excluding NAs # Dim: dim() #========================================================== +#lig_dist_colname = 'ligand_distance' or global var LigDist_colname +#lig_dist_cutoff = 10 or global var LigDist_cutoff + combining_dfs_plotting <- function( my_df_u , gene_metadata - , lig_dist_colname = 'ligand_distance' - , lig_dist_cutoff = 10){ + , lig_dist_colname = '' + , lig_dist_cutoff = ''){ # counting NAs in AF, OR cols # or_mychisq diff --git a/scripts/functions/plotting_data.R b/scripts/functions/plotting_data.R index faaebca..e32813b 100755 --- a/scripts/functions/plotting_data.R +++ b/scripts/functions/plotting_data.R @@ -16,9 +16,12 @@ library(dplyr) ## my_df_u_lig ## dup_muts #======================================================== +#lig_dist_colname = 'ligand_distance' or global var LigDist_colname +#lig_dist_cutoff = 10 or global var LigDist_cutoff + plotting_data <- function(df - , lig_dist_colname = 'ligand_distance' - , lig_dist_cutoff = 10) { + , lig_dist_colname = '' + , lig_dist_cutoff = '') { my_df = data.frame() my_df_u = data.frame() my_df_u_lig = data.frame() diff --git a/scripts/functions/plotting_globals.R b/scripts/functions/plotting_globals.R index c28047e..b2a29b9 100644 --- a/scripts/functions/plotting_globals.R +++ b/scripts/functions/plotting_globals.R @@ -23,12 +23,16 @@ import_dirs <- function(drug_name, gene_name) { dr_muts_col <<- paste0('dr_mutations_', drug_name) other_muts_col <<- paste0('other_mutations_', drug_name) - resistance_col <<- "drtype" gene_match <<- paste0(gene_name,"_p.") } -# other globals +# Other globals +#===================== +# Resistance colname +#===================== +resistance_col <<- "drtype" + #=============================== # mcsm ligand distance cut off #=============================== @@ -39,7 +43,6 @@ LigDist_cutoff <<- 10 # Angstroms symbol #================== angstroms_symbol <<- "\u212b" -#cat(paste0("There are ", nrow(my_df_u_lig), " sites lying within 10", angstroms_symbol, " of the ligand\n")) #=============== # Delta symbol diff --git a/scripts/plotting/Header_TT.R b/scripts/plotting/Header_TT.R index 2fa892c..0b78307 100755 --- a/scripts/plotting/Header_TT.R +++ b/scripts/plotting/Header_TT.R @@ -186,3 +186,7 @@ func_path = "~/git/LSHTM_analysis/scripts/functions/" source_files <- list.files(func_path, "\\.R$") # locate all .R files map(paste0(func_path, source_files), source) # source all your R scripts! +# set plot script dir +plot_script_path = "~/git/LSHTM_analysis/scripts/plotting/" + + diff --git a/scripts/plotting/get_plotting_dfs.R b/scripts/plotting/get_plotting_dfs.R index 23a99c7..56e8a67 100644 --- a/scripts/plotting/get_plotting_dfs.R +++ b/scripts/plotting/get_plotting_dfs.R @@ -12,20 +12,24 @@ source("/home/tanu/git/LSHTM_analysis/scripts/plotting/Header_TT.R") # in from other scripts # to call this #******************** +# set drug and gene name -#==================== -# variables for lig -#==================== +#========================================== +# variables for lig: +# comes from functions/plotting_globals.R +#========================================== -#LigDist_colname = "ligand_distance" -#LigDist_cutoff = 10 +cat("\nGlobal variables for Ligand:" + , "\nligand distance colname:", LigDist_colname + , "\nligand distance cut off:", LigDist_cutoff) #=========== # input #=========== -#--------------------- +#-------------------------------------------- # call: import_dirs() -#--------------------- +# comes from functions/plotting_globals.R +#-------------------------------------------- import_dirs(drug, gene) #--------------------------- @@ -72,6 +76,8 @@ gene_metadata <- read.csv(infile_metadata , stringsAsFactors = F , header = T) +cat("\nDim of meta data file: ", dim(gene_metadata)) + all_plot_dfs = combining_dfs_plotting(my_df_u , gene_metadata , lig_dist_colname = LigDist_colname @@ -82,34 +88,6 @@ merged_df3 = all_plot_dfs[[2]] merged_df2_comp = all_plot_dfs[[3]] merged_df3_comp = all_plot_dfs[[4]] #====================================================================== -#TODO: Think! MOVE TO COMBINE or singular file for deepddg - -#============================ -# adding deepddg scaled values -# scale data b/w -1 and 1 -#============================ -# n = which(colnames(merged_df3) == "deepddg"); n -# -# my_min = min(merged_df3[,n]); my_min -# my_max = max(merged_df3[,n]); my_max -# -# merged_df3$deepddg_scaled = ifelse(merged_df3[,n] < 0 -# , merged_df3[,n]/abs(my_min) -# , merged_df3[,n]/my_max) -# # sanity check -# my_min = min(merged_df3$deepddg_scaled); my_min -# my_max = max(merged_df3$deepddg_scaled); my_max -# -# if (my_min == -1 && my_max == 1){ -# cat("\nPASS: DeepDDG successfully scaled b/w -1 and 1" -# #, "\nProceeding with assigning deep outcome category") -# , "\n") -# }else{ -# cat("\nFAIL: could not scale DeepDDG ddg values" -# , "Aborting!") -# } -# - #################################################################### # Data for combining other dfs #################################################################### @@ -131,7 +109,7 @@ merged_df3_comp = all_plot_dfs[[4]] # Data for logoplots #################################################################### -source("logo_data.R") +source(paste0(plot_script_path, "logo_data.R")) s1 = c("\nSuccessfully sourced logo_data.R") cat(s1) @@ -142,7 +120,7 @@ cat(s1) #source("other_plots_data.R") -source("dm_om_data.R") +source(paste0(plot_script_path, "dm_om_data.R")) s2 = c("\nSuccessfully sourced other_plots_data.R") cat(s2) @@ -151,7 +129,7 @@ cat(s2) # Data for Lineage barplots: WF and LF dfs #################################################################### -source("lineage_data.R") +source(paste0(plot_script_path, "lineage_data.R")) s3 = c("\nSuccessfully sourced lineage_data.R") cat(s3) @@ -160,7 +138,7 @@ cat(s3) # Data for corr plots: #################################################################### # make sure the above script works because merged_df2_combined is needed -source("corr_data.R") +source(paste0(plot_script_path, "corr_data.R")) s4 = c("\nSuccessfully sourced corr_data.R") cat(s4) @@ -168,7 +146,7 @@ cat(s4) ######################################################################## # End of script ######################################################################## -if ( all( length(s1), length(s2), length(s3), length(s4) ) >0 ){ +if ( all( length(s1), length(s2), length(s3), length(s4) ) > 0 ){ cat( "\n##################################################" , "\nSuccessful: get_plotting_dfs.R worked!" @@ -181,45 +159,18 @@ if ( all( length(s1), length(s2), length(s3), length(s4) ) >0 ){ } ######################################################################## -# clear excess variables -rm(c1, c2, c3, c4, check1 - , curr_count, curr_total - , cols_check - , cols_to_select - , cols_to_select_deepddg - , cols_to_select_duet - , cols_to_select_dynamut - , cols_to_select_dynamut2 - , cols_to_select_encomddg - , cols_to_select_encomdds - , cols_to_select_mcsm - , cols_to_select_mcsm_na - , cols_to_select_sdm +# clear excess variables: from the global enviornment + +vars0 = ls(envir = .GlobalEnv)[grepl("curr_*", ls(envir = .GlobalEnv))] +vars1 = ls(envir = .GlobalEnv)[grepl("^cols_to*", ls(envir = .GlobalEnv))] +vars2 = ls(envir = .GlobalEnv)[grepl("pivot_cols_*", ls(envir = .GlobalEnv))] +vars3 = ls(envir = .GlobalEnv)[grepl("expected_*", ls(envir = .GlobalEnv))] + +rm(c1 + , fact_cols , infile_metadata , infile_params - #, infilename_dynamut - #, infilename_dynamut2 - #, infilename_mcsm_f_snps - #, infilename_mcsm_na - ) - -rm(pivot_cols -, pivot_cols_deepddg -, pivot_cols_duet -, pivot_cols_dynamut -, pivot_cols_dynamut2 -, pivot_cols_encomddg -, pivot_cols_encomdds -, pivot_cols_foldx -, pivot_cols_mcsm -, pivot_cols_mcsm_na -, pivot_cols_n -, pivot_cols_sdm) - -rm(expected_cols -, expected_ncols -, expected_rows -, expected_rows_lf -, fact_cols) - - + , vars0 + , vars1 + , vars2 + , vars3) \ No newline at end of file diff --git a/scripts/plotting/logo_data.R b/scripts/plotting/logo_data.R index 7eaf1b6..87d503b 100644 --- a/scripts/plotting/logo_data.R +++ b/scripts/plotting/logo_data.R @@ -140,3 +140,4 @@ wide_df_or_mult = wide_df_or_mult[,-1] str(wide_df_or_mult) position_or_mult = as.numeric(colnames(wide_df_or_mult)) + diff --git a/scripts/plotting/logo_plots.R b/scripts/plotting/logo_plots.R index 8b2e856..95ccfa4 100755 --- a/scripts/plotting/logo_plots.R +++ b/scripts/plotting/logo_plots.R @@ -56,6 +56,8 @@ logo_combined_labelled = "logo_combined_labelled.svg" plot_logo_combined_labelled = paste0(plotdir,"/", logo_combined_labelled) ######################################################### +#logo_or_mult_p + theme_dark() +#logo_or_mult_p + theme(plot.background = element_rect(fill = "black")) #================================== # Output @@ -124,8 +126,24 @@ print(logo_logOR) #***************************** # Mutant logo plot: >1 nsSNP #****************************** +aa_col_choices = c('chemistry', 'hydrophobicity', 'clustalx', 'taylor') +my_logo_col = aa_col_choices[[1]] + +if (my_logo_col == 'clustalx || taylor'){ + cat("\nSelected colour scheme:", my_logo_col + , "\nUsing black theme\n") + theme_bgc = "black" + font_bgc = "white" +} if (my_logo_col == 'chemistry || hydrophobicity') { + cat('\nSelected colour scheme:', my_logo_col + , "\nUsing grey theme") + theme_bgc = "grey" + font_bgc = "black" +} + p0 = ggseqlogo(tab_mt , method = 'custom' + , col_scheme = my_logo_col , seq_type = 'aa') + #ylab('my custom height') + theme(axis.text.x = element_blank()) + @@ -143,9 +161,12 @@ cat('\nDone: p0') mut_logo_p = p0 + theme(legend.position = "none" , legend.title = element_blank() , legend.text = element_text(size = 20) - , axis.text.x = element_text(size = 14, angle = 90) - , axis.text.y = element_blank()) -#mut_logo_p + , axis.text.x = element_text(size = 14 + , angle = 90 + , colour = font_bgc) + , axis.text.y = element_blank() + , plot.background = element_rect(fill = theme_bgc)) +mut_logo_p cat('\nDone: p0+mut_logo_p') #************************* @@ -154,9 +175,7 @@ cat('\nDone: p0+mut_logo_p') p2 = ggseqlogo(tab_wt , method = 'custom' , seq_type = 'aa' - #, col_scheme = "taylor" - #, col_scheme = chemistry2 -) + + , col_scheme = my_logo_col) + #ylab('my custom height') + theme(text=element_text(family="FreeSans"))+ theme(axis.text.x = element_blank() @@ -185,7 +204,11 @@ cat('\nDone: wt_logo_p') #*********************** # Logo OR >1 nsSNP #*********************** -logo_or_mult_p = ggseqlogo(wide_df_or_mult, method="custom", seq_type="aa") + ylab("my custom height") + +logo_or_mult_p = ggseqlogo(wide_df_or_mult + , method = "custom" + , col_scheme = my_logo_col + , seq_type="aa") + + ylab("my custom height") + theme(axis.text.x = element_text(size = 14 , angle = 90 , hjust = 1