From 534643125617cef1fb3c72fa315a3979462ab6e7 Mon Sep 17 00:00:00 2001 From: Tanushree Tunstall Date: Sat, 29 Jan 2022 17:24:15 +0000 Subject: [PATCH] repurposing corr_data.R into a function to allow required params to be passed in --- scripts/Header_TT.R | 58 +++++++++++++++++--------- scripts/plotting/corr_data.R | 64 +++++++++++++++++++++++++---- scripts/plotting/get_plotting_dfs.R | 44 ++++++++++++-------- scripts/plotting/other_plots_data.R | 10 ++--- 4 files changed, 126 insertions(+), 50 deletions(-) diff --git a/scripts/Header_TT.R b/scripts/Header_TT.R index 1151e22..acb2e5e 100755 --- a/scripts/Header_TT.R +++ b/scripts/Header_TT.R @@ -8,11 +8,35 @@ require("getopt", quietly = TRUE) # cmd parse arguments +if (!require ("DT")){ + install.packages("DT") + library(DT) +} + +if (!require ("plyr")){ + install.packages("plyr") + library(plyr) +} + if (!require("tidyverse")) { install.packages("tidyverse", dependencies = TRUE) library(tidyverse) } +#--------------------------- +# covered by tidyverse + +# if (!require("ggplot2")) { +# install.packages("ggplot2", dependencies = TRUE) +# library(ggplot2) +# } + +# if (!require ("dplyr")){ +# install.packages("dplyr") +# library(dplyr) +# } +#----------------------------- + if (!require("shiny")) { install.packages("shiny", dependencies = TRUE) library(shiny) @@ -33,26 +57,6 @@ if (!require("ggridges")) { library(ggridges) } -# if (!require("ggplot2")) { -# install.packages("ggplot2", dependencies = TRUE) -# library(ggplot2) -# } - -# if (!require ("dplyr")){ -# install.packages("dplyr") -# library(dplyr) -# } - -if (!require ("DT")){ - install.packages("DT") - library(DT) -} - -if (!require ("plyr")){ - install.packages("plyr") - library(plyr) - } - # Install #if(!require(devtools)) install.packages("devtools") #devtools::install_github("kassambara/ggcorrplot") @@ -188,3 +192,17 @@ map(paste0(func_path, source_files), source) # source all your R scripts! # set plot script dir plot_script_path = "~/git/LSHTM_analysis/scripts/plotting/" + +################################################## + +# Function name clashes with plyr and dplyr +# # loading dplyr after plyr causes issues +# if("dplyr" %in% (.packages())){ +# detach("package:dplyr", unload=TRUE) +# detach("package:plyr", unload=TRUE) +# } +# library(plyr) +# library(dplyr) + +# another solution is to requireNamespace() instead of library() +# so its function names don't collide with dplyr's. diff --git a/scripts/plotting/corr_data.R b/scripts/plotting/corr_data.R index 3120763..861e5da 100644 --- a/scripts/plotting/corr_data.R +++ b/scripts/plotting/corr_data.R @@ -2,6 +2,7 @@ ######################################################### # TASK: Script to format data for corr plots ######################################################### +#library(dplyr) #================================================= # Data for Corrplots @@ -12,6 +13,10 @@ cat("\n==========================================" # use data #merged_df2 +geneL_normal = c("pnca") +geneL_na_dy = c("gid") +geneL_na = c("rpob") +geneL_ppi2 = c("alr", "embb", "katg", "rpob") #---------------------------- # columns for corr plots:PS @@ -19,11 +24,55 @@ cat("\n==========================================" # NOTE: you can add mcsm_ppi column as well, and it will only select what it can find! big_df_colnames = data.frame(names(merged_df2)) -corr_cols_select <- c("mutationinformation", drug, "mutation_info_labels" - , "duet_stability_change", "ligand_affinity_change", "ddg_foldx", "asa", "rsa" - , "rd_values", "kd_values", "log10_or_mychisq", "neglog_pval_fisher","af" - , "deepddg", "ddg_dynamut", "ddg_dynamut2", "mcsm_na_affinity" - , "ddg_encom", "dds_encom", "ddg_mcsm", "ddg_sdm", "ddg_duet", "ligand_distance") +core_cols = c("mutationinformation", drug, "mutation_info_labels" + , "duet_stability_change", "ligand_affinity_change", "ddg_foldx", "asa", "rsa" + , "rd_values", "kd_values", "log10_or_mychisq", "neglog_pval_fisher","af" + , "deepddg" , "ddg_dynamut2" + , "consurf_score" + #, "consurf_scaled" + , "snap2_score" + #, "snap2_scaled", "snap2_accuracy_pc" + , "ligand_distance") + +if (tolower(gene)%in%geneL_normal){ + corr_cols_select = core_cols +} +if (tolower(gene)%in%geneL_na_dy){ + additional_cols = c("mcsm_na_affinity" + , "ddg_dynamut" + , "ddg_encom", "dds_encom" + , "ddg_mcsm", "ddg_sdm" + , "ddg_duet" + #, "mcsm_na_scaled" + #, "ddg_dynamut_scaled" + #, "ddg_encom_scaled", "dds_encom_scaled" + #, "ddg_mcsm_scaled", "ddg_sdm_scaled" + #, "ddg_duet_scaled" + ) + + corr_cols_select = c(core_cols, additional_cols) + +} + +if (tolower(gene)%in%geneL_na){ + additional_cols = c("mcsm_na_affinity" + #, "mcsm_na_scaled" + ) + + corr_cols_select = c(core_cols, additional_cols) + +} + +if (tolower(gene)%in%geneL_ppi2){ + additional_cols = c("mcsm_ppi2_affinity") + corr_cols_select = c(core_cols, additional_cols) +} + +# corr_cols_select <- c("mutationinformation", drug, "mutation_info_labels" +# , "duet_stability_change", "ligand_affinity_change", "ddg_foldx", "asa", "rsa" +# , "rd_values", "kd_values", "log10_or_mychisq", "neglog_pval_fisher","af" +# , "deepddg", "ddg_dynamut", "ddg_dynamut2", "mcsm_na_affinity" +# , "ddg_encom", "dds_encom", "ddg_mcsm", "ddg_sdm", "ddg_duet", "ligand_distance") #=========================== # Corr data for plots: PS @@ -36,9 +85,8 @@ corr_df_m2 = merged_df2[,colnames(merged_df2)%in%corr_cols_select] # formatting: some cols # Add pretty colnames #----------------------- -corr_df_m2_f <- corr_df_m2 %>% - rename( - DUET = duet_stability_change +corr_df_m2_f <- corr_df_m2 %>% dplyr::rename( + 'DUET' = duet_stability_change , 'mCSM-lig' = ligand_affinity_change , FoldX = ddg_foldx , DeepDDG = deepddg diff --git a/scripts/plotting/get_plotting_dfs.R b/scripts/plotting/get_plotting_dfs.R index fd07df2..e7f631f 100644 --- a/scripts/plotting/get_plotting_dfs.R +++ b/scripts/plotting/get_plotting_dfs.R @@ -124,31 +124,41 @@ cat(s1) #################################################################### # Data for DM OM Plots: Long format dfs #################################################################### - #source("other_plots_data.R") -# source(paste0(plot_script_path, "dm_om_data.R")) -# -# s2 = c("\nSuccessfully sourced other_plots_data.R") -# cat(s2) +#source(paste0(plot_script_path, "dm_om_data.R")) + +#s2 = c("\nSuccessfully sourced other_plots_data.R") +#cat(s2) #################################################################### # Data for Lineage barplots: WF and LF dfs #################################################################### -# -# source(paste0(plot_script_path, "lineage_data.R")) -# -# s3 = c("\nSuccessfully sourced lineage_data.R") -# cat(s3) + +source(paste0(plot_script_path, "lineage_data.R")) + +s3 = c("\nSuccessfully sourced lineage_data.R") +cat(s3) #################################################################### # Data for corr plots: #################################################################### # make sure the above script works because merged_df2_combined is needed -# source(paste0(plot_script_path, "corr_data.R")) -# -# s4 = c("\nSuccessfully sourced corr_data.R") -# cat(s4) +#source(paste0(plot_script_path, "corr_data.R")) + +#s4 = c("\nSuccessfully sourced corr_data.R") +#cat(s4) + +# Moved "logo_data.R" to redundant/ +#----------------------------------------- +# Replaced above with my function +# corr_data_extract() + +corr_df_m3_f = corr_data_extract(merged_df3, extract_scaled_cols = F) +head(corr_df_m3_f) + +corr_df_m2_f = corr_data_extract(merged_df2, extract_scaled_cols = F) +head(corr_df_m2_f) ######################################################################## # End of script @@ -158,13 +168,13 @@ cat(s1) # "\n##################################################" # , "\nSuccessful: get_plotting_dfs.R worked!" # , "\n###################################################\n") -# } else { +# } else { # cat( # "\n#################################################" # , "\nFAIL: get_plotting_dfs.R didn't complete fully!Please check" # , "\n###################################################\n" ) -# } -# +# } + ######################################################################## # clear excess variables: from the global enviornment diff --git a/scripts/plotting/other_plots_data.R b/scripts/plotting/other_plots_data.R index a3e052f..3e2232e 100644 --- a/scripts/plotting/other_plots_data.R +++ b/scripts/plotting/other_plots_data.R @@ -6,7 +6,7 @@ #======================================================================= # working dir and loading libraries getwd() -setwd("~/git/LSHTM_analysis/scripts/plotting") +#setwd("~/git/LSHTM_analysis/scripts/plotting") getwd() #source("~/git/LSHTM_analysis/scripts/Header_TT.R") @@ -14,11 +14,11 @@ library(ggplot2) library(data.table) library(dplyr) library(tidyverse) -source("combining_dfs_plotting.R") +#source("combining_dfs_plotting.R") -rm(merged_df2, merged_df2_comp, merged_df2_lig, merged_df2_comp_lig - , merged_df3_comp, merged_df3_comp_lig - , my_df_u, my_df_u_lig) +#rm(merged_df2, merged_df2_comp, merged_df2_lig, merged_df2_comp_lig +# , merged_df3_comp, merged_df3_comp_lig +# , my_df_u, my_df_u_lig) cols_to_select = c("mutation", "mutationinformation"