repurposing corr_data.R into a function to allow required params to be passed in

This commit is contained in:
Tanushree Tunstall 2022-01-29 17:24:15 +00:00
parent 7317156bba
commit 5346431256
4 changed files with 126 additions and 50 deletions

View file

@ -8,11 +8,35 @@
require("getopt", quietly = TRUE) # cmd parse arguments
if (!require ("DT")){
install.packages("DT")
library(DT)
}
if (!require ("plyr")){
install.packages("plyr")
library(plyr)
}
if (!require("tidyverse")) {
install.packages("tidyverse", dependencies = TRUE)
library(tidyverse)
}
#---------------------------
# covered by tidyverse
# if (!require("ggplot2")) {
# install.packages("ggplot2", dependencies = TRUE)
# library(ggplot2)
# }
# if (!require ("dplyr")){
# install.packages("dplyr")
# library(dplyr)
# }
#-----------------------------
if (!require("shiny")) {
install.packages("shiny", dependencies = TRUE)
library(shiny)
@ -33,26 +57,6 @@ if (!require("ggridges")) {
library(ggridges)
}
# if (!require("ggplot2")) {
# install.packages("ggplot2", dependencies = TRUE)
# library(ggplot2)
# }
# if (!require ("dplyr")){
# install.packages("dplyr")
# library(dplyr)
# }
if (!require ("DT")){
install.packages("DT")
library(DT)
}
if (!require ("plyr")){
install.packages("plyr")
library(plyr)
}
# Install
#if(!require(devtools)) install.packages("devtools")
#devtools::install_github("kassambara/ggcorrplot")
@ -188,3 +192,17 @@ map(paste0(func_path, source_files), source) # source all your R scripts!
# set plot script dir
plot_script_path = "~/git/LSHTM_analysis/scripts/plotting/"
##################################################
# Function name clashes with plyr and dplyr
# # loading dplyr after plyr causes issues
# if("dplyr" %in% (.packages())){
# detach("package:dplyr", unload=TRUE)
# detach("package:plyr", unload=TRUE)
# }
# library(plyr)
# library(dplyr)
# another solution is to requireNamespace() instead of library()
# so its function names don't collide with dplyr's.

View file

@ -2,6 +2,7 @@
#########################################################
# TASK: Script to format data for corr plots
#########################################################
#library(dplyr)
#=================================================
# Data for Corrplots
@ -12,6 +13,10 @@ cat("\n=========================================="
# use data
#merged_df2
geneL_normal = c("pnca")
geneL_na_dy = c("gid")
geneL_na = c("rpob")
geneL_ppi2 = c("alr", "embb", "katg", "rpob")
#----------------------------
# columns for corr plots:PS
@ -19,11 +24,55 @@ cat("\n=========================================="
# NOTE: you can add mcsm_ppi column as well, and it will only select what it can find!
big_df_colnames = data.frame(names(merged_df2))
corr_cols_select <- c("mutationinformation", drug, "mutation_info_labels"
, "duet_stability_change", "ligand_affinity_change", "ddg_foldx", "asa", "rsa"
, "rd_values", "kd_values", "log10_or_mychisq", "neglog_pval_fisher","af"
, "deepddg", "ddg_dynamut", "ddg_dynamut2", "mcsm_na_affinity"
, "ddg_encom", "dds_encom", "ddg_mcsm", "ddg_sdm", "ddg_duet", "ligand_distance")
core_cols = c("mutationinformation", drug, "mutation_info_labels"
, "duet_stability_change", "ligand_affinity_change", "ddg_foldx", "asa", "rsa"
, "rd_values", "kd_values", "log10_or_mychisq", "neglog_pval_fisher","af"
, "deepddg" , "ddg_dynamut2"
, "consurf_score"
#, "consurf_scaled"
, "snap2_score"
#, "snap2_scaled", "snap2_accuracy_pc"
, "ligand_distance")
if (tolower(gene)%in%geneL_normal){
corr_cols_select = core_cols
}
if (tolower(gene)%in%geneL_na_dy){
additional_cols = c("mcsm_na_affinity"
, "ddg_dynamut"
, "ddg_encom", "dds_encom"
, "ddg_mcsm", "ddg_sdm"
, "ddg_duet"
#, "mcsm_na_scaled"
#, "ddg_dynamut_scaled"
#, "ddg_encom_scaled", "dds_encom_scaled"
#, "ddg_mcsm_scaled", "ddg_sdm_scaled"
#, "ddg_duet_scaled"
)
corr_cols_select = c(core_cols, additional_cols)
}
if (tolower(gene)%in%geneL_na){
additional_cols = c("mcsm_na_affinity"
#, "mcsm_na_scaled"
)
corr_cols_select = c(core_cols, additional_cols)
}
if (tolower(gene)%in%geneL_ppi2){
additional_cols = c("mcsm_ppi2_affinity")
corr_cols_select = c(core_cols, additional_cols)
}
# corr_cols_select <- c("mutationinformation", drug, "mutation_info_labels"
# , "duet_stability_change", "ligand_affinity_change", "ddg_foldx", "asa", "rsa"
# , "rd_values", "kd_values", "log10_or_mychisq", "neglog_pval_fisher","af"
# , "deepddg", "ddg_dynamut", "ddg_dynamut2", "mcsm_na_affinity"
# , "ddg_encom", "dds_encom", "ddg_mcsm", "ddg_sdm", "ddg_duet", "ligand_distance")
#===========================
# Corr data for plots: PS
@ -36,9 +85,8 @@ corr_df_m2 = merged_df2[,colnames(merged_df2)%in%corr_cols_select]
# formatting: some cols
# Add pretty colnames
#-----------------------
corr_df_m2_f <- corr_df_m2 %>%
rename(
DUET = duet_stability_change
corr_df_m2_f <- corr_df_m2 %>% dplyr::rename(
'DUET' = duet_stability_change
, 'mCSM-lig' = ligand_affinity_change
, FoldX = ddg_foldx
, DeepDDG = deepddg

View file

@ -124,31 +124,41 @@ cat(s1)
####################################################################
# Data for DM OM Plots: Long format dfs
####################################################################
#source("other_plots_data.R")
# source(paste0(plot_script_path, "dm_om_data.R"))
#
# s2 = c("\nSuccessfully sourced other_plots_data.R")
# cat(s2)
#source(paste0(plot_script_path, "dm_om_data.R"))
#s2 = c("\nSuccessfully sourced other_plots_data.R")
#cat(s2)
####################################################################
# Data for Lineage barplots: WF and LF dfs
####################################################################
#
# source(paste0(plot_script_path, "lineage_data.R"))
#
# s3 = c("\nSuccessfully sourced lineage_data.R")
# cat(s3)
source(paste0(plot_script_path, "lineage_data.R"))
s3 = c("\nSuccessfully sourced lineage_data.R")
cat(s3)
####################################################################
# Data for corr plots:
####################################################################
# make sure the above script works because merged_df2_combined is needed
# source(paste0(plot_script_path, "corr_data.R"))
#
# s4 = c("\nSuccessfully sourced corr_data.R")
# cat(s4)
#source(paste0(plot_script_path, "corr_data.R"))
#s4 = c("\nSuccessfully sourced corr_data.R")
#cat(s4)
# Moved "logo_data.R" to redundant/
#-----------------------------------------
# Replaced above with my function
# corr_data_extract()
corr_df_m3_f = corr_data_extract(merged_df3, extract_scaled_cols = F)
head(corr_df_m3_f)
corr_df_m2_f = corr_data_extract(merged_df2, extract_scaled_cols = F)
head(corr_df_m2_f)
########################################################################
# End of script
@ -158,13 +168,13 @@ cat(s1)
# "\n##################################################"
# , "\nSuccessful: get_plotting_dfs.R worked!"
# , "\n###################################################\n")
# } else {
# } else {
# cat(
# "\n#################################################"
# , "\nFAIL: get_plotting_dfs.R didn't complete fully!Please check"
# , "\n###################################################\n" )
# }
#
# }
########################################################################
# clear excess variables: from the global enviornment

View file

@ -6,7 +6,7 @@
#=======================================================================
# working dir and loading libraries
getwd()
setwd("~/git/LSHTM_analysis/scripts/plotting")
#setwd("~/git/LSHTM_analysis/scripts/plotting")
getwd()
#source("~/git/LSHTM_analysis/scripts/Header_TT.R")
@ -14,11 +14,11 @@ library(ggplot2)
library(data.table)
library(dplyr)
library(tidyverse)
source("combining_dfs_plotting.R")
#source("combining_dfs_plotting.R")
rm(merged_df2, merged_df2_comp, merged_df2_lig, merged_df2_comp_lig
, merged_df3_comp, merged_df3_comp_lig
, my_df_u, my_df_u_lig)
#rm(merged_df2, merged_df2_comp, merged_df2_lig, merged_df2_comp_lig
# , merged_df3_comp, merged_df3_comp_lig
# , my_df_u, my_df_u_lig)
cols_to_select = c("mutation", "mutationinformation"