repurposing corr_data.R into a function to allow required params to be passed in

This commit is contained in:
Tanushree Tunstall 2022-01-29 17:24:15 +00:00
parent 7317156bba
commit 5346431256
4 changed files with 126 additions and 50 deletions

View file

@ -8,11 +8,35 @@
require("getopt", quietly = TRUE) # cmd parse arguments require("getopt", quietly = TRUE) # cmd parse arguments
if (!require ("DT")){
install.packages("DT")
library(DT)
}
if (!require ("plyr")){
install.packages("plyr")
library(plyr)
}
if (!require("tidyverse")) { if (!require("tidyverse")) {
install.packages("tidyverse", dependencies = TRUE) install.packages("tidyverse", dependencies = TRUE)
library(tidyverse) library(tidyverse)
} }
#---------------------------
# covered by tidyverse
# if (!require("ggplot2")) {
# install.packages("ggplot2", dependencies = TRUE)
# library(ggplot2)
# }
# if (!require ("dplyr")){
# install.packages("dplyr")
# library(dplyr)
# }
#-----------------------------
if (!require("shiny")) { if (!require("shiny")) {
install.packages("shiny", dependencies = TRUE) install.packages("shiny", dependencies = TRUE)
library(shiny) library(shiny)
@ -33,26 +57,6 @@ if (!require("ggridges")) {
library(ggridges) library(ggridges)
} }
# if (!require("ggplot2")) {
# install.packages("ggplot2", dependencies = TRUE)
# library(ggplot2)
# }
# if (!require ("dplyr")){
# install.packages("dplyr")
# library(dplyr)
# }
if (!require ("DT")){
install.packages("DT")
library(DT)
}
if (!require ("plyr")){
install.packages("plyr")
library(plyr)
}
# Install # Install
#if(!require(devtools)) install.packages("devtools") #if(!require(devtools)) install.packages("devtools")
#devtools::install_github("kassambara/ggcorrplot") #devtools::install_github("kassambara/ggcorrplot")
@ -188,3 +192,17 @@ map(paste0(func_path, source_files), source) # source all your R scripts!
# set plot script dir # set plot script dir
plot_script_path = "~/git/LSHTM_analysis/scripts/plotting/" plot_script_path = "~/git/LSHTM_analysis/scripts/plotting/"
##################################################
# Function name clashes with plyr and dplyr
# # loading dplyr after plyr causes issues
# if("dplyr" %in% (.packages())){
# detach("package:dplyr", unload=TRUE)
# detach("package:plyr", unload=TRUE)
# }
# library(plyr)
# library(dplyr)
# another solution is to requireNamespace() instead of library()
# so its function names don't collide with dplyr's.

View file

@ -2,6 +2,7 @@
######################################################### #########################################################
# TASK: Script to format data for corr plots # TASK: Script to format data for corr plots
######################################################### #########################################################
#library(dplyr)
#================================================= #=================================================
# Data for Corrplots # Data for Corrplots
@ -12,6 +13,10 @@ cat("\n=========================================="
# use data # use data
#merged_df2 #merged_df2
geneL_normal = c("pnca")
geneL_na_dy = c("gid")
geneL_na = c("rpob")
geneL_ppi2 = c("alr", "embb", "katg", "rpob")
#---------------------------- #----------------------------
# columns for corr plots:PS # columns for corr plots:PS
@ -19,11 +24,55 @@ cat("\n=========================================="
# NOTE: you can add mcsm_ppi column as well, and it will only select what it can find! # NOTE: you can add mcsm_ppi column as well, and it will only select what it can find!
big_df_colnames = data.frame(names(merged_df2)) big_df_colnames = data.frame(names(merged_df2))
corr_cols_select <- c("mutationinformation", drug, "mutation_info_labels" core_cols = c("mutationinformation", drug, "mutation_info_labels"
, "duet_stability_change", "ligand_affinity_change", "ddg_foldx", "asa", "rsa" , "duet_stability_change", "ligand_affinity_change", "ddg_foldx", "asa", "rsa"
, "rd_values", "kd_values", "log10_or_mychisq", "neglog_pval_fisher","af" , "rd_values", "kd_values", "log10_or_mychisq", "neglog_pval_fisher","af"
, "deepddg", "ddg_dynamut", "ddg_dynamut2", "mcsm_na_affinity" , "deepddg" , "ddg_dynamut2"
, "ddg_encom", "dds_encom", "ddg_mcsm", "ddg_sdm", "ddg_duet", "ligand_distance") , "consurf_score"
#, "consurf_scaled"
, "snap2_score"
#, "snap2_scaled", "snap2_accuracy_pc"
, "ligand_distance")
if (tolower(gene)%in%geneL_normal){
corr_cols_select = core_cols
}
if (tolower(gene)%in%geneL_na_dy){
additional_cols = c("mcsm_na_affinity"
, "ddg_dynamut"
, "ddg_encom", "dds_encom"
, "ddg_mcsm", "ddg_sdm"
, "ddg_duet"
#, "mcsm_na_scaled"
#, "ddg_dynamut_scaled"
#, "ddg_encom_scaled", "dds_encom_scaled"
#, "ddg_mcsm_scaled", "ddg_sdm_scaled"
#, "ddg_duet_scaled"
)
corr_cols_select = c(core_cols, additional_cols)
}
if (tolower(gene)%in%geneL_na){
additional_cols = c("mcsm_na_affinity"
#, "mcsm_na_scaled"
)
corr_cols_select = c(core_cols, additional_cols)
}
if (tolower(gene)%in%geneL_ppi2){
additional_cols = c("mcsm_ppi2_affinity")
corr_cols_select = c(core_cols, additional_cols)
}
# corr_cols_select <- c("mutationinformation", drug, "mutation_info_labels"
# , "duet_stability_change", "ligand_affinity_change", "ddg_foldx", "asa", "rsa"
# , "rd_values", "kd_values", "log10_or_mychisq", "neglog_pval_fisher","af"
# , "deepddg", "ddg_dynamut", "ddg_dynamut2", "mcsm_na_affinity"
# , "ddg_encom", "dds_encom", "ddg_mcsm", "ddg_sdm", "ddg_duet", "ligand_distance")
#=========================== #===========================
# Corr data for plots: PS # Corr data for plots: PS
@ -36,9 +85,8 @@ corr_df_m2 = merged_df2[,colnames(merged_df2)%in%corr_cols_select]
# formatting: some cols # formatting: some cols
# Add pretty colnames # Add pretty colnames
#----------------------- #-----------------------
corr_df_m2_f <- corr_df_m2 %>% corr_df_m2_f <- corr_df_m2 %>% dplyr::rename(
rename( 'DUET' = duet_stability_change
DUET = duet_stability_change
, 'mCSM-lig' = ligand_affinity_change , 'mCSM-lig' = ligand_affinity_change
, FoldX = ddg_foldx , FoldX = ddg_foldx
, DeepDDG = deepddg , DeepDDG = deepddg

View file

@ -124,31 +124,41 @@ cat(s1)
#################################################################### ####################################################################
# Data for DM OM Plots: Long format dfs # Data for DM OM Plots: Long format dfs
#################################################################### ####################################################################
#source("other_plots_data.R") #source("other_plots_data.R")
# source(paste0(plot_script_path, "dm_om_data.R")) #source(paste0(plot_script_path, "dm_om_data.R"))
#
# s2 = c("\nSuccessfully sourced other_plots_data.R") #s2 = c("\nSuccessfully sourced other_plots_data.R")
# cat(s2) #cat(s2)
#################################################################### ####################################################################
# Data for Lineage barplots: WF and LF dfs # Data for Lineage barplots: WF and LF dfs
#################################################################### ####################################################################
#
# source(paste0(plot_script_path, "lineage_data.R")) source(paste0(plot_script_path, "lineage_data.R"))
#
# s3 = c("\nSuccessfully sourced lineage_data.R") s3 = c("\nSuccessfully sourced lineage_data.R")
# cat(s3) cat(s3)
#################################################################### ####################################################################
# Data for corr plots: # Data for corr plots:
#################################################################### ####################################################################
# make sure the above script works because merged_df2_combined is needed # make sure the above script works because merged_df2_combined is needed
# source(paste0(plot_script_path, "corr_data.R")) #source(paste0(plot_script_path, "corr_data.R"))
#
# s4 = c("\nSuccessfully sourced corr_data.R") #s4 = c("\nSuccessfully sourced corr_data.R")
# cat(s4) #cat(s4)
# Moved "logo_data.R" to redundant/
#-----------------------------------------
# Replaced above with my function
# corr_data_extract()
corr_df_m3_f = corr_data_extract(merged_df3, extract_scaled_cols = F)
head(corr_df_m3_f)
corr_df_m2_f = corr_data_extract(merged_df2, extract_scaled_cols = F)
head(corr_df_m2_f)
######################################################################## ########################################################################
# End of script # End of script
@ -164,7 +174,7 @@ cat(s1)
# , "\nFAIL: get_plotting_dfs.R didn't complete fully!Please check" # , "\nFAIL: get_plotting_dfs.R didn't complete fully!Please check"
# , "\n###################################################\n" ) # , "\n###################################################\n" )
# } # }
#
######################################################################## ########################################################################
# clear excess variables: from the global enviornment # clear excess variables: from the global enviornment

View file

@ -6,7 +6,7 @@
#======================================================================= #=======================================================================
# working dir and loading libraries # working dir and loading libraries
getwd() getwd()
setwd("~/git/LSHTM_analysis/scripts/plotting") #setwd("~/git/LSHTM_analysis/scripts/plotting")
getwd() getwd()
#source("~/git/LSHTM_analysis/scripts/Header_TT.R") #source("~/git/LSHTM_analysis/scripts/Header_TT.R")
@ -14,11 +14,11 @@ library(ggplot2)
library(data.table) library(data.table)
library(dplyr) library(dplyr)
library(tidyverse) library(tidyverse)
source("combining_dfs_plotting.R") #source("combining_dfs_plotting.R")
rm(merged_df2, merged_df2_comp, merged_df2_lig, merged_df2_comp_lig #rm(merged_df2, merged_df2_comp, merged_df2_lig, merged_df2_comp_lig
, merged_df3_comp, merged_df3_comp_lig # , merged_df3_comp, merged_df3_comp_lig
, my_df_u, my_df_u_lig) # , my_df_u, my_df_u_lig)
cols_to_select = c("mutation", "mutationinformation" cols_to_select = c("mutation", "mutationinformation"