repurposing corr_data.R into a function to allow required params to be passed in
This commit is contained in:
parent
7317156bba
commit
5346431256
4 changed files with 126 additions and 50 deletions
|
@ -8,11 +8,35 @@
|
||||||
|
|
||||||
require("getopt", quietly = TRUE) # cmd parse arguments
|
require("getopt", quietly = TRUE) # cmd parse arguments
|
||||||
|
|
||||||
|
if (!require ("DT")){
|
||||||
|
install.packages("DT")
|
||||||
|
library(DT)
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!require ("plyr")){
|
||||||
|
install.packages("plyr")
|
||||||
|
library(plyr)
|
||||||
|
}
|
||||||
|
|
||||||
if (!require("tidyverse")) {
|
if (!require("tidyverse")) {
|
||||||
install.packages("tidyverse", dependencies = TRUE)
|
install.packages("tidyverse", dependencies = TRUE)
|
||||||
library(tidyverse)
|
library(tidyverse)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#---------------------------
|
||||||
|
# covered by tidyverse
|
||||||
|
|
||||||
|
# if (!require("ggplot2")) {
|
||||||
|
# install.packages("ggplot2", dependencies = TRUE)
|
||||||
|
# library(ggplot2)
|
||||||
|
# }
|
||||||
|
|
||||||
|
# if (!require ("dplyr")){
|
||||||
|
# install.packages("dplyr")
|
||||||
|
# library(dplyr)
|
||||||
|
# }
|
||||||
|
#-----------------------------
|
||||||
|
|
||||||
if (!require("shiny")) {
|
if (!require("shiny")) {
|
||||||
install.packages("shiny", dependencies = TRUE)
|
install.packages("shiny", dependencies = TRUE)
|
||||||
library(shiny)
|
library(shiny)
|
||||||
|
@ -33,26 +57,6 @@ if (!require("ggridges")) {
|
||||||
library(ggridges)
|
library(ggridges)
|
||||||
}
|
}
|
||||||
|
|
||||||
# if (!require("ggplot2")) {
|
|
||||||
# install.packages("ggplot2", dependencies = TRUE)
|
|
||||||
# library(ggplot2)
|
|
||||||
# }
|
|
||||||
|
|
||||||
# if (!require ("dplyr")){
|
|
||||||
# install.packages("dplyr")
|
|
||||||
# library(dplyr)
|
|
||||||
# }
|
|
||||||
|
|
||||||
if (!require ("DT")){
|
|
||||||
install.packages("DT")
|
|
||||||
library(DT)
|
|
||||||
}
|
|
||||||
|
|
||||||
if (!require ("plyr")){
|
|
||||||
install.packages("plyr")
|
|
||||||
library(plyr)
|
|
||||||
}
|
|
||||||
|
|
||||||
# Install
|
# Install
|
||||||
#if(!require(devtools)) install.packages("devtools")
|
#if(!require(devtools)) install.packages("devtools")
|
||||||
#devtools::install_github("kassambara/ggcorrplot")
|
#devtools::install_github("kassambara/ggcorrplot")
|
||||||
|
@ -188,3 +192,17 @@ map(paste0(func_path, source_files), source) # source all your R scripts!
|
||||||
|
|
||||||
# set plot script dir
|
# set plot script dir
|
||||||
plot_script_path = "~/git/LSHTM_analysis/scripts/plotting/"
|
plot_script_path = "~/git/LSHTM_analysis/scripts/plotting/"
|
||||||
|
|
||||||
|
##################################################
|
||||||
|
|
||||||
|
# Function name clashes with plyr and dplyr
|
||||||
|
# # loading dplyr after plyr causes issues
|
||||||
|
# if("dplyr" %in% (.packages())){
|
||||||
|
# detach("package:dplyr", unload=TRUE)
|
||||||
|
# detach("package:plyr", unload=TRUE)
|
||||||
|
# }
|
||||||
|
# library(plyr)
|
||||||
|
# library(dplyr)
|
||||||
|
|
||||||
|
# another solution is to requireNamespace() instead of library()
|
||||||
|
# so its function names don't collide with dplyr's.
|
||||||
|
|
|
@ -2,6 +2,7 @@
|
||||||
#########################################################
|
#########################################################
|
||||||
# TASK: Script to format data for corr plots
|
# TASK: Script to format data for corr plots
|
||||||
#########################################################
|
#########################################################
|
||||||
|
#library(dplyr)
|
||||||
|
|
||||||
#=================================================
|
#=================================================
|
||||||
# Data for Corrplots
|
# Data for Corrplots
|
||||||
|
@ -12,6 +13,10 @@ cat("\n=========================================="
|
||||||
|
|
||||||
# use data
|
# use data
|
||||||
#merged_df2
|
#merged_df2
|
||||||
|
geneL_normal = c("pnca")
|
||||||
|
geneL_na_dy = c("gid")
|
||||||
|
geneL_na = c("rpob")
|
||||||
|
geneL_ppi2 = c("alr", "embb", "katg", "rpob")
|
||||||
|
|
||||||
#----------------------------
|
#----------------------------
|
||||||
# columns for corr plots:PS
|
# columns for corr plots:PS
|
||||||
|
@ -19,11 +24,55 @@ cat("\n=========================================="
|
||||||
# NOTE: you can add mcsm_ppi column as well, and it will only select what it can find!
|
# NOTE: you can add mcsm_ppi column as well, and it will only select what it can find!
|
||||||
big_df_colnames = data.frame(names(merged_df2))
|
big_df_colnames = data.frame(names(merged_df2))
|
||||||
|
|
||||||
corr_cols_select <- c("mutationinformation", drug, "mutation_info_labels"
|
core_cols = c("mutationinformation", drug, "mutation_info_labels"
|
||||||
, "duet_stability_change", "ligand_affinity_change", "ddg_foldx", "asa", "rsa"
|
, "duet_stability_change", "ligand_affinity_change", "ddg_foldx", "asa", "rsa"
|
||||||
, "rd_values", "kd_values", "log10_or_mychisq", "neglog_pval_fisher","af"
|
, "rd_values", "kd_values", "log10_or_mychisq", "neglog_pval_fisher","af"
|
||||||
, "deepddg", "ddg_dynamut", "ddg_dynamut2", "mcsm_na_affinity"
|
, "deepddg" , "ddg_dynamut2"
|
||||||
, "ddg_encom", "dds_encom", "ddg_mcsm", "ddg_sdm", "ddg_duet", "ligand_distance")
|
, "consurf_score"
|
||||||
|
#, "consurf_scaled"
|
||||||
|
, "snap2_score"
|
||||||
|
#, "snap2_scaled", "snap2_accuracy_pc"
|
||||||
|
, "ligand_distance")
|
||||||
|
|
||||||
|
if (tolower(gene)%in%geneL_normal){
|
||||||
|
corr_cols_select = core_cols
|
||||||
|
}
|
||||||
|
if (tolower(gene)%in%geneL_na_dy){
|
||||||
|
additional_cols = c("mcsm_na_affinity"
|
||||||
|
, "ddg_dynamut"
|
||||||
|
, "ddg_encom", "dds_encom"
|
||||||
|
, "ddg_mcsm", "ddg_sdm"
|
||||||
|
, "ddg_duet"
|
||||||
|
#, "mcsm_na_scaled"
|
||||||
|
#, "ddg_dynamut_scaled"
|
||||||
|
#, "ddg_encom_scaled", "dds_encom_scaled"
|
||||||
|
#, "ddg_mcsm_scaled", "ddg_sdm_scaled"
|
||||||
|
#, "ddg_duet_scaled"
|
||||||
|
)
|
||||||
|
|
||||||
|
corr_cols_select = c(core_cols, additional_cols)
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
if (tolower(gene)%in%geneL_na){
|
||||||
|
additional_cols = c("mcsm_na_affinity"
|
||||||
|
#, "mcsm_na_scaled"
|
||||||
|
)
|
||||||
|
|
||||||
|
corr_cols_select = c(core_cols, additional_cols)
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
if (tolower(gene)%in%geneL_ppi2){
|
||||||
|
additional_cols = c("mcsm_ppi2_affinity")
|
||||||
|
corr_cols_select = c(core_cols, additional_cols)
|
||||||
|
}
|
||||||
|
|
||||||
|
# corr_cols_select <- c("mutationinformation", drug, "mutation_info_labels"
|
||||||
|
# , "duet_stability_change", "ligand_affinity_change", "ddg_foldx", "asa", "rsa"
|
||||||
|
# , "rd_values", "kd_values", "log10_or_mychisq", "neglog_pval_fisher","af"
|
||||||
|
# , "deepddg", "ddg_dynamut", "ddg_dynamut2", "mcsm_na_affinity"
|
||||||
|
# , "ddg_encom", "dds_encom", "ddg_mcsm", "ddg_sdm", "ddg_duet", "ligand_distance")
|
||||||
|
|
||||||
#===========================
|
#===========================
|
||||||
# Corr data for plots: PS
|
# Corr data for plots: PS
|
||||||
|
@ -36,9 +85,8 @@ corr_df_m2 = merged_df2[,colnames(merged_df2)%in%corr_cols_select]
|
||||||
# formatting: some cols
|
# formatting: some cols
|
||||||
# Add pretty colnames
|
# Add pretty colnames
|
||||||
#-----------------------
|
#-----------------------
|
||||||
corr_df_m2_f <- corr_df_m2 %>%
|
corr_df_m2_f <- corr_df_m2 %>% dplyr::rename(
|
||||||
rename(
|
'DUET' = duet_stability_change
|
||||||
DUET = duet_stability_change
|
|
||||||
, 'mCSM-lig' = ligand_affinity_change
|
, 'mCSM-lig' = ligand_affinity_change
|
||||||
, FoldX = ddg_foldx
|
, FoldX = ddg_foldx
|
||||||
, DeepDDG = deepddg
|
, DeepDDG = deepddg
|
||||||
|
|
|
@ -124,32 +124,42 @@ cat(s1)
|
||||||
####################################################################
|
####################################################################
|
||||||
# Data for DM OM Plots: Long format dfs
|
# Data for DM OM Plots: Long format dfs
|
||||||
####################################################################
|
####################################################################
|
||||||
|
|
||||||
#source("other_plots_data.R")
|
#source("other_plots_data.R")
|
||||||
|
|
||||||
#source(paste0(plot_script_path, "dm_om_data.R"))
|
#source(paste0(plot_script_path, "dm_om_data.R"))
|
||||||
#
|
|
||||||
#s2 = c("\nSuccessfully sourced other_plots_data.R")
|
#s2 = c("\nSuccessfully sourced other_plots_data.R")
|
||||||
#cat(s2)
|
#cat(s2)
|
||||||
|
|
||||||
####################################################################
|
####################################################################
|
||||||
# Data for Lineage barplots: WF and LF dfs
|
# Data for Lineage barplots: WF and LF dfs
|
||||||
####################################################################
|
####################################################################
|
||||||
#
|
|
||||||
# source(paste0(plot_script_path, "lineage_data.R"))
|
source(paste0(plot_script_path, "lineage_data.R"))
|
||||||
#
|
|
||||||
# s3 = c("\nSuccessfully sourced lineage_data.R")
|
s3 = c("\nSuccessfully sourced lineage_data.R")
|
||||||
# cat(s3)
|
cat(s3)
|
||||||
|
|
||||||
####################################################################
|
####################################################################
|
||||||
# Data for corr plots:
|
# Data for corr plots:
|
||||||
####################################################################
|
####################################################################
|
||||||
# make sure the above script works because merged_df2_combined is needed
|
# make sure the above script works because merged_df2_combined is needed
|
||||||
#source(paste0(plot_script_path, "corr_data.R"))
|
#source(paste0(plot_script_path, "corr_data.R"))
|
||||||
#
|
|
||||||
#s4 = c("\nSuccessfully sourced corr_data.R")
|
#s4 = c("\nSuccessfully sourced corr_data.R")
|
||||||
#cat(s4)
|
#cat(s4)
|
||||||
|
|
||||||
|
# Moved "logo_data.R" to redundant/
|
||||||
|
#-----------------------------------------
|
||||||
|
# Replaced above with my function
|
||||||
|
# corr_data_extract()
|
||||||
|
|
||||||
|
corr_df_m3_f = corr_data_extract(merged_df3, extract_scaled_cols = F)
|
||||||
|
head(corr_df_m3_f)
|
||||||
|
|
||||||
|
corr_df_m2_f = corr_data_extract(merged_df2, extract_scaled_cols = F)
|
||||||
|
head(corr_df_m2_f)
|
||||||
|
|
||||||
########################################################################
|
########################################################################
|
||||||
# End of script
|
# End of script
|
||||||
########################################################################
|
########################################################################
|
||||||
|
@ -164,7 +174,7 @@ cat(s1)
|
||||||
# , "\nFAIL: get_plotting_dfs.R didn't complete fully!Please check"
|
# , "\nFAIL: get_plotting_dfs.R didn't complete fully!Please check"
|
||||||
# , "\n###################################################\n" )
|
# , "\n###################################################\n" )
|
||||||
# }
|
# }
|
||||||
#
|
|
||||||
########################################################################
|
########################################################################
|
||||||
# clear excess variables: from the global enviornment
|
# clear excess variables: from the global enviornment
|
||||||
|
|
||||||
|
|
|
@ -6,7 +6,7 @@
|
||||||
#=======================================================================
|
#=======================================================================
|
||||||
# working dir and loading libraries
|
# working dir and loading libraries
|
||||||
getwd()
|
getwd()
|
||||||
setwd("~/git/LSHTM_analysis/scripts/plotting")
|
#setwd("~/git/LSHTM_analysis/scripts/plotting")
|
||||||
getwd()
|
getwd()
|
||||||
|
|
||||||
#source("~/git/LSHTM_analysis/scripts/Header_TT.R")
|
#source("~/git/LSHTM_analysis/scripts/Header_TT.R")
|
||||||
|
@ -14,11 +14,11 @@ library(ggplot2)
|
||||||
library(data.table)
|
library(data.table)
|
||||||
library(dplyr)
|
library(dplyr)
|
||||||
library(tidyverse)
|
library(tidyverse)
|
||||||
source("combining_dfs_plotting.R")
|
#source("combining_dfs_plotting.R")
|
||||||
|
|
||||||
rm(merged_df2, merged_df2_comp, merged_df2_lig, merged_df2_comp_lig
|
#rm(merged_df2, merged_df2_comp, merged_df2_lig, merged_df2_comp_lig
|
||||||
, merged_df3_comp, merged_df3_comp_lig
|
# , merged_df3_comp, merged_df3_comp_lig
|
||||||
, my_df_u, my_df_u_lig)
|
# , my_df_u, my_df_u_lig)
|
||||||
|
|
||||||
|
|
||||||
cols_to_select = c("mutation", "mutationinformation"
|
cols_to_select = c("mutation", "mutationinformation"
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue