repurposing corr_data.R into a function to allow required params to be passed in
This commit is contained in:
parent
7317156bba
commit
5346431256
4 changed files with 126 additions and 50 deletions
|
@ -8,11 +8,35 @@
|
|||
|
||||
require("getopt", quietly = TRUE) # cmd parse arguments
|
||||
|
||||
if (!require ("DT")){
|
||||
install.packages("DT")
|
||||
library(DT)
|
||||
}
|
||||
|
||||
if (!require ("plyr")){
|
||||
install.packages("plyr")
|
||||
library(plyr)
|
||||
}
|
||||
|
||||
if (!require("tidyverse")) {
|
||||
install.packages("tidyverse", dependencies = TRUE)
|
||||
library(tidyverse)
|
||||
}
|
||||
|
||||
#---------------------------
|
||||
# covered by tidyverse
|
||||
|
||||
# if (!require("ggplot2")) {
|
||||
# install.packages("ggplot2", dependencies = TRUE)
|
||||
# library(ggplot2)
|
||||
# }
|
||||
|
||||
# if (!require ("dplyr")){
|
||||
# install.packages("dplyr")
|
||||
# library(dplyr)
|
||||
# }
|
||||
#-----------------------------
|
||||
|
||||
if (!require("shiny")) {
|
||||
install.packages("shiny", dependencies = TRUE)
|
||||
library(shiny)
|
||||
|
@ -33,26 +57,6 @@ if (!require("ggridges")) {
|
|||
library(ggridges)
|
||||
}
|
||||
|
||||
# if (!require("ggplot2")) {
|
||||
# install.packages("ggplot2", dependencies = TRUE)
|
||||
# library(ggplot2)
|
||||
# }
|
||||
|
||||
# if (!require ("dplyr")){
|
||||
# install.packages("dplyr")
|
||||
# library(dplyr)
|
||||
# }
|
||||
|
||||
if (!require ("DT")){
|
||||
install.packages("DT")
|
||||
library(DT)
|
||||
}
|
||||
|
||||
if (!require ("plyr")){
|
||||
install.packages("plyr")
|
||||
library(plyr)
|
||||
}
|
||||
|
||||
# Install
|
||||
#if(!require(devtools)) install.packages("devtools")
|
||||
#devtools::install_github("kassambara/ggcorrplot")
|
||||
|
@ -188,3 +192,17 @@ map(paste0(func_path, source_files), source) # source all your R scripts!
|
|||
|
||||
# set plot script dir
|
||||
plot_script_path = "~/git/LSHTM_analysis/scripts/plotting/"
|
||||
|
||||
##################################################
|
||||
|
||||
# Function name clashes with plyr and dplyr
|
||||
# # loading dplyr after plyr causes issues
|
||||
# if("dplyr" %in% (.packages())){
|
||||
# detach("package:dplyr", unload=TRUE)
|
||||
# detach("package:plyr", unload=TRUE)
|
||||
# }
|
||||
# library(plyr)
|
||||
# library(dplyr)
|
||||
|
||||
# another solution is to requireNamespace() instead of library()
|
||||
# so its function names don't collide with dplyr's.
|
||||
|
|
|
@ -2,6 +2,7 @@
|
|||
#########################################################
|
||||
# TASK: Script to format data for corr plots
|
||||
#########################################################
|
||||
#library(dplyr)
|
||||
|
||||
#=================================================
|
||||
# Data for Corrplots
|
||||
|
@ -12,6 +13,10 @@ cat("\n=========================================="
|
|||
|
||||
# use data
|
||||
#merged_df2
|
||||
geneL_normal = c("pnca")
|
||||
geneL_na_dy = c("gid")
|
||||
geneL_na = c("rpob")
|
||||
geneL_ppi2 = c("alr", "embb", "katg", "rpob")
|
||||
|
||||
#----------------------------
|
||||
# columns for corr plots:PS
|
||||
|
@ -19,11 +24,55 @@ cat("\n=========================================="
|
|||
# NOTE: you can add mcsm_ppi column as well, and it will only select what it can find!
|
||||
big_df_colnames = data.frame(names(merged_df2))
|
||||
|
||||
corr_cols_select <- c("mutationinformation", drug, "mutation_info_labels"
|
||||
, "duet_stability_change", "ligand_affinity_change", "ddg_foldx", "asa", "rsa"
|
||||
, "rd_values", "kd_values", "log10_or_mychisq", "neglog_pval_fisher","af"
|
||||
, "deepddg", "ddg_dynamut", "ddg_dynamut2", "mcsm_na_affinity"
|
||||
, "ddg_encom", "dds_encom", "ddg_mcsm", "ddg_sdm", "ddg_duet", "ligand_distance")
|
||||
core_cols = c("mutationinformation", drug, "mutation_info_labels"
|
||||
, "duet_stability_change", "ligand_affinity_change", "ddg_foldx", "asa", "rsa"
|
||||
, "rd_values", "kd_values", "log10_or_mychisq", "neglog_pval_fisher","af"
|
||||
, "deepddg" , "ddg_dynamut2"
|
||||
, "consurf_score"
|
||||
#, "consurf_scaled"
|
||||
, "snap2_score"
|
||||
#, "snap2_scaled", "snap2_accuracy_pc"
|
||||
, "ligand_distance")
|
||||
|
||||
if (tolower(gene)%in%geneL_normal){
|
||||
corr_cols_select = core_cols
|
||||
}
|
||||
if (tolower(gene)%in%geneL_na_dy){
|
||||
additional_cols = c("mcsm_na_affinity"
|
||||
, "ddg_dynamut"
|
||||
, "ddg_encom", "dds_encom"
|
||||
, "ddg_mcsm", "ddg_sdm"
|
||||
, "ddg_duet"
|
||||
#, "mcsm_na_scaled"
|
||||
#, "ddg_dynamut_scaled"
|
||||
#, "ddg_encom_scaled", "dds_encom_scaled"
|
||||
#, "ddg_mcsm_scaled", "ddg_sdm_scaled"
|
||||
#, "ddg_duet_scaled"
|
||||
)
|
||||
|
||||
corr_cols_select = c(core_cols, additional_cols)
|
||||
|
||||
}
|
||||
|
||||
if (tolower(gene)%in%geneL_na){
|
||||
additional_cols = c("mcsm_na_affinity"
|
||||
#, "mcsm_na_scaled"
|
||||
)
|
||||
|
||||
corr_cols_select = c(core_cols, additional_cols)
|
||||
|
||||
}
|
||||
|
||||
if (tolower(gene)%in%geneL_ppi2){
|
||||
additional_cols = c("mcsm_ppi2_affinity")
|
||||
corr_cols_select = c(core_cols, additional_cols)
|
||||
}
|
||||
|
||||
# corr_cols_select <- c("mutationinformation", drug, "mutation_info_labels"
|
||||
# , "duet_stability_change", "ligand_affinity_change", "ddg_foldx", "asa", "rsa"
|
||||
# , "rd_values", "kd_values", "log10_or_mychisq", "neglog_pval_fisher","af"
|
||||
# , "deepddg", "ddg_dynamut", "ddg_dynamut2", "mcsm_na_affinity"
|
||||
# , "ddg_encom", "dds_encom", "ddg_mcsm", "ddg_sdm", "ddg_duet", "ligand_distance")
|
||||
|
||||
#===========================
|
||||
# Corr data for plots: PS
|
||||
|
@ -36,9 +85,8 @@ corr_df_m2 = merged_df2[,colnames(merged_df2)%in%corr_cols_select]
|
|||
# formatting: some cols
|
||||
# Add pretty colnames
|
||||
#-----------------------
|
||||
corr_df_m2_f <- corr_df_m2 %>%
|
||||
rename(
|
||||
DUET = duet_stability_change
|
||||
corr_df_m2_f <- corr_df_m2 %>% dplyr::rename(
|
||||
'DUET' = duet_stability_change
|
||||
, 'mCSM-lig' = ligand_affinity_change
|
||||
, FoldX = ddg_foldx
|
||||
, DeepDDG = deepddg
|
||||
|
|
|
@ -124,31 +124,41 @@ cat(s1)
|
|||
####################################################################
|
||||
# Data for DM OM Plots: Long format dfs
|
||||
####################################################################
|
||||
|
||||
#source("other_plots_data.R")
|
||||
|
||||
# source(paste0(plot_script_path, "dm_om_data.R"))
|
||||
#
|
||||
# s2 = c("\nSuccessfully sourced other_plots_data.R")
|
||||
# cat(s2)
|
||||
#source(paste0(plot_script_path, "dm_om_data.R"))
|
||||
|
||||
#s2 = c("\nSuccessfully sourced other_plots_data.R")
|
||||
#cat(s2)
|
||||
|
||||
####################################################################
|
||||
# Data for Lineage barplots: WF and LF dfs
|
||||
####################################################################
|
||||
#
|
||||
# source(paste0(plot_script_path, "lineage_data.R"))
|
||||
#
|
||||
# s3 = c("\nSuccessfully sourced lineage_data.R")
|
||||
# cat(s3)
|
||||
|
||||
source(paste0(plot_script_path, "lineage_data.R"))
|
||||
|
||||
s3 = c("\nSuccessfully sourced lineage_data.R")
|
||||
cat(s3)
|
||||
|
||||
####################################################################
|
||||
# Data for corr plots:
|
||||
####################################################################
|
||||
# make sure the above script works because merged_df2_combined is needed
|
||||
# source(paste0(plot_script_path, "corr_data.R"))
|
||||
#
|
||||
# s4 = c("\nSuccessfully sourced corr_data.R")
|
||||
# cat(s4)
|
||||
#source(paste0(plot_script_path, "corr_data.R"))
|
||||
|
||||
#s4 = c("\nSuccessfully sourced corr_data.R")
|
||||
#cat(s4)
|
||||
|
||||
# Moved "logo_data.R" to redundant/
|
||||
#-----------------------------------------
|
||||
# Replaced above with my function
|
||||
# corr_data_extract()
|
||||
|
||||
corr_df_m3_f = corr_data_extract(merged_df3, extract_scaled_cols = F)
|
||||
head(corr_df_m3_f)
|
||||
|
||||
corr_df_m2_f = corr_data_extract(merged_df2, extract_scaled_cols = F)
|
||||
head(corr_df_m2_f)
|
||||
|
||||
########################################################################
|
||||
# End of script
|
||||
|
@ -158,13 +168,13 @@ cat(s1)
|
|||
# "\n##################################################"
|
||||
# , "\nSuccessful: get_plotting_dfs.R worked!"
|
||||
# , "\n###################################################\n")
|
||||
# } else {
|
||||
# } else {
|
||||
# cat(
|
||||
# "\n#################################################"
|
||||
# , "\nFAIL: get_plotting_dfs.R didn't complete fully!Please check"
|
||||
# , "\n###################################################\n" )
|
||||
# }
|
||||
#
|
||||
# }
|
||||
|
||||
########################################################################
|
||||
# clear excess variables: from the global enviornment
|
||||
|
||||
|
|
|
@ -6,7 +6,7 @@
|
|||
#=======================================================================
|
||||
# working dir and loading libraries
|
||||
getwd()
|
||||
setwd("~/git/LSHTM_analysis/scripts/plotting")
|
||||
#setwd("~/git/LSHTM_analysis/scripts/plotting")
|
||||
getwd()
|
||||
|
||||
#source("~/git/LSHTM_analysis/scripts/Header_TT.R")
|
||||
|
@ -14,11 +14,11 @@ library(ggplot2)
|
|||
library(data.table)
|
||||
library(dplyr)
|
||||
library(tidyverse)
|
||||
source("combining_dfs_plotting.R")
|
||||
#source("combining_dfs_plotting.R")
|
||||
|
||||
rm(merged_df2, merged_df2_comp, merged_df2_lig, merged_df2_comp_lig
|
||||
, merged_df3_comp, merged_df3_comp_lig
|
||||
, my_df_u, my_df_u_lig)
|
||||
#rm(merged_df2, merged_df2_comp, merged_df2_lig, merged_df2_comp_lig
|
||||
# , merged_df3_comp, merged_df3_comp_lig
|
||||
# , my_df_u, my_df_u_lig)
|
||||
|
||||
|
||||
cols_to_select = c("mutation", "mutationinformation"
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue