229 lines
7.2 KiB
R
229 lines
7.2 KiB
R
#!/usr/bin/env Rscript
|
|
|
|
#########################################################
|
|
# TASK: Get formatted data for plots
|
|
#########################################################
|
|
# working dir and loading libraries
|
|
getwd()
|
|
setwd("~/git/LSHTM_analysis/scripts/plotting")
|
|
getwd()
|
|
|
|
source("Header_TT.R")
|
|
|
|
#********************
|
|
# cmd args passed
|
|
# in from other scripts
|
|
# to call this
|
|
#********************
|
|
|
|
#====================
|
|
# variables for lig
|
|
#====================
|
|
|
|
#LigDist_colname = "ligand_distance"
|
|
#LigDist_cutoff = 10
|
|
|
|
#===========
|
|
# input
|
|
#===========
|
|
#---------------------
|
|
# call: import_dirs()
|
|
#---------------------
|
|
import_dirs(drug, gene)
|
|
|
|
#---------------------------
|
|
# call: plotting_data()
|
|
#---------------------------
|
|
if (!exists("infile_params") && exists("gene")){
|
|
#if (!is.character(infile_params) && exists("gene")){ # when running as cmd
|
|
in_filename_params = paste0(tolower(gene), "_all_params.csv") #for pncA (and for gid finally) 10/09/21
|
|
#in_filename_params = paste0(tolower(gene), "_comb_afor.csv") # part combined for gid
|
|
infile_params = paste0(outdir, "/", in_filename_params)
|
|
cat("\nInput file for mcsm comb data not specified, assuming filename: ", infile_params, "\n")
|
|
}
|
|
|
|
# Input 1: read <gene>_comb_afor.csv
|
|
cat("\nReading mcsm combined data file: ", infile_params)
|
|
mcsm_df = read.csv(infile_params, header = T)
|
|
pd_df = plotting_data(mcsm_df
|
|
, lig_dist_colname = LigDist_colname
|
|
, lig_dist_cutoff = LigDist_cutoff)
|
|
|
|
my_df = pd_df[[1]]
|
|
my_df_u = pd_df[[2]] # this forms one of the input for combining_dfs_plotting()
|
|
|
|
max_ang <- round(max(my_df_u[LigDist_colname]))
|
|
min_ang <- round(min(my_df_u[LigDist_colname]))
|
|
|
|
cat("\nLigand distance cut off, colname:", LigDist_colname
|
|
, "\nThe max distance", gene, "structure df" , ":", max_ang, "\u212b"
|
|
, "\nThe min distance", gene, "structure df" , ":", min_ang, "\u212b")
|
|
|
|
#--------------------------------
|
|
# call: combining_dfs_plotting()
|
|
#--------------------------------
|
|
if (!exists("infile_metadata") && exists("gene")){
|
|
#if (!is.character(infile_metadata) && exists("gene")){ # when running as cmd
|
|
in_filename_metadata = paste0(tolower(gene), "_metadata.csv") # part combined for gid
|
|
infile_metadata = paste0(outdir, "/", in_filename_metadata)
|
|
cat("\nInput file for gene metadata not specified, assuming filename: ", infile_metadata, "\n")
|
|
}
|
|
|
|
# Input 2: read <gene>_meta data.csv
|
|
cat("\nReading meta data file: ", infile_metadata)
|
|
|
|
gene_metadata <- read.csv(infile_metadata
|
|
, stringsAsFactors = F
|
|
, header = T)
|
|
|
|
all_plot_dfs = combining_dfs_plotting(my_df_u
|
|
, gene_metadata
|
|
, lig_dist_colname = LigDist_colname
|
|
, lig_dist_cutoff = LigDist_cutoff)
|
|
|
|
merged_df2 = all_plot_dfs[[1]]
|
|
merged_df3 = all_plot_dfs[[2]]
|
|
merged_df2_comp = all_plot_dfs[[3]]
|
|
merged_df3_comp = all_plot_dfs[[4]]
|
|
#======================================================================
|
|
#TODO: Think! MOVE TO COMBINE or singular file for deepddg
|
|
|
|
#============================
|
|
# adding deepddg scaled values
|
|
# scale data b/w -1 and 1
|
|
#============================
|
|
# n = which(colnames(merged_df3) == "deepddg"); n
|
|
#
|
|
# my_min = min(merged_df3[,n]); my_min
|
|
# my_max = max(merged_df3[,n]); my_max
|
|
#
|
|
# merged_df3$deepddg_scaled = ifelse(merged_df3[,n] < 0
|
|
# , merged_df3[,n]/abs(my_min)
|
|
# , merged_df3[,n]/my_max)
|
|
# # sanity check
|
|
# my_min = min(merged_df3$deepddg_scaled); my_min
|
|
# my_max = max(merged_df3$deepddg_scaled); my_max
|
|
#
|
|
# if (my_min == -1 && my_max == 1){
|
|
# cat("\nPASS: DeepDDG successfully scaled b/w -1 and 1"
|
|
# #, "\nProceeding with assigning deep outcome category")
|
|
# , "\n")
|
|
# }else{
|
|
# cat("\nFAIL: could not scale DeepDDG ddg values"
|
|
# , "Aborting!")
|
|
# }
|
|
#
|
|
|
|
####################################################################
|
|
# Data for combining other dfs
|
|
####################################################################
|
|
|
|
#source("other_dfs_data.R")
|
|
# Fixed this at source i.e python script
|
|
# Moved: "other_dfs_data.R" to redundant/
|
|
|
|
####################################################################
|
|
# Data for subcols barplot (~heatmap)
|
|
####################################################################
|
|
|
|
#source("coloured_bp_data.R")
|
|
# Repurposed function so that params can be passed instead to generate
|
|
# data required for plotting.
|
|
# Moved "coloured_bp_data.R" to redundant/
|
|
|
|
####################################################################
|
|
# Data for logoplots
|
|
####################################################################
|
|
|
|
source("logo_data.R")
|
|
|
|
s1 = c("\nSuccessfully sourced logo_data.R")
|
|
cat(s1)
|
|
|
|
####################################################################
|
|
# Data for DM OM Plots: Long format dfs
|
|
####################################################################
|
|
|
|
#source("other_plots_data.R")
|
|
|
|
source("dm_om_data.R")
|
|
|
|
s2 = c("\nSuccessfully sourced other_plots_data.R")
|
|
cat(s2)
|
|
|
|
####################################################################
|
|
# Data for Lineage barplots: WF and LF dfs
|
|
####################################################################
|
|
|
|
source("lineage_data.R")
|
|
|
|
s3 = c("\nSuccessfully sourced lineage_data.R")
|
|
cat(s3)
|
|
|
|
####################################################################
|
|
# Data for corr plots:
|
|
####################################################################
|
|
# make sure the above script works because merged_df2_combined is needed
|
|
source("corr_data.R")
|
|
|
|
s4 = c("\nSuccessfully sourced corr_data.R")
|
|
cat(s4)
|
|
|
|
########################################################################
|
|
# End of script
|
|
########################################################################
|
|
if ( all( length(s1), length(s2), length(s3), length(s4) ) >0 ){
|
|
cat(
|
|
"\n##################################################"
|
|
, "\nSuccessful: get_plotting_dfs.R worked!"
|
|
, "\n###################################################\n")
|
|
} else {
|
|
cat(
|
|
"\n#################################################"
|
|
, "\nFAIL: get_plotting_dfs.R didn't complete fully!Please check"
|
|
, "\n###################################################\n" )
|
|
}
|
|
|
|
########################################################################
|
|
# clear excess variables
|
|
rm(c1, c2, c3, c4, check1
|
|
, curr_count, curr_total
|
|
, cols_check
|
|
, cols_to_select
|
|
, cols_to_select_deepddg
|
|
, cols_to_select_duet
|
|
, cols_to_select_dynamut
|
|
, cols_to_select_dynamut2
|
|
, cols_to_select_encomddg
|
|
, cols_to_select_encomdds
|
|
, cols_to_select_mcsm
|
|
, cols_to_select_mcsm_na
|
|
, cols_to_select_sdm
|
|
, infile_metadata
|
|
, infile_params
|
|
#, infilename_dynamut
|
|
#, infilename_dynamut2
|
|
#, infilename_mcsm_f_snps
|
|
#, infilename_mcsm_na
|
|
)
|
|
|
|
rm(pivot_cols
|
|
, pivot_cols_deepddg
|
|
, pivot_cols_duet
|
|
, pivot_cols_dynamut
|
|
, pivot_cols_dynamut2
|
|
, pivot_cols_encomddg
|
|
, pivot_cols_encomdds
|
|
, pivot_cols_foldx
|
|
, pivot_cols_mcsm
|
|
, pivot_cols_mcsm_na
|
|
, pivot_cols_n
|
|
, pivot_cols_sdm)
|
|
|
|
rm(expected_cols
|
|
, expected_ncols
|
|
, expected_rows
|
|
, expected_rows_lf
|
|
, fact_cols)
|
|
|
|
|