LSHTM_analysis/scripts/plotting/get_plotting_dfs.R

186 lines
6.8 KiB
R

#!/usr/bin/env Rscript
#########################################################
# TASK: Get formatted data for plots
#########################################################
# working dir and loading libraries
getwd()
source("~/git/LSHTM_analysis/scripts/Header_TT.R")
#********************
# cmd args passed
# in from other scripts
# to call this
#********************
# set drug and gene name
#==========================================
# variables for lig:
# comes from functions/plotting_globals.R
#==========================================
cat("\nGlobal variables for Ligand:"
, "\nligand distance colname:", LigDist_colname
, "\nligand distance cut off:", LigDist_cutoff)
#===========
# input
#===========
#--------------------------------------------
# call: import_dirs()
# comes from functions/plotting_globals.R
#--------------------------------------------
import_dirs(drug, gene)
#---------------------------
# call: plotting_data()
#---------------------------
if (!exists("infile_params") && exists("gene")){
#if (!is.character(infile_params) && exists("gene")){ # when running as cmd
in_filename_params = paste0(tolower(gene), "_all_params.csv")
infile_params = paste0(outdir, "/", in_filename_params)
cat("\nInput file for mcsm comb data not specified, assuming filename: ", infile_params, "\n")
}
# Input 1: read <gene>_comb_afor.csv
cat("\nReading mcsm combined data file: ", infile_params)
mcsm_df = read.csv(infile_params, header = T)
pd_df = plotting_data(mcsm_df
, lig_dist_colname = LigDist_colname
, lig_dist_cutoff = LigDist_cutoff)
my_df = pd_df[[1]]
my_df_u = pd_df[[2]] # this forms one of the input for combining_dfs_plotting()
max_ang <- round(max(my_df_u[LigDist_colname]))
min_ang <- round(min(my_df_u[LigDist_colname]))
cat("\nLigand distance colname:", LigDist_colname
, "\nThe max distance", gene, "structure df" , ":", max_ang, "\u212b"
, "\nThe min distance", gene, "structure df" , ":", min_ang, "\u212b")
#--------------------------------
# call: combining_dfs_plotting()
#--------------------------------
if (!exists("infile_metadata") && exists("gene")){
#if (!is.character(infile_metadata) && exists("gene")){ # when running as cmd
in_filename_metadata = paste0(tolower(gene), "_metadata.csv") # part combined for gid
infile_metadata = paste0(outdir, "/", in_filename_metadata)
cat("\nInput file for gene metadata not specified, assuming filename: ", infile_metadata, "\n")
}
# Input 2: read <gene>_meta data.csv
cat("\nReading meta data file: ", infile_metadata)
gene_metadata <- read.csv(infile_metadata
, stringsAsFactors = F
, header = T)
cat("\nDim of meta data file: ", dim(gene_metadata))
all_plot_dfs = combining_dfs_plotting(my_df_u
, gene_metadata
, lig_dist_colname = LigDist_colname
, lig_dist_cutoff = LigDist_cutoff)
merged_df2 = all_plot_dfs[[1]]
merged_df3 = all_plot_dfs[[2]]
merged_df2_comp = all_plot_dfs[[3]]
merged_df3_comp = all_plot_dfs[[4]]
#======================================================================
####################################################################
# Data for combining other dfs
####################################################################
#source("other_dfs_data.R")
# Fixed this at source i.e python script
# Moved: "other_dfs_data.R" to redundant/
####################################################################
# Data for subcols barplot (~heatmap)
####################################################################
#source("coloured_bp_data.R")
# Repurposed function so that params can be passed instead to generate
# data required for plotting.
# Moved "coloured_bp_data.R" to redundant/
####################################################################
# Data for logoplots
####################################################################
#source(paste0(plot_script_path, "logo_data.R"))
#s1 = c("\nSuccessfully sourced logo_data.R")
#cat(s1)
# input data is merged_df3
# so repurposed it into a function so params can be passed instead to generate
# data required for plotting.
# Moved "logo_data.R" to redundant/
source(paste0(plot_script_path, "logo_data_msa.R"))
s1 = c("\nSuccessfully sourced logo_data_msa.R")
cat(s1)
####################################################################
# Data for DM OM Plots: WF and LF dfs
# My function: dm_om_wf_lf_data()
####################################################################
#source("other_plots_data.R")
# converted to a function
# moved old one to redundant. Added suffix to filename i.e. _nf(non-function)
source(paste0(plot_script_path, "dm_om_data.R"))
s2 = c("\nSuccessfully sourced other_plots_data.R")
cat(s2)
####################################################################
# Data for Lineage barplots: WF and LF dfs
####################################################################
source(paste0(plot_script_path, "lineage_data.R"))
s3 = c("\nSuccessfully sourced lineage_data.R")
cat(s3)
####################################################################
# Data for corr plots:
# My function: corr_data_extract()
####################################################################
# make sure the above script works because merged_df2_combined is needed
corr_df_m3_f = corr_data_extract(merged_df3, extract_scaled_cols = F)
head(corr_df_m3_f)
corr_df_m2_f = corr_data_extract(merged_df2, extract_scaled_cols = F)
head(corr_df_m2_f)
########################################################################
# End of script
########################################################################
# if ( all( length(s1), length(s2), length(s3), length(s4) ) > 0 ){
# cat(
# "\n##################################################"
# , "\nSuccessful: get_plotting_dfs.R worked!"
# , "\n###################################################\n")
# } else {
# cat(
# "\n#################################################"
# , "\nFAIL: get_plotting_dfs.R didn't complete fully!Please check"
# , "\n###################################################\n" )
# }
########################################################################
# clear excess variables: from the global enviornment
vars0 = ls(envir = .GlobalEnv)[grepl("curr_*", ls(envir = .GlobalEnv))]
vars1 = ls(envir = .GlobalEnv)[grepl("^cols_to*", ls(envir = .GlobalEnv))]
vars2 = ls(envir = .GlobalEnv)[grepl("pivot_cols_*", ls(envir = .GlobalEnv))]
vars3 = ls(envir = .GlobalEnv)[grepl("expected_*", ls(envir = .GlobalEnv))]
rm( fact_cols
, infile_metadata
, infile_params
, vars0
, vars1
, vars2
, vars3)