#!/usr/bin/env Rscript ######################################################### # TASK: Get formatted data for plots ######################################################### # working dir and loading libraries getwd() source("/home/tanu/git/LSHTM_analysis/scripts/plotting/Header_TT.R") #******************** # cmd args passed # in from other scripts # to call this #******************** # set drug and gene name #========================================== # variables for lig: # comes from functions/plotting_globals.R #========================================== cat("\nGlobal variables for Ligand:" , "\nligand distance colname:", LigDist_colname , "\nligand distance cut off:", LigDist_cutoff) #=========== # input #=========== #-------------------------------------------- # call: import_dirs() # comes from functions/plotting_globals.R #-------------------------------------------- import_dirs(drug, gene) #--------------------------- # call: plotting_data() #--------------------------- if (!exists("infile_params") && exists("gene")){ #if (!is.character(infile_params) && exists("gene")){ # when running as cmd in_filename_params = paste0(tolower(gene), "_all_params.csv") infile_params = paste0(outdir, "/", in_filename_params) cat("\nInput file for mcsm comb data not specified, assuming filename: ", infile_params, "\n") } # Input 1: read _comb_afor.csv cat("\nReading mcsm combined data file: ", infile_params) mcsm_df = read.csv(infile_params, header = T) pd_df = plotting_data(mcsm_df , lig_dist_colname = LigDist_colname , lig_dist_cutoff = LigDist_cutoff) my_df = pd_df[[1]] my_df_u = pd_df[[2]] # this forms one of the input for combining_dfs_plotting() max_ang <- round(max(my_df_u[LigDist_colname])) min_ang <- round(min(my_df_u[LigDist_colname])) cat("\nLigand distance colname:", LigDist_colname , "\nThe max distance", gene, "structure df" , ":", max_ang, "\u212b" , "\nThe min distance", gene, "structure df" , ":", min_ang, "\u212b") #-------------------------------- # call: combining_dfs_plotting() #-------------------------------- if (!exists("infile_metadata") && exists("gene")){ #if (!is.character(infile_metadata) && exists("gene")){ # when running as cmd in_filename_metadata = paste0(tolower(gene), "_metadata.csv") # part combined for gid infile_metadata = paste0(outdir, "/", in_filename_metadata) cat("\nInput file for gene metadata not specified, assuming filename: ", infile_metadata, "\n") } # Input 2: read _meta data.csv cat("\nReading meta data file: ", infile_metadata) gene_metadata <- read.csv(infile_metadata , stringsAsFactors = F , header = T) cat("\nDim of meta data file: ", dim(gene_metadata)) all_plot_dfs = combining_dfs_plotting(my_df_u , gene_metadata , lig_dist_colname = LigDist_colname , lig_dist_cutoff = LigDist_cutoff) merged_df2 = all_plot_dfs[[1]] merged_df3 = all_plot_dfs[[2]] merged_df2_comp = all_plot_dfs[[3]] merged_df3_comp = all_plot_dfs[[4]] #====================================================================== #################################################################### # Data for combining other dfs #################################################################### #source("other_dfs_data.R") # Fixed this at source i.e python script # Moved: "other_dfs_data.R" to redundant/ #################################################################### # Data for subcols barplot (~heatmap) #################################################################### #source("coloured_bp_data.R") # Repurposed function so that params can be passed instead to generate # data required for plotting. # Moved "coloured_bp_data.R" to redundant/ #################################################################### # Data for logoplots #################################################################### #source(paste0(plot_script_path, "logo_data.R")) #s1 = c("\nSuccessfully sourced logo_data.R") #cat(s1) # input data is merged_df3 # so repurposed it into a function so params can be passed instead to generate # data required for plotting. # Moved "logo_data.R" to redundant/ source(paste0(plot_script_path, "logo_data_msa.R")) s1 = c("\nSuccessfully sourced logo_data_msa.R") cat(s1) #################################################################### # Data for DM OM Plots: Long format dfs #################################################################### #source("other_plots_data.R") # source(paste0(plot_script_path, "dm_om_data.R")) # # s2 = c("\nSuccessfully sourced other_plots_data.R") # cat(s2) #################################################################### # Data for Lineage barplots: WF and LF dfs #################################################################### # # source(paste0(plot_script_path, "lineage_data.R")) # # s3 = c("\nSuccessfully sourced lineage_data.R") # cat(s3) #################################################################### # Data for corr plots: #################################################################### # make sure the above script works because merged_df2_combined is needed # source(paste0(plot_script_path, "corr_data.R")) # # s4 = c("\nSuccessfully sourced corr_data.R") # cat(s4) ######################################################################## # End of script ######################################################################## # if ( all( length(s1), length(s2), length(s3), length(s4) ) > 0 ){ # cat( # "\n##################################################" # , "\nSuccessful: get_plotting_dfs.R worked!" # , "\n###################################################\n") # } else { # cat( # "\n#################################################" # , "\nFAIL: get_plotting_dfs.R didn't complete fully!Please check" # , "\n###################################################\n" ) # } # ######################################################################## # clear excess variables: from the global enviornment vars0 = ls(envir = .GlobalEnv)[grepl("curr_*", ls(envir = .GlobalEnv))] vars1 = ls(envir = .GlobalEnv)[grepl("^cols_to*", ls(envir = .GlobalEnv))] vars2 = ls(envir = .GlobalEnv)[grepl("pivot_cols_*", ls(envir = .GlobalEnv))] vars3 = ls(envir = .GlobalEnv)[grepl("expected_*", ls(envir = .GlobalEnv))] rm(c1 , fact_cols , infile_metadata , infile_params , vars0 , vars1 , vars2 , vars3)