#!/usr/bin/env Rscript ######################################################### # TASK: Get formatted data for plots ######################################################### # working dir and loading libraries getwd() #setwd("~/git/LSHTM_analysis/scripts/plotting") source("/home/tanu/git/LSHTM_analysis/scripts/plotting/Header_TT.R") #******************** # cmd args passed # in from other scripts # to call this #******************** #==================== # variables for lig #==================== #LigDist_colname = "ligand_distance" #LigDist_cutoff = 10 #=========== # input #=========== #--------------------- # call: import_dirs() #--------------------- import_dirs(drug, gene) #--------------------------- # call: plotting_data() #--------------------------- if (!exists("infile_params") && exists("gene")){ #if (!is.character(infile_params) && exists("gene")){ # when running as cmd in_filename_params = paste0(tolower(gene), "_all_params.csv") infile_params = paste0(outdir, "/", in_filename_params) cat("\nInput file for mcsm comb data not specified, assuming filename: ", infile_params, "\n") } # Input 1: read _comb_afor.csv cat("\nReading mcsm combined data file: ", infile_params) mcsm_df = read.csv(infile_params, header = T) pd_df = plotting_data(mcsm_df , lig_dist_colname = LigDist_colname , lig_dist_cutoff = LigDist_cutoff) my_df = pd_df[[1]] my_df_u = pd_df[[2]] # this forms one of the input for combining_dfs_plotting() max_ang <- round(max(my_df_u[LigDist_colname])) min_ang <- round(min(my_df_u[LigDist_colname])) cat("\nLigand distance cut off, colname:", LigDist_colname , "\nThe max distance", gene, "structure df" , ":", max_ang, "\u212b" , "\nThe min distance", gene, "structure df" , ":", min_ang, "\u212b") #-------------------------------- # call: combining_dfs_plotting() #-------------------------------- if (!exists("infile_metadata") && exists("gene")){ #if (!is.character(infile_metadata) && exists("gene")){ # when running as cmd in_filename_metadata = paste0(tolower(gene), "_metadata.csv") # part combined for gid infile_metadata = paste0(outdir, "/", in_filename_metadata) cat("\nInput file for gene metadata not specified, assuming filename: ", infile_metadata, "\n") } # Input 2: read _meta data.csv cat("\nReading meta data file: ", infile_metadata) gene_metadata <- read.csv(infile_metadata , stringsAsFactors = F , header = T) all_plot_dfs = combining_dfs_plotting(my_df_u , gene_metadata , lig_dist_colname = LigDist_colname , lig_dist_cutoff = LigDist_cutoff) merged_df2 = all_plot_dfs[[1]] merged_df3 = all_plot_dfs[[2]] merged_df2_comp = all_plot_dfs[[3]] merged_df3_comp = all_plot_dfs[[4]] #====================================================================== #TODO: Think! MOVE TO COMBINE or singular file for deepddg #============================ # adding deepddg scaled values # scale data b/w -1 and 1 #============================ # n = which(colnames(merged_df3) == "deepddg"); n # # my_min = min(merged_df3[,n]); my_min # my_max = max(merged_df3[,n]); my_max # # merged_df3$deepddg_scaled = ifelse(merged_df3[,n] < 0 # , merged_df3[,n]/abs(my_min) # , merged_df3[,n]/my_max) # # sanity check # my_min = min(merged_df3$deepddg_scaled); my_min # my_max = max(merged_df3$deepddg_scaled); my_max # # if (my_min == -1 && my_max == 1){ # cat("\nPASS: DeepDDG successfully scaled b/w -1 and 1" # #, "\nProceeding with assigning deep outcome category") # , "\n") # }else{ # cat("\nFAIL: could not scale DeepDDG ddg values" # , "Aborting!") # } # #################################################################### # Data for combining other dfs #################################################################### #source("other_dfs_data.R") # Fixed this at source i.e python script # Moved: "other_dfs_data.R" to redundant/ #################################################################### # Data for subcols barplot (~heatmap) #################################################################### #source("coloured_bp_data.R") # Repurposed function so that params can be passed instead to generate # data required for plotting. # Moved "coloured_bp_data.R" to redundant/ #################################################################### # Data for logoplots #################################################################### source("logo_data.R") s1 = c("\nSuccessfully sourced logo_data.R") cat(s1) #################################################################### # Data for DM OM Plots: Long format dfs #################################################################### #source("other_plots_data.R") source("dm_om_data.R") s2 = c("\nSuccessfully sourced other_plots_data.R") cat(s2) #################################################################### # Data for Lineage barplots: WF and LF dfs #################################################################### source("lineage_data.R") s3 = c("\nSuccessfully sourced lineage_data.R") cat(s3) #################################################################### # Data for corr plots: #################################################################### # make sure the above script works because merged_df2_combined is needed source("corr_data.R") s4 = c("\nSuccessfully sourced corr_data.R") cat(s4) ######################################################################## # End of script ######################################################################## if ( all( length(s1), length(s2), length(s3), length(s4) ) >0 ){ cat( "\n##################################################" , "\nSuccessful: get_plotting_dfs.R worked!" , "\n###################################################\n") } else { cat( "\n#################################################" , "\nFAIL: get_plotting_dfs.R didn't complete fully!Please check" , "\n###################################################\n" ) } ######################################################################## # clear excess variables rm(c1, c2, c3, c4, check1 , curr_count, curr_total , cols_check , cols_to_select , cols_to_select_deepddg , cols_to_select_duet , cols_to_select_dynamut , cols_to_select_dynamut2 , cols_to_select_encomddg , cols_to_select_encomdds , cols_to_select_mcsm , cols_to_select_mcsm_na , cols_to_select_sdm , infile_metadata , infile_params #, infilename_dynamut #, infilename_dynamut2 #, infilename_mcsm_f_snps #, infilename_mcsm_na ) rm(pivot_cols , pivot_cols_deepddg , pivot_cols_duet , pivot_cols_dynamut , pivot_cols_dynamut2 , pivot_cols_encomddg , pivot_cols_encomdds , pivot_cols_foldx , pivot_cols_mcsm , pivot_cols_mcsm_na , pivot_cols_n , pivot_cols_sdm) rm(expected_cols , expected_ncols , expected_rows , expected_rows_lf , fact_cols)