#!/usr/bin/env Rscript ######################################################### # TASK: Get formatted data for plots ######################################################### # working dir and loading libraries getwd() source("~/git/LSHTM_analysis/scripts/Header_TT.R") # cmd args passed # in from other scripts # to call this # set drug and gene name #========================================== # variables for affinity: # comes from functions/plotting_globals.R #========================================== cat("\nGlobal variables for Ligand:" , "\nligand distance colname:", LigDist_colname , "\nligand distance cut off:", LigDist_cutoff) cat("\nGlobal variables for mCSM-PPI2 affinity:" , "\nPPI2 distance colname:", ppi2Dist_colname , "\nPPI2 cut off:", DistCutOff) cat("\nGlobal variables for mCSM-NA affinity:" , "\nligand distance colname:", naDist_colname , "\nligand distance cut off:", DistCutOff) #=========== # input #=========== #-------------------------------------------- # call: import_dirs() # comes from functions/plotting_globals.R #-------------------------------------------- import_dirs(drug, gene) #--------------------------- # call: plotting_data() #--------------------------- if (!exists("infile_params") && exists("gene")){ #if (!is.character(infile_params) && exists("gene")){ # when running as cmd in_filename_params = paste0(tolower(gene), "_all_params.csv") infile_params = paste0(outdir, "/", in_filename_params) cat("\nInput file for mcsm comb data not specified, assuming filename: ", infile_params, "\n") } # Input 1: read _comb_afor.csv cat("\nReading mcsm combined data file: ", infile_params) mcsm_df = read.csv(infile_params, header = T) pd_df = plotting_data(mcsm_df , lig_dist_colname = LigDist_colname , lig_dist_cutoff = LigDist_cutoff) my_df = pd_df[[1]] my_df_u = pd_df[[2]] # this forms one of the input for combining_dfs_plotting() max_ang <- round(max(my_df_u[LigDist_colname])) min_ang <- round(min(my_df_u[LigDist_colname])) cat("\nLigand distance colname:", LigDist_colname , "\nThe max distance", gene, "structure df" , ":", max_ang, "\u212b" , "\nThe min distance", gene, "structure df" , ":", min_ang, "\u212b") #-------------------------------- # call: combining_dfs_plotting() #-------------------------------- if (!exists("infile_metadata") && exists("gene")){ #if (!is.character(infile_metadata) && exists("gene")){ # when running as cmd in_filename_metadata = paste0(tolower(gene), "_metadata.csv") # part combined for gid infile_metadata = paste0(outdir, "/", in_filename_metadata) cat("\nInput file for gene metadata not specified, assuming filename: ", infile_metadata, "\n") } # Input 2: read _meta data.csv cat("\nReading meta data file: ", infile_metadata) gene_metadata <- read.csv(infile_metadata , stringsAsFactors = F , header = T) cat("\nDim of meta data file: ", dim(gene_metadata)) all_plot_dfs = combining_dfs_plotting(my_df_u , gene_metadata , lig_dist_colname = LigDist_colname , lig_dist_cutoff = LigDist_cutoff) merged_df2 = all_plot_dfs[[1]] merged_df3 = all_plot_dfs[[2]] #merged_df2_comp = all_plot_dfs[[3]] #merged_df3_comp = all_plot_dfs[[4]] #====================================================================== #################################################################### # Data for subcols barplot (~heatmap) #################################################################### #source("coloured_bp_data.R") # Repurposed function so that params can be passed instead to generate # data required for plotting. # Moved "coloured_bp_data.R" to redundant/ #################################################################### # Data for logoplots #################################################################### # # source(paste0(plot_script_path, "logo_data_msa.R")) # s1 = c("\nSuccessfully sourced logo_data_msa.R") # cat(s1) # # #################################################################### # # Data for DM OM Plots: WF and LF dfs # # My function: dm_om_wf_lf_data() # # location: scripts/functions/dm_om_data.R # #source("other_plots_data.R") # #################################################################### # # #source(paste0(plot_script_path, "dm_om_data.R")) # calling the function directly instead # geneL_normal = c("pnca") # geneL_na = c("gid", "rpob") # geneL_ppi2 = c("alr", "embb", "katg", "rpob") # # all_dm_om_df = dm_om_wf_lf_data(df = merged_df3, gene = gene) # # wf_duet = all_dm_om_df[['wf_duet']] # lf_duet = all_dm_om_df[['lf_duet']] # # wf_mcsm_lig = all_dm_om_df[['wf_mcsm_lig']] # lf_mcsm_lig = all_dm_om_df[['lf_mcsm_lig']] # # wf_foldx = all_dm_om_df[['wf_foldx']] # lf_foldx = all_dm_om_df[['lf_foldx']] # # wf_deepddg = all_dm_om_df[['wf_deepddg']] # lf_deepddg = all_dm_om_df[['lf_deepddg']] # # wf_dynamut2 = all_dm_om_df[['wf_dynamut2']] # lf_dynamut2 = all_dm_om_df[['lf_dynamut2']] # # wf_consurf = all_dm_om_df[['wf_consurf']] # lf_consurf = all_dm_om_df[['lf_consurf']] # # wf_snap2 = all_dm_om_df[['wf_snap2']] # lf_snap2 = all_dm_om_df[['lf_snap2']] # # wf_provean = all_dm_om_df[['wf_provean']] # lf_provean = all_dm_om_df[['lf_provean']] # # # NEW # wf_dist_gen = all_dm_om_df[['wf_dist_gen']] # lf_dist_gen = all_dm_om_df[['lf_dist_gen']] # # if (tolower(gene)%in%geneL_na){ # wf_mcsm_na = all_dm_om_df[['wf_mcsm_na']] # lf_mcsm_na = all_dm_om_df[['lf_mcsm_na']] # } # # if (tolower(gene)%in%geneL_ppi2){ # wf_mcsm_ppi2 = all_dm_om_df[['wf_mcsm_ppi2']] # lf_mcsm_ppi2 = all_dm_om_df[['lf_mcsm_ppi2']] # } # # s2 = c("\nSuccessfully sourced other_plots_data.R") # cat(s2) # # #################################################################### # # Data for Lineage barplots: WF and LF dfs # # My function: lineage_plot_data() # # location: scripts/functions/lineage_plot_data.R # #################################################################### # # #source(paste0(plot_script_path, "lineage_data.R")) # # converted to a function. Moved lineage_data.R to redundant/ # lineage_dfL = lineage_plot_data(merged_df2 # , lineage_column_name = "lineage" # , remove_empty_lineage = F # , lineage_label_col_name = "lineage_labels" # , id_colname = "id" # , snp_colname = "mutationinformation" # ) # # lin_wf = lineage_dfL[['lin_wf']] # lin_lf = lineage_dfL[['lin_lf']] # # s3 = c("\nSuccessfully sourced lineage_data.R") # cat(s3) # # #################################################################### # # Data for corr plots: # # My function: corr_data_extract() # # location: scripts/functions/corr_plot_data.R # #################################################################### # # make sure the above script works because merged_df2_combined is needed # merged_df3 = as.data.frame(merged_df3) # # corr_df_m3_f = corr_data_extract(merged_df3 # , gene = gene # , drug = drug # , extract_scaled_cols = F) # head(corr_df_m3_f) # # # corr_df_m2_f = corr_data_extract(merged_df2 # # , gene = gene # # , drug = drug # # , extract_scaled_cols = F) # # head(corr_df_m2_f) # # s4 = c("\nSuccessfully sourced Corr_data.R") # cat(s4) # # ######################################################################## # # End of script # ######################################################################## # if ( all( length(s1), length(s2), length(s3), length(s4) ) > 0 ){ # cat( # "\n##################################################" # , "\nSuccessful: get_plotting_dfs.R worked!" # , "\n###################################################\n") # } else { # cat( # "\n#################################################" # , "\nFAIL: get_plotting_dfs.R didn't complete fully!Please check" # , "\n###################################################\n" ) # } # # ######################################################################## # # clear excess variables: from the global enviornment # # vars0 = ls(envir = .GlobalEnv)[grepl("curr_*", ls(envir = .GlobalEnv))] # vars1 = ls(envir = .GlobalEnv)[grepl("^cols_to*", ls(envir = .GlobalEnv))] # vars2 = ls(envir = .GlobalEnv)[grepl("pivot_cols_*", ls(envir = .GlobalEnv))] # vars3 = ls(envir = .GlobalEnv)[grepl("expected_*", ls(envir = .GlobalEnv))] # # rm( infile_metadata # , infile_params # , vars0 # , vars1 # , vars2 # , vars3)