diff --git a/scripts/plotting/basic_barplots_PS.R b/scripts/plotting/basic_barplots_PS.R index 738318d..4e9d0f9 100644 --- a/scripts/plotting/basic_barplots_PS.R +++ b/scripts/plotting/basic_barplots_PS.R @@ -23,18 +23,19 @@ source("plotting_data.R") cat(paste0("Directories imported:" , "\ndatadir:", datadir , "\nindir:", indir - , "\noutdir:", outdir)) + , "\noutdir:", outdir + , "\nplotdir:", plotdir)) #======= # output #======= # plot 1 basic_bp_duet = "basic_barplot_PS.svg" -plot_basic_bp_duet = paste0(outdir, "/plots/", basic_bp_duet) +plot_basic_bp_duet = paste0(plotdir,"/", basic_bp_duet) # plot 2 pos_count_duet = "position_count_PS.svg" -plot_pos_count_duet = paste0(outdir, "/plots/", pos_count_duet) +plot_pos_count_duet = paste0(plotdir, "/", pos_count_duet) #%%=============================================================== #================ diff --git a/scripts/plotting/plotting_data.R b/scripts/plotting/plotting_data.R index 8c6feb1..b1b7a2c 100644 --- a/scripts/plotting/plotting_data.R +++ b/scripts/plotting/plotting_data.R @@ -39,7 +39,7 @@ cat(gene_match) datadir = paste0("~/git/Data") indir = paste0(datadir, "/", drug, "/input") outdir = paste0("~/git/Data", "/", drug, "/output") - +plotdir = paste0("~/git/Data", "/", drug, "/output/plots") #====== # input #====== @@ -56,15 +56,19 @@ cat("Reading struct params including mcsm:", in_filename_params) my_df = read.csv(infile_params, header = T) -cat("Input dimensions:", dim(my_df)) +cat("\nInput dimensions:", dim(my_df)) # quick checks #colnames(my_df) #str(my_df) +########################### +# extract unique mutations +########################### + # check for duplicate mutations if ( length(unique(my_df$mutationinformation)) != length(my_df$mutationinformation)){ - cat(paste0("CAUTION:", " Duplicate mutations identified" + cat(paste0("\nCAUTION:", " Duplicate mutations identified" , "\nExtracting these...")) dup_muts = my_df[duplicated(my_df$mutationinformation),] dup_muts_nu = length(unique(dup_muts$mutationinformation)) @@ -73,14 +77,24 @@ if ( length(unique(my_df$mutationinformation)) != length(my_df$mutationinformati , "\n\nExtracting df with unique mutations only")) my_df_u = my_df[!duplicated(my_df$mutationinformation),] }else{ - cat(paste0("No duplicate mutations detected")) + cat(paste0("\nNo duplicate mutations detected")) my_df_u = my_df } upos = unique(my_df_u$position) -cat("Dim of clean df:"); cat(dim(my_df_u)) +cat("\nDim of clean df:"); cat(dim(my_df_u)) cat("\nNo. of unique mutational positions:"); cat(length(upos), "\n") + +########################### +# extract mutations <10Angstroms +########################### +table(my_df_u$ligand_distance<10) + +my_df_u_lig = my_df_u[my_df_u$ligand_distance <10,] +angstroms_symbol = "\u212b" +cat(paste0("There are ", nrow(my_df_u_lig), " sites lying within 10", angstroms_symbol, " of the ligand")) + ######################################################################## # end of data extraction and cleaning for plots # ########################################################################