diff --git a/scripts/plotting/barplots_subcolours_aa_PS.R b/scripts/plotting/barplots_subcolours_aa_PS.R index f29c872..f6444ce 100644 --- a/scripts/plotting/barplots_subcolours_aa_PS.R +++ b/scripts/plotting/barplots_subcolours_aa_PS.R @@ -3,52 +3,92 @@ setwd("~/git/LSHTM_analysis/scripts/plotting") getwd() ######################################################### -# TASK: - +# TASK: output barplot by position with each bar coloured by +# its stability value and active site positions indicated +# according to colour specified in "subcols_axis_PS.R" ######################################################### +#======================================================================= + ############################################################ # 1: Installing and loading required packages and functions ############################################################ #source("Header_TT.R") +library(ggplot2) +library(data.table) source("barplot_colour_function.R") - -############################################################ -# 2: Read file: struct params data with columns containing -# colours for axis labels -############################################################ #source("subcols_axis.R") source("subcols_axis_PS.R") -# this should return +# should return the following dfs, directories and variables # mut_pos_cols # my_df -# my_df_u: df with unique mutations +# my_df_u +# my_df_u_lig +# dup_muts + +cat(paste0("Directories imported:" + , "\ndatadir:", datadir + , "\nindir:", indir + , "\noutdir:", outdir + , "\nplotdir:", plotdir)) + +cat(paste0("Variables imported:" + , "\ndrug:", drug + , "\ngene:", gene + , "\ngene_match:", gene_match + , "\nLength of upos:", length(upos) + , "\nAngstrom symbol:", angstroms_symbol)) # clear excess variable -# "mut_pos_cols" is just for inspection in case you need to cross check +rm(my_df, upos, dup_muts, my_df_u_lig) +#======================================================================= +#================ +# Inspecting mut_pos_cols # position numbers and colours -# open file from deskptop ("sample_axis_cols") for cross checking +# open file from desktop ("sample_axis_cols") for cross checking +#================ table(mut_pos_cols$lab_bg) -sum( table(mut_pos_cols$lab_bg) ) == nrow(mut_pos_cols) # should be True - +check_lab_bg = sum( table(mut_pos_cols$lab_bg) ) == nrow(mut_pos_cols) # should be True +check_lab_bg + table(mut_pos_cols$lab_bg2) -sum( table(mut_pos_cols$lab_bg2) ) == nrow(mut_pos_cols) # should be True +check_lab_bg2 = sum( table(mut_pos_cols$lab_bg2) ) == nrow(mut_pos_cols) # should be True +check_lab_bg2 table(mut_pos_cols$lab_fg) -sum( table(mut_pos_cols$lab_fg) ) == nrow(mut_pos_cols) # should be True +check_lab_fg = sum( table(mut_pos_cols$lab_fg) ) == nrow(mut_pos_cols) # should be True +check_lab_fg + +# sanity checks: +if (check_lab_bg && check_lab_bg2 && check_lab_fg) { + print("PASS: No. of assigned colours match length") +}else{ + print("FAIL: length of assigned colours mismatch") + quit() +} # very important! my_axis_colours = mut_pos_cols$lab_fg # now clear mut_pos_cols -rm(mut_pos_cols) +rm(mut_pos_cols) +#======================================================================= +#================ +# Data for plots +#================ +# REASSIGNMENT as necessary +df = my_df_u + +# sanity checks +str(df) ########################### # 2: Plot: DUET scores ########################### + #========================== # Plot 2: Barplot with scores (unordered) # corresponds to duet_outcome @@ -62,20 +102,12 @@ rm(mut_pos_cols) # will require generating the colour scale separately. #============================ # sanity checks -upos = unique(my_df$position) +upos = unique(df$position) -table(my_df$duet_outcome) -table(my_df_u$duet_outcome) +table(df$duet_outcome) +table(df$duet_outcome) -#=========================== -# Data preparation for plots -#=========================== -# REASSIGNMENT as necessary -df <- my_df_u -rm(my_df, my_df_u) - -# add frequency of positions -library(data.table) +# add frequency of positions (from lib data.table) setDT(df)[, pos_count := .N, by = .(position)] # this is cummulative @@ -93,8 +125,8 @@ snp_count = sort(unique(snpsBYpos_df$snpsBYpos)) # sanity checks # should be a factor +df$duet_outcome = as.factor(df$duet_outcome) is.factor(df$duet_outcome) -#TRUE table(df$duet_outcome) @@ -116,13 +148,14 @@ tapply(df$duet_scaled, df$duet_outcome, max) # check unique values in normalised data u = unique(df$duet_scaled) +cat("No. of unique values in normalised data:", length(u)) #%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% # Run this section if rounding is to be used # specify number for rounding -n = 3 -df$duet_scaledR = round(df$duet_scaled, n) -ur = unique(df$duet_scaledR) +#n = 3 +#df$duet_scaledR = round(df$duet_scaled, n) +#ur = unique(df$duet_scaledR) # create an extra column called group which contains the "gp name and score" # so colours can be generated for each unique values in this column @@ -158,7 +191,8 @@ my_yats = 18 #****************** # plot name and location # outdir/ (should be imported from reading file) -print(paste0("plot will be in:", outdir)) +plotdir = paste0(outdir, "/", "plots") #should be imported from reading file +print(paste0("plot will be in:", plotdir)) bp_aa_subcols_duet = "barplot_acoloured_PS.svg" plot_bp_aa_subcols_duet = paste0(outdir, "/plots/", bp_aa_subcols_duet) diff --git a/scripts/plotting/basic_barplots_PS.R b/scripts/plotting/basic_barplots_PS.R index 4e9d0f9..c28332b 100644 --- a/scripts/plotting/basic_barplots_PS.R +++ b/scripts/plotting/basic_barplots_PS.R @@ -4,6 +4,7 @@ # basic barplots with count of mutations # basic barplots with frequency of count of mutations ######################################################### +#======================================================================= # working dir and loading libraries getwd() setwd("~/git/LSHTM_analysis/scripts/plotting") @@ -14,18 +15,30 @@ library(ggplot2) library(data.table) library(dplyr) source("plotting_data.R") - # should return - #my_df - #my_df_u - #dup_muts -#======================================================== +# should return the following dfs, directories and variables +# my_df +# my_df_u +# my_df_u_lig +# dup_muts + cat(paste0("Directories imported:" , "\ndatadir:", datadir , "\nindir:", indir , "\noutdir:", outdir , "\nplotdir:", plotdir)) + + cat(paste0("Variables imported:" + , "\ndrug:", drug + , "\ngene:", gene + , "\ngene_match:", gene_match + , "\nLength of upos:", length(upos)) + , "\nAngstrom symbol:", angstroms_symbol)) + +# clear excess variable +rm(my_df, upos, dup_muts, my_df_u_lig) +#======================================================================= #======= # output #======= @@ -37,17 +50,16 @@ plot_basic_bp_duet = paste0(plotdir,"/", basic_bp_duet) pos_count_duet = "position_count_PS.svg" plot_pos_count_duet = paste0(plotdir, "/", pos_count_duet) -#%%=============================================================== +#======================================================================= #================ # Data for plots #================ # REASSIGNMENT as necessary df = my_df_u -rm(my_df, upos, dup_muts) # sanity checks str(df) -#%%======================================================================= +#======================================================================= #**************** # Plot 1:Count of stabilising and destabilsing muts #**************** @@ -89,7 +101,9 @@ outPlot = g + geom_bar(aes(fill = duet_outcome) print(outPlot) dev.off() -#%%======================================================================= + +table(df$duet_outcome) +#======================================================================= #**************** # Plot 2: frequency of positions #**************** @@ -173,6 +187,6 @@ outPlot_pos = g + geom_bar(aes (alpha = 0.5) print(outPlot_pos) dev.off() ######################################################################## -# end of DUET barplots +# end of Ligand barplots ######################################################################## diff --git a/scripts/plotting/plotting_data.R b/scripts/plotting/plotting_data.R index b1b7a2c..b9f2197 100644 --- a/scripts/plotting/plotting_data.R +++ b/scripts/plotting/plotting_data.R @@ -11,6 +11,10 @@ setwd("~/git/LSHTM_analysis/scripts/plotting") getwd() #source("Header_TT.R") +library(ggplot2) +library(data.table) +library(dplyr) + require("getopt", quietly = TRUE) #cmd parse arguments #======================================================== # command line args diff --git a/scripts/plotting/subcols_axis_PS.R b/scripts/plotting/subcols_axis_PS.R index 5b9b0f4..5fe0ab3 100644 --- a/scripts/plotting/subcols_axis_PS.R +++ b/scripts/plotting/subcols_axis_PS.R @@ -1,52 +1,21 @@ +######################################################### +# TASK: Adding colours to positions labels according to +# active site residues. This is so these can be seen promptly +# when visualising the barplot. +######################################################### +#======================================================================= getwd() setwd("~/git/LSHTM_analysis/scripts/plotting") getwd() -######################################################### -# TASK: +source("plotting_data.R") + # should return the following dfs and directories + # my_df + # my_df_u + # my_df_u_lig + # dup_muts -######################################################### - -######################################################################## -# Installing and loading required packages and functions # -######################################################################## - -#source("Header_TT.R") -#source("barplot_colour_function.R") - -######################################################################## -# Read file: call script for combining df for PS # -######################################################################## -#????????????? -# -######################################################## -#%% variable assignment: input and output paths & filenames -drug = "pyrazinamide" -gene = "pncA" -gene_match = paste0(gene,"_p.") -cat(gene_match) - -#============= -# directories -#============= -datadir = paste0("~/git/Data") -indir = paste0(datadir, "/", drug, "/input") -outdir = paste0("~/git/Data", "/", drug, "/output") - -#====== -# input -#====== -#in_filename = "mcsm_complex1_normalised.csv" -in_filename_params = paste0(tolower(gene), "_all_params.csv") -infile_params = paste0(outdir, "/", in_filename_params) -cat(paste0("Input file:", infile_params) ) - -#======= -# output -#======= - - -#%%=============================================================== +#======================================================================= ########################### # Read file: struct params ########################### @@ -83,7 +52,7 @@ if ( length(unique(my_df$mutationinformation)) != length(my_df$mutationinformati upos = unique(my_df_u$position) cat("Dim of clean df:"); cat(dim(my_df_u)) cat("\nNo. of unique mutational positions:"); cat(length(upos)) -#====================================================== +#======================================================================= # create a new df with unique position numbers and cols position = unique(my_df$position) #130 position_cols = as.data.frame(position) @@ -235,6 +204,5 @@ rm(aa_cols_ref , lab_bg , lab_bg2 , lab_fg - , position - , dup_muts) + , position)