From 12c24d974e2d5b556df593da004f8bb955d36add Mon Sep 17 00:00:00 2001 From: Tanushree Tunstall Date: Fri, 31 Jan 2020 16:39:22 +0000 Subject: [PATCH] added script for coloured axis for ligand affinity --- .../plotting/barplots_subcolours_aa_LIG.R | 296 ++++++++++++++++++ 1 file changed, 296 insertions(+) create mode 100644 mcsm_analysis/pyrazinamide/scripts/plotting/barplots_subcolours_aa_LIG.R diff --git a/mcsm_analysis/pyrazinamide/scripts/plotting/barplots_subcolours_aa_LIG.R b/mcsm_analysis/pyrazinamide/scripts/plotting/barplots_subcolours_aa_LIG.R new file mode 100644 index 0000000..432749e --- /dev/null +++ b/mcsm_analysis/pyrazinamide/scripts/plotting/barplots_subcolours_aa_LIG.R @@ -0,0 +1,296 @@ +getwd() +setwd("~/git/LSHTM_analysis/mcsm_analysis/pyrazinamide/scripts/plotting") +getwd() + +############################################################ +# 1: Installing and loading required packages and functions +############################################################ + +source("../Header_TT.R") +source("../barplot_colour_function.R") + +############################################################ +# Output dir for plots +############################################################ +out_dir = "~/git/Data/pyrazinamide/output/plots" + +############################################################ +# 2: call script the prepares the data with columns containing +# colours for axis labels +############################################################ + +source("subcols_axis_LIG.R") + +# this should return +#mut_pos_cols: 52, 4 +#my_df: 169, 39 + +# clear excess variable +# "mut_pos_cols" is just for inspection in case you need to cross check +# position numbers and colours +# open file from deskptop ("sample_axis_cols") for cross checking + +table(mut_pos_cols$lab_bg) + +sum( table(mut_pos_cols$lab_bg) ) == nrow(mut_pos_cols) # should be True + +table(mut_pos_cols$lab_bg2) + +sum( table(mut_pos_cols$lab_bg2) ) == nrow(mut_pos_cols) # should be True + +table(mut_pos_cols$lab_fg) + +sum( table(mut_pos_cols$lab_fg) ) == nrow(mut_pos_cols) # should be True + +# very important!: should be the length of the unique positions +my_axis_colours = mut_pos_cols$lab_fg + +# now clear mut_pos_cols +rm(mut_pos_cols) + +########################### +# 2: Plot: Lig scores +########################### +#========================== +# Plot 2: Barplot with scores (unordered) +# corresponds to Lig_outcome +# Stacked Barplot with colours: Lig_outcome @ position coloured by +# stability scores. This is a barplot where each bar corresponds +# to a SNP and is coloured by its corresponding PredAff stability value. +# Normalised values (range between -1 and 1 ) to aid visualisation +# NOTE: since barplot plots discrete values, colour = score, so number of +# colours will be equal to the no. of unique normalised scores +# rather than a continuous scale +# will require generating the colour scale separately. +#============================ +# sanity checks +upos = unique(my_df$Position) + +str(my_df$Lig_outcome) + +colnames(my_df) + +#=========================== +# Data preparation for plots +#=========================== +#!!!!!!!!!!!!!!!!! +# REASSIGNMENT +df <- my_df +#!!!!!!!!!!!!!!!!! + +rm(my_df) + +# sanity checks +# should be a factor +is.factor(df$Lig_outcome); +#FALSE + +df$Lig_outcome = as.factor(df$Lig_outcome) +is.factor(df$Lig_outcome); +#TRUE + +table(df$Lig_outcome) + +# check the range +min(df$ratioPredAff) +max(df$ratioPredAff) + +# sanity checks +# very important!!!! +tapply(df$ratioPredAff, df$Lig_outcome, min) + +tapply(df$ratioPredAff, df$Lig_outcome, max) + +# My colour FUNCTION: based on group and subgroup +# in my case; +# df = df +# group = Lig_outcome +# subgroup = normalised score i.e ratioPredAff + +# Prepare data: round off ratioPredAff scores +# round off to 3 significant digits: +# 323 if no rounding is performed: used to generate the original graph +# 287 if rounded to 3 places +# FIXME: check if reducing precicion creates any ML prob + +# check unique values in normalised data +u = unique(df$ratioPredAff) + +# %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +# Run this section if rounding is to be used +# specify number for rounding +n = 3 +df$ratioPredAffR = round(df$ratioPredAff, n) +u = unique(df$ratioPredAffR) + +# create an extra column called group which contains the "gp name and score" +# so colours can be generated for each unique values in this column +my_grp = df$ratioPredAffR +df$group <- paste0(df$Lig_outcome, "_", my_grp, sep = "") + +# ELSE +# uncomment the below if rounding is not required + +#my_grp = df$ratioPredAff +#df$group <- paste0(df$Lig_outcome, "_", my_grp, sep = "") + +# %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +#****************** +# generate plot +#****************** + +# Call the function to create the palette based on the group defined above +colours <- ColourPalleteMulti(df, "Lig_outcome", "my_grp") +my_title = "Ligand Affinity" +library(ggplot2) + +# axis label size +my_xaxls = 13 +my_yaxls = 15 + +# axes text size +my_xaxts = 15 +my_yaxts = 15 + +# no ordering of x-axis according to frequency +g = ggplot(df, aes(factor(Position, ordered = T))) +g + + geom_bar(aes(fill = group), colour = "grey") + + scale_fill_manual( values = colours + , guide = 'none') + + theme( axis.text.x = element_text(size = my_xaxls + , angle = 90 + , hjust = 1 + , vjust = 0.4) + , axis.text.y = element_text(size = my_yaxls + , angle = 0 + , hjust = 1 + , vjust = 0) + , axis.title.x = element_text(size = my_xaxts) + , axis.title.y = element_text(size = my_yaxts ) ) + + labs(title = my_title + , x = "Position" + , y = "Frequency") + +#======================== +# plot with axis colours +#======================== +class(df$lab_bg) +# make this a named vector + +# define cartesian coord +my_xlim = length(unique(df$Position)); my_xlim + +# axis label size +my_xals = 15 +my_yals = 15 + +# axes text size +my_xats = 15 +my_yats = 18 + +# using geom_tile +g = ggplot(df, aes(factor(Position, ordered = T))) +g + + coord_cartesian(xlim = c(1, my_xlim) + , ylim = c(0, 6) + , clip = "off") + + + geom_bar(aes(fill = group), colour = "grey") + + scale_fill_manual( values = colours + , guide = 'none') + + geom_tile(aes(,-0.8, width = 0.95, height = 0.85) + , fill = df$lab_bg) + + geom_tile(aes(,-1.2, width = 0.95, height = -0.2) + , fill = df$lab_bg2) + + + # Here it's important to specify that your axis goes from 1 to max number of levels + theme( axis.text.x = element_text(size = my_xats + , angle = 90 + , hjust = 1 + , vjust = 0.4 + , colour = my_axis_colours) + , axis.text.y = element_text(size = my_yats + , angle = 0 + , hjust = 1 + , vjust = 0) + , axis.title.x = element_text(size = my_xals) + , axis.title.y = element_text(size = my_yals ) + , axis.ticks.x = element_blank() + ) + + labs(title = my_title + , x = "Position" + , y = "Frequency") + +#======================== +# output plot as svg/png +#======================== +class(df$lab_bg) +# make this a named vector + +# define cartesian coord +my_xlim = length(unique(df$Position)); my_xlim + +# axis label size +my_xals = 18 +my_yals = 18 + +# axes text size +my_xats = 16 #14 in PS +my_yats = 18 + +# set output dir for plots +#getwd() +#setwd("~/git/Data/pyrazinamide/output/plots") +#getwd() + +plot_name = "barplot_LIG_acoloured.svg" +my_plot_name = paste0(out_dir, "/", plot_name); my_plot_name + +svg(my_plot_name, width = 26, height = 4) + +g = ggplot(df, aes(factor(Position, ordered = T))) + +outFile = g + + coord_cartesian(xlim = c(1, my_xlim) + , ylim = c(0, 6) + , clip = "off" + ) + + + geom_bar(aes(fill = group), colour = "grey") + + scale_fill_manual( values = colours + , guide = 'none') + +# geom_tile(aes(,-0.6, width = 0.9, height = 0.7) +# , fill = df$lab_bg) + +# geom_tile(aes(,-1, width = 0.9, height = 0.3) +# , fill = df$lab_bg2) + + geom_tile(aes(,-0.8, width = 0.95, height = 0.85) + , fill = df$lab_bg) + + geom_tile(aes(,-1.2, width = 0.95, height = -0.2) + , fill = df$lab_bg2) + + +# Here it's important to specify that your axis goes from 1 to max number of levels + theme( axis.text.x = element_text(size = my_xats + , angle = 90 + , hjust = 1 + , vjust = 0.4 + , colour = my_axis_colours) + , axis.text.y = element_text(size = my_yats + , angle = 0 + , hjust = 1 + , vjust = 0) + , axis.title.x = element_text(size = my_xals) + , axis.title.y = element_text(size = my_yals ) + , axis.ticks.x = element_blank() + ) + + labs(title = "" + , x = "Position" + , y = "Frequency") + + +print(outFile) +dev.off() + +# for sanity and good practice +#rm(df)