getwd() setwd("~/git/LSHTM_analysis/scripts/plotting") getwd() ######################################################### # TASK: output barplot by position with each bar coloured by # its stability value and NO coloured positions indicated ######################################################### #======================================================================= ############################################################ # 1: Installing and loading required packages and functions ############################################################# #source("~/git/LSHTM_analysis/scripts/Header_TT.R") library(ggplot2) library(data.table) source("barplot_colour_function.R") source("plotting_data.R") # should return the following dfs, directories and variables # mut_pos_cols # my_df # my_df_u # my_df_u_lig # dup_muts cat(paste0("Directories imported:" , "\ndatadir:", datadir , "\nindir:", indir , "\noutdir:", outdir , "\nplotdir:", plotdir)) cat(paste0("Variables imported:" , "\ndrug:", drug , "\ngene:", gene , "\ngene_match:", gene_match , "\nLength of upos:", length(upos) , "\nAngstrom symbol:", angstroms_symbol)) # clear excess variable rm(my_df, upos, dup_muts, my_df_u_lig) ######################################################## #======= # output #======= print(paste0("plot will be in:", plotdir)) bp_subcols_duet = "barplot_coloured_PS.svg" plot_bp_subcols_duet = paste0(plotdir, "/", bp_subcols_duet) #=================== # Data for plots #=================== # REASSIGNMENT as necessary df = my_df_u # sanity checks str(df) upos = unique(df$position) # should be a factor if (is.factor(df$duet_outcome)){ print("duet_outcome is factor") }else{ print("convert duet_outcome to factor") df$duet_outcome = as.factor(df$duet_outcome) } is.factor(df$duet_outcome) table(df$duet_outcome) # should be -1 and 1 min(df$duet_scaled) max(df$duet_scaled) tapply(df$duet_scaled, df$duet_outcome, min) tapply(df$duet_scaled, df$duet_outcome, max) #****************** # generate plot #****************** #========================== # Barplot with scores (unordered) # corresponds to duet_outcome # Stacked Barplot with colours: duet_outcome @ position coloured by # stability scores. This is a barplot where each bar corresponds # to a SNP and is coloured by its corresponding DUET stability value. # Normalised values (range between -1 and 1 ) to aid visualisation # NOTE: since barplot plots discrete values, colour = score, so number of # colours will be equal to the no. of unique normalised scores # rather than a continuous scale # will require generating the colour scale separately. #============================ # My colour FUNCTION: based on group and subgroup # in my case; # df = df # group = duet_outcome # subgroup = normalised score i.e duet_scaled # check unique values in normalised data u = unique(df$duet_scaled) #%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% # Run this section if rounding is to be used n = 3 df$duet_scaledR = round(df$duet_scaled, n) ur = unique(df$duet_scaledR) # create an extra column called group which contains the "gp name and score" # so colours can be generated for each unique values in this column #my_grp = df$duet_scaledR # rounding my_grp = df$duet_scaled # no rounding #%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% df$group <- paste0(df$duet_outcome, "_", my_grp, sep = "") # Call the function to create the palette based on the group defined above colours <- ColourPalleteMulti(df, "duet_outcome", "my_grp") print(paste0("Colour palette generated for: ", length(colours), " colours")) my_title = "Protein stability (DUET)" # axis label size my_xaxls = 13 my_yaxls = 15 # axes text size my_xaxts = 15 my_yaxts = 15 #****************** # generate plot: NO axis colours #****************** print(paste0("plot name:", plot_bp_subcols_duet)) svg(plot_bp_subcols_duet, width = 26, height = 4) g = ggplot(df, aes(factor(position, ordered = T))) outPlot = g + geom_bar(aes(fill = group), colour = "grey") + scale_fill_manual( values = colours , guide = "none") + theme( axis.text.x = element_text(size = my_xaxls , angle = 90 , hjust = 1 , vjust = 0.4) , axis.text.y = element_text(size = my_yaxls , angle = 0 , hjust = 1 , vjust = 0) , axis.title.x = element_text(size = my_xaxts) , axis.title.y = element_text(size = my_yaxts ) ) + labs(title = "" #title = my_title , x = "position" , y = "Frequency") print(outPlot) dev.off() # for sanity and good practice rm(df) #======================= end of plot # axis colours labels # https://stackoverflow.com/questions/38862303/customize-ggplot2-axis-labels-with-different-colors # https://stackoverflow.com/questions/56543485/plot-coloured-boxes-around-axis-label