getwd() setwd("~/git/LSHTM_analysis/scripts/plotting") getwd() ######################################################### # TASK: ######################################################### ############################################################ # 1: Installing and loading required packages and functions ############################################################ #source("Header_TT.R") source("barplot_colour_function.R") ############################################################ # 2: Read file: struct params data with columns containing # colours for axis labels ############################################################ #source("subcols_axis.R") source("subcols_axis_PS.R") # this should return # mut_pos_cols # my_df # my_df_u: df with unique mutations # clear excess variable # "mut_pos_cols" is just for inspection in case you need to cross check # position numbers and colours # open file from deskptop ("sample_axis_cols") for cross checking table(mut_pos_cols$lab_bg) sum( table(mut_pos_cols$lab_bg) ) == nrow(mut_pos_cols) # should be True table(mut_pos_cols$lab_bg2) sum( table(mut_pos_cols$lab_bg2) ) == nrow(mut_pos_cols) # should be True table(mut_pos_cols$lab_fg) sum( table(mut_pos_cols$lab_fg) ) == nrow(mut_pos_cols) # should be True # very important! my_axis_colours = mut_pos_cols$lab_fg # now clear mut_pos_cols rm(mut_pos_cols) ########################### # 2: Plot: DUET scores ########################### #========================== # Plot 2: Barplot with scores (unordered) # corresponds to duet_outcome # Stacked Barplot with colours: duet_outcome @ position coloured by # stability scores. This is a barplot where each bar corresponds # to a SNP and is coloured by its corresponding DUET stability value. # Normalised values (range between -1 and 1 ) to aid visualisation # NOTE: since barplot plots discrete values, colour = score, so number of # colours will be equal to the no. of unique normalised scores # rather than a continuous scale # will require generating the colour scale separately. #============================ # sanity checks upos = unique(my_df$position) table(my_df$duet_outcome) table(my_df_u$duet_outcome) #=========================== # Data preparation for plots #=========================== # REASSIGNMENT as necessary df <- my_df_u rm(my_df, my_df_u) # add frequency of positions library(data.table) setDT(df)[, pos_count := .N, by = .(position)] # this is cummulative table(df$pos_count) # use group by on this library(dplyr) snpsBYpos_df <- df %>% group_by(position) %>% summarize(snpsBYpos = mean(pos_count)) table(snpsBYpos_df$snpsBYpos) snp_count = sort(unique(snpsBYpos_df$snpsBYpos)) # sanity checks # should be a factor is.factor(df$duet_outcome) #TRUE table(df$duet_outcome) # should be -1 and 1 min(df$duet_scaled) max(df$duet_scaled) # sanity checks # very important!!!! tapply(df$duet_scaled, df$duet_outcome, min) tapply(df$duet_scaled, df$duet_outcome, max) # My colour FUNCTION: based on group and subgroup # in my case; # df = df # group = duet_outcome # subgroup = normalised score i.e duet_scaled # check unique values in normalised data u = unique(df$duet_scaled) #%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% # Run this section if rounding is to be used # specify number for rounding n = 3 df$duet_scaledR = round(df$duet_scaled, n) ur = unique(df$duet_scaledR) # create an extra column called group which contains the "gp name and score" # so colours can be generated for each unique values in this column #my_grp = df$duet_scaledR # rounding my_grp = df$duet_scaled # no rounding #%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% df$group <- paste0(df$duet_outcome, "_", my_grp, sep = "") # Call the function to create the palette based on the group defined above colours <- ColourPalleteMulti(df, "duet_outcome", "my_grp") print(paste0("Colour palette generated for: ", length(colours), " colours")) my_title = "Protein stability (DUET)" #======================== # plot with axis colours #======================== class(df$lab_bg) # define cartesian coord my_xlim = length(unique(df$position)); my_xlim # axis label size my_xals = 18 my_yals = 18 # axes text size my_xats = 14 my_yats = 18 #****************** # generate plot: with axis colours #****************** # plot name and location # outdir/ (should be imported from reading file) print(paste0("plot will be in:", outdir)) bp_aa_subcols_duet = "barplot_acoloured_PS.svg" plot_bp_aa_subcols_duet = paste0(outdir, "/plots/", bp_aa_subcols_duet) print(paste0("plot name:", plot_bp_aa_subcols_duet)) svg(plot_bp_aa_subcols_duet, width = 26, height = 4) g = ggplot(df, aes(factor(position, ordered = T))) outPlot = g + coord_cartesian(xlim = c(1, my_xlim) #, ylim = c(0, 6) , ylim = c(0, max(snp_count)) , clip = "off") + geom_bar(aes(fill = group), colour = "grey") + scale_fill_manual(values = colours , guide = "none") + geom_tile(aes(,-0.8, width = 0.95, height = 0.85) , fill = df$lab_bg) + geom_tile(aes(,-1.2, width = 0.95, height = -0.2) , fill = df$lab_bg2) + # Here it"s important to specify that your axis goes from 1 to max number of levels theme(axis.text.x = element_text(size = my_xats , angle = 90 , hjust = 1 , vjust = 0.4 , colour = my_axis_colours) , axis.text.y = element_text(size = my_yats , angle = 0 , hjust = 1 , vjust = 0) , axis.title.x = element_text(size = my_xals) , axis.title.y = element_text(size = my_yals ) , axis.ticks.x = element_blank()) + labs(title = "" , x = "position" , y = "Frequency") print(outPlot) dev.off() #!!!!!!!!!!!!!!!! #Warning message: # Vectorized input to `element_text()` is not officially supported. #Results may be unexpected or may change in future versions of ggplot2. #!!!!!!!!!!!!!!!!! # for sanity and good practice #rm(df)