#!/usr/bin/env Rscript ######################################################### # TASK: AF, OR and stability plots: PS # Output: 1 SVG ######################################################### # Installing and loading required packages ########################################################## getwd() setwd("~/git/LSHTM_analysis/scripts/plotting/") getwd() source("Header_TT.R") library(ggplot2) library(data.table) #source("combining_dfs_plotting.R") source("barplot_colour_function.R") source("subcols_axis.R") ########################### # Data for PS plots # you need merged_df3_comp/merged_df_comp # since these have unique SNPs ########################### no_or_index = which(is.na(my_df_u_cols$or_mychisq)) my_df_u_cols_clean = my_df_u_cols[-no_or_index,] #%%%%%%%%%%%%%%%%%%%%%%%% # REASSIGNMENT df = my_df_u_cols_clean #%%%%%%%%%%%%%%%%%%%%%%%%% cols_to_select = colnames(mut_pos_cols) mut_pos_cols_clean = df[colnames(df)%in%cols_to_select] mut_pos_cols_clean = unique(mut_pos_cols_clean[, 1:length(mut_pos_cols_clean)]) ######################################################################## # end of data extraction and cleaning for plots # ######################################################################## #=========================== # Barplot with axis colours: #=========================== #================ # Inspecting mut_pos_cols # position numbers and colours and assigning axis colours based on lab_fg # of the correct df # open file from desktop ("sample_axis_cols") for cross checking #================ # very important! #my_axis_colours = mut_pos_cols$lab_fg if (nrow(mut_pos_cols_clean) == length(unique(df$position)) ){ print("PASS: lengths checked, assigning axis colours") my_axis_colours = mut_pos_cols_clean$lab_fg cat("length of axis colours:", length(my_axis_colours) , "\nwhich corresponds to the number of positions on the x-axis of the plot") }else{ print("FAIL:lengths mismatch, could not assign axis colours") quit() } # unique positions upos = unique(df$position); length(upos) table(df$duet_outcome) # add frequency of positions (from lib data.table) setDT(df)[, pos_count := .N, by = .(position)] # this is cummulative table(df$pos_count) # use group by on this library(dplyr) snpsBYpos_df <- df %>% group_by(position) %>% summarize(snpsBYpos = mean(pos_count)) table(snpsBYpos_df$snpsBYpos) snp_count = sort(unique(snpsBYpos_df$snpsBYpos)) # sanity checks # should be a factor if (is.factor(df$duet_outcome)){ print("duet_outcome is factor") }else{ print("converting duet_outcome to factor") df$duet_outcome = as.factor(df$duet_outcome) } is.factor(df$duet_outcome) # should be -1 and 1 min(df$duet_scaled) max(df$duet_scaled) # sanity checks tapply(df$duet_scaled, df$duet_outcome, min) tapply(df$duet_scaled, df$duet_outcome, max) # My colour FUNCTION: based on group and subgroup # in my case; # df = df # group = duet_outcome # subgroup = normalised score i.e duet_scaled # check unique values in normalised data u = unique(df$duet_scaled) cat("No. of unique values in normalised data:", length(u)) # Define group # Create an extra column called group which contains the "gp name and score" # so colours can be generated for each unique values in this column my_grp = df$duet_scaled # no rounding df$group <- paste0(df$duet_outcome, "_", my_grp, sep = "") # Call the function to create the palette based on the group defined above colours <- ColourPalleteMulti(df, "duet_outcome", "my_grp") print(paste0("Colour palette generated for: ", length(colours), " colours")) my_title = "Protein stability (DUET)" cat("No. of axis colours: ", length(my_axis_colours)) #****************** # generate plot: barplot unordered by frequency with axis colours #****************** class(df$lab_bg) # define cartesian coord my_xlim = length(unique(df$position)); my_xlim # axis label size my_xals = 18 my_yals = 18 # axes text size my_xats = 14 my_yats = 18 g3 = ggplot(df, aes(factor(position, ordered = T))) p3 = g3 + coord_cartesian(xlim = c(1, my_xlim) #, ylim = c(0, 6) , ylim = c(0, max(snp_count)) , clip = "off") + geom_bar(aes(fill = group), colour = "grey") + scale_fill_manual(values = colours , guide = "none") + geom_tile(aes(,-0.8, width = 0.95, height = 0.85) , fill = df$lab_bg) + geom_tile(aes(,-1.2, width = 0.95, height = -0.2) , fill = df$lab_bg2) + # Here it"s important to specify that your axis goes from 1 to max number of levels theme(axis.text.x = element_text(size = my_xats , angle = 90 , hjust = 1 , vjust = 0.4 , colour = my_axis_colours) , axis.text.y = element_text(size = my_yats , angle = 0 , hjust = 1 , vjust = 0) , axis.title.x = element_text(size = my_xals) , axis.title.y = element_text(size = my_yals+2 ) , axis.ticks.x = element_blank()) + labs(title = "" #title = my_title , x = "Position" , y = "Frequency") p3 #================= # generate plot: AF by position unordered, not coloured #================= my_ats = 20 # axis text size my_als = 22 # axis label size g1 = ggplot(df) p1 = g1 + geom_bar(aes(x = factor(position, ordered = T) , y = af*100 #, fill = duet_outcome ) , color = "black" , fill = "lightgrey" , stat = "identity") + theme(axis.text.x = element_blank() , axis.text.y = element_text(size = my_ats , angle = 0 , hjust = 1 , vjust = 0) , axis.title.x = element_blank() , axis.title.y = element_text(size = my_als) ) + labs(title = "" #, size = 100 #, x = "Wild-type position" , y = "AF(%)") p1 #================= # generate plot: OR by position unordered #================= my_ats = 20 # axis text size my_als = 22 # axis label size g2 = ggplot(df) p2 = g2 + geom_bar(aes(x = factor(position, ordered = T) , y = or_mychisq #, fill = duet_outcome ) , colour = "black" , fill = "lightgray" , stat = "identity") + scale_y_continuous(limits = c(0, 450))+ theme(axis.text.x = element_blank() , axis.text.y = element_text(size = my_ats , angle = 0 , hjust = 1 , vjust = 0) , axis.title.x = element_blank() , axis.title.y = element_text(size = my_als) ) + labs(#title = "OR by position" #, x = "Wild-type position" y = "OR") p2 ######################################################################## # end of DUET barplots # ######################################################################## #============================ # combined plot 1: UNlabelled #============================ ps_combined = "af_or_combined_PS.svg" plot_ps_combined = paste0(plotdir,"/", ps_combined) cat("combined plot Unlabelled:", plot_ps_combined) svg(plot_ps_combined , width = 26, height = 12) OutPlot_combined = cowplot::plot_grid(p1, p2, p3 , ncol = 1 , align = 'v') OutPlot_combined dev.off() #============================ # combined plot 2: labelled #============================ ps_combined_labelled = "af_or_combined_PS_labelled.svg" plot_ps_combined_labelled = paste0(plotdir,"/", ps_combined_labelled) cat("combined plot Labelled:", plot_ps_combined_labelled) svg(plot_ps_combined_labelled , width = 26, height = 12) OutPlot_combined_labelled = cowplot::plot_grid(p1, p2, p3 , ncol = 1 , labels = c("(a)", "(b)", "(c)") , label_size = 21 , align = 'v' , hjust = -0.4) OutPlot_combined_labelled dev.off()