From 41ff051dc9f40bdf0f7e1395208febf8ef20fb33 Mon Sep 17 00:00:00 2001 From: Tanushree Tunstall Date: Wed, 9 Sep 2020 18:57:28 +0100 Subject: [PATCH] script to generate combined ps plot wit af and or --- scripts/plotting/ps_plots_combined.R | 251 +++++++++++++++++++++++++++ 1 file changed, 251 insertions(+) create mode 100644 scripts/plotting/ps_plots_combined.R diff --git a/scripts/plotting/ps_plots_combined.R b/scripts/plotting/ps_plots_combined.R new file mode 100644 index 0000000..afcdc25 --- /dev/null +++ b/scripts/plotting/ps_plots_combined.R @@ -0,0 +1,251 @@ +#!/usr/bin/env Rscript +######################################################### +# TASK: AF, OR and stability plots: PS +# Output: 1 SVG + +######################################################### +# Installing and loading required packages +########################################################## +getwd() +setwd("~/git/LSHTM_analysis/scripts/plotting/") +getwd() + +source("Header_TT.R") +library(ggplot2) +library(data.table) +#source("combining_dfs_plotting.R") +source("barplot_colour_function.R") +source("subcols_axis.R") + +########################### +# Data for PS plots +# you need merged_df3_comp/merged_df_comp +# since these have unique SNPs +########################### + +no_or_index = which(is.na(my_df_u_cols$or_mychisq)) + +my_df_u_cols_clean = my_df_u_cols[-no_or_index,] + +#%%%%%%%%%%%%%%%%%%%%%%%% +# REASSIGNMENT +df = my_df_u_cols_clean +#%%%%%%%%%%%%%%%%%%%%%%%%% +cols_to_select = colnames(mut_pos_cols) + +mut_pos_cols_clean = df[colnames(df)%in%cols_to_select] +mut_pos_cols_clean = unique(mut_pos_cols_clean[, 1:length(mut_pos_cols_clean)]) + +######################################################################## +# end of data extraction and cleaning for plots # +######################################################################## + +#=========================== +# Barplot with axis colours: +#=========================== +#================ +# Inspecting mut_pos_cols +# position numbers and colours and assigning axis colours based on lab_fg +# of the correct df +# open file from desktop ("sample_axis_cols") for cross checking +#================ +# very important! +#my_axis_colours = mut_pos_cols$lab_fg + +if (nrow(mut_pos_cols_clean) == length(unique(df$position)) ){ + print("PASS: lengths checked, assigning axis colours") + my_axis_colours = mut_pos_cols_clean$lab_fg + cat("length of axis colours:", length(my_axis_colours) + , "\nwhich corresponds to the number of positions on the x-axis of the plot") +}else{ + print("FAIL:lengths mismatch, could not assign axis colours") + quit() +} + +# unique positions +upos = unique(df$position); length(upos) + +table(df$duet_outcome) + +# add frequency of positions (from lib data.table) +setDT(df)[, pos_count := .N, by = .(position)] + +# this is cummulative +table(df$pos_count) + +# use group by on this +library(dplyr) +snpsBYpos_df <- df %>% + group_by(position) %>% + summarize(snpsBYpos = mean(pos_count)) + +table(snpsBYpos_df$snpsBYpos) + +snp_count = sort(unique(snpsBYpos_df$snpsBYpos)) + +# sanity checks +# should be a factor +if (is.factor(df$duet_outcome)){ + print("duet_outcome is factor") +}else{ + print("converting duet_outcome to factor") + df$duet_outcome = as.factor(df$duet_outcome) +} + +is.factor(df$duet_outcome) + +# should be -1 and 1 +min(df$duet_scaled) +max(df$duet_scaled) + +# sanity checks +tapply(df$duet_scaled, df$duet_outcome, min) +tapply(df$duet_scaled, df$duet_outcome, max) + +# My colour FUNCTION: based on group and subgroup +# in my case; +# df = df +# group = duet_outcome +# subgroup = normalised score i.e duet_scaled + +# check unique values in normalised data +u = unique(df$duet_scaled) +cat("No. of unique values in normalised data:", length(u)) + +# Define group +# Create an extra column called group which contains the "gp name and score" +# so colours can be generated for each unique values in this column +my_grp = df$duet_scaled # no rounding +df$group <- paste0(df$duet_outcome, "_", my_grp, sep = "") + +# Call the function to create the palette based on the group defined above +colours <- ColourPalleteMulti(df, "duet_outcome", "my_grp") +print(paste0("Colour palette generated for: ", length(colours), " colours")) +my_title = "Protein stability (DUET)" +cat("No. of axis colours: ", length(my_axis_colours)) + +#****************** +# generate plot: barplot unordered by frequency with axis colours +#****************** +class(df$lab_bg) + +# define cartesian coord +my_xlim = length(unique(df$position)); my_xlim + +# axis label size +my_xals = 18 +my_yals = 18 + +# axes text size +my_xats = 14 +my_yats = 18 + +g3 = ggplot(df, aes(factor(position, ordered = T))) + +p3 = g3 + + coord_cartesian(xlim = c(1, my_xlim) + #, ylim = c(0, 6) + , ylim = c(0, max(snp_count)) + , clip = "off") + + geom_bar(aes(fill = group), colour = "grey") + + scale_fill_manual(values = colours + , guide = "none") + + geom_tile(aes(,-0.8, width = 0.95, height = 0.85) + , fill = df$lab_bg) + + geom_tile(aes(,-1.2, width = 0.95, height = -0.2) + , fill = df$lab_bg2) + + + # Here it"s important to specify that your axis goes from 1 to max number of levels + theme(axis.text.x = element_text(size = my_xats + , angle = 90 + , hjust = 1 + , vjust = 0.4 + , colour = my_axis_colours) + , axis.text.y = element_text(size = my_yats + , angle = 0 + , hjust = 1 + , vjust = 0) + , axis.title.x = element_text(size = my_xals) + , axis.title.y = element_text(size = my_yals+2 ) + , axis.ticks.x = element_blank()) + + labs(title = "" + #title = my_title + , x = "Position" + , y = "Frequency") + +p3 +#================= +# generate plot: AF by position unordered, not coloured +#================= +my_ats = 20 # axis text size +my_als = 22 # axis label size + +g1 = ggplot(df) +p1 = g1 + + geom_bar(aes(x = factor(position, ordered = T) + , y = af*100 + #, fill = duet_outcome + ) + , color = "black" + , fill = "lightgrey" + , stat = "identity") + + theme(axis.text.x = element_blank() + , axis.text.y = element_text(size = my_ats + , angle = 0 + , hjust = 1 + , vjust = 0) + , axis.title.x = element_blank() + , axis.title.y = element_text(size = my_als) ) + + labs(title = "" + #, size = 100 + #, x = "Wild-type position" + , y = "AF(%)") + +p1 +#================= +# generate plot: OR by position unordered +#================= +my_ats = 20 # axis text size +my_als = 22 # axis label size + + +g2 = ggplot(df) + +p2 = g2 + + geom_bar(aes(x = factor(position, ordered = T) + , y = or_mychisq + #, fill = duet_outcome + ) + , colour = "black" + , fill = "lightgray" + , stat = "identity") + + theme(axis.text.x = element_blank() + , axis.text.y = element_text(size = my_ats + , angle = 0 + , hjust = 1 + , vjust = 0) + , axis.title.x = element_blank() + , axis.title.y = element_text(size = my_als) ) + + labs(#title = "OR by position" + #, x = "Wild-type position" + y = "OR") + +p2 + +######################################################################## +# end of DUET barplots # +######################################################################## +#======= +# output +#======= +ps_plots_combined = "af_or_combined_PS.svg" +plot_ps_plots_combined = paste0(plotdir,"/", ps_plots_combined ) + +svg(plot_ps_plots_combined , width = 26, height = 12) + +#theme_set(theme_gray()) # to preserve default theme +printFile = cowplot::plot_grid(p1, p2, p3 + , ncol = 1 + , align = 'v') +printFile +dev.off()