From db66fdb84460aacd1c596226aa649bf33eb3631c Mon Sep 17 00:00:00 2001 From: Tanushree Tunstall Date: Tue, 29 Jun 2021 14:00:10 +0100 Subject: [PATCH] added barplots_subcolours.R that generates heatmap style barplots --- scripts/plotting/barplots_subcolours.R | 237 +++++++++++++++++++++++++ 1 file changed, 237 insertions(+) create mode 100755 scripts/plotting/barplots_subcolours.R diff --git a/scripts/plotting/barplots_subcolours.R b/scripts/plotting/barplots_subcolours.R new file mode 100755 index 0000000..a1f7b18 --- /dev/null +++ b/scripts/plotting/barplots_subcolours.R @@ -0,0 +1,237 @@ +#!/usr/bin/env Rscript +getwd() +setwd("~/git/LSHTM_analysis/scripts/plotting") +getwd() +source("Header_TT.R") + +drug = 'streptomycin' +gene = 'gid' + +spec = matrix(c( + "drug" , "d", 1, "character", + "gene" , "g", 1, "character", + "data_file1" , "fa", 2, "character", + "data_file2" , "fb", 2, "character" +), byrow = TRUE, ncol = 4) + +opt = getopt(spec) + +drug = opt$drug +gene = opt$gene +infile_params = opt$data_file1 +infile_metadata = opt$data_file2 + +if(is.null(drug)|is.null(gene)) { + stop("Missing arguments: --drug and --gene must both be specified (case-sensitive)") +} + +#=========== +# Input +#=========== +source("../functions/barplot_colour_function.R") + +source("get_plotting_dfs.R") + +#=========== +# output +#=========== +# PS +bp_subcols_duet = "barplot_coloured_PS.svg" +plot_bp_subcols_duet = paste0(plotdir, "/", bp_subcols_duet) + +# LIG +bp_subcols_lig = "barplot_coloured_LIG.svg" +plot_bp_subcols_lig = paste0(plotdir, "/", bp_subcols_lig) + +############################################################################## +#==================== +# Data for plots: PS +#==================== +# sanity checks +str(my_df_u) +upos = unique(my_df_u$position) + +# should be a factor +if (is.factor(my_df_u$duet_outcome)){ + print("duet_outcome is factor") +}else{ + print("convert duet_outcome to factor") + my_df_u$duet_outcome = as.factor(my_df_u$duet_outcome) +} + +is.factor(my_df_u$duet_outcome) +table(my_df_u$duet_outcome) + +# should be -1 and 1 +min(my_df_u$duet_scaled) +max(my_df_u$duet_scaled) + +tapply(my_df_u$duet_scaled, my_df_u$duet_outcome, min) +tapply(my_df_u$duet_scaled, my_df_u$duet_outcome, max) + +#======================================================= +# Barplot (unordered) colour each nsSNP by stability +## My colour FUNCTION: based on group and subgroup +# in my case; +# df = df +# group = duet_outcome +# subgroup = normalised score i.e duet_scaled +#======================================================== +# check unique values in normalised data +u = unique(my_df_u$duet_scaled) +my_grp = my_df_u$duet_scaled #no rounding + +#------------------------------------------------- +# Run this section if rounding is to be used +#n = 3 +#my_df_u$duet_scaledR = round(my_df_u$duet_scaled, n) +#ur = unique(my_df_u$duet_scaledR) +#my_grp = my_df_u$duet_scaledR # rounding +#--------------------------------------------------- + +my_df_u$group <- paste0(my_df_u$duet_outcome, "_", my_grp, sep = "") + +# Call the function to create the palette based on the group defined above +colours <- ColourPalleteMulti(my_df_u, "duet_outcome", "my_grp") +print(paste0("Colour palette generated for: ", length(colours), " colours")) +my_title = "Protein stability (DUET)" + +# axis label size +my_xaxls = 12 +my_yaxls = 20 + +# axes text size +my_xaxts = 18 +my_yaxts = 20 + +#******************** +# generate plot: PS +# NO axis colours +#******************** +print(paste0("plot name:", plot_bp_subcols_duet)) +svg(plot_bp_subcols_duet, width = 26, height = 4) + +g = ggplot(my_df_u, aes(factor(position, ordered = T))) +outPlot_bp_ps = g + + geom_bar(aes(fill = group), colour = "grey") + + scale_fill_manual( values = colours + , guide = "none") + + theme( axis.text.x = element_text(size = my_xaxls + , angle = 90 + , hjust = 1 + , vjust = 0.4) + , axis.text.y = element_text(size = my_yaxls + , angle = 0 + , hjust = 1 + , vjust = 0) + , axis.title.x = element_text(size = my_xaxts) + , axis.title.y = element_text(size = my_yaxts ) ) + + labs(title = "" + #title = my_title + , x = "Position" + , y = "Frequency") + +print(outPlot_bp_ps) +dev.off() + +#################################################### +#==================== +# Data for plots: LIG +#==================== +# sanity checks +str(my_df_u_lig) +upos = unique(my_df_u_lig$position) + +# should be a factor +if (is.factor(my_df_u_lig$ligand_outcome)){ + print("ligand_outcome is factor") +}else{ + print("convert ligand_outcome to factor") + my_df_u_lig$ligand_outcome = as.factor(my_df_u_lig$ligand_outcome) +} + +is.factor(my_df_u_lig$ligand_outcome) +table(my_df_u_lig$ligand_outcome) + +# should be -1 and 1 +min(my_df_u_lig$affinity_scaled) +max(my_df_u_lig$affinity_scaled) + +tapply(my_df_u_lig$affinity_scaled, my_df_u_lig$ligand_outcome, min) +tapply(my_df_u_lig$affinity_scaled, my_df_u_lig$ligand_outcome, max) + +#======================================================= +# Barplot (unordered) colour each nsSNP by stability +## My colour FUNCTION: based on group and subgroup +# in my case; +# df = my_df_u_lig +# group = ligand_outcome +# subgroup = normalised score i.e affinity_scaled +#======================================================== +# check unique values in normalised data +u_lig = unique(my_df_u_lig$affinity_scaled) +my_grp_lig = my_df_u_lig$affinity_scaled #no rounding + +#------------------------------------------------- +# Run this section if rounding is to be used +#n = 3 +#my_df_u_lig$affinity_scaledR = round(my_df_u_lig$affinity_scaled, n) +#ur_lig = unique(my_df_u_lig$affinity_scaledR) +#my_grp_lig = my_df_u_lig$affinity_scaledR # rounding +#--------------------------------------------------- + +my_df_u_lig$group_lig <- paste0(my_df_u_lig$ligand_outcome, "_" + , my_grp_lig + , sep = "") + +# Call the function to create the palette based on the group defined above +colours_lig <- ColourPalleteMulti(my_df_u_lig + , "ligand_outcome" + , "my_grp_lig") + +print(paste0("Colour palette generated for: " + , length(colours_lig) + , " colours_lig")) + +my_title_lig = "Ligand Affinity" + +# axis label size +my_xaxls = 12 +my_yaxls = 20 + +# axes text size +my_xaxts = 18 +my_yaxts = 20 + +#****************** +# generate plot: LIG +# NO axis colours +#****************** +print(paste0("plot name:", plot_bp_subcols_lig)) +svg(plot_bp_subcols_lig, width = 26, height = 4) + +g2 = ggplot(my_df_u_lig, aes(factor(position, ordered = T))) +outPlot_bp_lig = g2 + + geom_bar(aes(fill = group_lig), colour = "grey") + + scale_fill_manual( values = colours_lig + , guide = "none") + + theme( axis.text.x = element_text(size = my_xaxls + , angle = 90 + , hjust = 1 + , vjust = 0.4) + , axis.text.y = element_text(size = my_yaxls + , angle = 0 + , hjust = 1 + , vjust = 0) + , axis.title.x = element_text(size = my_xaxts) + , axis.title.y = element_text(size = my_yaxts ) ) + + labs(title = "" + #title = my_title_lig + , x = "Position" + , y = "Frequency") + +print(outPlot_bp_lig) +dev.off() +######################################################################= +# End of script +######################################################################= \ No newline at end of file