From 8de26864010e7cee91cf5ee60982f5f6826bf9c9 Mon Sep 17 00:00:00 2001 From: Tanushree Tunstall Date: Thu, 24 Jun 2021 17:34:53 +0100 Subject: [PATCH] added logo_plots.R that now produces all logo plots while sourcing the get_plotting_df.R script --- scripts/plotting/get_plotting_dfs.R | 20 ++- scripts/plotting/logo_combined.R | 2 +- scripts/plotting/logo_plots.R | 253 ++++++++++++++++++++++++++++ 3 files changed, 271 insertions(+), 4 deletions(-) create mode 100755 scripts/plotting/logo_plots.R diff --git a/scripts/plotting/get_plotting_dfs.R b/scripts/plotting/get_plotting_dfs.R index 893a929..537b84c 100644 --- a/scripts/plotting/get_plotting_dfs.R +++ b/scripts/plotting/get_plotting_dfs.R @@ -108,8 +108,9 @@ cat("No. of rows in my_data:", nrow(logo_data) , "\nDistinct positions corresponding to snps:", length(c1) , "\n===========================================================") #======================================================================= -#%% logo plots from dataframe - +#================== +# logo data: OR +#================== foo = logo_data[, c("position" , "mutant_type","duet_scaled", "or_mychisq" , "mut_prop_polarity", "mut_prop_water")] @@ -132,7 +133,6 @@ position_or = as.numeric(colnames(wide_df_or)) #================== # logo data: logOR #================== -# extracting data with log10R logo_data_plot_logor = logo_data[, c("position", "mutant_type", "log10or")] wide_df_logor <- logo_data_plot_logor %>% spread(position, log10or, fill = 0.0) @@ -217,6 +217,20 @@ rownames(tab_wt) identical(colnames(tab_mt), colnames(tab_wt)) identical(ncol(tab_mt), ncol(tab_wt)) +#---------------------------------- +# logo data OR: multiple nsSNPs (>1) +#---------------------------------- +logo_data_or_mult = my_data_snp[, c("position", "mutant_type", "or_mychisq")] +#wide_df_or <- logo_data_or %>% spread(position, or_mychisq, fill = 0.0) +wide_df_or_mult <- logo_data_or_mult %>% spread(position, or_mychisq, fill = NA) + +wide_df_or_mult = as.matrix(wide_df_or_mult) +rownames(wide_df_or_mult) = wide_df_or_mult[,1] +wide_df_or_mult = wide_df_or_mult[,-1] +str(wide_df_or_mult) + +position_or_mult = as.numeric(colnames(wide_df_or_mult)) + ######################################################################## # End of script ######################################################################## \ No newline at end of file diff --git a/scripts/plotting/logo_combined.R b/scripts/plotting/logo_combined.R index 925d1ec..7a9cb6a 100755 --- a/scripts/plotting/logo_combined.R +++ b/scripts/plotting/logo_combined.R @@ -254,4 +254,4 @@ OutPlot2 = cowplot::plot_grid(logo_or, p1, p3 , label_size = 25) print(OutPlot2) -#dev.off() +#dev.off() \ No newline at end of file diff --git a/scripts/plotting/logo_plots.R b/scripts/plotting/logo_plots.R new file mode 100755 index 0000000..8b2e856 --- /dev/null +++ b/scripts/plotting/logo_plots.R @@ -0,0 +1,253 @@ +#!/usr/bin/env Rscript +######################################################### +# TASK: producing logo-type plot showing + ## OR + ## Log OR +# multiple muts (>1 per position) coloured by aa property + ## mutant_type + ## wild_type + +# Note: Replaced scripts: + ## logo_plot.R + ## logo_multiple_muts.R + ## logo_combined.R +######################################################### +getwd() +setwd("~/git/LSHTM_analysis/scripts/plotting") +getwd() +source("Header_TT.R") + +spec = matrix(c( + "drug" , "d", 1, "character", + "gene" , "g", 1, "character", + "data_file1" , "fa", 2, "character", + "data_file2" , "fb", 2, "character" +), byrow = TRUE, ncol = 4) + +opt = getopt(spec) + +drug = opt$drug +gene = opt$gene +infile_params = opt$data_file1 +infile_metadata = opt$data_file2 + +if(is.null(drug)|is.null(gene)) { + stop("Missing arguments: --drug and --gene must both be specified (case-sensitive)") +} + +#=========== +# Input +#=========== +source("get_plotting_dfs.R") + +#=========== +# output +#=========== +logo_or_plotname = "logo_or_plot.svg" +plot_logo_or = paste0(plotdir,"/", logo_or_plotname) + +logo_logOR_plotname = "logo_logOR_plot.svg" +plot_logo_logOR = paste0(plotdir,"/", logo_logOR_plotname) + +logo_multiple_muts = "logo_multiple_muts.svg" +plot_logo_multiple_muts = paste0(plotdir,"/", logo_multiple_muts) + +logo_combined_labelled = "logo_combined_labelled.svg" +plot_logo_combined_labelled = paste0(plotdir,"/", logo_combined_labelled) + +######################################################### + +#================================== +# Output +# Logo plot OR: custom height (OR) +#================================== +cat("Logo plot with OR as y axis:", plot_logo_or) +svg(plot_logo_or, width = 30 , height = 6) + +logo_or = ggseqlogo(wide_df_or + , method = "custom" + , seq_type = "aa") + ylab("my custom height") + + theme( axis.text.x = element_text(size = 12 + , angle = 90 + , hjust = 1 + , vjust = 0.4) + , axis.text.y = element_text(size = 22 + , angle = 0 + , hjust = 1 + , vjust = 0) + , axis.title.y = element_text(size = 25) + , axis.title.x = element_text(size = 20) + #, legend.position = "bottom") + + , legend.position = "none")+ + #, legend.text = element_text(size = 15) + #, legend.title = element_text(size = 15))+ + scale_x_discrete("Position" + #, breaks + , labels = position_or + , limits = factor(1:length(position_or))) + + ylab("Odds Ratio") + +print(logo_or) +#dev.off() + +#============================================= +# Output +# Logo plot LOG OR: custom height (Log10 OR) +#============================================= +cat("Logo plot with log10 OR as y axis:", plot_logo_logOR) +svg(plot_logo_logOR, width = 30 , height = 6) + +logo_logOR = ggseqlogo(wide_df_logor_m + , method = "custom" + , seq_type="aa") + ylab("my custom height") + + theme(legend.position = "bottom" + , axis.text.x = element_text(size = 13 + , angle = 90 + , hjust = 1 + , vjust = 0.4) + , axis.text.y = element_text(size = 20 + , angle = 0 + , hjust = 1 + , vjust = 0) + , axis.title.y = element_text(size = 25) + , axis.title.x = element_text(size = 20))+ + scale_x_discrete("Position" + #, breaks + , labels = position_logor + , limits = factor(1:length(position_logor)))+ + ylab("Log (Odds Ratio)") + + scale_y_continuous(limits = c(0, 9)) + +print(logo_logOR) +#dev.off() + +#***************************** +# Mutant logo plot: >1 nsSNP +#****************************** +p0 = ggseqlogo(tab_mt + , method = 'custom' + , seq_type = 'aa') + + #ylab('my custom height') + + theme(axis.text.x = element_blank()) + + theme(text=element_text(family="FreeSans"))+ + theme_logo()+ + scale_x_continuous(breaks = 1:ncol(tab_mt) + , labels = colnames(tab_mt))+ + scale_y_continuous( breaks = 1:max_mult_mut + , limits = c(0, max_mult_mut)) + +#p0 +cat('\nDone: p0') + +# further customisation +mut_logo_p = p0 + theme(legend.position = "none" + , legend.title = element_blank() + , legend.text = element_text(size = 20) + , axis.text.x = element_text(size = 14, angle = 90) + , axis.text.y = element_blank()) +#mut_logo_p +cat('\nDone: p0+mut_logo_p') + +#************************* +# Wild logo plot: >1 nsSNP +#************************* +p2 = ggseqlogo(tab_wt + , method = 'custom' + , seq_type = 'aa' + #, col_scheme = "taylor" + #, col_scheme = chemistry2 +) + + #ylab('my custom height') + + theme(text=element_text(family="FreeSans"))+ + theme(axis.text.x = element_blank() + , axis.text.y = element_blank()) + + theme_logo() + + scale_x_continuous(breaks = 1:ncol(tab_wt) + , labels = colnames(tab_wt)) +#p2 +cat('\nDone: p2') + +# further customise +wt_logo_p = p2 + + theme(legend.position = "bottom" + #, legend.title = element_blank() + , legend.title = element_text("Amino acid properties", size = 20) + , legend.text = element_text(size = 20) + , axis.text.x = element_text(size = 14, angle = 90) + , axis.text.y = element_blank() + , axis.title.x = element_text(size = 22))+ + + labs(x= "Position") + +#wt_logo_p +cat('\nDone: wt_logo_p') + +#*********************** +# Logo OR >1 nsSNP +#*********************** +logo_or_mult_p = ggseqlogo(wide_df_or_mult, method="custom", seq_type="aa") + ylab("my custom height") + + theme(axis.text.x = element_text(size = 14 + , angle = 90 + , hjust = 1 + , vjust = 0.4) + , axis.text.y = element_text(size = 18 + , angle = 0 + , hjust = 1 + , vjust = 0) + , axis.title.y = element_text(size = 18) + , axis.title.x = element_blank() + , legend.position = "none")+ + scale_x_discrete( labels = position_or_mult + , limits = factor(1:length(position_or_mult))) + + scale_y_discrete(breaks = c(50, 150, 250, 350) + , labels = c(50, 150, 250, 350) + , limits = c(50, 150, 250, 350) + ) + + xlab("Position") + + ylab("Odds Ratio") + +#logo_or_mult_p +cat('\nDone: logo_or_mult_p') + +#========================================= +# Output +# Combined plot: logo_mutliple_muts.svg +#========================================= +#suppressMessages( require(cowplot) ) +cat('\nDone: wt_logo_p') + +#plot_grid(p1, p3, ncol = 1, align = 'v') +cat('\nDone: mut_logo_p + wt_logo_p') + +# colour scheme: https://rdrr.io/cran/ggseqlogo/src/R/col_schemes.r +cat("\nOutput plot:", plot_logo_multiple_muts, "\n") +svg(plot_logo_multiple_muts, width = 32, height = 10) + +mult_muts_combined = cowplot::plot_grid(mut_logo_p, wt_logo_p + , nrow = 2 + , align = "v" + , rel_heights = c(3/4, 1/4)) + +print(mult_muts_combined) +#dev.off() + +#====================================================== +# Output +# Combined ALL logo plots: logOR, mut_logo and mut_logo +#===================================================== +cat("Output plot:", plot_logo_combined_labelled) +svg(plot_logo_combined_labelled, width = 25, height = 10) + +all_logo_plots = cowplot::plot_grid(logo_or_mult_p + , mut_logo_p + , wt_logo_p + , nrow = 3 + , align = "hv" + #, labels = c("(a)","(b)", "(c)") + , labels = "AUTO" + , rel_heights = c(3/8, 3/8, 1.5/8) + , rel_widths = c(0.85, 1, 1) + , label_size = 25) + +print(all_logo_plots) +#dev.off() \ No newline at end of file