diff --git a/scripts/plotting/logo_plot.R b/scripts/plotting/logo_plot.R index a11b2ca..7c2d66f 100755 --- a/scripts/plotting/logo_plot.R +++ b/scripts/plotting/logo_plot.R @@ -1,28 +1,9 @@ -#!/usr/bin/env Rscript -######################################################### -# TASK: producing logoplot -# from data and/or from sequence -######################################################### -# working dir and loading libraries +#!/usr/bin/env Rscript getwd() setwd("~/git/LSHTM_analysis/scripts/plotting") getwd() - source("Header_TT.R") -source("../functions/plotting_globals.R") -source("../functions/plotting_data.R") -source("../functions/combining_dfs_plotting.R") -########################################################### -# command line args -#******************** -#drug = 'streptomycin' -#gene = 'gid' -#******************** -# !!!FUTURE TODO!!! -# Can pass additional params of output/plot dir by user. -# Not strictly required for my workflow since it is optimised -# to have a streamlined input/output flow without filename worries. -#******************** + spec = matrix(c( "drug" , "d", 1, "character", "gene" , "g", 1, "character", @@ -32,158 +13,41 @@ spec = matrix(c( opt = getopt(spec) -#FIXME: detect if script running from cmd, then set these drug = opt$drug gene = opt$gene infile_params = opt$data_file1 infile_metadata = opt$data_file2 -# hardcoding when not using cmd -#drug = "streptomycin" -#gene = "gid" - if(is.null(drug)|is.null(gene)) { stop("Missing arguments: --drug and --gene must both be specified (case-sensitive)") } + #=========== -# input +# Input #=========== -#--------------------- -# call: import_dirs() -#--------------------- -import_dirs(drug, gene) -#--------------------------- -# call: plotting_data() -#--------------------------- -#if (!exists("infile_params") && exists("gene")){ -if (!is.character(infile_params) && exists("gene")){ # when running as cmd - #in_filename_params = paste0(tolower(gene), "_all_params.csv") - in_filename_params = paste0(tolower(gene), "_comb_afor.csv") # part combined for gid - infile_params = paste0(outdir, "/", in_filename_params) - cat("\nInput file for mcsm comb data not specified, assuming filename: ", infile_params, "\n") -} - -# Input 1: read _comb_afor.csv -cat("\nReading mcsm combined data file: ", infile_params) -mcsm_df = read.csv(infile_params, header = T) -pd_df = plotting_data(mcsm_df) -my_df_u = pd_df[[2]] # this forms one of the input for combining_dfs_plotting() - -#-------------------------------- -# call: combining_dfs_plotting() -#-------------------------------- -#if (!exists("infile_metadata") && exists("gene")){ -if (!is.character(infile_metadata) && exists("gene")){ # when running as cmd - in_filename_metadata = paste0(tolower(gene), "_metadata.csv") # part combined for gid - infile_metadata = paste0(outdir, "/", in_filename_metadata) - cat("\nInput file for gene metadata not specified, assuming filename: ", infile_metadata, "\n") -} - -# Input 2: read _meta data.csv -cat("\nReading meta data file: ", infile_metadata) - -gene_metadata <- read.csv(infile_metadata - , stringsAsFactors = F - , header = T) - -all_plot_dfs = combining_dfs_plotting(my_df_u - , gene_metadata - , lig_dist_colname = 'ligand_distance' - , lig_dist_cutoff = 10) - -merged_df3 = all_plot_dfs[[2]] +source("get_plotting_dfs.R") #=========== # output #=========== -logo_plot = "logo_plot.svg" -plot_logo_plot = paste0(plotdir,"/", logo_plot) +logo_or_plotname = "logo_or_plot.svg" +plot_logo_or = paste0(plotdir,"/", logo_or_plotname) -###################################################################### -# Data for plots -# you need merged_df2 or merged_df2_comp -# since this is one-many relationship -# i.e the same SNP can belong to multiple lineages -# using the _comp dataset means -# we lose some muts and at this level, we should use -# as much info as available, hence use df with NA +logo_logOR_plotname = "logo_logOR_plot.svg" +plot_logo_logOR = paste0(plotdir,"/", logo_logOR_plotname) -# This will the first plotting df -# Then subset this to extract dr muts only (second plottig df) -#################################################################### +############################################################################## -#%%%%%%%%%%%%%%%%%%%%%%%%% -# uncomment as necessary -# REASSIGNMENT -#my_data = merged_df2 -#my_data = merged_df2_comp -my_data = merged_df3 -#my_data = merged_df3_comp -#%%%%%%%%%%%%%%%%%%%%%%%%%% +#================================ +# Logo plot: custom height (OR) +#================================ +cat("Logo plot with OR as y axis:", plot_logo_or) +svg(plot_logo_or, width = 30 , height = 6) -# quick checks -colnames(my_data) -str(my_data) - -c1 = unique(my_data$position) -nrow(my_data) -cat("No. of rows in my_data:", nrow(my_data) - , "\nDistinct positions corresponding to snps:", length(c1) - , "\n===========================================================") - -#!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! -# FIXME: Think and decide what you want to remove -# mut_pos_occurence < 1 or sample_pos_occurrence <1 - -# get freq count of positions so you can subset freq<1 -#require(data.table) -#setDT(my_data)[, mut_pos_occurrence := .N, by = .(position)] #265, 14 - -#extract freq_pos>1 -#my_data_snp = my_data[my_data$occurrence!=1,] - -#u = unique(my_data_snp$position) #73 -#!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! - -#%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -# REASSIGNMENT to prevent changing code -my_data_snp = my_data -#%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -#======================================================================= -#%% logo plots from dataframe - -############# -# PLOTS: ggseqlogo with custom height -# https://omarwagih.github.io/ggseqlogo/ -############# -foo = my_data_snp[, c("position" - , "mutant_type","duet_scaled", "or_mychisq" - , "mut_prop_polarity", "mut_prop_water")] - -my_data_snp$log10or = log10(my_data_snp$or_mychisq) -logo_data = my_data_snp[, c("position" - , "mutant_type", "or_mychisq", "log10or")] - -logo_data_or = my_data_snp[, c("position", "mutant_type", "or_mychisq")] -wide_df_or <- logo_data_or %>% spread(position, or_mychisq, fill = 0.0) - -wide_df_or = as.matrix(wide_df_or) -rownames(wide_df_or) = wide_df_or[,1] -dim(wide_df_or) -wide_df_or = wide_df_or[,-1] -str(wide_df_or) - -position_or = as.numeric(colnames(wide_df_or)) - -#=========================================== -#custom height (OR) logo plot: CORRECT x-axis labelling -#============================================ -# custom height (OR) logo plot: yayy works -cat("Logo plot with OR as y axis:", plot_logo_plot) -svg(plot_logo_plot, width = 30 , height = 6) - -logo_or = ggseqlogo(wide_df_or, method="custom", seq_type="aa") + ylab("my custom height") + +logo_or = ggseqlogo(wide_df_or + , method = "custom" + , seq_type = "aa") + ylab("my custom height") + theme( axis.text.x = element_text(size = 12 , angle = 90 , hjust = 1 @@ -206,46 +70,34 @@ logo_or = ggseqlogo(wide_df_or, method="custom", seq_type="aa") + ylab("my custo print(logo_or) dev.off() -#%% end of logo plot with OR as height -#======================================================================= -# extracting data with log10R -logo_data_logor = my_data_snp[, c("position", "mutant_type", "log10or")] -wide_df_logor <- logo_data_logor %>% spread(position, log10or, fill = 0.0) +#======================================= +# Logo plot: custom height (Log10 OR) +#======================================= +cat("Logo plot with log10 OR as y axis:", plot_logo_logOR) +svg(plot_logo_logOR, width = 30 , height = 6) -wide_df_logor = as.matrix(wide_df_logor) - -rownames(wide_df_logor) = wide_df_logor[,1] -wide_df_logor = subset(wide_df_logor, select = -c(1) ) -colnames(wide_df_logor) -wide_df_logor_m = data.matrix(wide_df_logor) - -rownames(wide_df_logor_m) -colnames(wide_df_logor_m) - -# FIXME -#my_ylim_up = as.numeric(max(wide_df_logor_m)) * 5 -#my_ylim_low = as.numeric(min(wide_df_logor_m)) - -position_logor = as.numeric(colnames(wide_df_logor_m)) - -# custom height (log10OR) logo plot: yayy works -ggseqlogo(wide_df_logor_m, method="custom", seq_type="aa") + ylab("my custom height") + +ggseqlogo(wide_df_logor_m + , method = "custom" + , seq_type="aa") + ylab("my custom height") + theme(legend.position = "bottom" - , axis.text.x = element_text(size = 11 + , axis.text.x = element_text(size = 13 , angle = 90 , hjust = 1 , vjust = 0.4) - , axis.text.y = element_text(size = 15 + , axis.text.y = element_text(size = 20 , angle = 0 , hjust = 1 - , vjust = 0))+ + , vjust = 0) + , axis.title.y = element_text(size = 25) + , axis.title.x = element_text(size = 20))+ scale_x_discrete("Position" #, breaks , labels = position_logor , limits = factor(1:length(position_logor)))+ ylab("Log (Odds Ratio)") + scale_y_continuous(limits = c(0, 9)) -#======================================================================= -#%% end of script -#======================================================================= +dev.off() +######################################################################= +# End of script +######################################################################= diff --git a/scripts/plotting/running_plotting_scripts.txt b/scripts/plotting/running_plotting_scripts.txt index 060ab99..8e380f3 100644 --- a/scripts/plotting/running_plotting_scripts.txt +++ b/scripts/plotting/running_plotting_scripts.txt @@ -41,9 +41,7 @@ sources: -fb flag has default if not supplied #======================== -# ./logo_multiple_muts.R : FIXME, -Error in grid.Call(L_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : polygon edge not found (new) -https://stackoverflow.com/questions/34220883/error-in-grid-calll-textbounds-as-graphicsannotxlabel-xx-xy-polygon +# ./logo_multiple_muts.R #======================== ./logo_multiple_muts.R -d streptomycin -g gid @@ -56,13 +54,12 @@ sources: ## 1 svg of multiple muts for all positions > 1 muts (mutations + wt) note: - -fa flag has default if not supplied - -fb flag has default if not supplied + - fa flag has default if not supplied + - fb flag has default if not supplied + - Error in grid.Call fixed by commenting out image rendering on console #======================== -# ./logo_combined.R : FIXME, -Error in grid.Call(L_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : polygon edge not found (new) -https://stackoverflow.com/questions/34220883/error-in-grid-calll-textbounds-as-graphicsannotxlabel-xx-xy-polygon +# ./logo_combined.R #======================== sources: @@ -75,8 +72,9 @@ sources: 'logo_plots.R' + 'logo_multiple_muts.R' note: - -fa flag has default if not supplied - -fb flag has default if not supplied + - fa flag has default if not supplied + - fb flag has default if not supplied + - Error in grid.Call fixed by commenting out image rendering on console ######################################################################## # TODO