From f640087922953ffda9cda524237c7e9cb8aa79f7 Mon Sep 17 00:00:00 2001 From: Tanushree Tunstall Date: Fri, 14 Jan 2022 16:09:57 +0000 Subject: [PATCH] added logoP_snp.R and renamed logo_plots_func.R to logoP.R --- .../functions/{logo_plots_func.R => logoP.R} | 13 +- scripts/functions/logoP_snp.R | 244 ++++++++++++++++++ .../functions/tests/test_logo_plots_func.R | 28 -- 3 files changed, 253 insertions(+), 32 deletions(-) rename scripts/functions/{logo_plots_func.R => logoP.R} (93%) create mode 100644 scripts/functions/logoP_snp.R delete mode 100644 scripts/functions/tests/test_logo_plots_func.R diff --git a/scripts/functions/logo_plots_func.R b/scripts/functions/logoP.R similarity index 93% rename from scripts/functions/logo_plots_func.R rename to scripts/functions/logoP.R index f28d434..1a18282 100644 --- a/scripts/functions/logo_plots_func.R +++ b/scripts/functions/logoP.R @@ -141,7 +141,8 @@ LogoPlotCustomH <- function(plot_df LogoPlot = ggseqlogo(logo_dfP_wf , method = "custom" , col_scheme = my_logo_col - , seq_type = "aa") + ylab("my custom height") + + , seq_type = "aa") + + #ylab("my custom height") + theme(axis.text.x = element_text(size = x_ats , angle = x_tangle , hjust = 1 @@ -156,11 +157,15 @@ LogoPlotCustomH <- function(plot_df , colour = xtt_col) , axis.title.y = element_text(size = y_tts , colour = ytt_col) + , legend.title = element_text(size = y_tts + , colour = ytt_col) + , legend.text = element_text(size = leg_ts) + , legend.position = leg_pos , legend.direction = leg_dir - , plot.background = element_rect(fill = theme_bgc))+ - #, legend.text = element_text(size = leg_ts) - #, legend.title = element_text(size = leg_tts))+ + , plot.background = element_rect(fill = theme_bgc) + , legend.text = element_text(size = leg_ts) + , legend.title = element_text(size = leg_tts))+ scale_x_discrete(x_lab #, breaks , labels = position_or diff --git a/scripts/functions/logoP_snp.R b/scripts/functions/logoP_snp.R new file mode 100644 index 0000000..df31db0 --- /dev/null +++ b/scripts/functions/logoP_snp.R @@ -0,0 +1,244 @@ +#logo plots + +# one for multiple muts + # --> select/drop down option to filter count of nsSNPs + # --> select/drop down option for colour + # --> should include WT + +# Data used + +#tab_mt # mutant logo plot +#tab_wt # wt logo plot + +# Make it hover over position and then get the corresponding data table! +#%%====================================================================== + +#================== +# logo data: OR +#================== +# NOTE: my_logo_col + +LogoPlotSnps <- function(plot_df + , x_axis_colname = "position" + , symbol_mut_colname = "mutant_type" + , symbol_wt_colname = "mutant_type" + , omit_snp_count = c(0) # can be 1, 2, etc. + + , my_logo_col = "chemistry" + , x_lab = "Position" + , y_lab = "Count" + , x_ats = 14 # text size + , x_tangle = 90 # text angle + , y_ats = 22 + , y_tangle = 0 + , x_tts = 20 # title size + , y_tts = 23 + , leg_pos = "none" # can be top, left, right and bottom or c(0.8, 0.9) + , leg_dir = "horizontal" #can be vertical or horizontal + , leg_ts = 20 # leg text size + , leg_tts = 16 # leg title size + ) + +{ + ############################################ + # Data processing for logo plot for nsSNPS + ############################################ + + setDT(plot_df)[, mut_pos_occurrence := .N, by = .(eval(parse(text=x_axis_colname)))] + + table(plot_df[[x_axis_colname]]) + table(plot_df$mut_pos_occurrence) + + max_mut = max(table(plot_df[[x_axis_colname]])) + + # Subset Data as specified by user + pos_freqC = c(1:max_mut) + + cat("\nDisplaying nsSNP position frequency:\n") + print(table(plot_df$mut_pos_occurrence)) + + if ( (length(omit_snp_count) ==1) && (omit_snp_count == 0) ){ + my_data_snp = plot_df + u = unique(my_data_snp[[x_axis_colname]]) + max_mult_mut = max(table(my_data_snp[[x_axis_colname]])) + + cat("\nNo filtering requested:" + , "\nTotal no. of nsSNPs:", sum(table(plot_df$mut_pos_occurrence)) + , "\nTotal no. of nsSNPs omitted:", sum(table(plot_df$mut_pos_occurrence)[omit_snp_count]) + , "\nDim of data:", dim(my_data_snp) + , "\nNo. of positions:", length(u) + , "\nMax no. of muts at any position:", max_mult_mut) + } else { + + my_data_snp = subset(plot_df, !(mut_pos_occurrence%in%omit_snp_count) ) + + exp_nrows = sum(table(plot_df$mut_pos_occurrence)) - sum(table(plot_df$mut_pos_occurrence)[omit_snp_count]) + got_rows = sum(table(my_data_snp$mut_pos_occurrence)) + u = unique(my_data_snp[[x_axis_colname]]) + max_mult_mut = max(table(my_data_snp[[x_axis_colname]])) + + if (got_rows == exp_nrows) { + cat("\nPass: Position with the stated nsSNP frequency filtered:", omit_snp_count + , "\nTotal no. of nsSNPs:", sum(table(plot_df$mut_pos_occurrence)) + , "\nTotal no. of nsSNPs omitted:", sum(table(plot_df$mut_pos_occurrence)[omit_snp_count]) + , "\nDim of subsetted data:", dim(my_data_snp) + , "\nNo. of positions:", length(u) + , "\nMax no. of muts at any position:", max_mult_mut) + } else { + + cat("\nFAIL:Position with the stated nsSNP frequency COULD NOT be filtered..." + , "\nExpected:",exp_nrows + , "\nGot:", got_rows ) + } + } + + #-------------------------------------- + # matrix for mutant type + # frequency of mutant type by position + #--------------------------------------- + table(my_data_snp[[symbol_mut_colname]], my_data_snp[[x_axis_colname]]) + tab_mt = table(my_data_snp[[symbol_mut_colname]], my_data_snp[[x_axis_colname]]) + class(tab_mt) + + # unclass to convert to matrix + tab_mt = unclass(tab_mt) + + if (is.matrix(tab_mt)){ + cat("\nPASS: Mutant matrix successfully created..." + , "\nRownames of mutant matrix:", rownames(tab_mt) + , "\nColnames of mutant matrix:", colnames(tab_mt)) + } else{ + tab_mt = as.matrix(tab_mt, rownames = T) + if (is.matrix(tab_mt)){ + cat("\nCreating mutant matrix..." + , "\nRownames of mutant matrix:", rownames(tab_mt) + , "\nColnames of mutant matrix:", colnames(tab_mt)) + } + } + + #------------------------------------- + # matrix for wild type + # frequency of wild type by position + #------------------------------------- + tab_wt = table(my_data_snp[[symbol_wt_colname]], my_data_snp[[x_axis_colname]]); tab_wt + tab_wt = unclass(tab_wt) + + # Important: remove wt duplicates + #wt = my_data_snp[, c("position", "wild_type")] + wt = my_data_snp %>% + select(x_axis_colname, symbol_wt_colname) + + wt = wt[!duplicated(wt),] + wt + + tab_wt = table(wt[[symbol_wt_colname]], wt[[x_axis_colname]]); tab_wt # should all be 1 + + if ( identical(colnames(tab_mt), colnames(tab_wt) ) && identical(ncol(tab_mt), ncol(tab_wt)) ){ + + cat("\nPASS: Wild type matrix successfully created" + , "\nDim of wt matrix:", dim(tab_wt) + , "\nDim of mutant matrix:", dim(tab_mt) + , "\n" + , "\nRownames of mutant matrix:", rownames(tab_wt) + , "\nColnames of mutant matrix:", colnames(tab_wt)) + } + + + ###################################### + # Generating plots with given y_axis + ##################################### + + if (my_logo_col %in% c('clustalx','taylor')) { + cat("\nSelected colour scheme:", my_logo_col + , "\nUsing black theme\n") + + theme_bgc = "black" + xfont_bgc = "white" + yfont_bgc = "white" + xtt_col = "white" + ytt_col = "white" + } + + if (my_logo_col %in% c('chemistry', 'hydrophobicity')) { + cat('\nSelected colour scheme:', my_logo_col + , "\nUsing grey theme") + + theme_bgc = "grey" + xfont_bgc = "black" + yfont_bgc = "black" + xtt_col = "black" + ytt_col = "black" + } + #LogoSnps_P + mut_logo_p = ggseqlogo(tab_mt + , method = "custom" + , col_scheme = my_logo_col) + + + theme( + # panel.grid = element_blank(), + + # axis.text.x = element_text(size = x_ats + # , angle = x_tangle + # , hjust = 1 + # , vjust = 0.4 + # , colour = xfont_bgc) + + # , axis.text.y = element_text(size = y_ats + # , angle = y_tangle + # , hjust = 1 + # , vjust = 0 + # , colour = yfont_bgc) + + , axis.title.x = element_text(size = x_tts + , colour = xtt_col) + + , axis.title.y = element_text(size = y_tts + , colour = ytt_col) + , legend.title = element_text(size = y_tts + , colour = ytt_col) + , legend.text = element_text(size = leg_ts) + , legend.position = leg_pos + , legend.direction = leg_dir + , plot.background = element_rect(fill = theme_bgc))+ + + #theme(text = element_text(family = "FreeSans"))+ + # theme_logo(base_size = 18)+ + + # scale_x_continuous(breaks = 1:ncol(tab_mt) + # , labels = colnames(tab_mt))+ + # scale_y_continuous(breaks = 1:max_mult_mut + # , limits = c(0, max_mult_mut)) + + scale_x_discrete(x_lab + , labels = colnames(tab_mt) + , limits = factor(1:ncol(tab_mt)))+ + scale_y_continuous(y_lab + , breaks = 1:max_mult_mut + , limits = c(0, max_mult_mut))#+ + # ylab(y_lab)+xlab(x_lab) + + + return(mut_logo_p) + +} + + +LogoPlotSnps(plot_df = merged_df3 + , x_axis_colname = "position" + , symbol_mut_colname = "mutant_type" + , symbol_wt_colname = "mutant_type" + , omit_snp_count = c(0)# can be 1, 2, etc. + + , my_logo_col = "chemistry" + , x_lab = "Position" + , y_lab = "Count" + , x_ats = 1 # text size + , x_tangle = 90 # text angle + , y_ats = 1 + , y_tangle = 0 + , x_tts = 20 # title size + , y_tts = 20 + , leg_pos = "top" # can be top, left, right and bottom or c(0.8, 0.9) + , leg_dir = "horizontal" #can be vertical or horizontal + , leg_ts = 20 # leg text size + , leg_tts = 16 # leg title size +) diff --git a/scripts/functions/tests/test_logo_plots_func.R b/scripts/functions/tests/test_logo_plots_func.R deleted file mode 100644 index b75ca9e..0000000 --- a/scripts/functions/tests/test_logo_plots_func.R +++ /dev/null @@ -1,28 +0,0 @@ - -#source("~/git/LSHTM_analysis/config/gid.R") -#source("~/git/LSHTM_analysis/scripts/plotting/get_plotting_dfs.R") - - -LogoPlotCustomH (plot_df = merged_df3 - , x_axis_colname = "position" - , y_axis_colname = "or_mychisq" - , symbol_colname = "mutant_type" - , y_axis_log = F - , log_value = log10 - , rm_empty_pos = F - , my_logo_col = 'hydrophobicity' - , x_lab = "Position" - , y_lab = "Odds Ratio" - , x_ats = 12 # text size - , x_tangle = 90 # text angle - , y_ats = 22 - , y_tangle = 0 - , x_tts = 19 # title size - , y_tts = 22 - #, leg_pos = c(0.05,-0.12) - , leg_pos = "top" - , leg_dir = "horizontal" - , leg_ts = 15 # leg text size - , leg_tts = 16 # leg title size -) -