diff --git a/scripts/functions/logoP_msa_raw.R b/scripts/functions/logoP_msa_raw.R deleted file mode 100644 index 0cbb7c5..0000000 --- a/scripts/functions/logoP_msa_raw.R +++ /dev/null @@ -1,319 +0,0 @@ -##################################################################################### -# LogoPlotMSA(): -# Input: -# Data: -# msaSeq_mut: MSA chr vector for muts -# msaSeq_wt [Optional]: MSA chr vector for wt - -# Others params: -# plot_positions: can choose what positions to plot -# msa_method : can be "bits" or "probability" -# my_logo_col : can be "chemistry", "hydrophobicity", "taylor" or "clustalx" - -# Returns data LogoPlot from MSA - -#... - -# TODO: SHINY -# drop down: my_logo_col i.e the 4 colour choices -# drop down: for DataED_PFM(), ED score options: - # c("log", log-odds", "diff", "probKL", "ratio", "unscaled_log", "wKL") -# drop down/enter field: for DataED_PFM(), background probability -# Make it hover over position and then get the corresponding data table! -################################################################################### - -#================== -# logo data: OR -#================== -LogoPlotMSA <- function(msaSeq_mut - , msaSeq_wt - , plot_positions - , msa_method = 'bits' # or probability - , my_logo_col = "chemistry" - , x_lab = "Wild-type position" - , y_lab = "" - , x_ats = 13 # text size - , x_tangle = 90 # text angle - , x_axis_offset = 0.07 # dist b/w y-axis and plot start - , y_ats = 13 - , y_tangle = 0 - , x_tts = 13 # title size - , y_tts = 13 - , leg_pos = "top" # can be top, left, right and bottom or c(0.8, 0.9) - , leg_dir = "horizontal" #can be vertical or horizontal - , leg_ts = 16 # leg text size - , leg_tts = 16 # leg title size - ) - -{ - - ############################################ - # Data processing for logo plot for nsSNPS - ########################################### - cat("\nLength of MSA", length(msaSeq_mut) - , "\nlength of WT seq:", length(msaSeq_wt)) - - if(missing(plot_positions)){ - #if(is.null(plot_positions)){ - cat("\n=======================" - , "\nPlotting entire MSA" - , "\n========================") - msa_seq_plot = msaSeq_mut - msa_all_interim = sapply(msa_seq_plot, function(x) unlist(strsplit(x,""))) - msa_all_interimDF = data.frame(msa_all_interim) - msa_all_pos = as.numeric(rownames(msa_all_interimDF)) - - wt_seq_plot = msaSeq_wt - wt_all_interim = sapply(wt_seq_plot, function(x) unlist(strsplit(x,""))) - wt_all_interimDF = data.frame(wt_all_interim) - wt_all_pos = as.numeric(rownames(wt_all_interimDF)) - - - } else { - cat("\nUser specified plotting positions for MSA:" - , "\nThese are:\n", plot_positions - , "\nSorting plot positions...") - - plot_positions = sort(plot_positions) - - cat("\nPlotting positions sorted:\n" - , plot_positions) - - #----------- - # MSA: mut - #----------- - cat("\n===========================================" - , "\nGenerating MSA: filtered positions" - , "\n===========================================") - - msa_interim = sapply(msaSeq_mut, function(x) unlist(strsplit(x,""))) - msa_interimDF = data.frame(msa_interim) - msa_pos = as.numeric(rownames(msa_interimDF)) - - if (all(plot_positions%in%msa_pos)){ - cat("\nAll positions within range" - , "\nProceeding with generating requested position MSA seqs..." - , "\nNo. of positions in plot:", length(plot_positions)) - i_extract = plot_positions - dfP1 = msa_interimDF[i_extract,] - - }else{ - cat("\nNo. of positions selected:", length(plot_positions)) - i_ofr = plot_positions[!plot_positions%in%msa_pos] - cat("\n1 or more plot_positions out of range..." - , "\nThese are:\n", i_ofr - , "\nQuitting! Resubmit with correct plot_positions") - #i_extract = plot_positions[plot_positions%in%msa_pos] - #cat("\nFinal no. of positions being plottted:", length(i_extract) - # , "\nNo. of positions dropped from request:", length(i_ofr)) - quit() - } - - #matP1 = msa_interim[i_extract, 1:ncol(msa_interim)] - #dfP1 = msa_interimDF[i_extract,] - dfP1 = data.frame(t(dfP1)) - names(dfP1) = i_extract - cols_to_paste = names(dfP1) - dfP1['chosen_seq'] = apply(dfP1[ , cols_to_paste] - , 1 - , paste, sep = '' - , collapse = "") - - msa_seq_plot = dfP1$chosen_seq - - #----------- - # WT: fasta - #----------- - cat("\n=========================================" - , "\nGenerating WT fasta: filtered positions" - ,"\n===========================================") - wt_interim = sapply(msaSeq_wt, function(x) unlist(strsplit(x,""))) - wt_interimDF = data.frame(wt_interim) - wt_pos = as.numeric(rownames(wt_interimDF)) - - if (all(plot_positions%in%wt_pos)){ - cat("\nAll positions within range" - , "\nProceeding with generating requested position MSA seqs..." - , "\nplot positions:", length(plot_positions)) - i2_extract = plot_positions - }else{ - cat("\nNo. of positions selected:", length(plot_positions)) - i2_ofr = plot_positions[!plot_positions%in%wt_pos] - cat("\n1 or more plot_positions out of range..." - , "\nThese are:\n", i_ofr - , "\nQuitting! Resubmit with correct plot_positions") - #i2_extract = plot_positions[plot_positions%in%wt_pos] - #cat("\nFinal no. of positions being plottted:", length(i2_extract) - # , "\nNo. of positions dropped from request:", length(i2_ofr)) - quit() - } - - #matP1 = msa_interim[i_extract, 1:ncol(msa_interim)] - dfP2 = wt_interimDF[i2_extract,] - dfP2 = data.frame(t(dfP2)) - names(dfP2) = i2_extract - cols_to_paste2 = names(dfP2) - dfP2['chosen_seq'] = apply( dfP2[ , cols_to_paste2] - , 1 - , paste, sep = '' - , collapse = "") - - wt_seq_plot = dfP2$chosen_seq -} - - ###################################### - # Generating plots for muts and wt - ##################################### - - if (my_logo_col %in% c('clustalx','taylor')) { - cat("\nSelected colour scheme:", my_logo_col - , "\nUsing black theme\n") - - theme_bgc = "black" - xfont_bgc = "white" - yfont_bgc = "white" - xtt_col = "white" - ytt_col = "white" - } - - if (my_logo_col %in% c('chemistry', 'hydrophobicity')) { - cat("\nstart of MSA" - , '\nSelected colour scheme:', my_logo_col - , "\nUsing grey theme") - - theme_bgc = "grey" - xfont_bgc = "black" - yfont_bgc = "black" - xtt_col = "black" - ytt_col = "black" - } - - ##################################### - # Generating logo plots for nsSNPs - ##################################### - LogoPlotMSAL <- list() - - #------------------- - # Mutant logo plot - #------------------- - p0 = ggseqlogo(msa_seq_plot - , facet = "grid" - , method = msa_method - , col_scheme = my_logo_col - , seq_type = 'aa') + - theme(legend.position = leg_pos - , legend.direction = leg_dir - #, legend.title = element_blank() - , legend.title = element_text(size = leg_tts - , colour = ytt_col) - , legend.text = element_text(size = leg_ts) - - , axis.text.x = element_text(size = x_ats - , angle = x_tangle - , hjust = 1 - , vjust = 0.4 - , colour = xfont_bgc) - #, axis.text.y = element_blank() - , axis.text.y = element_text(size = y_ats - , angle = y_tangle - , hjust = 1 - , vjust = -1.0 - , colour = yfont_bgc) - , axis.title.x = element_text(size = x_tts - , colour = xtt_col) - , axis.title.y = element_text(size = y_tts - , colour = ytt_col) - , plot.background = element_rect(fill = theme_bgc))+ - xlab(x_lab) - - if (missing(plot_positions)){ - msa_mut_logo_P = p0 + - scale_x_discrete(breaks = msa_all_pos - , expand = c(0.02,0) - , labels = msa_all_pos - , limits = factor(msa_all_pos)) - - }else{ - msa_mut_logo_P = p0 + - scale_y_continuous(expand = c(0,0.09)) + - scale_x_discrete(breaks = i_extract - , expand = c(x_axis_offset,0) - , labels = i_extract - , limits = factor(i_extract)) - } - - cat('\nDone: MSA plot for mutations') - #return(msa_mut_logoP) - LogoPlotMSAL[['msa_mut_logoP']] <- msa_mut_logo_P - - #--------------------------------- - # Wild-type MSA: gene_fasta file - #--------------------------------- - p1 = ggseqlogo(wt_seq_plot - , facet = "grid" - , method = msa_method - , col_scheme = my_logo_col - , seq_type = 'aa') + - - theme(legend.position = "none" - , legend.direction = leg_dir - #, legend.title = element_blank() - , legend.title = element_text(size = leg_tts - , colour = ytt_col) - , legend.text = element_text(size = leg_ts) - - , axis.text.x = element_text(size = x_ats - , angle = x_tangle - , hjust = 1 - , vjust = 0.4 - , colour = xfont_bgc) - , axis.text.y = element_blank() - - , axis.title.x = element_text(size = x_tts - , colour = xtt_col) - , axis.title.y = element_text(size = y_tts - , colour = ytt_col) - - , plot.background = element_rect(fill = theme_bgc)) + - ylab("") + xlab("Wild-type position") - - if (missing(plot_positions)){ - msa_wt_logo_P = p1 + - scale_x_discrete(breaks = wt_all_pos - , expand = c(0.02,0) - , labels = wt_all_pos - , limits = factor(wt_all_pos) ) - - }else{ - msa_wt_logo_P = p1 + - scale_y_continuous(expand = c(0,0.09)) + - scale_x_discrete(breaks = i2_extract - , expand = c(x_axis_offset, 0) - , labels = i2_extract - , limits = factor(i2_extract)) - } - - cat('\nDone: MSA plot for WT') - #return(msa_wt_logoP) - LogoPlotMSAL[['msa_wt_logoP']] <- msa_wt_logo_P - - #========================================= - # Output - # Combined plot: logo_MSA - #========================================= - - cat('\nDone: msa_mut_logoP + msa_wt_logoP') - - # colour scheme: https://rdrr.io/cran/ggseqlogo/src/R/col_schemes.r - #cat("\nOutput plot:", LogoSNPs_comb, "\n") - #svg(LogoSNPs_combined, width = 32, height = 10) - - LogoMSA_comb = cowplot::plot_grid(LogoPlotMSAL[['msa_mut_logoP']] - , LogoPlotMSAL[['msa_wt_logoP']] - , nrow = 2 - , align = "v" - , rel_heights = c(3/4, 1/4)) - - return(LogoMSA_comb) - -}