########################################### LogoPlotMSA <- function(msaSeq_mut # chr vector , msaSeq_wt # chr vector #, msa_method = c("custom") # can be "bits", "probability" or "custom" , logo_type = c("EDLogo") #"bits_pfm", "probability_pfm", "bits_raw", "probability_raw") # can be "bits", "probability" or "custom" , EDScore_type = c("log") # see if this relevant, or source function should have it! , bg_prob = NULL , my_logo_col = "chemistry" , plot_positions , y_breaks , x_lab_mut = "nsSNP-position" , y_lab_mut , x_ats = 13 # text size , x_tangle = 90 # text angle , x_axis_offset = 0.05 # dist b/w y-axis and plot start , x_axis_offset_filtered = 0.2 , y_axis_offset = 0.05 , y_ats = 13 , y_tangle = 0 , x_tts = 13 # title size , y_tts = 13 , leg_pos = "top" # can be top, left, right and bottom or c(0.8, 0.9) , leg_dir = "horizontal" #can be vertical or horizontal , leg_ts = 16 # leg text size , leg_tts = 16 # leg title size ) { # Get PFM matrix for mut and wt MSA provided data_ed = DataED_PFM(msaSeq_mut , msaSeq_wt , ED_score = EDScore_type) names(data_ed) #"pfm_mutM" "pfm_mut_scaledM" "combED_mutM" "pfm_wtM" "pfm_wt_scaledM" "combED_wtM" if (logo_type == "EDLogo"){ msa_method = "custom" y_label = "Enrichment Score" data_logo_mut = data_ed[['combED_mutM']] data_logo_wt = data_ed[['combED_wtM']] msa_pos = as.numeric(colnames(data_logo_mut)) wt_pos = as.numeric(colnames(data_logo_wt)) # Construct Y-axis for MSA mut plot: cat("\nCalculating y-axis for MSA mut plot") if ( missing(y_breaks) ){ # Y-axis: Calculating cat("\n----------------------------------------" , "\nY-axis being generated from data" , "\n-----------------------------------------") ylim_low <- floor(min(data_logo_mut)); ylim_low if( ylim_low == 0){ ylim_low = ylim_low cat("\nY-axis lower limit:", ylim_low) y_rlow = seq(0, ylim_low, length.out = 3); y_rlow ylim_up <- ceiling(max(data_logo_mut)) + 5; ylim_up cat("\nY-axis upper limit:", ylim_up) y_rup = seq(0, ylim_up, by = 2); y_rup }else{ ylim_low = ylim_low + (-0.5) cat("\nY-axis lower limit is <0:", ylim_low) y_rlow = seq(0, ylim_low, length.out = 3); y_rlow ylim_up <- ceiling(max(data_logo_mut)) + 3; ylim_up cat("\nY-axis upper limit:", ylim_up) y_rup = seq(0, ylim_up, by = 3); y_rup } #ylim_scale <- unique(sort(c(y_rlow, y_rup, ylim_up))); ylim_scale ylim_scale <- unique(sort(c(y_rlow, y_rup))); ylim_scale cat("\nY-axis generated: see below\n" , ylim_scale) }else{ # Y-axis: User provided cat("\n--------------------------------" , "\nUsing y-axis:: User provided" ,"\n---------------------------------") ylim_scale = sort(y_breaks) ylim_low = min(ylim_scale); ylim_low ylim_up = max(ylim_scale); ylim_up } } if (logo_type == "bits_pfm"){ msa_method = "bits" y_label = "Bits (PFM)" data_logo_mut = data_ed[['pfm_mut_scaledM']] data_logo_wt = data_ed[['pfm_wtM']] msa_pos = as.numeric(colnames(data_logo_mut)) wt_pos = as.numeric(colnames(data_logo_wt)) } if (logo_type == "probability_pfm"){ msa_method = "probability" y_label = "Probability (PFM)" data_logo_mut = data_ed[['pfm_mut_scaledM']] data_logo_wt = data_ed[['pfm_wtM']] msa_pos = as.numeric(colnames(data_logo_mut)) wt_pos = as.numeric(colnames(data_logo_wt)) } if (logo_type == "bits_raw"){ msa_method = "bits" y_label = "Bits" data_logo_mut = msaSeq_mut msa_interim = sapply(data_logo_mut, function(x) unlist(strsplit(x,""))) msa_interimDF = data.frame(msa_interim) msa_pos = as.numeric(rownames(msa_interimDF)) data_logo_wt = msaSeq_wt wt_interim = sapply(data_logo_wt, function(x) unlist(strsplit(x,""))) wt_interimDF = data.frame(wt_interim) wt_pos = as.numeric(rownames(wt_interimDF)) } if (logo_type == "probability_raw"){ msa_method = "probability" y_label = "Probability" data_logo_mut = msaSeq_mut msa_interim = sapply(data_logo_mut, function(x) unlist(strsplit(x,""))) msa_interimDF = data.frame(msa_interim) msa_pos = as.numeric(rownames(msa_interimDF)) data_logo_wt = msaSeq_wt wt_interim = sapply(data_logo_wt, function(x) unlist(strsplit(x,""))) wt_interimDF = data.frame(wt_interim) wt_pos = as.numeric(rownames(wt_interimDF)) } ################################################################################# # param: plot_position ################################################################################# if(missing(plot_positions)){ #================================ # NO filtering of positions #================================ #--------- # MSA mut #--------- cat("\n===========================================" , "\nGenerated PFM mut: No filtering" , "\n===========================================") plot_mut_edM = data_logo_mut #--------- # MSA WT #--------- cat("\n===========================================" , "\nGenerated PFM WT: No filtering" , "\n===========================================") plot_wt_edM = data_logo_wt }else{ #================================ # Filtering of positions #================================ cat("\n===========================================" , "\nGenerating PFM MSA: filtered positions" , "\n===========================================" , "\nUser specified plotting positions for MSA:" , "\nThese are:\n", plot_positions , "\nSorting plot positions...") plot_positions = sort(plot_positions) cat("\nPlotting positions sorted:\n" , plot_positions) if ( all(plot_positions%in%msa_pos) && all(plot_positions%in%wt_pos) ){ cat("\nAll positions within range" , "\nFiltering positions as specified..." , "\nNo. of positions in plot:", length(plot_positions)) i_extract = plot_positions #----------------- # PFM: mut + wt #------------------ if (logo_type%in%c("EDLogo", "bits_pfm", "probability_pfm")){ plot_mut_edM = data_logo_mut[, i_extract] plot_wt_edM = data_logo_wt[, i_extract] } if (logo_type%in%c("bits_raw", "probability_raw")){ #-------- # Mut #-------- dfP1 = msa_interimDF[i_extract,] dfP1 = data.frame(t(dfP1)) names(dfP1) = i_extract cols_to_paste = names(dfP1) dfP1['chosen_seq'] = apply(dfP1[, cols_to_paste] , 1 , paste, sep = '' , collapse = "") plot_mut_edM = dfP1$chosen_seq #-------- # WT #-------- dfP2 = wt_interimDF[i_extract,] dfP2 = data.frame(t(dfP2)) names(dfP2) = i_extract cols_to_paste2 = names(dfP2) dfP2['chosen_seq'] = apply( dfP2[, cols_to_paste2] , 1 , paste, sep = '' , collapse = "") plot_wt_edM = dfP2$chosen_seq } }else{ cat("\nNo. of positions selected:", length(plot_positions)) i_ofr = plot_positions[!plot_positions%in%msa_pos] cat("\n1 or more plot_positions out of range..." , "\nThese are:\n", i_ofr , "\nQuitting! Resubmit with correct plot_positions") quit() } } ###################################### # Generating plots for muts and wt ##################################### if (my_logo_col %in% c('clustalx','taylor')) { cat("\nSelected colour scheme:", my_logo_col , "\nUsing black theme\n") theme_bgc = "black" xfont_bgc = "white" yfont_bgc = "white" xtt_col = "white" ytt_col = "white" } if (my_logo_col %in% c('chemistry', 'hydrophobicity')) { cat("\nstart of MSA" , '\nSelected colour scheme:', my_logo_col , "\nUsing grey theme") theme_bgc = "grey" xfont_bgc = "black" yfont_bgc = "black" xtt_col = "black" ytt_col = "black" } ##################################### # Generating logo plots for nsSNPs ##################################### PlotlogolasL <- list() #------------------- # Mutant logo plot #------------------- p0 = ggseqlogo(plot_mut_edM , method = msa_method , col_scheme = my_logo_col , seq_type = 'auto') + theme(legend.position = leg_pos , legend.direction = leg_dir #, legend.title = element_blank() , legend.title = element_text(size = leg_tts , colour = ytt_col) , legend.text = element_text(size = leg_ts) , axis.text.x = element_text(size = x_ats , angle = x_tangle , hjust = 1 , vjust = 0.4 , colour = xfont_bgc) #, axis.text.y = element_blank() , axis.text.y = element_text(size = y_ats , angle = y_tangle , hjust = 1 , vjust = -1.0 , colour = yfont_bgc) , axis.title.x = element_text(size = x_tts , colour = xtt_col) , axis.title.y = element_text(size = y_tts , colour = ytt_col) , plot.background = element_rect(fill = theme_bgc)) + xlab(x_lab_mut) if (missing(plot_positions)){ ed_mut_logo_P = p0 + scale_x_discrete(breaks = msa_pos , expand = c(x_axis_offset, 0) , labels = msa_pos , limits = factor(msa_pos)) }else{ ed_mut_logo_P = p0 + scale_x_discrete(breaks = i_extract , expand = c(x_axis_offset_filtered, 0) , labels = i_extract , limits = factor(i_extract)) } if (logo_type == "EDLogo"){ ed_mut_logo_P = ed_mut_logo_P + scale_y_continuous(limits = c(ylim_low, ylim_up) , breaks = ylim_scale , expand = c(0, y_axis_offset)) + geom_hline(yintercept = 0 , linetype = "solid" , color = "grey" , size = 1) } if (missing(y_lab_mut)){ ed_mut_logo_P = ed_mut_logo_P + ylab(y_label) } else{ ed_mut_logo_P = ed_mut_logo_P + ylab(y_lab_mut) } cat('\nDone: MSA plot for mutations') #return(msa_mut_logoP) PlotlogolasL[['ed_mut_logoP']] <- ed_mut_logo_P #--------------------------------- # Wild-type MSA: gene_fasta file #--------------------------------- p1 = ggseqlogo(plot_wt_edM #, facet = "grid" , method = msa_method , col_scheme = my_logo_col , seq_type = 'aa') + theme(legend.position = "none" , legend.direction = leg_dir #, legend.title = element_blank() , legend.title = element_text(size = leg_tts , colour = ytt_col) , legend.text = element_text(size = leg_ts) , axis.text.x = element_text(size = x_ats , angle = x_tangle , hjust = 1 , vjust = 0.4 , colour = xfont_bgc) , axis.text.y = element_blank() , axis.title.x = element_text(size = x_tts , colour = xtt_col) , axis.title.y = element_text(size = y_tts , colour = ytt_col) , plot.background = element_rect(fill = theme_bgc)) + ylab("") + xlab("Wild-type position") if (missing(plot_positions)){ # No y-axis needed ed_wt_logo_P = p1 + scale_x_discrete(breaks = wt_pos , expand = c(x_axis_offset, 0) , labels = wt_pos , limits = factor(wt_pos)) }else{ ed_wt_logo_P = p1 + scale_x_discrete(breaks = i_extract , expand = c(x_axis_offset_filtered, 0) , labels = i_extract , limits = factor(i_extract)) } cat('\nDone: MSA plot for WT') #return(msa_wt_logoP) PlotlogolasL[['ed_wt_logoP']] <- ed_wt_logo_P #========================================= # Output # Combined plot: logo ED/other logo plot # customised for ggseqlogo #========================================= cat('\nDone: ed_mut_logoP + ed_wt_logoP') # colour scheme: https://rdrr.io/cran/ggseqlogo/src/R/col_schemes.r #cat("\nOutput plot:", LogoSNPs_comb, "\n") #svg(LogoSNPs_combined, width = 32, height = 10) LogoED_comb = cowplot::plot_grid(PlotlogolasL[['ed_mut_logoP']] , PlotlogolasL[['ed_wt_logoP']] , nrow = 2 , align = "v" , rel_heights = c(3/4, 1/4)) return(LogoED_comb) }