##################################################################################### # LogoPlotMSA(): # Input: # Data: # msaSeq_mut: MSA chr vector for muts # msaSeq_wt: MSA chr vector for wt # Logo type params: # logo_type = c("EDLogo", "bits_pfm", "probability_pfm", "bits_raw", "probability_raw") # EDLogo: calculated from the Logolas package based on PFM matrix (scaled). #The required content from the package is sourced locally within 'my_logolas.R' # bits_pfm: Information Content based on PFM scaled matrix (my_logolas.R) # probability_pfm: Probability based on PFM scaled matrix (my_logolas.R) # bits_raw: Information Content based on Raw MSA (ggseqlogo) # probability_raw: Probability based on Raw MSA (ggseqlogo) # EDScore_type = c("log", log-odds", "diff", "probKL", "ratio", "unscaled_log", "wKL") # bg_prob: background probability, default is equal i.e NULL. # This is used by the internal call to DataED_PFM(). This func takes thse args. I have used it here for # completeness and allow nuanced plot control # my_logo_col = c("chemistry", "hydrophobicity", "clustalx", "taylor") # --> if clustalx and taylor, set variable to black bg + white font # --> if chemistry and hydrophobicity, then grey bg + black font # ...other params # Returns: Logo plots from MSA both mutant and wt (for comparability) # For my case, I always use it as it helps see what is at the wild-type already! # TODO: SHINY # drop down: logo_type # drop down: ED score type # drop down/enter field : bg probability (in the actual plot function!) # drop down: my_logo_col # Make it hover over position and then get the corresponding data table! ################################################################################### ########################################### #LogoPlotMSA <- function(msaSeq_mut # chr vector # , msaSeq_wt # chr vector LogoPlotMSA <- function(unified_msa , logo_type = c("EDLogo") #"bits_pfm", "probability_pfm", "bits_raw", "probability_raw") , EDScore_type = c("log") # see if this relevant, or source function should have it! , bg_prob = NULL , my_logo_col = "chemistry" , plot_positions , y_breaks , x_lab_mut = "nsSNP position" , y_lab_mut , x_ats = 13 # text size , x_tangle = 90 # text angle , x_axis_offset = 0 # dist b/w y-axis and plot start , x_axis_offset_filtered = 0 , y_axis_offset = 0 , y_axis_increment = 1 , y_ats = 13 , y_tangle = 0 , x_tts = 13 # title size , y_tts = 13 , leg_pos = "top" # can be top, left, right and bottom or c(0.8, 0.9) , leg_dir = "horizontal" #can be vertical or horizontal , leg_ts = 16 # leg text size , leg_tts = 16 # leg title size ) { # FIXME: Hack! # msaSeq_mut=unified_msa[[1]] # msaSeq_wt=unified_msa[[2]] msaSeq_mut=unified_msa[['msa_seq']] msaSeq_wt=unified_msa[['wt_seq']] # Get PFM matrix for mut and wt MSA provided data_ed = DataED_PFM(msaSeq_mut , msaSeq_wt , ED_score = EDScore_type) names(data_ed) #"pfm_mutM" "pfm_mut_scaledM" "combED_mutM" "pfm_wtM" "pfm_wt_scaledM" "combED_wtM" if (logo_type == "EDLogo"){ msa_method = "custom" y_label = "Enrichment Score" data_logo_mut = data_ed[['combED_mutM']] data_logo_wt = data_ed[['combED_wtM']] msa_pos = as.numeric(colnames(data_logo_mut)) wt_pos = as.numeric(colnames(data_logo_wt)) # Construct Y-axis for MSA mut plot: cat("\nCalculating y-axis for MSA mut plot") if ( missing(y_breaks) ){ # Y-axis: Calculating cat("\n----------------------------------------" , "\nY-axis being generated from data" , "\n-----------------------------------------") ylim_low <- floor(min(data_logo_mut)); ylim_low if( ylim_low == 0){ ylim_low = ylim_low cat("\nY-axis lower limit:", ylim_low) y_rlow = seq(0, ylim_low, length.out = 3); y_rlow ylim_up <- ceiling(max(data_logo_mut)) + 5; ylim_up cat("\nY-axis upper limit:", ylim_up) y_rup = seq(0, ylim_up, by = 2); y_rup }else{ ylim_low = ylim_low + (-0.5) cat("\nY-axis lower limit is <0:", ylim_low) y_rlow = seq(0, ylim_low, length.out = 3); y_rlow ylim_up <- ceiling(max(data_logo_mut)) + 3; ylim_up cat("\nY-axis upper limit:", ylim_up) y_rup = seq(0, ylim_up, by = 3); y_rup } #ylim_scale <- unique(sort(c(y_rlow, y_rup, ylim_up))); ylim_scale ylim_scale <- unique(sort(c(y_rlow, y_rup))); ylim_scale cat("\nY-axis generated: see below\n" , ylim_scale) }else{ # Y-axis: User provided cat("\n--------------------------------" , "\nUsing y-axis:: User provided" ,"\n---------------------------------") ylim_scale = sort(y_breaks) ylim_low = min(ylim_scale); ylim_low ylim_up = max(ylim_scale); ylim_up } } if (logo_type == "bits_pfm"){ msa_method = "bits" y_label = "Bits (PFM)" data_logo_mut = data_ed[['pfm_mut_scaledM']] data_logo_wt = data_ed[['pfm_wtM']] msa_pos = as.numeric(colnames(data_logo_mut)) wt_pos = as.numeric(colnames(data_logo_wt)) } if (logo_type == "probability_pfm"){ msa_method = "probability" y_label = "Probability (PFM)" data_logo_mut = data_ed[['pfm_mut_scaledM']] data_logo_wt = data_ed[['pfm_wtM']] msa_pos = as.numeric(colnames(data_logo_mut)) wt_pos = as.numeric(colnames(data_logo_wt)) } if (logo_type == "bits_raw"){ msa_method = "bits" y_label = "Bits" data_logo_mut = msaSeq_mut msa_interim = sapply(data_logo_mut, function(x) unlist(strsplit(x,""))) msa_interimDF = data.frame(msa_interim) msa_pos = as.numeric(rownames(msa_interimDF)) data_logo_wt = msaSeq_wt wt_interim = sapply(data_logo_wt, function(x) unlist(strsplit(x,""))) wt_interimDF = data.frame(wt_interim) wt_pos = as.numeric(rownames(wt_interimDF)) } if (logo_type == "probability_raw"){ msa_method = "probability" y_label = "Probability" data_logo_mut = msaSeq_mut msa_interim = sapply(data_logo_mut, function(x) unlist(strsplit(x,""))) msa_interimDF = data.frame(msa_interim) msa_pos = as.numeric(rownames(msa_interimDF)) data_logo_wt = msaSeq_wt wt_interim = sapply(data_logo_wt, function(x) unlist(strsplit(x,""))) wt_interimDF = data.frame(wt_interim) wt_pos = as.numeric(rownames(wt_interimDF)) } ################################################################################# # param: plot_position ################################################################################# if(missing(plot_positions)){ #================================ # NO filtering of positions #================================ #--------- # MSA mut #--------- cat("\n===========================================" , "\nGenerated PFM mut: No filtering" , "\n===========================================") plot_mut_edM = data_logo_mut #--------- # MSA WT #--------- cat("\n===========================================" , "\nGenerated PFM WT: No filtering" , "\n===========================================") plot_wt_edM = data_logo_wt }else{ #================================ # Filtering of positions #================================ cat("\n===========================================" , "\nGenerating PFM MSA: filtered positions" , "\n===========================================" , "\nUser specified plotting positions for MSA:" , "\nThese are:\n", plot_positions , "\nSorting plot positions...") plot_positions = sort(plot_positions) cat("\nPlotting positions sorted:\n" , plot_positions) if ( all(plot_positions%in%msa_pos) && all(plot_positions%in%wt_pos) ){ cat("\nAll positions within range" , "\nFiltering positions as specified..." , "\nNo. of positions in plot:", length(plot_positions)) i_extract = plot_positions #----------------- # PFM: mut + wt #------------------ if (logo_type%in%c("EDLogo", "bits_pfm", "probability_pfm")){ plot_mut_edM = data_logo_mut[, i_extract] plot_wt_edM = data_logo_wt[, i_extract] } if (logo_type%in%c("bits_raw", "probability_raw")){ #-------- # Mut #-------- dfP1 = msa_interimDF[i_extract,] dfP1 = data.frame(t(dfP1)) names(dfP1) = i_extract cols_to_paste = names(dfP1) dfP1['chosen_seq'] = apply(dfP1[, cols_to_paste] , 1 , paste, sep = '' , collapse = "") plot_mut_edM = dfP1$chosen_seq #-------- # WT #-------- dfP2 = wt_interimDF[i_extract,] dfP2 = data.frame(t(dfP2)) names(dfP2) = i_extract cols_to_paste2 = names(dfP2) dfP2['chosen_seq'] = apply( dfP2[, cols_to_paste2] , 1 , paste, sep = '' , collapse = "") plot_wt_edM = dfP2$chosen_seq } }else{ cat("\nNo. of positions selected:", length(plot_positions)) i_ofr = plot_positions[!plot_positions%in%msa_pos] cat("\n1 or more plot_positions out of range..." , "\nThese are:\n", i_ofr , "\nQuitting! Resubmit with correct plot_positions") quit() } } ###################################### # Generating plots for muts and wt ##################################### if (my_logo_col %in% c('clustalx','taylor')) { cat("\nSelected colour scheme:", my_logo_col , "\nUsing black theme\n") theme_bgc = "black" xfont_bgc = "white" yfont_bgc = "white" xtt_col = "white" ytt_col = "white" } if (my_logo_col %in% c('chemistry', 'hydrophobicity')) { cat("\nstart of MSA" , '\nSelected colour scheme:', my_logo_col , "\nUsing grey theme") theme_bgc = "white" xfont_bgc = "black" yfont_bgc = "black" xtt_col = "black" ytt_col = "black" } ##################################### # Generating logo plots for nsSNPs ##################################### PlotlogolasL <- list() #------------------- # Mutant logo plot #------------------- p0 = ggplot() + geom_logo(plot_mut_edM , method = msa_method , col_scheme = my_logo_col , seq_type = 'auto') + theme(legend.position = leg_pos , legend.direction = leg_dir #, legend.title = element_blank() , legend.title = element_text(size = leg_tts , colour = ytt_col) , legend.text = element_text(size = leg_ts) , axis.text.x = element_text(size = x_ats , angle = x_tangle , hjust = 1 , vjust = 0.4 , colour = xfont_bgc) #, axis.text.y = element_blank() , axis.ticks=element_blank() , axis.text.y = element_text(size = y_ats , angle = y_tangle , hjust = 1 , vjust = -1.0 , colour = yfont_bgc) , axis.title.x = element_text(size = x_tts , colour = xtt_col) , axis.title.y = element_text(size = y_tts , colour = ytt_col) , panel.grid=element_blank() , plot.background = element_rect(fill = theme_bgc, colour=NA) , panel.background = element_rect(fill = "transparent", colour=NA) ) + labs(y=y_label) + xlab(x_lab_mut) if (missing(plot_positions)){ ed_mut_logo_P = p0 + scale_y_continuous( expand = c(0,0), breaks = seq( 0, (y_lim), by = y_axis_increment ) ) + scale_x_discrete(breaks = msa_pos , expand = c(x_axis_offset, 0) , labels = msa_pos , limits = factor(msa_pos)) }else{ ed_mut_logo_P = p0 + scale_y_continuous( expand = c(0,0)#, # breaks = seq( # 0, # (y_lim), # by = y_axis_increment #) ) + # scale_x_continuous(expand = c(0,0)) #+ scale_x_discrete(breaks = i_extract , expand = c(x_axis_offset_filtered, 0) , labels = i_extract , limits = factor(i_extract)) } cat('\nDone: MSA plot for mutations') #### Wild-type MSA: gene_fasta file #### p1 = ggplot() + geom_logo(plot_wt_edM #, facet = "grid" , method = msa_method , col_scheme = my_logo_col , seq_type = 'aa') + theme(legend.position = "none" , legend.direction = leg_dir , legend.title = element_text(size = leg_tts , colour = ytt_col) , legend.text = element_text(size = leg_ts) , axis.text.x = element_blank() , axis.ticks=element_blank() , axis.text.y = element_blank() , axis.title.x = element_text(size = x_tts , colour = xtt_col) , axis.title.y = element_text(size = y_tts , colour = ytt_col) , panel.grid=element_blank() , plot.background = element_rect(fill = theme_bgc, colour=NA) , panel.background = element_rect(fill = "transparent", colour=NA) , plot.margin = margin(r=0,l=0, unit="pt") ) + scale_y_discrete(expand = c(0,0)) + ylab("") + xlab("Wild-type position") if (missing(plot_positions)){ # No y-axis needed ed_wt_logo_P = p1# + }else{ ed_wt_logo_P = p1 + scale_x_discrete(expand = c(0, 0), breaks = i_extract, #labels = i_extract, limits = factor(i_extract) ) } cowplot::plot_grid(ed_mut_logo_P , ed_wt_logo_P , nrow = 2 , align = "v" #, axis='lr' , rel_heights = c(3/4, 1/4)) } #LogoPlotMSA(unified_msa)