##################################################################################### # LogoPlotMSA(): # Input: # Data: # msaSeq_mut: MSA chr vector for muts # msaSeq_wt: MSA chr vector for wt # Logo type params: # logo_type = c("EDLogo", "bits_pfm", "probability_pfm", "bits_raw", "probability_raw") # EDLogo: calculated from the Logolas package based on PFM matrix (scaled). #The required content from the package is sourced locally within 'my_logolas.R' # bits_pfm: Information Content based on PFM scaled matrix (my_logolas.R) # probability_pfm: Probability based on PFM scaled matrix (my_logolas.R) # bits_raw: Information Content based on Raw MSA (ggseqlogo) # probability_raw: Probability based on Raw MSA (ggseqlogo) # EDScore_type = c("log", log-odds", "diff", "probKL", "ratio", "unscaled_log", "wKL") # bg_prob: background probability, default is equal i.e NULL. # This is used by the internal call to DataED_PFM(). This func takes thse args. I have used it here for # completeness and allow nuanced plot control # my_logo_col = c("chemistry", "hydrophobicity", "clustalx", "taylor") # --> if clustalx and taylor, set variable to black bg + white font # --> if chemistry and hydrophobicity, then grey bg + black font # ...other params # Returns: Logo plots from MSA both mutant and wt (for comparability) # For my case, I always use it as it helps see what is at the wild-type already! # TODO: SHINY # drop down: logo_type # drop down: ED score type # drop down/enter field : bg probability (in the actual plot function!) # drop down: my_logo_col # Make it hover over position and then get the corresponding data table! ################################################################################### ########################################### #LogoPlotMSA <- function(msaSeq_mut # chr vector # , msaSeq_wt # chr vector LogoPlotMSA <- function(# unified_msa # <- not needed any more because we have 'target' now target = 'embb' , logo_type = c("EDLogo") #"bits_pfm", "probability_pfm", "bits_raw", "probability_raw") , EDScore_type = c("log") # see if this relevant, or source function should have it! , bg_prob = NULL , my_logo_col = "chemistry" , plot_positions , y_breaks , x_lab_mut = "" , y_lab_mut , x_ats = 10 # text size , x_tangle = 90 # text angle , x_axis_offset = 0 # dist b/w y-axis and plot start , x_axis_offset_filtered = 0 , y_axis_offset = 0 , y_axis_increment = 1 , y_ats = 10 , y_tangle = 0 , x_tts = 10 # title size , y_tts = 10 , leg_pos = "top" # can be top, left, right and bottom or c(0.8, 0.9) , leg_dir = "horizontal" #can be vertical or horizontal , leg_ts = 14 # leg text size , leg_tts = 14 # leg title size , ... ) { # FIXME: Hack! # msaSeq_mut=unified_msa[[1]] # msaSeq_wt=unified_msa[[2]] unified_msa = get(paste0(target, "_unified_msa")) msaSeq_mut=unified_msa[['msa_seq']] msaSeq_wt=unified_msa[['wt_seq']] # Get PFM matrix for mut and wt MSA provided data_ed = DataED_PFM(msaSeq_mut , msaSeq_wt , ED_score = EDScore_type) names(data_ed) #"pfm_mutM" "pfm_mut_scaledM" "combED_mutM" "pfm_wtM" "pfm_wt_scaledM" "combED_wtM" #merged_df3 for current target (unfortunatly i can't think of an easy way to get this from unified_msa) contig_df=data.frame(position=1:max(nchar(unified_msa$wt_seq))) plot_df = get(paste0(target, "_merged_df3")) # generate the tile columns #plot_df=cbind(embb_merged_df3) plot_df$col_aa = ifelse(plot_df[["position"]]%in%active_aa_pos, "transparent", "transparent") plot_df$bg_all = plot_df$col_aa plot_df$bg_all = ifelse(plot_df[["position"]]%in%aa_pos_drug, "drug", plot_df$bg_all) plot_df$col_bg1 = plot_df$bg_all plot_df$col_bg1 = ifelse(plot_df[["position"]]%in%aa_pos_lig1, "lig1", plot_df$col_bg1) plot_df$col_bg2 = plot_df$col_bg1 plot_df$col_bg2 = ifelse(plot_df[["position"]]%in%aa_pos_lig2, "lig2", plot_df$col_bg2) plot_df$col_bg3 = plot_df$col_bg2 plot_df$col_bg3 = ifelse(plot_df[["position"]]%in%aa_pos_lig3 , "lig3", plot_df$col_bg3) plot_df = generate_distance_colour_map(plot_df, debug=FALSE) # copy only the tile columns to the contiguous DF contig_df$ligand_distance = plot_df$ligand_distance[match(contig_df$position, plot_df$position)] contig_df_map = generate_distance_colour_map(contig_df, debug=TRUE) contig_df$ligD_colours = contig_df_map$ligD_colours[match(contig_df$position, contig_df_map$position)] #contig_df$ligD_colours = plot_df$ligD_colours[match(contig_df$position, plot_df$position)] contig_df$bg_all = plot_df$bg_all[match(contig_df$position, plot_df$position)] contig_df$col_bg1 = plot_df$col_bg1[match(contig_df$position, plot_df$position)] contig_df$col_bg2 = plot_df$col_bg2[match(contig_df$position, plot_df$position)] contig_df$col_bg3 = plot_df$col_bg3[match(contig_df$position, plot_df$position)] contig_df=replace_na( contig_df, list( ligD_colours='transparent', bg_all = 'transparent', col_bg1 = 'transparent', col_bg2 = 'transparent', col_bg3 = 'transparent' ) ) if (logo_type == "EDLogo"){ msa_method = "custom" y_label = "Enrichment Score" data_logo_mut = data_ed[['combED_mutM']] data_logo_wt = data_ed[['combED_wtM']] msa_pos = as.numeric(colnames(data_logo_mut)) wt_pos = as.numeric(colnames(data_logo_wt)) # Construct Y-axis for MSA mut plot: cat("\nCalculating y-axis for MSA mut plot") if ( missing(y_breaks) ){ # Y-axis: Calculating cat("\n----------------------------------------" , "\nY-axis being generated from data" , "\n-----------------------------------------") ylim_low <- floor(min(data_logo_mut)); ylim_low if( ylim_low == 0){ ylim_low = ylim_low cat("\nY-axis lower limit:", ylim_low) y_rlow = seq(0, ylim_low, length.out = 3); y_rlow ylim_up <- ceiling(max(data_logo_mut)) + 5; ylim_up cat("\nY-axis upper limit:", ylim_up) y_rup = seq(0, ylim_up, by = 2); y_rup }else{ ylim_low = ylim_low + (-0.5) cat("\nY-axis lower limit is <0:", ylim_low) y_rlow = seq(0, ylim_low, length.out = 3); y_rlow ylim_up <- ceiling(max(data_logo_mut)) + 3; ylim_up cat("\nY-axis upper limit:", ylim_up) y_rup = seq(0, ylim_up, by = 3); y_rup } #ylim_scale <- unique(sort(c(y_rlow, y_rup, ylim_up))); ylim_scale ylim_scale <- unique(sort(c(y_rlow, y_rup))); ylim_scale cat("\nY-axis generated: see below\n" , ylim_scale) }else{ # Y-axis: User provided cat("\n--------------------------------" , "\nUsing y-axis:: User provided" ,"\n---------------------------------") ylim_scale = sort(y_breaks) ylim_low = min(ylim_scale); ylim_low ylim_up = max(ylim_scale); ylim_up } } if (logo_type == "bits_pfm"){ msa_method = "bits" y_label = "Bits (PFM)" data_logo_mut = data_ed[['pfm_mut_scaledM']] data_logo_wt = data_ed[['pfm_wtM']] msa_pos = as.numeric(colnames(data_logo_mut)) wt_pos = as.numeric(colnames(data_logo_wt)) } if (logo_type == "probability_pfm"){ msa_method = "probability" y_label = "Probability (PFM)" data_logo_mut = data_ed[['pfm_mut_scaledM']] data_logo_wt = data_ed[['pfm_wtM']] msa_pos = as.numeric(colnames(data_logo_mut)) wt_pos = as.numeric(colnames(data_logo_wt)) } if (logo_type == "bits_raw"){ msa_method = "bits" y_label = "Bits" data_logo_mut = msaSeq_mut msa_interim = sapply(data_logo_mut, function(x) unlist(strsplit(x,""))) msa_interimDF = data.frame(msa_interim) msa_pos = as.numeric(rownames(msa_interimDF)) data_logo_wt = msaSeq_wt wt_interim = sapply(data_logo_wt, function(x) unlist(strsplit(x,""))) wt_interimDF = data.frame(wt_interim) wt_pos = as.numeric(rownames(wt_interimDF)) } if (logo_type == "probability_raw"){ msa_method = "probability" y_label = "Probability" data_logo_mut = msaSeq_mut msa_interim = sapply(data_logo_mut, function(x) unlist(strsplit(x,""))) msa_interimDF = data.frame(msa_interim) msa_pos = as.numeric(rownames(msa_interimDF)) data_logo_wt = msaSeq_wt wt_interim = sapply(data_logo_wt, function(x) unlist(strsplit(x,""))) wt_interimDF = data.frame(wt_interim) wt_pos = as.numeric(rownames(wt_interimDF)) } ################################################################################# # param: plot_position ################################################################################# if(missing(plot_positions)){ #================================ # NO filtering of positions #================================ #--------- # MSA mut #--------- cat("\n===========================================" , "\nGenerated PFM mut: No filtering" , "\n===========================================") plot_mut_edM = data_logo_mut #--------- # MSA WT #--------- cat("\n===========================================" , "\nGenerated PFM WT: No filtering" , "\n===========================================") plot_wt_edM = data_logo_wt }else{ #================================ # Filtering of positions #================================ cat("\n===========================================" , "\nGenerating PFM MSA: filtered positions" , "\n===========================================" , "\nUser specified plotting positions for MSA:" , "\nThese are:\n", plot_positions , "\nSorting plot positions...") plot_positions = sort(plot_positions) cat("\nPlotting positions sorted:\n" , plot_positions) if ( all(plot_positions%in%msa_pos) && all(plot_positions%in%wt_pos) ){ cat("\nAll positions within range" , "\nFiltering positions as specified..." , "\nNo. of positions in plot:", length(plot_positions)) i_extract = plot_positions #----------------- # PFM: mut + wt #------------------ if (logo_type%in%c("EDLogo", "bits_pfm", "probability_pfm")){ plot_mut_edM = data_logo_mut[, i_extract] plot_wt_edM = data_logo_wt[, i_extract] } if (logo_type%in%c("bits_raw", "probability_raw")){ #-------- # Mut #-------- dfP1 = msa_interimDF[i_extract,] dfP1 = data.frame(t(dfP1)) names(dfP1) = i_extract cols_to_paste = names(dfP1) dfP1['chosen_seq'] = apply(dfP1[, cols_to_paste] , 1 , paste, sep = '' , collapse = "") plot_mut_edM = dfP1$chosen_seq #-------- # WT #-------- dfP2 = wt_interimDF[i_extract,] dfP2 = data.frame(t(dfP2)) names(dfP2) = i_extract cols_to_paste2 = names(dfP2) dfP2['chosen_seq'] = apply( dfP2[, cols_to_paste2] , 1 , paste, sep = '' , collapse = "") plot_wt_edM = dfP2$chosen_seq } }else{ cat("\nNo. of positions selected:", length(plot_positions)) i_ofr = plot_positions[!plot_positions%in%msa_pos] cat("\n1 or more plot_positions out of range..." , "\nThese are:\n", i_ofr , "\nQuitting! Resubmit with correct plot_positions") quit() } } ###################################### # Generating plots for muts and wt ##################################### if (my_logo_col %in% c('clustalx','taylor')) { cat("\nSelected colour scheme:", my_logo_col , "\nUsing black theme\n") theme_bgc = "black" xfont_bgc = "white" yfont_bgc = "white" xtt_col = "white" ytt_col = "white" } if (my_logo_col %in% c('chemistry', 'hydrophobicity')) { cat("\nstart of MSA" , '\nSelected colour scheme:', my_logo_col , "\nUsing grey theme") theme_bgc = "white" xfont_bgc = "black" yfont_bgc = "black" xtt_col = "black" ytt_col = "black" } ##################################### # Generating logo plots for nsSNPs ##################################### PlotlogolasL <- list() #------------------- # Mutant logo plot #------------------- p0 = ggplot() + geom_logo(plot_mut_edM , method = msa_method , col_scheme = my_logo_col , seq_type = 'auto') + theme(legend.position = leg_pos , legend.direction = leg_dir #, legend.title = element_blank() , legend.title = element_text(size = leg_tts , colour = ytt_col) , legend.text = element_text(size = leg_ts) , axis.text.x = element_text(size = x_ats , angle = x_tangle , hjust = 1 , vjust = 0.4 , colour = xfont_bgc) #, axis.text.y = element_blank() , axis.ticks=element_blank() , axis.text.y = element_text(size = y_ats , angle = y_tangle , hjust = 1 , vjust = -1.0 , colour = yfont_bgc) , axis.title.x = element_text(size = x_tts , colour = xtt_col) , axis.title.y = element_text(size = y_tts , colour = ytt_col) , panel.grid=element_blank() , plot.background = element_rect(fill = theme_bgc, colour=NA) , panel.background = element_rect(fill = "transparent", colour=NA) ) + labs(y=y_label) + xlab(x_lab_mut) if (missing(plot_positions)){ ed_mut_logo_P = p0 + scale_y_continuous( expand = c(0,0), breaks = seq( 0, (y_lim), by = y_axis_increment ) ) + scale_x_discrete(breaks = msa_pos , expand = c(x_axis_offset, 0) , labels = msa_pos , limits = factor(msa_pos)) }else{ ed_mut_logo_P = p0 + scale_y_continuous( expand = c(0,0)#, # breaks = seq( # 0, # (y_lim), # by = y_axis_increment #) ) + # scale_x_continuous(expand = c(0,0)) #+ scale_x_discrete(breaks = i_extract , expand = c(x_axis_offset_filtered, 0) , labels = i_extract , limits = factor(i_extract)) } cat('\nDone: MSA plot for mutations') #### Wild-type MSA: gene_fasta file #### p1 = ggplot() + geom_logo(plot_wt_edM #, facet = "grid" , method = msa_method , col_scheme = my_logo_col , seq_type = 'aa') + theme(legend.position = "none" , legend.direction = leg_dir , legend.title = element_text(size = leg_tts , colour = ytt_col) , legend.text = element_text(size = leg_ts) , axis.text.x = element_blank() , axis.ticks=element_blank() , axis.text.y = element_blank() , axis.title.x = element_text(size = x_tts , colour = xtt_col) , axis.title.y = element_text(size = y_tts , colour = ytt_col) , panel.grid=element_blank() , plot.background = element_rect(fill = theme_bgc, colour=NA) , panel.background = element_rect(fill = "transparent", colour=NA) , plot.margin = margin(r=0,l=0, unit="pt") ) + scale_y_discrete(expand = c(0,0)) + ylab("") + xlab("") if (missing(plot_positions)){ # No y-axis needed ed_wt_logo_P = p1# + } else { ed_wt_logo_P = p1 + scale_x_discrete(expand = c(0, 0), breaks = i_extract, #labels = i_extract, limits = factor(i_extract) ) #plot_df=plot_df[plot_df$position %in% plot_positions,] contig_df=contig_df[contig_df$position %in% plot_positions,] anno_bar = position_annotation( contig_df, aa_pos_drug=aa_pos_drug, active_aa_pos=active_aa_pos, aa_pos_lig1=aa_pos_lig1, aa_pos_lig2=aa_pos_lig2, aa_pos_lig3=aa_pos_lig3, generate_colours = FALSE ) } cowplot::plot_grid(ed_mut_logo_P , ed_wt_logo_P , anno_bar , ncol = 1 , align = "v" #, axis='lr' , rel_heights = c(3/4, 1/4,1/10)) } #LogoPlotMSA(unified_msa)