library(Logolas) library(ggseqlogo) source("~/git/LSHTM_analysis/scripts/functions/my_logolas.R") # data msa: mut my_data = read.csv("/home/tanu/git/Misc/practice_plots/pnca_msa_eg2.csv", header = F) #15 cols only msaSeq_mut = my_data$V1 # data msa: wt gene = "pncA" drug = "pyrazinamide" indir = paste0("~/git/Data/", drug , "/input/") in_filename_fasta = paste0(tolower(gene), "2_f2.fasta") infile_fasta = paste0(indir, in_filename_fasta) cat("\nInput fasta file for WT: ", infile_fasta, "\n") msa2 = read.csv(infile_fasta, header = F) head(msa2) cat("\nLength of WT fasta:", nrow(msa2)) #wt_seq = msa2$V1 #head(wt_seq) msaSeq_wt = msa2$V1 ########################################### PlotLogolasMSA <- function(msaSeq_mut # chr vector , msaSeq_wt # chr vector , msa_method = c("custom") # will be c("EDLogo", "Logo)# , EDLogo_score = c("log")# can be: "log-odds", "diff", "probKL", "ratio", "unscaled_log", "wKL" , bg_prob = NULL , my_logo_col = "chemistry" , plot_positions = c(1, 10, 14, 8) , y_breaks , x_lab = "Wild-type position" , y_lab = "" , x_ats = 13 # text size , x_tangle = 90 # text angle , x_axis_offset = 0.05 # dist b/w y-axis and plot start , x_axis_offset_filtered = 0.2 , y_axis_offset = 0.05 , y_ats = 13 , y_tangle = 0 , x_tts = 13 # title size , y_tts = 13 , leg_pos = "top" # can be top, left, right and bottom or c(0.8, 0.9) , leg_dir = "horizontal" #can be vertical or horizontal , leg_ts = 16 # leg text size , leg_tts = 16 # leg title size ) { dash_control = list() dash_control_default <- list(concentration = NULL, mode = NULL, optmethod = "mixEM", sample_weights = NULL, verbose = FALSE, bf = TRUE, pi_init = NULL, squarem_control = list(), dash_control = list(), reportcov = FALSE) dash_control <- modifyList(dash_control_default, dash_control) ############################################ # Data processing for logo plot for nsSNPS ########################################### cat("\nLength of MSA", length(msaSeq_mut) , "\nlength of WT seq:", length(msaSeq_wt)) cat("\n=======================" , "\nPlotting entire MSA" , "\n========================") #-------------------------- # Getting PFM: mutant MSA #-------------------------- pfm_mut <- Biostrings::consensusMatrix(msaSeq_mut) colnames(pfm_mut) <- 1:dim(pfm_mut)[2] pfm_mut_scaled <- do.call(dash, append(list(comp_data = pfm_mut), dash_control))$posmean logo_mut_h = get_logo_heights(pfm_mut_scaled , bg = bg_prob , score = EDLogo_score) logo_mut_h$pos_ic logo_mut_h$neg_ic # TODO: Add sanity check! #<... #...> pos_mutM = logo_mut_h[['table_mat_pos_norm']]; pos_mutM pos_mutS = logo_mut_h$pos_ic; pos_mutS pos_mutED = t(pos_mutS*t(pos_mutM)); pos_mutED neg_mutM = logo_mut_h[['table_mat_neg_norm']]*(-1) neg_mutS = logo_mut_h$neg_ic; neg_mutS neg_mutED = t(neg_mutS*t(neg_mutM)); neg_mutED combED_mutM = pos_mutED + neg_mutED # Construct the x-axis: mutant MSA msa_all_pos = as.numeric(colnames(combED_mutM)) #--------------------- # Getting PFM: WT #--------------------- pfm_wt <- Biostrings::consensusMatrix(msaSeq_wt) colnames(pfm_wt) <- 1:dim(pfm_wt)[2] pfm_wt_scaled <- do.call(dash, append(list(comp_data = pfm_wt), dash_control))$posmean logo_wt_h = get_logo_heights(pfm_wt_scaled , bg = bg_prob , score = EDLogo_score) logo_wt_h$pos_ic logo_wt_h$neg_ic pos_wtM = logo_wt_h[['table_mat_pos_norm']]; pos_wtM pos_wtS = logo_wt_h$pos_ic; pos_wtS pos_wtED = t(pos_wtS*t(pos_wtM)); pos_wtED neg_wtM = logo_wt_h[['table_mat_neg_norm']]*(-1) neg_wtS = logo_wt_h$neg_ic; neg_wtS neg_wtED = t(neg_wtS*t(neg_wtM)); neg_wtED combED_wtM = pos_wtED + neg_wtED # Construct the x-axis: mutant MSA wt_all_pos = as.numeric(colnames(combED_wtM)) if(missing(plot_positions)){ #------------------------------ # MSA mut: All, no filtering #------------------------------- cat("\n===========================================" , "\nGenerated PFM mut: No filtering" , "\n===========================================") plot_mut_edM = combED_mutM #------------------------------ # MSA WT: All, no filtering #------------------------------- cat("\n===========================================" , "\nGenerated PFM WT: No filtering" , "\n===========================================") plot_wt_edM = combED_wtM }else{ #------------------------------ # PFM mut: Filtered positions #------------------------------- cat("\n===========================================" , "\nGenerating PFM MSA: filtered positions" , "\n===========================================" , "\nUser specified plotting positions for MSA:" , "\nThese are:\n", plot_positions , "\nSorting plot positions...") plot_positions = sort(plot_positions) cat("\nPlotting positions sorted:\n" , plot_positions) if ( all(plot_positions%in%msa_all_pos) && all(plot_positions%in%wt_all_pos) ){ cat("\nAll positions within range" , "\nFiltering positions as specified..." , "\nNo. of positions in plot:", length(plot_positions)) i_extract = plot_positions plot_mut_edM = combED_mutM[, i_extract] plot_wt_edM = combED_wtM[, i_extract] }else{ cat("\nNo. of positions selected:", length(plot_positions)) i_ofr = plot_positions[!plot_positions%in%msa_all_pos] cat("\n1 or more plot_positions out of range..." , "\nThese are:\n", i_ofr , "\nQuitting! Resubmit with correct plot_positions") quit() } } # Construct the y-axis: Calculating cat("\n-------------------------" , "\nConstructing y-axis:" , "\nUser did not provide" ,"\n--------------------------") if (missing(y_breaks)){ ylim_low <- floor(min(combED_mutM)); ylim_low if( ylim_low == 0){ ylim_low = ylim_low cat("\nY-axis lower limit:", ylim_low) y_rlow = seq(0, ylim_low, length.out = 3); y_rlow ylim_up <- ceiling(max(combED_mutM)) + 4; ylim_up cat("\nY-axis upper limit:", ylim_up) y_rup = seq(0, ylim_up, by = 2); y_rup }else{ ylim_low = ylim_low + (-0.5) cat("\nY-axis lower limit is <0:", ylim_low) y_rlow = seq(0, ylim_low, length.out = 3); y_rlow ylim_up <- ceiling(max(combED_mutM)) + 3; ylim_up cat("\nY-axis upper limit:", ylim_up) y_rup = seq(0, ylim_up, by = 3); y_rup } #ylim_scale <- unique(sort(c(y_rlow, y_rup, ylim_up))); ylim_scale ylim_scale <- unique(sort(c(y_rlow, y_rup))); ylim_scale cat("\nY-axis generated: see below" , "\n" , ylim_scale) }else{ # Construct the y-axis: User provided cat("\n-------------------------" , "\nConstructing y-axis:" , "\nUser provided" ,"\n--------------------------") ylim_scale = sort(y_breaks) ylim_low = min(ylim_scale); ylim_low ylim_up = max(ylim_scale); ylim_up } else { } ###################################### # Generating plots for muts and wt ##################################### if (my_logo_col %in% c('clustalx','taylor')) { cat("\nSelected colour scheme:", my_logo_col , "\nUsing black theme\n") theme_bgc = "black" xfont_bgc = "white" yfont_bgc = "white" xtt_col = "white" ytt_col = "white" } if (my_logo_col %in% c('chemistry', 'hydrophobicity')) { cat("\nstart of MSA" , '\nSelected colour scheme:', my_logo_col , "\nUsing grey theme") theme_bgc = "grey" xfont_bgc = "black" yfont_bgc = "black" xtt_col = "black" ytt_col = "black" } ##################################### # Generating logo plots for nsSNPs ##################################### PlotlogolasL <- list() #------------------- # Mutant logo plot #------------------- p0 = ggseqlogo(plot_mut_edM #, facet = "grid" , method = msa_method , col_scheme = my_logo_col , seq_type = 'auto') + theme(legend.position = leg_pos , legend.direction = leg_dir #, legend.title = element_blank() , legend.title = element_text(size = leg_tts , colour = ytt_col) , legend.text = element_text(size = leg_ts) , axis.text.x = element_text(size = x_ats , angle = x_tangle , hjust = 1 , vjust = 0.4 , colour = xfont_bgc) #, axis.text.y = element_blank() , axis.text.y = element_text(size = y_ats , angle = y_tangle , hjust = 1 , vjust = -1.0 , colour = yfont_bgc) , axis.title.x = element_text(size = x_tts , colour = xtt_col) , axis.title.y = element_text(size = y_tts , colour = ytt_col) , plot.background = element_rect(fill = theme_bgc))+ xlab(x_lab) if (missing(plot_positions)){ ed_mut_logo_P = p0 + scale_x_discrete(breaks = msa_all_pos , expand = c(x_axis_offset,0) , labels = msa_all_pos , limits = factor(msa_all_pos))+ scale_y_continuous(limits = c(ylim_low, ylim_up) , breaks = ylim_scale , expand = c(0,y_axis_offset)) + geom_hline(yintercept = 0 , linetype = "solid" , color = "grey" , size = 1) }else{ ed_mut_logo_P = p0 + #scale_y_continuous(expand = c(0,0.09)) + scale_y_continuous(limits = c(ylim_low, ylim_up) , breaks = ylim_scale , expand = c(0,y_axis_offset))+ scale_x_discrete(breaks = i_extract , expand = c(x_axis_offset_filtered,0) , labels = i_extract , limits = factor(i_extract)) + geom_hline(yintercept = 0 , linetype = "solid" , color = "grey" , size = 1) } cat('\nDone: MSA plot for mutations') #return(msa_mut_logoP) PlotlogolasL[['ed_mut_logoP']] <- ed_mut_logo_P #--------------------------------- # Wild-type MSA: gene_fasta file #--------------------------------- p1 = ggseqlogo(plot_wt_edM #, facet = "grid" , method = msa_method , col_scheme = my_logo_col , seq_type = 'aa') + theme(legend.position = "none" , legend.direction = leg_dir #, legend.title = element_blank() , legend.title = element_text(size = leg_tts , colour = ytt_col) , legend.text = element_text(size = leg_ts) , axis.text.x = element_text(size = x_ats , angle = x_tangle , hjust = 1 , vjust = 0.4 , colour = xfont_bgc) , axis.text.y = element_blank() , axis.title.x = element_text(size = x_tts , colour = xtt_col) , axis.title.y = element_text(size = y_tts , colour = ytt_col) , plot.background = element_rect(fill = theme_bgc)) + ylab("") + xlab("Wild-type position") if (missing(plot_positions)){ ed_wt_logo_P = p1 + scale_x_discrete(breaks = wt_all_pos , expand = c(x_axis_offset,0) , labels = wt_all_pos , limits = factor(wt_all_pos)) }else{ ed_wt_logo_P = p1 + scale_y_continuous(expand = c(0,0.09)) + scale_x_discrete(breaks = i_extract , expand = c(x_axis_offset_filtered, 0) , labels = i_extract , limits = factor(i_extract)) } cat('\nDone: MSA plot for WT') #return(msa_wt_logoP) PlotlogolasL[['ed_wt_logoP']] <- ed_wt_logo_P #========================================= # Output # Combined plot: logo ED plot # customised for ggseqlogo #========================================= cat('\nDone: ed_mut_logoP + ed_wt_logoP') # colour scheme: https://rdrr.io/cran/ggseqlogo/src/R/col_schemes.r #cat("\nOutput plot:", LogoSNPs_comb, "\n") #svg(LogoSNPs_combined, width = 32, height = 10) LogoED_comb = cowplot::plot_grid(PlotlogolasL[['ed_mut_logoP']] , PlotlogolasL[['ed_wt_logoP']] , nrow = 2 , align = "v" , rel_heights = c(3/4, 1/4)) return(LogoED_comb) }