From 6a9f4a0cab3f95558e0b9af52e96946902fa9b82 Mon Sep 17 00:00:00 2001 From: Tanushree Tunstall Date: Mon, 24 Jan 2022 17:23:32 +0000 Subject: [PATCH] added logoP_logolas.R to plot logolas like plot to show ED regions --- scripts/functions/logoP_logolas.R | 405 ++++++++++++++++++++++ scripts/functions/logoP_msa.R | 2 +- scripts/functions/tests/test_logo_plots.R | 41 +-- scripts/plotting/logo_data_msa.R | 2 +- 4 files changed, 418 insertions(+), 32 deletions(-) create mode 100644 scripts/functions/logoP_logolas.R diff --git a/scripts/functions/logoP_logolas.R b/scripts/functions/logoP_logolas.R new file mode 100644 index 0000000..91c0af5 --- /dev/null +++ b/scripts/functions/logoP_logolas.R @@ -0,0 +1,405 @@ +library(Logolas) +library(ggseqlogo) +source("~/git/LSHTM_analysis/scripts/functions/my_logolas.R") + +# data msa: mut +my_data = read.csv("/home/tanu/git/Misc/practice_plots/pnca_msa_eg2.csv", header = F) #15 cols only +msaSeq_mut = my_data$V1 + +# data msa: wt +gene = "pncA" +drug = "pyrazinamide" +indir = paste0("~/git/Data/", drug , "/input/") + +in_filename_fasta = paste0(tolower(gene), "2_f2.fasta") +infile_fasta = paste0(indir, in_filename_fasta) +cat("\nInput fasta file for WT: ", infile_fasta, "\n") + +msa2 = read.csv(infile_fasta, header = F) +head(msa2) +cat("\nLength of WT fasta:", nrow(msa2)) +#wt_seq = msa2$V1 +#head(wt_seq) +msaSeq_wt = msa2$V1 + +########################################### + +PlotLogolasMSA <- function(msaSeq_mut # chr vector + , msaSeq_wt # chr vector + , msa_method = c("custom") # will be c("EDLogo", "Logo)# + , EDLogo_score = c("log")# can be: "log-odds", "diff", "probKL", "ratio", "unscaled_log", "wKL" + , bg_prob = NULL + , my_logo_col = "chemistry" + , plot_positions = c(1, 10, 14, 8) + , y_breaks + , x_lab = "Wild-type position" + , y_lab = "" + , x_ats = 13 # text size + , x_tangle = 90 # text angle + , x_axis_offset = 0.05 # dist b/w y-axis and plot start + , x_axis_offset_filtered = 0.2 + , y_axis_offset = 0.05 + , y_ats = 13 + , y_tangle = 0 + , x_tts = 13 # title size + , y_tts = 13 + , leg_pos = "top" # can be top, left, right and bottom or c(0.8, 0.9) + , leg_dir = "horizontal" #can be vertical or horizontal + , leg_ts = 16 # leg text size + , leg_tts = 16 # leg title size + ) + +{ + dash_control = list() + dash_control_default <- list(concentration = NULL, mode = NULL, + optmethod = "mixEM", sample_weights = NULL, verbose = FALSE, + bf = TRUE, pi_init = NULL, squarem_control = list(), + dash_control = list(), reportcov = FALSE) + + dash_control <- modifyList(dash_control_default, dash_control) + + + ############################################ + # Data processing for logo plot for nsSNPS + ########################################### + cat("\nLength of MSA", length(msaSeq_mut) + , "\nlength of WT seq:", length(msaSeq_wt)) + + cat("\n=======================" + , "\nPlotting entire MSA" + , "\n========================") + + #-------------------------- + # Getting PFM: mutant MSA + #-------------------------- + pfm_mut <- Biostrings::consensusMatrix(msaSeq_mut) + colnames(pfm_mut) <- 1:dim(pfm_mut)[2] + pfm_mut_scaled <- do.call(dash, append(list(comp_data = pfm_mut), + dash_control))$posmean + + logo_mut_h = get_logo_heights(pfm_mut_scaled + , bg = bg_prob + , score = EDLogo_score) + logo_mut_h$pos_ic + logo_mut_h$neg_ic + + # TODO: Add sanity check! + #<... + #...> + + pos_mutM = logo_mut_h[['table_mat_pos_norm']]; pos_mutM + pos_mutS = logo_mut_h$pos_ic; pos_mutS + pos_mutED = t(pos_mutS*t(pos_mutM)); pos_mutED + + neg_mutM = logo_mut_h[['table_mat_neg_norm']]*(-1) + neg_mutS = logo_mut_h$neg_ic; neg_mutS + neg_mutED = t(neg_mutS*t(neg_mutM)); neg_mutED + + combED_mutM = pos_mutED + neg_mutED + + # Construct the x-axis: mutant MSA + msa_all_pos = as.numeric(colnames(combED_mutM)) + + #--------------------- + # Getting PFM: WT + #--------------------- + pfm_wt <- Biostrings::consensusMatrix(msaSeq_wt) + colnames(pfm_wt) <- 1:dim(pfm_wt)[2] + pfm_wt_scaled <- do.call(dash, append(list(comp_data = pfm_wt), + dash_control))$posmean + + logo_wt_h = get_logo_heights(pfm_wt_scaled + , bg = bg_prob + , score = EDLogo_score) + logo_wt_h$pos_ic + logo_wt_h$neg_ic + + pos_wtM = logo_wt_h[['table_mat_pos_norm']]; pos_wtM + pos_wtS = logo_wt_h$pos_ic; pos_wtS + pos_wtED = t(pos_wtS*t(pos_wtM)); pos_wtED + + neg_wtM = logo_wt_h[['table_mat_neg_norm']]*(-1) + neg_wtS = logo_wt_h$neg_ic; neg_wtS + neg_wtED = t(neg_wtS*t(neg_wtM)); neg_wtED + + combED_wtM = pos_wtED + neg_wtED + + # Construct the x-axis: mutant MSA + wt_all_pos = as.numeric(colnames(combED_wtM)) + + + if(missing(plot_positions)){ + + #------------------------------ + # MSA mut: All, no filtering + #------------------------------- + cat("\n===========================================" + , "\nGenerated PFM mut: No filtering" + , "\n===========================================") + plot_mut_edM = combED_mutM + + #------------------------------ + # MSA WT: All, no filtering + #------------------------------- + cat("\n===========================================" + , "\nGenerated PFM WT: No filtering" + , "\n===========================================") + plot_wt_edM = combED_wtM + + }else{ + + #------------------------------ + # PFM mut: Filtered positions + #------------------------------- + cat("\n===========================================" + , "\nGenerating PFM MSA: filtered positions" + , "\n===========================================" + , "\nUser specified plotting positions for MSA:" + , "\nThese are:\n", plot_positions + , "\nSorting plot positions...") + + plot_positions = sort(plot_positions) + + cat("\nPlotting positions sorted:\n" + , plot_positions) + + if ( all(plot_positions%in%msa_all_pos) && all(plot_positions%in%wt_all_pos) ){ + cat("\nAll positions within range" + , "\nFiltering positions as specified..." + , "\nNo. of positions in plot:", length(plot_positions)) + i_extract = plot_positions + plot_mut_edM = combED_mutM[, i_extract] + plot_wt_edM = combED_wtM[, i_extract] + + }else{ + cat("\nNo. of positions selected:", length(plot_positions)) + i_ofr = plot_positions[!plot_positions%in%msa_all_pos] + cat("\n1 or more plot_positions out of range..." + , "\nThese are:\n", i_ofr + , "\nQuitting! Resubmit with correct plot_positions") + quit() + } + } + + + # Construct the y-axis: Calculating + cat("\n-------------------------" + , "\nConstructing y-axis:" + , "\nUser did not provide" + ,"\n--------------------------") + if (missing(y_breaks)){ + + ylim_low <- floor(min(combED_mutM)); ylim_low + + if( ylim_low == 0){ + ylim_low = ylim_low + cat("\nY-axis lower limit:", ylim_low) + y_rlow = seq(0, ylim_low, length.out = 3); y_rlow + + ylim_up <- ceiling(max(combED_mutM)) + 4; ylim_up + cat("\nY-axis upper limit:", ylim_up) + y_rup = seq(0, ylim_up, by = 2); y_rup + + }else{ + ylim_low = ylim_low + (-0.5) + cat("\nY-axis lower limit is <0:", ylim_low) + y_rlow = seq(0, ylim_low, length.out = 3); y_rlow + + ylim_up <- ceiling(max(combED_mutM)) + 3; ylim_up + cat("\nY-axis upper limit:", ylim_up) + y_rup = seq(0, ylim_up, by = 3); y_rup + } + + #ylim_scale <- unique(sort(c(y_rlow, y_rup, ylim_up))); ylim_scale + ylim_scale <- unique(sort(c(y_rlow, y_rup))); ylim_scale + + cat("\nY-axis generated: see below" + , "\n" + , ylim_scale) + + }else{ + # Construct the y-axis: User provided + cat("\n-------------------------" + , "\nConstructing y-axis:" + , "\nUser provided" + ,"\n--------------------------") + ylim_scale = sort(y_breaks) + ylim_low = min(ylim_scale); ylim_low + ylim_up = max(ylim_scale); ylim_up + } + +else { + + + } + ###################################### + # Generating plots for muts and wt + ##################################### + + if (my_logo_col %in% c('clustalx','taylor')) { + cat("\nSelected colour scheme:", my_logo_col + , "\nUsing black theme\n") + + theme_bgc = "black" + xfont_bgc = "white" + yfont_bgc = "white" + xtt_col = "white" + ytt_col = "white" + } + + if (my_logo_col %in% c('chemistry', 'hydrophobicity')) { + cat("\nstart of MSA" + , '\nSelected colour scheme:', my_logo_col + , "\nUsing grey theme") + + theme_bgc = "grey" + xfont_bgc = "black" + yfont_bgc = "black" + xtt_col = "black" + ytt_col = "black" + } + + ##################################### + # Generating logo plots for nsSNPs + ##################################### + PlotlogolasL <- list() + + #------------------- + # Mutant logo plot + #------------------- + p0 = ggseqlogo(plot_mut_edM + #, facet = "grid" + , method = msa_method + , col_scheme = my_logo_col + , seq_type = 'auto') + + theme(legend.position = leg_pos + , legend.direction = leg_dir + #, legend.title = element_blank() + , legend.title = element_text(size = leg_tts + , colour = ytt_col) + , legend.text = element_text(size = leg_ts) + + , axis.text.x = element_text(size = x_ats + , angle = x_tangle + , hjust = 1 + , vjust = 0.4 + , colour = xfont_bgc) + #, axis.text.y = element_blank() + , axis.text.y = element_text(size = y_ats + , angle = y_tangle + , hjust = 1 + , vjust = -1.0 + , colour = yfont_bgc) + , axis.title.x = element_text(size = x_tts + , colour = xtt_col) + , axis.title.y = element_text(size = y_tts + , colour = ytt_col) + , plot.background = element_rect(fill = theme_bgc))+ + xlab(x_lab) + + if (missing(plot_positions)){ + ed_mut_logo_P = p0 + + scale_x_discrete(breaks = msa_all_pos + , expand = c(x_axis_offset,0) + , labels = msa_all_pos + , limits = factor(msa_all_pos))+ + scale_y_continuous(limits = c(ylim_low, ylim_up) + , breaks = ylim_scale + , expand = c(0,y_axis_offset)) + + geom_hline(yintercept = 0 + , linetype = "solid" + , color = "grey" + , size = 1) + + }else{ + ed_mut_logo_P = p0 + + #scale_y_continuous(expand = c(0,0.09)) + + scale_y_continuous(limits = c(ylim_low, ylim_up) + , breaks = ylim_scale + , expand = c(0,y_axis_offset))+ + scale_x_discrete(breaks = i_extract + , expand = c(x_axis_offset_filtered,0) + , labels = i_extract + , limits = factor(i_extract)) + + geom_hline(yintercept = 0 + , linetype = "solid" + , color = "grey" + , size = 1) + + } + + cat('\nDone: MSA plot for mutations') + #return(msa_mut_logoP) + PlotlogolasL[['ed_mut_logoP']] <- ed_mut_logo_P + + #--------------------------------- + # Wild-type MSA: gene_fasta file + #--------------------------------- + p1 = ggseqlogo(plot_wt_edM + #, facet = "grid" + , method = msa_method + , col_scheme = my_logo_col + , seq_type = 'aa') + + + theme(legend.position = "none" + , legend.direction = leg_dir + #, legend.title = element_blank() + , legend.title = element_text(size = leg_tts + , colour = ytt_col) + , legend.text = element_text(size = leg_ts) + + , axis.text.x = element_text(size = x_ats + , angle = x_tangle + , hjust = 1 + , vjust = 0.4 + , colour = xfont_bgc) + , axis.text.y = element_blank() + + , axis.title.x = element_text(size = x_tts + , colour = xtt_col) + , axis.title.y = element_text(size = y_tts + , colour = ytt_col) + + , plot.background = element_rect(fill = theme_bgc)) + + ylab("") + xlab("Wild-type position") + + if (missing(plot_positions)){ + ed_wt_logo_P = p1 + + scale_x_discrete(breaks = wt_all_pos + , expand = c(x_axis_offset,0) + , labels = wt_all_pos + , limits = factor(wt_all_pos)) + }else{ + ed_wt_logo_P = p1 + + scale_y_continuous(expand = c(0,0.09)) + + scale_x_discrete(breaks = i_extract + , expand = c(x_axis_offset_filtered, 0) + , labels = i_extract + , limits = factor(i_extract)) + } + + cat('\nDone: MSA plot for WT') + #return(msa_wt_logoP) + PlotlogolasL[['ed_wt_logoP']] <- ed_wt_logo_P + + #========================================= + # Output + # Combined plot: logo ED plot + # customised for ggseqlogo + #========================================= + + cat('\nDone: ed_mut_logoP + ed_wt_logoP') + + # colour scheme: https://rdrr.io/cran/ggseqlogo/src/R/col_schemes.r + #cat("\nOutput plot:", LogoSNPs_comb, "\n") + #svg(LogoSNPs_combined, width = 32, height = 10) + + LogoED_comb = cowplot::plot_grid(PlotlogolasL[['ed_mut_logoP']] + , PlotlogolasL[['ed_wt_logoP']] + , nrow = 2 + , align = "v" + , rel_heights = c(3/4, 1/4)) + + return(LogoED_comb) + +} diff --git a/scripts/functions/logoP_msa.R b/scripts/functions/logoP_msa.R index ef88b60..e31b9b7 100644 --- a/scripts/functions/logoP_msa.R +++ b/scripts/functions/logoP_msa.R @@ -157,7 +157,6 @@ LogoPlotMSA <- function(msaSeq_mut ###################################### # Generating plots for muts and wt ##################################### - LogoPlotMSAL <- list() if (my_logo_col %in% c('clustalx','taylor')) { cat("\nSelected colour scheme:", my_logo_col @@ -185,6 +184,7 @@ LogoPlotMSA <- function(msaSeq_mut ##################################### # Generating logo plots for nsSNPs ##################################### + LogoPlotMSAL <- list() #------------------- # Mutant logo plot diff --git a/scripts/functions/tests/test_logo_plots.R b/scripts/functions/tests/test_logo_plots.R index 4480d2b..3d3f475 100644 --- a/scripts/functions/tests/test_logo_plots.R +++ b/scripts/functions/tests/test_logo_plots.R @@ -1,5 +1,5 @@ #source("~/git/LSHTM_analysis/config/gid.R") -source("~/git/LSHTM_analysis/config/pnca.R") # YES +source("~/git/LSHTM_analysis/config/pnca.R") #source("~/git/LSHTM_analysis/config/embb.R") #source("~/git/LSHTM_analysis/config/katg.R") #source("~/git/LSHTM_analysis/config/alr.R") @@ -72,37 +72,12 @@ source("~/git/LSHTM_analysis/scripts/plotting/get_plotting_dfs.R") # To plot entire MSA, simply don't specify {plot_positions} # script: logoP_msa.R # TODO perhaps: ED logo from Logolas +# TODO: Add scaled data option ######################################## -# LogoPlotMSA(msaSeq_mut = msa_seq -# , msaSeq_wt = wt_seq -# , msa_method = 'bits' # or probability -# , my_logo_col = "taylor" -# , plot_positions = active_aa_pos -# , x_lab = "nsSNP position" -# , y_lab = "" -# , x_ats = 10 # text size -# , x_tangle = 90 # text angle -# , x_axis_offset = 0.05 -# , y_ats = 15 -# , y_tangle = 0 -# , x_tts = 13 # title size -# , y_tts = 15 -# , leg_pos = "top" # can be top, left, right and bottom or c(0.8, 0.9) -# , leg_dir = "horizontal" #can be vertical or horizontal -# , leg_ts = 16 # leg text size -# , leg_tts = 16 # leg title size -# ) - - - -######################################## -# ED Logo plot MSA -# Mutant and wild-type -######################################## -LogoPlotED(msaSeq_mut = msa_seq +LogoPlotMSA(msaSeq_mut = msa_seq , msaSeq_wt = wt_seq , msa_method = 'bits' # or probability - , my_logo_col = "taylor" + , my_logo_col = "taylor" , plot_positions = active_aa_pos , x_lab = "nsSNP position" , y_lab = "" @@ -117,4 +92,10 @@ LogoPlotED(msaSeq_mut = msa_seq , leg_dir = "horizontal" #can be vertical or horizontal , leg_ts = 16 # leg text size , leg_tts = 16 # leg title size -) \ No newline at end of file +) + +######################################## +# ED Logo plot MSA +# Mutant and wild-type +######################################## + diff --git a/scripts/plotting/logo_data_msa.R b/scripts/plotting/logo_data_msa.R index 4651477..3ed6b5a 100644 --- a/scripts/plotting/logo_data_msa.R +++ b/scripts/plotting/logo_data_msa.R @@ -39,7 +39,7 @@ head(msa_seq) #================ #in_filename_fasta = paste0(tolower(gene), ".1fasta") -in_filename_fasta = paste0(tolower(gene), "2_f2.fasta") # gid +in_filename_fasta = paste0(tolower(gene), "2_f2.fasta") infile_fasta = paste0(indir, "/", in_filename_fasta) cat("\nInput fasta file for WT: ", infile_fasta, "\n")