added logoP_logolas.R to plot logolas like plot to show ED regions

This commit is contained in:
Tanushree Tunstall 2022-01-24 17:23:32 +00:00
parent 9aa62b33b1
commit 6a9f4a0cab
4 changed files with 418 additions and 32 deletions

View file

@ -0,0 +1,405 @@
library(Logolas)
library(ggseqlogo)
source("~/git/LSHTM_analysis/scripts/functions/my_logolas.R")
# data msa: mut
my_data = read.csv("/home/tanu/git/Misc/practice_plots/pnca_msa_eg2.csv", header = F) #15 cols only
msaSeq_mut = my_data$V1
# data msa: wt
gene = "pncA"
drug = "pyrazinamide"
indir = paste0("~/git/Data/", drug , "/input/")
in_filename_fasta = paste0(tolower(gene), "2_f2.fasta")
infile_fasta = paste0(indir, in_filename_fasta)
cat("\nInput fasta file for WT: ", infile_fasta, "\n")
msa2 = read.csv(infile_fasta, header = F)
head(msa2)
cat("\nLength of WT fasta:", nrow(msa2))
#wt_seq = msa2$V1
#head(wt_seq)
msaSeq_wt = msa2$V1
###########################################
PlotLogolasMSA <- function(msaSeq_mut # chr vector
, msaSeq_wt # chr vector
, msa_method = c("custom") # will be c("EDLogo", "Logo)#
, EDLogo_score = c("log")# can be: "log-odds", "diff", "probKL", "ratio", "unscaled_log", "wKL"
, bg_prob = NULL
, my_logo_col = "chemistry"
, plot_positions = c(1, 10, 14, 8)
, y_breaks
, x_lab = "Wild-type position"
, y_lab = ""
, x_ats = 13 # text size
, x_tangle = 90 # text angle
, x_axis_offset = 0.05 # dist b/w y-axis and plot start
, x_axis_offset_filtered = 0.2
, y_axis_offset = 0.05
, y_ats = 13
, y_tangle = 0
, x_tts = 13 # title size
, y_tts = 13
, leg_pos = "top" # can be top, left, right and bottom or c(0.8, 0.9)
, leg_dir = "horizontal" #can be vertical or horizontal
, leg_ts = 16 # leg text size
, leg_tts = 16 # leg title size
)
{
dash_control = list()
dash_control_default <- list(concentration = NULL, mode = NULL,
optmethod = "mixEM", sample_weights = NULL, verbose = FALSE,
bf = TRUE, pi_init = NULL, squarem_control = list(),
dash_control = list(), reportcov = FALSE)
dash_control <- modifyList(dash_control_default, dash_control)
############################################
# Data processing for logo plot for nsSNPS
###########################################
cat("\nLength of MSA", length(msaSeq_mut)
, "\nlength of WT seq:", length(msaSeq_wt))
cat("\n======================="
, "\nPlotting entire MSA"
, "\n========================")
#--------------------------
# Getting PFM: mutant MSA
#--------------------------
pfm_mut <- Biostrings::consensusMatrix(msaSeq_mut)
colnames(pfm_mut) <- 1:dim(pfm_mut)[2]
pfm_mut_scaled <- do.call(dash, append(list(comp_data = pfm_mut),
dash_control))$posmean
logo_mut_h = get_logo_heights(pfm_mut_scaled
, bg = bg_prob
, score = EDLogo_score)
logo_mut_h$pos_ic
logo_mut_h$neg_ic
# TODO: Add sanity check!
#<...
#...>
pos_mutM = logo_mut_h[['table_mat_pos_norm']]; pos_mutM
pos_mutS = logo_mut_h$pos_ic; pos_mutS
pos_mutED = t(pos_mutS*t(pos_mutM)); pos_mutED
neg_mutM = logo_mut_h[['table_mat_neg_norm']]*(-1)
neg_mutS = logo_mut_h$neg_ic; neg_mutS
neg_mutED = t(neg_mutS*t(neg_mutM)); neg_mutED
combED_mutM = pos_mutED + neg_mutED
# Construct the x-axis: mutant MSA
msa_all_pos = as.numeric(colnames(combED_mutM))
#---------------------
# Getting PFM: WT
#---------------------
pfm_wt <- Biostrings::consensusMatrix(msaSeq_wt)
colnames(pfm_wt) <- 1:dim(pfm_wt)[2]
pfm_wt_scaled <- do.call(dash, append(list(comp_data = pfm_wt),
dash_control))$posmean
logo_wt_h = get_logo_heights(pfm_wt_scaled
, bg = bg_prob
, score = EDLogo_score)
logo_wt_h$pos_ic
logo_wt_h$neg_ic
pos_wtM = logo_wt_h[['table_mat_pos_norm']]; pos_wtM
pos_wtS = logo_wt_h$pos_ic; pos_wtS
pos_wtED = t(pos_wtS*t(pos_wtM)); pos_wtED
neg_wtM = logo_wt_h[['table_mat_neg_norm']]*(-1)
neg_wtS = logo_wt_h$neg_ic; neg_wtS
neg_wtED = t(neg_wtS*t(neg_wtM)); neg_wtED
combED_wtM = pos_wtED + neg_wtED
# Construct the x-axis: mutant MSA
wt_all_pos = as.numeric(colnames(combED_wtM))
if(missing(plot_positions)){
#------------------------------
# MSA mut: All, no filtering
#-------------------------------
cat("\n==========================================="
, "\nGenerated PFM mut: No filtering"
, "\n===========================================")
plot_mut_edM = combED_mutM
#------------------------------
# MSA WT: All, no filtering
#-------------------------------
cat("\n==========================================="
, "\nGenerated PFM WT: No filtering"
, "\n===========================================")
plot_wt_edM = combED_wtM
}else{
#------------------------------
# PFM mut: Filtered positions
#-------------------------------
cat("\n==========================================="
, "\nGenerating PFM MSA: filtered positions"
, "\n==========================================="
, "\nUser specified plotting positions for MSA:"
, "\nThese are:\n", plot_positions
, "\nSorting plot positions...")
plot_positions = sort(plot_positions)
cat("\nPlotting positions sorted:\n"
, plot_positions)
if ( all(plot_positions%in%msa_all_pos) && all(plot_positions%in%wt_all_pos) ){
cat("\nAll positions within range"
, "\nFiltering positions as specified..."
, "\nNo. of positions in plot:", length(plot_positions))
i_extract = plot_positions
plot_mut_edM = combED_mutM[, i_extract]
plot_wt_edM = combED_wtM[, i_extract]
}else{
cat("\nNo. of positions selected:", length(plot_positions))
i_ofr = plot_positions[!plot_positions%in%msa_all_pos]
cat("\n1 or more plot_positions out of range..."
, "\nThese are:\n", i_ofr
, "\nQuitting! Resubmit with correct plot_positions")
quit()
}
}
# Construct the y-axis: Calculating
cat("\n-------------------------"
, "\nConstructing y-axis:"
, "\nUser did not provide"
,"\n--------------------------")
if (missing(y_breaks)){
ylim_low <- floor(min(combED_mutM)); ylim_low
if( ylim_low == 0){
ylim_low = ylim_low
cat("\nY-axis lower limit:", ylim_low)
y_rlow = seq(0, ylim_low, length.out = 3); y_rlow
ylim_up <- ceiling(max(combED_mutM)) + 4; ylim_up
cat("\nY-axis upper limit:", ylim_up)
y_rup = seq(0, ylim_up, by = 2); y_rup
}else{
ylim_low = ylim_low + (-0.5)
cat("\nY-axis lower limit is <0:", ylim_low)
y_rlow = seq(0, ylim_low, length.out = 3); y_rlow
ylim_up <- ceiling(max(combED_mutM)) + 3; ylim_up
cat("\nY-axis upper limit:", ylim_up)
y_rup = seq(0, ylim_up, by = 3); y_rup
}
#ylim_scale <- unique(sort(c(y_rlow, y_rup, ylim_up))); ylim_scale
ylim_scale <- unique(sort(c(y_rlow, y_rup))); ylim_scale
cat("\nY-axis generated: see below"
, "\n"
, ylim_scale)
}else{
# Construct the y-axis: User provided
cat("\n-------------------------"
, "\nConstructing y-axis:"
, "\nUser provided"
,"\n--------------------------")
ylim_scale = sort(y_breaks)
ylim_low = min(ylim_scale); ylim_low
ylim_up = max(ylim_scale); ylim_up
}
else {
}
######################################
# Generating plots for muts and wt
#####################################
if (my_logo_col %in% c('clustalx','taylor')) {
cat("\nSelected colour scheme:", my_logo_col
, "\nUsing black theme\n")
theme_bgc = "black"
xfont_bgc = "white"
yfont_bgc = "white"
xtt_col = "white"
ytt_col = "white"
}
if (my_logo_col %in% c('chemistry', 'hydrophobicity')) {
cat("\nstart of MSA"
, '\nSelected colour scheme:', my_logo_col
, "\nUsing grey theme")
theme_bgc = "grey"
xfont_bgc = "black"
yfont_bgc = "black"
xtt_col = "black"
ytt_col = "black"
}
#####################################
# Generating logo plots for nsSNPs
#####################################
PlotlogolasL <- list()
#-------------------
# Mutant logo plot
#-------------------
p0 = ggseqlogo(plot_mut_edM
#, facet = "grid"
, method = msa_method
, col_scheme = my_logo_col
, seq_type = 'auto') +
theme(legend.position = leg_pos
, legend.direction = leg_dir
#, legend.title = element_blank()
, legend.title = element_text(size = leg_tts
, colour = ytt_col)
, legend.text = element_text(size = leg_ts)
, axis.text.x = element_text(size = x_ats
, angle = x_tangle
, hjust = 1
, vjust = 0.4
, colour = xfont_bgc)
#, axis.text.y = element_blank()
, axis.text.y = element_text(size = y_ats
, angle = y_tangle
, hjust = 1
, vjust = -1.0
, colour = yfont_bgc)
, axis.title.x = element_text(size = x_tts
, colour = xtt_col)
, axis.title.y = element_text(size = y_tts
, colour = ytt_col)
, plot.background = element_rect(fill = theme_bgc))+
xlab(x_lab)
if (missing(plot_positions)){
ed_mut_logo_P = p0 +
scale_x_discrete(breaks = msa_all_pos
, expand = c(x_axis_offset,0)
, labels = msa_all_pos
, limits = factor(msa_all_pos))+
scale_y_continuous(limits = c(ylim_low, ylim_up)
, breaks = ylim_scale
, expand = c(0,y_axis_offset)) +
geom_hline(yintercept = 0
, linetype = "solid"
, color = "grey"
, size = 1)
}else{
ed_mut_logo_P = p0 +
#scale_y_continuous(expand = c(0,0.09)) +
scale_y_continuous(limits = c(ylim_low, ylim_up)
, breaks = ylim_scale
, expand = c(0,y_axis_offset))+
scale_x_discrete(breaks = i_extract
, expand = c(x_axis_offset_filtered,0)
, labels = i_extract
, limits = factor(i_extract)) +
geom_hline(yintercept = 0
, linetype = "solid"
, color = "grey"
, size = 1)
}
cat('\nDone: MSA plot for mutations')
#return(msa_mut_logoP)
PlotlogolasL[['ed_mut_logoP']] <- ed_mut_logo_P
#---------------------------------
# Wild-type MSA: gene_fasta file
#---------------------------------
p1 = ggseqlogo(plot_wt_edM
#, facet = "grid"
, method = msa_method
, col_scheme = my_logo_col
, seq_type = 'aa') +
theme(legend.position = "none"
, legend.direction = leg_dir
#, legend.title = element_blank()
, legend.title = element_text(size = leg_tts
, colour = ytt_col)
, legend.text = element_text(size = leg_ts)
, axis.text.x = element_text(size = x_ats
, angle = x_tangle
, hjust = 1
, vjust = 0.4
, colour = xfont_bgc)
, axis.text.y = element_blank()
, axis.title.x = element_text(size = x_tts
, colour = xtt_col)
, axis.title.y = element_text(size = y_tts
, colour = ytt_col)
, plot.background = element_rect(fill = theme_bgc)) +
ylab("") + xlab("Wild-type position")
if (missing(plot_positions)){
ed_wt_logo_P = p1 +
scale_x_discrete(breaks = wt_all_pos
, expand = c(x_axis_offset,0)
, labels = wt_all_pos
, limits = factor(wt_all_pos))
}else{
ed_wt_logo_P = p1 +
scale_y_continuous(expand = c(0,0.09)) +
scale_x_discrete(breaks = i_extract
, expand = c(x_axis_offset_filtered, 0)
, labels = i_extract
, limits = factor(i_extract))
}
cat('\nDone: MSA plot for WT')
#return(msa_wt_logoP)
PlotlogolasL[['ed_wt_logoP']] <- ed_wt_logo_P
#=========================================
# Output
# Combined plot: logo ED plot
# customised for ggseqlogo
#=========================================
cat('\nDone: ed_mut_logoP + ed_wt_logoP')
# colour scheme: https://rdrr.io/cran/ggseqlogo/src/R/col_schemes.r
#cat("\nOutput plot:", LogoSNPs_comb, "\n")
#svg(LogoSNPs_combined, width = 32, height = 10)
LogoED_comb = cowplot::plot_grid(PlotlogolasL[['ed_mut_logoP']]
, PlotlogolasL[['ed_wt_logoP']]
, nrow = 2
, align = "v"
, rel_heights = c(3/4, 1/4))
return(LogoED_comb)
}

View file

@ -157,7 +157,6 @@ LogoPlotMSA <- function(msaSeq_mut
######################################
# Generating plots for muts and wt
#####################################
LogoPlotMSAL <- list()
if (my_logo_col %in% c('clustalx','taylor')) {
cat("\nSelected colour scheme:", my_logo_col
@ -185,6 +184,7 @@ LogoPlotMSA <- function(msaSeq_mut
#####################################
# Generating logo plots for nsSNPs
#####################################
LogoPlotMSAL <- list()
#-------------------
# Mutant logo plot

View file

@ -1,5 +1,5 @@
#source("~/git/LSHTM_analysis/config/gid.R")
source("~/git/LSHTM_analysis/config/pnca.R") # YES
source("~/git/LSHTM_analysis/config/pnca.R")
#source("~/git/LSHTM_analysis/config/embb.R")
#source("~/git/LSHTM_analysis/config/katg.R")
#source("~/git/LSHTM_analysis/config/alr.R")
@ -72,37 +72,12 @@ source("~/git/LSHTM_analysis/scripts/plotting/get_plotting_dfs.R")
# To plot entire MSA, simply don't specify {plot_positions}
# script: logoP_msa.R
# TODO perhaps: ED logo from Logolas
# TODO: Add scaled data option
########################################
# LogoPlotMSA(msaSeq_mut = msa_seq
# , msaSeq_wt = wt_seq
# , msa_method = 'bits' # or probability
# , my_logo_col = "taylor"
# , plot_positions = active_aa_pos
# , x_lab = "nsSNP position"
# , y_lab = ""
# , x_ats = 10 # text size
# , x_tangle = 90 # text angle
# , x_axis_offset = 0.05
# , y_ats = 15
# , y_tangle = 0
# , x_tts = 13 # title size
# , y_tts = 15
# , leg_pos = "top" # can be top, left, right and bottom or c(0.8, 0.9)
# , leg_dir = "horizontal" #can be vertical or horizontal
# , leg_ts = 16 # leg text size
# , leg_tts = 16 # leg title size
# )
########################################
# ED Logo plot MSA
# Mutant and wild-type
########################################
LogoPlotED(msaSeq_mut = msa_seq
LogoPlotMSA(msaSeq_mut = msa_seq
, msaSeq_wt = wt_seq
, msa_method = 'bits' # or probability
, my_logo_col = "taylor"
, my_logo_col = "taylor"
, plot_positions = active_aa_pos
, x_lab = "nsSNP position"
, y_lab = ""
@ -117,4 +92,10 @@ LogoPlotED(msaSeq_mut = msa_seq
, leg_dir = "horizontal" #can be vertical or horizontal
, leg_ts = 16 # leg text size
, leg_tts = 16 # leg title size
)
)
########################################
# ED Logo plot MSA
# Mutant and wild-type
########################################