LSHTM_analysis/scripts/functions/logoP_msa_raw.R

319 lines
12 KiB
R

#####################################################################################
# LogoPlotMSA():
# Input:
# Data:
# msaSeq_mut: MSA chr vector for muts
# msaSeq_wt [Optional]: MSA chr vector for wt
# Others params:
# plot_positions: can choose what positions to plot
# msa_method : can be "bits" or "probability"
# my_logo_col : can be "chemistry", "hydrophobicity", "taylor" or "clustalx"
# Returns data LogoPlot from MSA
#...
# TODO: SHINY
# drop down: my_logo_col i.e the 4 colour choices
# drop down: for DataED_PFM(), ED score options:
# c("log", log-odds", "diff", "probKL", "ratio", "unscaled_log", "wKL")
# drop down/enter field: for DataED_PFM(), background probability
# Make it hover over position and then get the corresponding data table!
###################################################################################
#==================
# logo data: OR
#==================
LogoPlotMSA <- function(msaSeq_mut
, msaSeq_wt
, plot_positions
, msa_method = 'bits' # or probability
, my_logo_col = "chemistry"
, x_lab = "Wild-type position"
, y_lab = ""
, x_ats = 13 # text size
, x_tangle = 90 # text angle
, x_axis_offset = 0.07 # dist b/w y-axis and plot start
, y_ats = 13
, y_tangle = 0
, x_tts = 13 # title size
, y_tts = 13
, leg_pos = "top" # can be top, left, right and bottom or c(0.8, 0.9)
, leg_dir = "horizontal" #can be vertical or horizontal
, leg_ts = 16 # leg text size
, leg_tts = 16 # leg title size
)
{
############################################
# Data processing for logo plot for nsSNPS
###########################################
cat("\nLength of MSA", length(msaSeq_mut)
, "\nlength of WT seq:", length(msaSeq_wt))
if(missing(plot_positions)){
#if(is.null(plot_positions)){
cat("\n======================="
, "\nPlotting entire MSA"
, "\n========================")
msa_seq_plot = msaSeq_mut
msa_all_interim = sapply(msa_seq_plot, function(x) unlist(strsplit(x,"")))
msa_all_interimDF = data.frame(msa_all_interim)
msa_all_pos = as.numeric(rownames(msa_all_interimDF))
wt_seq_plot = msaSeq_wt
wt_all_interim = sapply(wt_seq_plot, function(x) unlist(strsplit(x,"")))
wt_all_interimDF = data.frame(wt_all_interim)
wt_all_pos = as.numeric(rownames(wt_all_interimDF))
} else {
cat("\nUser specified plotting positions for MSA:"
, "\nThese are:\n", plot_positions
, "\nSorting plot positions...")
plot_positions = sort(plot_positions)
cat("\nPlotting positions sorted:\n"
, plot_positions)
#-----------
# MSA: mut
#-----------
cat("\n==========================================="
, "\nGenerating MSA: filtered positions"
, "\n===========================================")
msa_interim = sapply(msaSeq_mut, function(x) unlist(strsplit(x,"")))
msa_interimDF = data.frame(msa_interim)
msa_pos = as.numeric(rownames(msa_interimDF))
if (all(plot_positions%in%msa_pos)){
cat("\nAll positions within range"
, "\nProceeding with generating requested position MSA seqs..."
, "\nNo. of positions in plot:", length(plot_positions))
i_extract = plot_positions
dfP1 = msa_interimDF[i_extract,]
}else{
cat("\nNo. of positions selected:", length(plot_positions))
i_ofr = plot_positions[!plot_positions%in%msa_pos]
cat("\n1 or more plot_positions out of range..."
, "\nThese are:\n", i_ofr
, "\nQuitting! Resubmit with correct plot_positions")
#i_extract = plot_positions[plot_positions%in%msa_pos]
#cat("\nFinal no. of positions being plottted:", length(i_extract)
# , "\nNo. of positions dropped from request:", length(i_ofr))
quit()
}
#matP1 = msa_interim[i_extract, 1:ncol(msa_interim)]
#dfP1 = msa_interimDF[i_extract,]
dfP1 = data.frame(t(dfP1))
names(dfP1) = i_extract
cols_to_paste = names(dfP1)
dfP1['chosen_seq'] = apply(dfP1[ , cols_to_paste]
, 1
, paste, sep = ''
, collapse = "")
msa_seq_plot = dfP1$chosen_seq
#-----------
# WT: fasta
#-----------
cat("\n========================================="
, "\nGenerating WT fasta: filtered positions"
,"\n===========================================")
wt_interim = sapply(msaSeq_wt, function(x) unlist(strsplit(x,"")))
wt_interimDF = data.frame(wt_interim)
wt_pos = as.numeric(rownames(wt_interimDF))
if (all(plot_positions%in%wt_pos)){
cat("\nAll positions within range"
, "\nProceeding with generating requested position MSA seqs..."
, "\nplot positions:", length(plot_positions))
i2_extract = plot_positions
}else{
cat("\nNo. of positions selected:", length(plot_positions))
i2_ofr = plot_positions[!plot_positions%in%wt_pos]
cat("\n1 or more plot_positions out of range..."
, "\nThese are:\n", i_ofr
, "\nQuitting! Resubmit with correct plot_positions")
#i2_extract = plot_positions[plot_positions%in%wt_pos]
#cat("\nFinal no. of positions being plottted:", length(i2_extract)
# , "\nNo. of positions dropped from request:", length(i2_ofr))
quit()
}
#matP1 = msa_interim[i_extract, 1:ncol(msa_interim)]
dfP2 = wt_interimDF[i2_extract,]
dfP2 = data.frame(t(dfP2))
names(dfP2) = i2_extract
cols_to_paste2 = names(dfP2)
dfP2['chosen_seq'] = apply( dfP2[ , cols_to_paste2]
, 1
, paste, sep = ''
, collapse = "")
wt_seq_plot = dfP2$chosen_seq
}
######################################
# Generating plots for muts and wt
#####################################
if (my_logo_col %in% c('clustalx','taylor')) {
cat("\nSelected colour scheme:", my_logo_col
, "\nUsing black theme\n")
theme_bgc = "black"
xfont_bgc = "white"
yfont_bgc = "white"
xtt_col = "white"
ytt_col = "white"
}
if (my_logo_col %in% c('chemistry', 'hydrophobicity')) {
cat("\nstart of MSA"
, '\nSelected colour scheme:', my_logo_col
, "\nUsing grey theme")
theme_bgc = "grey"
xfont_bgc = "black"
yfont_bgc = "black"
xtt_col = "black"
ytt_col = "black"
}
#####################################
# Generating logo plots for nsSNPs
#####################################
LogoPlotMSAL <- list()
#-------------------
# Mutant logo plot
#-------------------
p0 = ggseqlogo(msa_seq_plot
, facet = "grid"
, method = msa_method
, col_scheme = my_logo_col
, seq_type = 'aa') +
theme(legend.position = leg_pos
, legend.direction = leg_dir
#, legend.title = element_blank()
, legend.title = element_text(size = leg_tts
, colour = ytt_col)
, legend.text = element_text(size = leg_ts)
, axis.text.x = element_text(size = x_ats
, angle = x_tangle
, hjust = 1
, vjust = 0.4
, colour = xfont_bgc)
#, axis.text.y = element_blank()
, axis.text.y = element_text(size = y_ats
, angle = y_tangle
, hjust = 1
, vjust = -1.0
, colour = yfont_bgc)
, axis.title.x = element_text(size = x_tts
, colour = xtt_col)
, axis.title.y = element_text(size = y_tts
, colour = ytt_col)
, plot.background = element_rect(fill = theme_bgc))+
xlab(x_lab)
if (missing(plot_positions)){
msa_mut_logo_P = p0 +
scale_x_discrete(breaks = msa_all_pos
, expand = c(0.02,0)
, labels = msa_all_pos
, limits = factor(msa_all_pos))
}else{
msa_mut_logo_P = p0 +
scale_y_continuous(expand = c(0,0.09)) +
scale_x_discrete(breaks = i_extract
, expand = c(x_axis_offset,0)
, labels = i_extract
, limits = factor(i_extract))
}
cat('\nDone: MSA plot for mutations')
#return(msa_mut_logoP)
LogoPlotMSAL[['msa_mut_logoP']] <- msa_mut_logo_P
#---------------------------------
# Wild-type MSA: gene_fasta file
#---------------------------------
p1 = ggseqlogo(wt_seq_plot
, facet = "grid"
, method = msa_method
, col_scheme = my_logo_col
, seq_type = 'aa') +
theme(legend.position = "none"
, legend.direction = leg_dir
#, legend.title = element_blank()
, legend.title = element_text(size = leg_tts
, colour = ytt_col)
, legend.text = element_text(size = leg_ts)
, axis.text.x = element_text(size = x_ats
, angle = x_tangle
, hjust = 1
, vjust = 0.4
, colour = xfont_bgc)
, axis.text.y = element_blank()
, axis.title.x = element_text(size = x_tts
, colour = xtt_col)
, axis.title.y = element_text(size = y_tts
, colour = ytt_col)
, plot.background = element_rect(fill = theme_bgc)) +
ylab("") + xlab("Wild-type position")
if (missing(plot_positions)){
msa_wt_logo_P = p1 +
scale_x_discrete(breaks = wt_all_pos
, expand = c(0.02,0)
, labels = wt_all_pos
, limits = factor(wt_all_pos) )
}else{
msa_wt_logo_P = p1 +
scale_y_continuous(expand = c(0,0.09)) +
scale_x_discrete(breaks = i2_extract
, expand = c(x_axis_offset, 0)
, labels = i2_extract
, limits = factor(i2_extract))
}
cat('\nDone: MSA plot for WT')
#return(msa_wt_logoP)
LogoPlotMSAL[['msa_wt_logoP']] <- msa_wt_logo_P
#=========================================
# Output
# Combined plot: logo_MSA
#=========================================
cat('\nDone: msa_mut_logoP + msa_wt_logoP')
# colour scheme: https://rdrr.io/cran/ggseqlogo/src/R/col_schemes.r
#cat("\nOutput plot:", LogoSNPs_comb, "\n")
#svg(LogoSNPs_combined, width = 32, height = 10)
LogoMSA_comb = cowplot::plot_grid(LogoPlotMSAL[['msa_mut_logoP']]
, LogoPlotMSAL[['msa_wt_logoP']]
, nrow = 2
, align = "v"
, rel_heights = c(3/4, 1/4))
return(LogoMSA_comb)
}