276 lines
No EOL
9.3 KiB
R
276 lines
No EOL
9.3 KiB
R
#logo plots
|
|
|
|
# one for multiple muts
|
|
# --> select/drop down option to filter count of nsSNPs
|
|
# --> select/drop down option for colour
|
|
# --> should include WT
|
|
|
|
# Data used
|
|
|
|
#tab_mt # mutant logo plot
|
|
#tab_wt # wt logo plot
|
|
|
|
# Make it hover over position and then get the corresponding data table!
|
|
#%%======================================================================
|
|
|
|
#==================
|
|
# logo data: OR
|
|
#==================
|
|
# NOTE: my_logo_col
|
|
|
|
LogoPlotMSA <- function(msaSeq_mut
|
|
, msaSeq_wt
|
|
, plot_positions
|
|
, msa_method = 'bits' # or probability
|
|
, my_logo_col = "chemistry"
|
|
, x_lab = "Wild-type position"
|
|
, y_lab = ""
|
|
, x_ats = 13 # text size
|
|
, x_tangle = 90 # text angle
|
|
, y_ats = 13
|
|
, y_tangle = 0
|
|
, x_tts = 13 # title size
|
|
, y_tts = 13
|
|
, leg_pos = "top" # can be top, left, right and bottom or c(0.8, 0.9)
|
|
, leg_dir = "horizontal" #can be vertical or horizontal
|
|
, leg_ts = 16 # leg text size
|
|
, leg_tts = 16 # leg title size
|
|
)
|
|
|
|
{
|
|
|
|
############################################
|
|
# Data processing for logo plot for nsSNPS
|
|
###########################################
|
|
cat("\nLength of MSA", length(msaSeq_mut)
|
|
, "\nlength of WT seq:", length(msaSeq_wt))
|
|
|
|
if(missing(plot_positions)){
|
|
#if(is.null(plot_positions)){
|
|
cat("\nPlotting entire MSA")
|
|
msa_seq_plot = msaSeq_mut
|
|
wt_seq_plot = msaSeq_wt
|
|
|
|
} else {
|
|
cat("\nUser specified plotting positions for MSA:"
|
|
, "These are:", plot_positions)
|
|
|
|
#-----------
|
|
# MSA: mut
|
|
#-----------
|
|
cat("\nGenerating MSA: filtered positions")
|
|
msa_interim = sapply(msaSeq_mut, function(x) unlist(strsplit(x,"")))
|
|
|
|
if (any(is.na(msa_interim[plot_positions]))){
|
|
cat("Plot_positions selected:", length(plot_positions))
|
|
i_ofr = plot_positions[is.na(msa_interim[plot_positions])]
|
|
cat("\nIndex out of range: 1 or more"
|
|
, "\nThese are:", i_ofr
|
|
, "\nOmitting these and proceeding...")
|
|
i_extract = na.omit(msa_interim[plot_positions])
|
|
cat("\nFinal positions being plottted:", length(i_extract)
|
|
, "\nNo. of positions dropped from request:", length(i_ofr))
|
|
|
|
}else{
|
|
cat("\nAll positions within range"
|
|
, "\nProceeing with generating requested position MSA seqs...")
|
|
i_extract = plot_positions
|
|
}
|
|
|
|
matP1 = msa_interim[i_extract, 1:ncol(msa_interim)]
|
|
|
|
dfP1 = data.frame(t(matP1))
|
|
names(dfP1) = i_extract
|
|
cols_to_paste = names(dfP1)
|
|
dfP1['chosen_seq'] = apply( dfP1[ , cols_to_paste]
|
|
, 1
|
|
, paste, sep = ''
|
|
, collapse = "")
|
|
|
|
msa_seq_plot = dfP1$chosen_seq
|
|
|
|
#-----------
|
|
# WT: fasta
|
|
#-----------
|
|
cat("\nGenerating WT fasta: filtered positions")
|
|
|
|
wt_interim = sapply(msaSeq_wt, function(x) unlist(strsplit(x,"")))
|
|
|
|
if (any(is.na(wt_interim[plot_positions]))){
|
|
cat("Plot_positions selected:", length(plot_positions))
|
|
i2_ofr = plot_positions[is.na(wt_interim[plot_positions])]
|
|
cat("\nIndex out of range: 1 or more"
|
|
, "\nThese are:", i2_ofr
|
|
, "\nOmitting these and proceeding...")
|
|
i2_extract = na.omit(wt_interim[plot_positions])
|
|
cat("\nFinal positions being plottted:", length(i2_extract)
|
|
, "\nNo. of positions dropped from request:", length(i2_ofr))
|
|
|
|
}else{
|
|
cat("\nAll positions within range"
|
|
, "\nProceeing with generating requested position MSA seqs...")
|
|
i2_extract = plot_positions
|
|
}
|
|
|
|
matP2 = wt_interim[i_extract, 1:ncol(wt_interim)]
|
|
|
|
dfP2 = data.frame(t(matP2))
|
|
names(dfP2) = i2_extract
|
|
cols_to_paste_P2 = names(dfP2)
|
|
|
|
dfP2['chosen_seq'] = apply( dfP2[ , cols_to_paste_P2]
|
|
, 1
|
|
, paste, sep = ''
|
|
, collapse = "")
|
|
|
|
wt_seq_plot = dfP2$chosen_seq
|
|
}
|
|
|
|
|
|
######################################
|
|
# Generating plots for muts and wt
|
|
#####################################
|
|
LogoPlotMSAL <- list()
|
|
|
|
if (my_logo_col %in% c('clustalx','taylor')) {
|
|
cat("\nSelected colour scheme:", my_logo_col
|
|
, "\nUsing black theme\n")
|
|
|
|
theme_bgc = "black"
|
|
xfont_bgc = "white"
|
|
yfont_bgc = "white"
|
|
xtt_col = "white"
|
|
ytt_col = "white"
|
|
}
|
|
|
|
if (my_logo_col %in% c('chemistry', 'hydrophobicity')) {
|
|
cat('\nSelected colour scheme:', my_logo_col
|
|
, "\nUsing grey theme")
|
|
|
|
theme_bgc = "grey"
|
|
xfont_bgc = "black"
|
|
yfont_bgc = "black"
|
|
xtt_col = "black"
|
|
ytt_col = "black"
|
|
}
|
|
|
|
#####################################
|
|
# Generating logo plots for nsSNPs
|
|
#####################################
|
|
|
|
#-------------------
|
|
# Mutant logo plot
|
|
#-------------------
|
|
p0 = ggseqlogo(msa_seq_plot
|
|
, facet = "grid"
|
|
, method = msa_method
|
|
, col_scheme = my_logo_col
|
|
, seq_type = 'aa') +
|
|
theme(legend.position = leg_pos
|
|
, legend.direction = leg_dir
|
|
#, legend.title = element_blank()
|
|
, legend.title = element_text(size = leg_tts
|
|
, colour = ytt_col)
|
|
, legend.text = element_text(size = leg_ts)
|
|
|
|
, axis.text.x = element_text(size = x_ats
|
|
, angle = x_tangle
|
|
, hjust = 1
|
|
, vjust = 0.4
|
|
, colour = xfont_bgc)
|
|
#, axis.text.y = element_blank()
|
|
, axis.text.y = element_text(size = y_ats
|
|
, angle = y_tangle
|
|
, hjust = 1
|
|
, vjust = -1.0
|
|
, colour = yfont_bgc)
|
|
, axis.title.x = element_text(size = x_tts
|
|
, colour = xtt_col)
|
|
, axis.title.y = element_text(size = y_tts
|
|
, colour = ytt_col)
|
|
, plot.background = element_rect(fill = theme_bgc))+
|
|
xlab(x_lab)
|
|
|
|
if (missing(plot_positions)){
|
|
msa_mut_logo_P = p0
|
|
|
|
}else{
|
|
msa_mut_logo_P = p0 +
|
|
scale_y_continuous(expand = c(0,0.09)) +
|
|
scale_x_discrete(breaks = i_extract
|
|
, expand = c(0.09,0)
|
|
, labels = i_extract
|
|
, limits = factor(i_extract))
|
|
}
|
|
|
|
cat('\nDone: msa_mut_logo_P')
|
|
#return(msa_mut_logoP)
|
|
LogoPlotMSAL[['msa_mut_logoP']] <- msa_mut_logo_P
|
|
|
|
#---------------------------------
|
|
# Wild-type MSA: gene_fasta file
|
|
#---------------------------------
|
|
p1 = ggseqlogo(wt_seq_plot
|
|
, facet = "grid"
|
|
, method = msa_method
|
|
, col_scheme = my_logo_col
|
|
, seq_type = 'aa') +
|
|
|
|
theme(legend.position = "none"
|
|
, legend.direction = leg_dir
|
|
#, legend.title = element_blank()
|
|
, legend.title = element_text(size = leg_tts
|
|
, colour = ytt_col)
|
|
, legend.text = element_text(size = leg_ts)
|
|
|
|
, axis.text.x = element_text(size = x_ats
|
|
, angle = x_tangle
|
|
, hjust = 1
|
|
, vjust = 0.4
|
|
, colour = xfont_bgc)
|
|
, axis.text.y = element_blank()
|
|
|
|
, axis.title.x = element_text(size = x_tts
|
|
, colour = xtt_col)
|
|
, axis.title.y = element_text(size = y_tts
|
|
, colour = ytt_col)
|
|
|
|
, plot.background = element_rect(fill = theme_bgc)) +
|
|
ylab("") + xlab("Wild-type position")
|
|
|
|
|
|
if (missing(plot_positions)){
|
|
msa_wt_logo_P = p1
|
|
}else{
|
|
msa_wt_logo_P = p1 +
|
|
scale_y_continuous(expand = c(0,0.09)) +
|
|
scale_x_discrete(breaks = i2_extract
|
|
, expand = c(0.09,0)
|
|
, labels = i2_extract
|
|
, limits = factor(i2_extract))
|
|
}
|
|
|
|
cat('\nDone: msa_wt_logo_P')
|
|
#return(msa_wt_logoP)
|
|
LogoPlotMSAL[['msa_wt_logoP']] <- msa_wt_logo_P
|
|
|
|
#=========================================
|
|
# Output
|
|
# Combined plot: logo_MSA
|
|
#=========================================
|
|
|
|
cat('\nDone: msa_mut_logoP + msa_wt_logoP')
|
|
|
|
# colour scheme: https://rdrr.io/cran/ggseqlogo/src/R/col_schemes.r
|
|
#cat("\nOutput plot:", LogoSNPs_comb, "\n")
|
|
#svg(LogoSNPs_combined, width = 32, height = 10)
|
|
|
|
LogoMSA_comb = cowplot::plot_grid(LogoPlotMSAL[['msa_mut_logoP']]
|
|
, LogoPlotMSAL[['msa_wt_logoP']]
|
|
, nrow = 2
|
|
, align = "v"
|
|
, rel_heights = c(3/4, 1/4))
|
|
|
|
return(LogoMSA_comb)
|
|
|
|
} |