506 lines
17 KiB
R
506 lines
17 KiB
R
#####################################################################################
|
|
# LogoPlotMSA():
|
|
# Input:
|
|
# Data:
|
|
# msaSeq_mut: MSA chr vector for muts
|
|
# msaSeq_wt: MSA chr vector for wt
|
|
|
|
# Logo type params:
|
|
# logo_type = c("EDLogo", "bits_pfm", "probability_pfm", "bits_raw", "probability_raw")
|
|
# EDLogo: calculated from the Logolas package based on PFM matrix (scaled).
|
|
#The required content from the package is sourced locally within 'my_logolas.R'
|
|
# bits_pfm: Information Content based on PFM scaled matrix (my_logolas.R)
|
|
# probability_pfm: Probability based on PFM scaled matrix (my_logolas.R)
|
|
# bits_raw: Information Content based on Raw MSA (ggseqlogo)
|
|
# probability_raw: Probability based on Raw MSA (ggseqlogo)
|
|
|
|
# EDScore_type = c("log", log-odds", "diff", "probKL", "ratio", "unscaled_log", "wKL")
|
|
# bg_prob: background probability, default is equal i.e NULL.
|
|
# This is used by the internal call to DataED_PFM(). This func takes thse args. I have used it here for
|
|
# completeness and allow nuanced plot control
|
|
|
|
# my_logo_col = c("chemistry", "hydrophobicity", "clustalx", "taylor")
|
|
# --> if clustalx and taylor, set variable to black bg + white font
|
|
# --> if chemistry and hydrophobicity, then grey bg + black font
|
|
|
|
# ...other params
|
|
|
|
# Returns: Logo plots from MSA both mutant and wt (for comparability)
|
|
# For my case, I always use it as it helps see what is at the wild-type already!
|
|
|
|
# TODO: SHINY
|
|
# drop down: logo_type
|
|
# drop down: ED score type
|
|
# drop down/enter field : bg probability (in the actual plot function!)
|
|
# drop down: my_logo_col
|
|
# Make it hover over position and then get the corresponding data table!
|
|
###################################################################################
|
|
|
|
|
|
###########################################
|
|
#LogoPlotMSA <- function(msaSeq_mut # chr vector
|
|
# , msaSeq_wt # chr vector
|
|
LogoPlotMSA <- function(# unified_msa # <- not needed any more because we have 'target' now
|
|
target = 'embb'
|
|
, logo_type = c("EDLogo") #"bits_pfm", "probability_pfm", "bits_raw", "probability_raw")
|
|
, EDScore_type = c("log") # see if this relevant, or source function should have it!
|
|
, bg_prob = NULL
|
|
, my_logo_col = "chemistry"
|
|
, plot_positions
|
|
, y_breaks
|
|
, x_lab_mut = ""
|
|
, y_lab_mut
|
|
, x_ats = 10 # text size
|
|
, x_tangle = 90 # text angle
|
|
, x_axis_offset = 0 # dist b/w y-axis and plot start
|
|
, x_axis_offset_filtered = 0
|
|
, y_axis_offset = 0
|
|
, y_axis_increment = 1
|
|
, y_ats = 10
|
|
, y_tangle = 0
|
|
, x_tts = 10 # title size
|
|
, y_tts = 10
|
|
, leg_pos = "top" # can be top, left, right and bottom or c(0.8, 0.9)
|
|
, leg_dir = "horizontal" #can be vertical or horizontal
|
|
, leg_ts = 14 # leg text size
|
|
, leg_tts = 14 # leg title size
|
|
, ...
|
|
)
|
|
|
|
{
|
|
# FIXME: Hack!
|
|
# msaSeq_mut=unified_msa[[1]]
|
|
# msaSeq_wt=unified_msa[[2]]
|
|
|
|
unified_msa = get(paste0(target, "_unified_msa"))
|
|
|
|
msaSeq_mut=unified_msa[['msa_seq']]
|
|
msaSeq_wt=unified_msa[['wt_seq']]
|
|
|
|
# Get PFM matrix for mut and wt MSA provided
|
|
data_ed = DataED_PFM(msaSeq_mut
|
|
, msaSeq_wt
|
|
, ED_score = EDScore_type)
|
|
names(data_ed)
|
|
#"pfm_mutM" "pfm_mut_scaledM" "combED_mutM" "pfm_wtM" "pfm_wt_scaledM" "combED_wtM"
|
|
|
|
#merged_df3 for current target (unfortunatly i can't think of an easy way to get this from unified_msa)
|
|
contig_df=data.frame(position=1:max(nchar(unified_msa$wt_seq)))
|
|
plot_df = get(paste0(target, "_merged_df3"))
|
|
|
|
# generate the tile columns
|
|
#plot_df=cbind(embb_merged_df3)
|
|
plot_df$col_aa = ifelse(plot_df[["position"]]%in%active_aa_pos,
|
|
"transparent", "transparent")
|
|
plot_df$bg_all = plot_df$col_aa
|
|
plot_df$bg_all = ifelse(plot_df[["position"]]%in%aa_pos_drug,
|
|
"drug", plot_df$bg_all)
|
|
plot_df$col_bg1 = plot_df$bg_all
|
|
plot_df$col_bg1 = ifelse(plot_df[["position"]]%in%aa_pos_lig1,
|
|
"lig1", plot_df$col_bg1)
|
|
plot_df$col_bg2 = plot_df$col_bg1
|
|
plot_df$col_bg2 = ifelse(plot_df[["position"]]%in%aa_pos_lig2,
|
|
"lig2", plot_df$col_bg2)
|
|
plot_df$col_bg3 = plot_df$col_bg2
|
|
plot_df$col_bg3 = ifelse(plot_df[["position"]]%in%aa_pos_lig3
|
|
, "lig3", plot_df$col_bg3)
|
|
|
|
plot_df = generate_distance_colour_map(plot_df, debug=FALSE)
|
|
|
|
# copy only the tile columns to the contiguous DF
|
|
|
|
contig_df$ligand_distance = plot_df$ligand_distance[match(contig_df$position, plot_df$position)]
|
|
contig_df_map = generate_distance_colour_map(contig_df, debug=TRUE)
|
|
contig_df$ligD_colours = contig_df_map$ligD_colours[match(contig_df$position, contig_df_map$position)]
|
|
|
|
#contig_df$ligD_colours = plot_df$ligD_colours[match(contig_df$position, plot_df$position)]
|
|
contig_df$bg_all = plot_df$bg_all[match(contig_df$position, plot_df$position)]
|
|
contig_df$col_bg1 = plot_df$col_bg1[match(contig_df$position, plot_df$position)]
|
|
contig_df$col_bg2 = plot_df$col_bg2[match(contig_df$position, plot_df$position)]
|
|
contig_df$col_bg3 = plot_df$col_bg3[match(contig_df$position, plot_df$position)]
|
|
contig_df=replace_na(
|
|
contig_df,
|
|
list(
|
|
ligD_colours='transparent',
|
|
bg_all = 'transparent',
|
|
col_bg1 = 'transparent',
|
|
col_bg2 = 'transparent',
|
|
col_bg3 = 'transparent'
|
|
)
|
|
)
|
|
|
|
if (logo_type == "EDLogo"){
|
|
msa_method = "custom"
|
|
y_label = "Enrichment Score"
|
|
data_logo_mut = data_ed[['combED_mutM']]
|
|
data_logo_wt = data_ed[['combED_wtM']]
|
|
|
|
msa_pos = as.numeric(colnames(data_logo_mut))
|
|
wt_pos = as.numeric(colnames(data_logo_wt))
|
|
|
|
# Construct Y-axis for MSA mut plot:
|
|
cat("\nCalculating y-axis for MSA mut plot")
|
|
|
|
if ( missing(y_breaks) ){
|
|
# Y-axis: Calculating
|
|
cat("\n----------------------------------------"
|
|
, "\nY-axis being generated from data"
|
|
, "\n-----------------------------------------")
|
|
ylim_low <- floor(min(data_logo_mut)); ylim_low
|
|
if( ylim_low == 0){
|
|
ylim_low = ylim_low
|
|
cat("\nY-axis lower limit:", ylim_low)
|
|
y_rlow = seq(0, ylim_low, length.out = 3); y_rlow
|
|
|
|
ylim_up <- ceiling(max(data_logo_mut)) + 5; ylim_up
|
|
cat("\nY-axis upper limit:", ylim_up)
|
|
y_rup = seq(0, ylim_up, by = 2); y_rup
|
|
}else{
|
|
ylim_low = ylim_low + (-0.5)
|
|
cat("\nY-axis lower limit is <0:", ylim_low)
|
|
y_rlow = seq(0, ylim_low, length.out = 3); y_rlow
|
|
|
|
ylim_up <- ceiling(max(data_logo_mut)) + 3; ylim_up
|
|
cat("\nY-axis upper limit:", ylim_up)
|
|
y_rup = seq(0, ylim_up, by = 3); y_rup
|
|
}
|
|
#ylim_scale <- unique(sort(c(y_rlow, y_rup, ylim_up))); ylim_scale
|
|
ylim_scale <- unique(sort(c(y_rlow, y_rup))); ylim_scale
|
|
cat("\nY-axis generated: see below\n"
|
|
, ylim_scale)
|
|
}else{
|
|
# Y-axis: User provided
|
|
cat("\n--------------------------------"
|
|
, "\nUsing y-axis:: User provided"
|
|
,"\n---------------------------------")
|
|
ylim_scale = sort(y_breaks)
|
|
ylim_low = min(ylim_scale); ylim_low
|
|
ylim_up = max(ylim_scale); ylim_up
|
|
}
|
|
|
|
}
|
|
|
|
if (logo_type == "bits_pfm"){
|
|
msa_method = "bits"
|
|
y_label = "Bits (PFM)"
|
|
data_logo_mut = data_ed[['pfm_mut_scaledM']]
|
|
data_logo_wt = data_ed[['pfm_wtM']]
|
|
|
|
msa_pos = as.numeric(colnames(data_logo_mut))
|
|
wt_pos = as.numeric(colnames(data_logo_wt))
|
|
}
|
|
|
|
if (logo_type == "probability_pfm"){
|
|
msa_method = "probability"
|
|
y_label = "Probability (PFM)"
|
|
data_logo_mut = data_ed[['pfm_mut_scaledM']]
|
|
data_logo_wt = data_ed[['pfm_wtM']]
|
|
|
|
msa_pos = as.numeric(colnames(data_logo_mut))
|
|
wt_pos = as.numeric(colnames(data_logo_wt))
|
|
}
|
|
|
|
if (logo_type == "bits_raw"){
|
|
msa_method = "bits"
|
|
y_label = "Bits"
|
|
|
|
data_logo_mut = msaSeq_mut
|
|
msa_interim = sapply(data_logo_mut, function(x) unlist(strsplit(x,"")))
|
|
msa_interimDF = data.frame(msa_interim)
|
|
msa_pos = as.numeric(rownames(msa_interimDF))
|
|
|
|
data_logo_wt = msaSeq_wt
|
|
wt_interim = sapply(data_logo_wt, function(x) unlist(strsplit(x,"")))
|
|
wt_interimDF = data.frame(wt_interim)
|
|
wt_pos = as.numeric(rownames(wt_interimDF))
|
|
|
|
}
|
|
|
|
if (logo_type == "probability_raw"){
|
|
msa_method = "probability"
|
|
y_label = "Probability"
|
|
|
|
data_logo_mut = msaSeq_mut
|
|
msa_interim = sapply(data_logo_mut, function(x) unlist(strsplit(x,"")))
|
|
msa_interimDF = data.frame(msa_interim)
|
|
msa_pos = as.numeric(rownames(msa_interimDF))
|
|
|
|
data_logo_wt = msaSeq_wt
|
|
wt_interim = sapply(data_logo_wt, function(x) unlist(strsplit(x,"")))
|
|
wt_interimDF = data.frame(wt_interim)
|
|
wt_pos = as.numeric(rownames(wt_interimDF))
|
|
}
|
|
|
|
#################################################################################
|
|
# param: plot_position
|
|
#################################################################################
|
|
|
|
if(missing(plot_positions)){
|
|
|
|
#================================
|
|
# NO filtering of positions
|
|
#================================
|
|
#---------
|
|
# MSA mut
|
|
#---------
|
|
cat("\n==========================================="
|
|
, "\nGenerated PFM mut: No filtering"
|
|
, "\n===========================================")
|
|
|
|
plot_mut_edM = data_logo_mut
|
|
|
|
#---------
|
|
# MSA WT
|
|
#---------
|
|
cat("\n==========================================="
|
|
, "\nGenerated PFM WT: No filtering"
|
|
, "\n===========================================")
|
|
|
|
plot_wt_edM = data_logo_wt
|
|
|
|
}else{
|
|
|
|
#================================
|
|
# Filtering of positions
|
|
#================================
|
|
cat("\n==========================================="
|
|
, "\nGenerating PFM MSA: filtered positions"
|
|
, "\n==========================================="
|
|
, "\nUser specified plotting positions for MSA:"
|
|
, "\nThese are:\n", plot_positions
|
|
, "\nSorting plot positions...")
|
|
|
|
plot_positions = sort(plot_positions)
|
|
|
|
cat("\nPlotting positions sorted:\n"
|
|
, plot_positions)
|
|
|
|
if ( all(plot_positions%in%msa_pos) && all(plot_positions%in%wt_pos) ){
|
|
cat("\nAll positions within range"
|
|
, "\nFiltering positions as specified..."
|
|
, "\nNo. of positions in plot:", length(plot_positions))
|
|
i_extract = plot_positions
|
|
|
|
#-----------------
|
|
# PFM: mut + wt
|
|
#------------------
|
|
if (logo_type%in%c("EDLogo", "bits_pfm", "probability_pfm")){
|
|
|
|
plot_mut_edM = data_logo_mut[, i_extract]
|
|
plot_wt_edM = data_logo_wt[, i_extract]
|
|
|
|
}
|
|
if (logo_type%in%c("bits_raw", "probability_raw")){
|
|
|
|
#--------
|
|
# Mut
|
|
#--------
|
|
dfP1 = msa_interimDF[i_extract,]
|
|
dfP1 = data.frame(t(dfP1))
|
|
names(dfP1) = i_extract
|
|
cols_to_paste = names(dfP1)
|
|
dfP1['chosen_seq'] = apply(dfP1[, cols_to_paste]
|
|
, 1
|
|
, paste, sep = ''
|
|
, collapse = "")
|
|
plot_mut_edM = dfP1$chosen_seq
|
|
|
|
#--------
|
|
# WT
|
|
#--------
|
|
dfP2 = wt_interimDF[i_extract,]
|
|
dfP2 = data.frame(t(dfP2))
|
|
names(dfP2) = i_extract
|
|
cols_to_paste2 = names(dfP2)
|
|
dfP2['chosen_seq'] = apply( dfP2[, cols_to_paste2]
|
|
, 1
|
|
, paste, sep = ''
|
|
, collapse = "")
|
|
|
|
plot_wt_edM = dfP2$chosen_seq
|
|
|
|
}
|
|
|
|
}else{
|
|
cat("\nNo. of positions selected:", length(plot_positions))
|
|
i_ofr = plot_positions[!plot_positions%in%msa_pos]
|
|
cat("\n1 or more plot_positions out of range..."
|
|
, "\nThese are:\n", i_ofr
|
|
, "\nQuitting! Resubmit with correct plot_positions")
|
|
quit()
|
|
}
|
|
}
|
|
|
|
|
|
######################################
|
|
# Generating plots for muts and wt
|
|
#####################################
|
|
if (my_logo_col %in% c('clustalx','taylor')) {
|
|
cat("\nSelected colour scheme:", my_logo_col
|
|
, "\nUsing black theme\n")
|
|
|
|
theme_bgc = "black"
|
|
xfont_bgc = "white"
|
|
yfont_bgc = "white"
|
|
xtt_col = "white"
|
|
ytt_col = "white"
|
|
|
|
}
|
|
|
|
if (my_logo_col %in% c('chemistry', 'hydrophobicity')) {
|
|
cat("\nstart of MSA"
|
|
, '\nSelected colour scheme:', my_logo_col
|
|
, "\nUsing grey theme")
|
|
|
|
theme_bgc = "white"
|
|
xfont_bgc = "black"
|
|
yfont_bgc = "black"
|
|
xtt_col = "black"
|
|
ytt_col = "black"
|
|
|
|
}
|
|
|
|
#####################################
|
|
# Generating logo plots for nsSNPs
|
|
#####################################
|
|
PlotlogolasL <- list()
|
|
|
|
#-------------------
|
|
# Mutant logo plot
|
|
#-------------------
|
|
p0 = ggplot() + geom_logo(plot_mut_edM
|
|
, method = msa_method
|
|
, col_scheme = my_logo_col
|
|
, seq_type = 'auto') +
|
|
|
|
theme(legend.position = leg_pos
|
|
, legend.direction = leg_dir
|
|
#, legend.title = element_blank()
|
|
, legend.title = element_text(size = leg_tts
|
|
, colour = ytt_col)
|
|
, legend.text = element_text(size = leg_ts)
|
|
|
|
, axis.text.x = element_text(size = x_ats
|
|
, angle = x_tangle
|
|
, hjust = 1
|
|
, vjust = 0.4
|
|
, colour = xfont_bgc)
|
|
#, axis.text.y = element_blank()
|
|
, axis.ticks=element_blank()
|
|
, axis.text.y = element_text(size = y_ats
|
|
, angle = y_tangle
|
|
, hjust = 1
|
|
, vjust = -1.0
|
|
, colour = yfont_bgc)
|
|
, axis.title.x = element_text(size = x_tts
|
|
, colour = xtt_col)
|
|
, axis.title.y = element_text(size = y_tts
|
|
, colour = ytt_col)
|
|
, panel.grid=element_blank()
|
|
, plot.background = element_rect(fill = theme_bgc, colour=NA)
|
|
, panel.background = element_rect(fill = "transparent", colour=NA)
|
|
|
|
) +
|
|
labs(y=y_label) +
|
|
xlab(x_lab_mut)
|
|
|
|
if (missing(plot_positions)){
|
|
ed_mut_logo_P = p0 +
|
|
scale_y_continuous(
|
|
expand = c(0,0),
|
|
breaks = seq(
|
|
0,
|
|
(y_lim),
|
|
by = y_axis_increment
|
|
)
|
|
) +
|
|
scale_x_discrete(breaks = msa_pos
|
|
, expand = c(x_axis_offset, 0)
|
|
, labels = msa_pos
|
|
, limits = factor(msa_pos))
|
|
|
|
}else{
|
|
ed_mut_logo_P = p0 +
|
|
scale_y_continuous(
|
|
expand = c(0,0)#,
|
|
# breaks = seq(
|
|
# 0,
|
|
# (y_lim),
|
|
# by = y_axis_increment
|
|
#)
|
|
) +
|
|
# scale_x_continuous(expand = c(0,0)) #+
|
|
|
|
scale_x_discrete(breaks = i_extract
|
|
, expand = c(x_axis_offset_filtered, 0)
|
|
, labels = i_extract
|
|
, limits = factor(i_extract))
|
|
}
|
|
|
|
cat('\nDone: MSA plot for mutations')
|
|
#### Wild-type MSA: gene_fasta file ####
|
|
p1 = ggplot() + geom_logo(plot_wt_edM
|
|
#, facet = "grid"
|
|
, method = msa_method
|
|
, col_scheme = my_logo_col
|
|
, seq_type = 'aa') +
|
|
|
|
theme(legend.position = "none"
|
|
, legend.direction = leg_dir
|
|
, legend.title = element_text(size = leg_tts
|
|
, colour = ytt_col)
|
|
, legend.text = element_text(size = leg_ts)
|
|
, axis.text.x = element_blank()
|
|
, axis.ticks=element_blank()
|
|
, axis.text.y = element_blank()
|
|
|
|
, axis.title.x = element_text(size = x_tts
|
|
, colour = xtt_col)
|
|
, axis.title.y = element_text(size = y_tts
|
|
, colour = ytt_col)
|
|
|
|
, panel.grid=element_blank()
|
|
, plot.background = element_rect(fill = theme_bgc, colour=NA)
|
|
, panel.background = element_rect(fill = "transparent", colour=NA)
|
|
, plot.margin = margin(r=0,l=0, unit="pt")
|
|
|
|
) +
|
|
scale_y_discrete(expand = c(0,0)) +
|
|
ylab("") + xlab("")
|
|
|
|
if (missing(plot_positions)){
|
|
|
|
# No y-axis needed
|
|
ed_wt_logo_P = p1# +
|
|
} else {
|
|
|
|
ed_wt_logo_P = p1 +
|
|
scale_x_discrete(expand = c(0, 0),
|
|
breaks = i_extract,
|
|
#labels = i_extract,
|
|
limits = factor(i_extract)
|
|
)
|
|
|
|
#plot_df=plot_df[plot_df$position %in% plot_positions,]
|
|
contig_df=contig_df[contig_df$position %in% plot_positions,]
|
|
anno_bar = position_annotation(
|
|
contig_df,
|
|
aa_pos_drug=aa_pos_drug,
|
|
active_aa_pos=active_aa_pos,
|
|
aa_pos_lig1=aa_pos_lig1,
|
|
aa_pos_lig2=aa_pos_lig2,
|
|
aa_pos_lig3=aa_pos_lig3,
|
|
generate_colours = FALSE
|
|
)
|
|
|
|
}
|
|
cowplot::plot_grid(ed_mut_logo_P
|
|
, ed_wt_logo_P
|
|
, anno_bar
|
|
, ncol = 1
|
|
, align = "v"
|
|
#, axis='lr'
|
|
, rel_heights = c(3/4, 1/4,1/10))
|
|
|
|
}
|
|
#LogoPlotMSA(unified_msa)
|