wideP_consurf3

This commit is contained in:
Tanushree Tunstall 2022-08-10 14:08:40 +01:00
parent 0bcbb44ae5
commit 3af11ec3d3

View file

@ -1,29 +1,29 @@
##################################################################################### #####################################################################################
# LogoPlotMSA(): # LogoPlotMSA():
# Input: # Input:
# Data: # Data:
# msaSeq_mut: MSA chr vector for muts # msaSeq_mut: MSA chr vector for muts
# msaSeq_wt: MSA chr vector for wt # msaSeq_wt: MSA chr vector for wt
# Logo type params: # Logo type params:
# logo_type = c("EDLogo", "bits_pfm", "probability_pfm", "bits_raw", "probability_raw") # logo_type = c("EDLogo", "bits_pfm", "probability_pfm", "bits_raw", "probability_raw")
# EDLogo: calculated from the Logolas package based on PFM matrix (scaled). # EDLogo: calculated from the Logolas package based on PFM matrix (scaled).
#The required content from the package is sourced locally within 'my_logolas.R' #The required content from the package is sourced locally within 'my_logolas.R'
# bits_pfm: Information Content based on PFM scaled matrix (my_logolas.R) # bits_pfm: Information Content based on PFM scaled matrix (my_logolas.R)
# probability_pfm: Probability based on PFM scaled matrix (my_logolas.R) # probability_pfm: Probability based on PFM scaled matrix (my_logolas.R)
# bits_raw: Information Content based on Raw MSA (ggseqlogo) # bits_raw: Information Content based on Raw MSA (ggseqlogo)
# probability_raw: Probability based on Raw MSA (ggseqlogo) # probability_raw: Probability based on Raw MSA (ggseqlogo)
# EDScore_type = c("log", log-odds", "diff", "probKL", "ratio", "unscaled_log", "wKL")
# bg_prob: background probability, default is equal i.e NULL.
# This is used by the internal call to DataED_PFM(). This func takes thse args. I have used it here for
# completeness and allow nuanced plot control
# my_logo_col = c("chemistry", "hydrophobicity", "clustalx", "taylor")
# --> if clustalx and taylor, set variable to black bg + white font
# --> if chemistry and hydrophobicity, then grey bg + black font
# ...other params # EDScore_type = c("log", log-odds", "diff", "probKL", "ratio", "unscaled_log", "wKL")
# bg_prob: background probability, default is equal i.e NULL.
# This is used by the internal call to DataED_PFM(). This func takes thse args. I have used it here for
# completeness and allow nuanced plot control
# my_logo_col = c("chemistry", "hydrophobicity", "clustalx", "taylor")
# --> if clustalx and taylor, set variable to black bg + white font
# --> if chemistry and hydrophobicity, then grey bg + black font
# ...other params
# Returns: Logo plots from MSA both mutant and wt (for comparability) # Returns: Logo plots from MSA both mutant and wt (for comparability)
# For my case, I always use it as it helps see what is at the wild-type already! # For my case, I always use it as it helps see what is at the wild-type already!
@ -62,7 +62,7 @@ LogoPlotMSA <- function(unified_msa
, leg_dir = "horizontal" #can be vertical or horizontal , leg_dir = "horizontal" #can be vertical or horizontal
, leg_ts = 16 # leg text size , leg_ts = 16 # leg text size
, leg_tts = 16 # leg title size , leg_tts = 16 # leg title size
) )
{ {
# FIXME: Hack! # FIXME: Hack!
@ -70,8 +70,8 @@ LogoPlotMSA <- function(unified_msa
# msaSeq_wt=unified_msa[[2]] # msaSeq_wt=unified_msa[[2]]
msaSeq_mut=unified_msa[['msa_seq']] msaSeq_mut=unified_msa[['msa_seq']]
msaSeq_wt=unified_msa[['wt_seq']] msaSeq_wt=unified_msa[['wt_seq']]
# Get PFM matrix for mut and wt MSA provided # Get PFM matrix for mut and wt MSA provided
data_ed = DataED_PFM(msaSeq_mut data_ed = DataED_PFM(msaSeq_mut
, msaSeq_wt , msaSeq_wt
, ED_score = EDScore_type) , ED_score = EDScore_type)
@ -126,7 +126,7 @@ LogoPlotMSA <- function(unified_msa
ylim_low = min(ylim_scale); ylim_low ylim_low = min(ylim_scale); ylim_low
ylim_up = max(ylim_scale); ylim_up ylim_up = max(ylim_scale); ylim_up
} }
} }
if (logo_type == "bits_pfm"){ if (logo_type == "bits_pfm"){
@ -163,7 +163,7 @@ LogoPlotMSA <- function(unified_msa
wt_interimDF = data.frame(wt_interim) wt_interimDF = data.frame(wt_interim)
wt_pos = as.numeric(rownames(wt_interimDF)) wt_pos = as.numeric(rownames(wt_interimDF))
} }
if (logo_type == "probability_raw"){ if (logo_type == "probability_raw"){
msa_method = "probability" msa_method = "probability"
@ -183,132 +183,132 @@ LogoPlotMSA <- function(unified_msa
################################################################################# #################################################################################
# param: plot_position # param: plot_position
################################################################################# #################################################################################
if(missing(plot_positions)){ if(missing(plot_positions)){
#================================
# NO filtering of positions
#================================
#---------
# MSA mut
#---------
cat("\n==========================================="
, "\nGenerated PFM mut: No filtering"
, "\n===========================================")
plot_mut_edM = data_logo_mut #================================
# NO filtering of positions
#--------- #================================
# MSA WT #---------
#--------- # MSA mut
cat("\n===========================================" #---------
, "\nGenerated PFM WT: No filtering" cat("\n==========================================="
, "\n===========================================") , "\nGenerated PFM mut: No filtering"
, "\n===========================================")
plot_wt_edM = data_logo_wt
plot_mut_edM = data_logo_mut
#---------
# MSA WT
#---------
cat("\n==========================================="
, "\nGenerated PFM WT: No filtering"
, "\n===========================================")
plot_wt_edM = data_logo_wt
}else{ }else{
#================================ #================================
# Filtering of positions # Filtering of positions
#================================ #================================
cat("\n===========================================" cat("\n==========================================="
, "\nGenerating PFM MSA: filtered positions" , "\nGenerating PFM MSA: filtered positions"
, "\n===========================================" , "\n==========================================="
, "\nUser specified plotting positions for MSA:" , "\nUser specified plotting positions for MSA:"
, "\nThese are:\n", plot_positions , "\nThese are:\n", plot_positions
, "\nSorting plot positions...") , "\nSorting plot positions...")
plot_positions = sort(plot_positions) plot_positions = sort(plot_positions)
cat("\nPlotting positions sorted:\n" cat("\nPlotting positions sorted:\n"
, plot_positions) , plot_positions)
if ( all(plot_positions%in%msa_pos) && all(plot_positions%in%wt_pos) ){ if ( all(plot_positions%in%msa_pos) && all(plot_positions%in%wt_pos) ){
cat("\nAll positions within range" cat("\nAll positions within range"
, "\nFiltering positions as specified..." , "\nFiltering positions as specified..."
, "\nNo. of positions in plot:", length(plot_positions)) , "\nNo. of positions in plot:", length(plot_positions))
i_extract = plot_positions i_extract = plot_positions
#-----------------
# PFM: mut + wt
#------------------
if (logo_type%in%c("EDLogo", "bits_pfm", "probability_pfm")){
plot_mut_edM = data_logo_mut[, i_extract] #-----------------
plot_wt_edM = data_logo_wt[, i_extract] # PFM: mut + wt
#------------------
if (logo_type%in%c("EDLogo", "bits_pfm", "probability_pfm")){
plot_mut_edM = data_logo_mut[, i_extract]
plot_wt_edM = data_logo_wt[, i_extract]
}
if (logo_type%in%c("bits_raw", "probability_raw")){
#--------
# Mut
#--------
dfP1 = msa_interimDF[i_extract,]
dfP1 = data.frame(t(dfP1))
names(dfP1) = i_extract
cols_to_paste = names(dfP1)
dfP1['chosen_seq'] = apply(dfP1[, cols_to_paste]
, 1
, paste, sep = ''
, collapse = "")
plot_mut_edM = dfP1$chosen_seq
#--------
# WT
#--------
dfP2 = wt_interimDF[i_extract,]
dfP2 = data.frame(t(dfP2))
names(dfP2) = i_extract
cols_to_paste2 = names(dfP2)
dfP2['chosen_seq'] = apply( dfP2[, cols_to_paste2]
, 1
, paste, sep = ''
, collapse = "")
plot_wt_edM = dfP2$chosen_seq
}
}else{
cat("\nNo. of positions selected:", length(plot_positions))
i_ofr = plot_positions[!plot_positions%in%msa_pos]
cat("\n1 or more plot_positions out of range..."
, "\nThese are:\n", i_ofr
, "\nQuitting! Resubmit with correct plot_positions")
quit()
} }
if (logo_type%in%c("bits_raw", "probability_raw")){
#--------
# Mut
#--------
dfP1 = msa_interimDF[i_extract,]
dfP1 = data.frame(t(dfP1))
names(dfP1) = i_extract
cols_to_paste = names(dfP1)
dfP1['chosen_seq'] = apply(dfP1[, cols_to_paste]
, 1
, paste, sep = ''
, collapse = "")
plot_mut_edM = dfP1$chosen_seq
#--------
# WT
#--------
dfP2 = wt_interimDF[i_extract,]
dfP2 = data.frame(t(dfP2))
names(dfP2) = i_extract
cols_to_paste2 = names(dfP2)
dfP2['chosen_seq'] = apply( dfP2[, cols_to_paste2]
, 1
, paste, sep = ''
, collapse = "")
plot_wt_edM = dfP2$chosen_seq
}
}else{
cat("\nNo. of positions selected:", length(plot_positions))
i_ofr = plot_positions[!plot_positions%in%msa_pos]
cat("\n1 or more plot_positions out of range..."
, "\nThese are:\n", i_ofr
, "\nQuitting! Resubmit with correct plot_positions")
quit()
} }
}
######################################
# Generating plots for muts and wt
#####################################
if (my_logo_col %in% c('clustalx','taylor')) {
cat("\nSelected colour scheme:", my_logo_col
, "\nUsing black theme\n")
theme_bgc = "black"
xfont_bgc = "white"
yfont_bgc = "white"
xtt_col = "white"
ytt_col = "white"
}
if (my_logo_col %in% c('chemistry', 'hydrophobicity')) {
cat("\nstart of MSA"
, '\nSelected colour scheme:', my_logo_col
, "\nUsing grey theme")
theme_bgc = "grey"
xfont_bgc = "black"
yfont_bgc = "black"
xtt_col = "black"
ytt_col = "black"
######################################
# Generating plots for muts and wt
#####################################
if (my_logo_col %in% c('clustalx','taylor')) {
cat("\nSelected colour scheme:", my_logo_col
, "\nUsing black theme\n")
theme_bgc = "black"
xfont_bgc = "white"
yfont_bgc = "white"
xtt_col = "white"
ytt_col = "white"
} }
if (my_logo_col %in% c('chemistry', 'hydrophobicity')) {
cat("\nstart of MSA"
, '\nSelected colour scheme:', my_logo_col
, "\nUsing grey theme")
theme_bgc = "grey"
xfont_bgc = "black"
yfont_bgc = "black"
xtt_col = "black"
ytt_col = "black"
}
##################################### #####################################
# Generating logo plots for nsSNPs # Generating logo plots for nsSNPs
##################################### #####################################
@ -328,18 +328,18 @@ LogoPlotMSA <- function(unified_msa
, legend.title = element_text(size = leg_tts , legend.title = element_text(size = leg_tts
, colour = ytt_col) , colour = ytt_col)
, legend.text = element_text(size = leg_ts) , legend.text = element_text(size = leg_ts)
, axis.text.x = element_text(size = x_ats , axis.text.x = element_text(size = x_ats
, angle = x_tangle , angle = x_tangle
, hjust = 1 , hjust = 1
, vjust = 0.4 , vjust = 0.4
, colour = xfont_bgc) , colour = xfont_bgc)
#, axis.text.y = element_blank() #, axis.text.y = element_blank()
, axis.text.y = element_text(size = y_ats , axis.text.y = element_text(size = y_ats
, angle = y_tangle , angle = y_tangle
, hjust = 1 , hjust = 1
, vjust = -1.0 , vjust = -1.0
, colour = yfont_bgc) , colour = yfont_bgc)
, axis.title.x = element_text(size = x_tts , axis.title.x = element_text(size = x_tts
, colour = xtt_col) , colour = xtt_col)
, axis.title.y = element_text(size = y_tts , axis.title.y = element_text(size = y_tts
@ -349,14 +349,14 @@ LogoPlotMSA <- function(unified_msa
xlab(x_lab_mut) xlab(x_lab_mut)
if (missing(plot_positions)){ if (missing(plot_positions)){
ed_mut_logo_P = p0 + ed_mut_logo_P = p0 +
scale_x_discrete(breaks = msa_pos scale_x_discrete(breaks = msa_pos
, expand = c(x_axis_offset, 0) , expand = c(x_axis_offset, 0)
, labels = msa_pos , labels = msa_pos
, limits = factor(msa_pos)) , limits = factor(msa_pos))
}else{ }else{
ed_mut_logo_P = p0 + ed_mut_logo_P = p0 +
scale_x_discrete(breaks = i_extract scale_x_discrete(breaks = i_extract
, expand = c(x_axis_offset_filtered, 0) , expand = c(x_axis_offset_filtered, 0)
, labels = i_extract , labels = i_extract
@ -366,25 +366,25 @@ LogoPlotMSA <- function(unified_msa
if (logo_type == "EDLogo"){ if (logo_type == "EDLogo"){
ed_mut_logo_P = ed_mut_logo_P + ed_mut_logo_P = ed_mut_logo_P +
scale_y_continuous(limits = c(ylim_low, ylim_up) scale_y_continuous(limits = c(ylim_low, ylim_up)
, breaks = ylim_scale , breaks = ylim_scale
, expand = c(0, y_axis_offset)) + , expand = c(0, y_axis_offset)) +
geom_hline(yintercept = 0 geom_hline(yintercept = 0
, linetype = "solid" , linetype = "solid"
, color = "grey" , color = "grey"
, size = 1) , size = 1)
} }
if (missing(y_lab_mut)){ if (missing(y_lab_mut)){
ed_mut_logo_P = ed_mut_logo_P + ylab(y_label) ed_mut_logo_P = ed_mut_logo_P + ylab(y_label)
} else{ } else{
ed_mut_logo_P = ed_mut_logo_P + ylab(y_lab_mut) ed_mut_logo_P = ed_mut_logo_P + ylab(y_lab_mut)
} }
cat('\nDone: MSA plot for mutations') cat('\nDone: MSA plot for mutations')
#return(msa_mut_logoP) #return(msa_mut_logoP)
PlotlogolasL[['ed_mut_logoP']] <- ed_mut_logo_P PlotlogolasL[['ed_mut_logoP']] <- ed_mut_logo_P
#--------------------------------- #---------------------------------
# Wild-type MSA: gene_fasta file # Wild-type MSA: gene_fasta file
#--------------------------------- #---------------------------------
@ -415,25 +415,25 @@ LogoPlotMSA <- function(unified_msa
, plot.background = element_rect(fill = theme_bgc)) + , plot.background = element_rect(fill = theme_bgc)) +
ylab("") + xlab("Wild-type position") ylab("") + xlab("Wild-type position")
if (missing(plot_positions)){ if (missing(plot_positions)){
# No y-axis needed
ed_wt_logo_P = p1 +
scale_x_discrete(breaks = wt_pos
, expand = c(x_axis_offset, 0)
, labels = wt_pos
, limits = factor(wt_pos))
}else{
ed_wt_logo_P = p1 +
scale_x_discrete(breaks = i_extract
, expand = c(x_axis_offset_filtered, 0)
, labels = i_extract
, limits = factor(i_extract))
}
# No y-axis needed
ed_wt_logo_P = p1 +
scale_x_discrete(breaks = wt_pos
, expand = c(x_axis_offset, 0)
, labels = wt_pos
, limits = factor(wt_pos))
}else{
ed_wt_logo_P = p1 +
scale_x_discrete(breaks = i_extract
, expand = c(x_axis_offset_filtered, 0)
, labels = i_extract
, limits = factor(i_extract))
}
cat('\nDone: MSA plot for WT') cat('\nDone: MSA plot for WT')
#return(msa_wt_logoP) #return(msa_wt_logoP)
PlotlogolasL[['ed_wt_logoP']] <- ed_wt_logo_P PlotlogolasL[['ed_wt_logoP']] <- ed_wt_logo_P
@ -443,20 +443,21 @@ LogoPlotMSA <- function(unified_msa
# Combined plot: logo ED/other logo plot # Combined plot: logo ED/other logo plot
# customised for ggseqlogo # customised for ggseqlogo
#========================================= #=========================================
cat('\nDone: ed_mut_logoP + ed_wt_logoP') cat('\nDone: ed_mut_logoP + ed_wt_logoP')
# colour scheme: https://rdrr.io/cran/ggseqlogo/src/R/col_schemes.r # colour scheme: https://rdrr.io/cran/ggseqlogo/src/R/col_schemes.r
#cat("\nOutput plot:", LogoSNPs_comb, "\n") #cat("\nOutput plot:", LogoSNPs_comb, "\n")
#svg(LogoSNPs_combined, width = 32, height = 10) #svg(LogoSNPs_combined, width = 32, height = 10)
LogoED_comb = cowplot::plot_grid(PlotlogolasL[['ed_mut_logoP']] LogoED_comb = cowplot::plot_grid(PlotlogolasL[['ed_mut_logoP']]
, PlotlogolasL[['ed_wt_logoP']] , PlotlogolasL[['ed_wt_logoP']]
, nrow = 2 , nrow = 2
, align = "v" , align = "v"
, rel_heights = c(3/4, 1/4)) , axis='lr'
, rel_heights = c(3/4, 1/4))
return(LogoED_comb) return(LogoED_comb)
} }
#LogoPlotMSA(unified_msa) #LogoPlotMSA(unified_msa)