combined logolas and raw data msa plots into 1 script and called it the same as before logoP_msa.R
This commit is contained in:
parent
6365fff858
commit
92af9fd565
1 changed files with 196 additions and 170 deletions
|
@ -1,14 +1,17 @@
|
|||
source("~/git/LSHTM_analysis/scripts/plotting/Header_TT.R")
|
||||
source("~/git/LSHTM_analysis/scripts/functions/ed_pfm_data.R")
|
||||
###########################################
|
||||
PlotLogolasMSA <- function(msaSeq_mut # chr vector
|
||||
, msaSeq_wt # chr vector
|
||||
, msa_method = c("custom") # will be c("EDLogo", "Logo)#
|
||||
, ED_score = c("log")# can be: "log-odds", "diff", "probKL", "ratio", "unscaled_log", "wKL"
|
||||
#, msa_method = c("custom") # can be "bits", "probability" or "custom"
|
||||
, logo_type = c("EDLogo") #"bits_pfm", "probability_pfm", "bits_raw", "probability_raw") # can be "bits", "probability" or "custom"
|
||||
, EDScore_type = c("log") # see if this relevant, or source function should have it!
|
||||
, bg_prob = NULL
|
||||
, my_logo_col = "chemistry"
|
||||
, plot_positions
|
||||
, y_breaks
|
||||
, x_lab_mut = "nsSNP-position"
|
||||
, y_lab_mut = ""
|
||||
, y_lab_mut
|
||||
, x_ats = 13 # text size
|
||||
, x_tangle = 90 # text angle
|
||||
, x_axis_offset = 0.05 # dist b/w y-axis and plot start
|
||||
|
@ -25,115 +28,148 @@ PlotLogolasMSA <- function(msaSeq_mut # chr vector
|
|||
)
|
||||
|
||||
{
|
||||
#''' Can be put into a separate EDData plot function'''
|
||||
dash_control = list()
|
||||
dash_control_default <- list(concentration = NULL, mode = NULL,
|
||||
optmethod = "mixEM", sample_weights = NULL, verbose = FALSE,
|
||||
bf = TRUE, pi_init = NULL, squarem_control = list(),
|
||||
dash_control = list(), reportcov = FALSE)
|
||||
|
||||
dash_control <- modifyList(dash_control_default, dash_control)
|
||||
# Get PFM matrix for mut and wt MSA provided
|
||||
data_ed = DataED_PFM(msaSeq_mut
|
||||
, msaSeq_wt
|
||||
, ED_score = EDScore_type)
|
||||
names(data_ed)
|
||||
#"pfm_mutM" "pfm_mut_scaledM" "combED_mutM" "pfm_wtM" "pfm_wt_scaledM" "combED_wtM"
|
||||
|
||||
if (logo_type == "EDLogo"){
|
||||
msa_method = "custom"
|
||||
y_label = "Enrichment Score"
|
||||
data_logo_mut = data_ed[['combED_mutM']]
|
||||
data_logo_wt = data_ed[['combED_wtM']]
|
||||
|
||||
############################################
|
||||
# Data processing for logo plot for nsSNPS
|
||||
###########################################
|
||||
cat("\nLength of MSA", length(msaSeq_mut)
|
||||
, "\nlength of WT seq:", length(msaSeq_wt))
|
||||
msa_pos = as.numeric(colnames(data_logo_mut))
|
||||
wt_pos = as.numeric(colnames(data_logo_wt))
|
||||
|
||||
cat("\n======================="
|
||||
, "\nPlotting entire MSA"
|
||||
, "\n========================")
|
||||
# Construct Y-axis for MSA mut plot:
|
||||
cat("\nCalculating y-axis for MSA mut plot")
|
||||
|
||||
#--------------------------
|
||||
# Getting PFM: mutant MSA
|
||||
#--------------------------
|
||||
pfm_mut <- Biostrings::consensusMatrix(msaSeq_mut)
|
||||
colnames(pfm_mut) <- 1:dim(pfm_mut)[2]
|
||||
pfm_mut_scaled <- do.call(dash, append(list(comp_data = pfm_mut),
|
||||
dash_control))$posmean
|
||||
if ( missing(y_breaks) ){
|
||||
# Y-axis: Calculating
|
||||
cat("\n----------------------------------------"
|
||||
, "\nY-axis being generated from data"
|
||||
, "\n-----------------------------------------")
|
||||
ylim_low <- floor(min(data_logo_mut)); ylim_low
|
||||
if( ylim_low == 0){
|
||||
ylim_low = ylim_low
|
||||
cat("\nY-axis lower limit:", ylim_low)
|
||||
y_rlow = seq(0, ylim_low, length.out = 3); y_rlow
|
||||
|
||||
logo_mut_h = get_logo_heights(pfm_mut_scaled
|
||||
, bg = bg_prob
|
||||
, score = ED_score)
|
||||
ylim_up <- ceiling(max(data_logo_mut)) + 4; ylim_up
|
||||
cat("\nY-axis upper limit:", ylim_up)
|
||||
y_rup = seq(0, ylim_up, by = 2); y_rup
|
||||
}else{
|
||||
ylim_low = ylim_low + (-0.5)
|
||||
cat("\nY-axis lower limit is <0:", ylim_low)
|
||||
y_rlow = seq(0, ylim_low, length.out = 3); y_rlow
|
||||
|
||||
cat("\nGetting logo_heights from Logolas package...")
|
||||
|
||||
pos_mutM = logo_mut_h[['table_mat_pos_norm']]; pos_mutM
|
||||
pos_mutS = logo_mut_h[['pos_ic']]; pos_mutS
|
||||
pos_mutED = t(pos_mutS*t(pos_mutM)); pos_mutED
|
||||
|
||||
neg_mutM = logo_mut_h[['table_mat_neg_norm']]*(-1)
|
||||
neg_mutS = logo_mut_h[['neg_ic']]; neg_mutS
|
||||
neg_mutED = t(neg_mutS*t(neg_mutM)); neg_mutED
|
||||
|
||||
if (length(pos_mutS) && length(neg_mutS) == dim(pfm_mut)[2]){
|
||||
cat("\nPASS: pfm calculated successfully including scaled matrix"
|
||||
, "\nDim of pfm matrix:", dim(pfm_mut)[1], dim(pfm_mut)[2])
|
||||
ylim_up <- ceiling(max(data_logo_mut)) + 3; ylim_up
|
||||
cat("\nY-axis upper limit:", ylim_up)
|
||||
y_rup = seq(0, ylim_up, by = 3); y_rup
|
||||
}
|
||||
#ylim_scale <- unique(sort(c(y_rlow, y_rup, ylim_up))); ylim_scale
|
||||
ylim_scale <- unique(sort(c(y_rlow, y_rup))); ylim_scale
|
||||
cat("\nY-axis generated: see below\n"
|
||||
, ylim_scale)
|
||||
}else{
|
||||
# Y-axis: User provided
|
||||
cat("\n--------------------------------"
|
||||
, "\nUsing y-axis:: User provided"
|
||||
,"\n---------------------------------")
|
||||
ylim_scale = sort(y_breaks)
|
||||
ylim_low = min(ylim_scale); ylim_low
|
||||
ylim_up = max(ylim_scale); ylim_up
|
||||
}
|
||||
|
||||
combED_mutM = pos_mutED + neg_mutED
|
||||
|
||||
# Construct the x-axis: mutant MSA
|
||||
msa_all_pos = as.numeric(colnames(combED_mutM))
|
||||
|
||||
#---------------------
|
||||
# Getting PFM: WT
|
||||
#---------------------
|
||||
pfm_wt <- Biostrings::consensusMatrix(msaSeq_wt)
|
||||
colnames(pfm_wt) <- 1:dim(pfm_wt)[2]
|
||||
pfm_wt_scaled <- do.call(dash, append(list(comp_data = pfm_wt),
|
||||
dash_control))$posmean
|
||||
|
||||
logo_wt_h = get_logo_heights(pfm_wt_scaled
|
||||
, bg = bg_prob
|
||||
, score = ED_score)
|
||||
|
||||
pos_wtM = logo_wt_h[['table_mat_pos_norm']]; pos_wtM
|
||||
pos_wtS = logo_wt_h[['pos_ic']]; pos_wtS
|
||||
pos_wtED = t(pos_wtS*t(pos_wtM)); pos_wtED
|
||||
|
||||
neg_wtM = logo_wt_h[['table_mat_neg_norm']]*(-1)
|
||||
neg_wtS = logo_wt_h[['neg_ic']]; neg_wtS
|
||||
neg_wtED = t(neg_wtS*t(neg_wtM)); neg_wtED
|
||||
|
||||
if (length(pos_wtS) && length(neg_wtS) == dim(pfm_wt)[2]){
|
||||
cat("\nPASS: pfm calculated successfully including scaled matrix"
|
||||
, "\nDim of pfm matrix:", dim(pfm_wt)[1], dim(pfm_wt)[2])
|
||||
}
|
||||
|
||||
combED_wtM = pos_wtED + neg_wtED
|
||||
if (logo_type == "bits_pfm"){
|
||||
msa_method = "bits"
|
||||
y_label = "Bits (PFM)"
|
||||
data_logo_mut = data_ed[['pfm_mut_scaledM']]
|
||||
data_logo_wt = data_ed[['pfm_wtM']]
|
||||
|
||||
# Construct the x-axis: mutant MSA
|
||||
wt_all_pos = as.numeric(colnames(combED_wtM))
|
||||
msa_pos = as.numeric(colnames(data_logo_mut))
|
||||
wt_pos = as.numeric(colnames(data_logo_wt))
|
||||
}
|
||||
|
||||
if (logo_type == "probability_pfm"){
|
||||
msa_method = "probability"
|
||||
y_label = "Probability (PFM)"
|
||||
data_logo_mut = data_ed[['pfm_mut_scaledM']]
|
||||
data_logo_wt = data_ed[['pfm_wtM']]
|
||||
|
||||
msa_pos = as.numeric(colnames(data_logo_mut))
|
||||
wt_pos = as.numeric(colnames(data_logo_wt))
|
||||
}
|
||||
|
||||
if (logo_type == "bits_raw"){
|
||||
msa_method = "bits"
|
||||
y_label = "Bits"
|
||||
|
||||
data_logo_mut = msaSeq_mut
|
||||
msa_interim = sapply(data_logo_mut, function(x) unlist(strsplit(x,"")))
|
||||
msa_interimDF = data.frame(msa_interim)
|
||||
msa_pos = as.numeric(rownames(msa_interimDF))
|
||||
|
||||
data_logo_wt = msaSeq_wt
|
||||
wt_interim = sapply(data_logo_wt, function(x) unlist(strsplit(x,"")))
|
||||
wt_interimDF = data.frame(wt_interim)
|
||||
wt_pos = as.numeric(rownames(wt_interimDF))
|
||||
|
||||
}
|
||||
|
||||
if (logo_type == "probability_raw"){
|
||||
msa_method = "probability"
|
||||
y_label = "Probability"
|
||||
|
||||
data_logo_mut = msaSeq_mut
|
||||
msa_interim = sapply(data_logo_mut, function(x) unlist(strsplit(x,"")))
|
||||
msa_interimDF = data.frame(msa_interim)
|
||||
msa_pos = as.numeric(rownames(msa_interimDF))
|
||||
|
||||
data_logo_wt = msaSeq_wt
|
||||
wt_interim = sapply(data_logo_wt, function(x) unlist(strsplit(x,"")))
|
||||
wt_interimDF = data.frame(wt_interim)
|
||||
wt_pos = as.numeric(rownames(wt_interimDF))
|
||||
}
|
||||
|
||||
#################################################################################
|
||||
# param: plot_position
|
||||
#################################################################################
|
||||
|
||||
if(missing(plot_positions)){
|
||||
|
||||
#------------------------------
|
||||
# MSA mut: All, no filtering
|
||||
#-------------------------------
|
||||
#================================
|
||||
# NO filtering of positions
|
||||
#================================
|
||||
#---------
|
||||
# MSA mut
|
||||
#---------
|
||||
cat("\n==========================================="
|
||||
, "\nGenerated PFM mut: No filtering"
|
||||
, "\n===========================================")
|
||||
|
||||
plot_mut_edM = combED_mutM
|
||||
plot_mut_edM = data_logo_mut
|
||||
|
||||
#------------------------------
|
||||
# MSA WT: All, no filtering
|
||||
#-------------------------------
|
||||
#---------
|
||||
# MSA WT
|
||||
#---------
|
||||
cat("\n==========================================="
|
||||
, "\nGenerated PFM WT: No filtering"
|
||||
, "\n===========================================")
|
||||
|
||||
plot_wt_edM = combED_wtM
|
||||
plot_wt_edM = data_logo_wt
|
||||
|
||||
}else{
|
||||
|
||||
#------------------------------
|
||||
# PFM mut: Filtered positions
|
||||
#-------------------------------
|
||||
|
||||
#================================
|
||||
# Filtering of positions
|
||||
#================================
|
||||
cat("\n==========================================="
|
||||
, "\nGenerating PFM MSA: filtered positions"
|
||||
, "\n==========================================="
|
||||
|
@ -146,17 +182,56 @@ PlotLogolasMSA <- function(msaSeq_mut # chr vector
|
|||
cat("\nPlotting positions sorted:\n"
|
||||
, plot_positions)
|
||||
|
||||
if ( all(plot_positions%in%msa_all_pos) && all(plot_positions%in%wt_all_pos) ){
|
||||
if ( all(plot_positions%in%msa_pos) && all(plot_positions%in%wt_pos) ){
|
||||
cat("\nAll positions within range"
|
||||
, "\nFiltering positions as specified..."
|
||||
, "\nNo. of positions in plot:", length(plot_positions))
|
||||
i_extract = plot_positions
|
||||
plot_mut_edM = combED_mutM[, i_extract]
|
||||
plot_wt_edM = combED_wtM[, i_extract]
|
||||
#plot_mut_edM = data_logo_mut[, i_extract]
|
||||
#plot_wt_edM = data_logo_wt[, i_extract]
|
||||
|
||||
#-----------------
|
||||
# PFM: mut + wt
|
||||
#------------------
|
||||
if (logo_type%in%c("EDLogo", "bits_pfm", "probability_pfm")){
|
||||
|
||||
plot_mut_edM = data_logo_mut[, i_extract]
|
||||
plot_wt_edM = data_logo_wt[, i_extract]
|
||||
}
|
||||
if (logo_type%in%c("bits_raw", "probability_raw")){
|
||||
|
||||
#--------
|
||||
# Mut
|
||||
#--------
|
||||
dfP1 = msa_interimDF[i_extract,]
|
||||
dfP1 = data.frame(t(dfP1))
|
||||
names(dfP1) = i_extract
|
||||
cols_to_paste = names(dfP1)
|
||||
dfP1['chosen_seq'] = apply(dfP1[, cols_to_paste]
|
||||
, 1
|
||||
, paste, sep = ''
|
||||
, collapse = "")
|
||||
plot_mut_edM = dfP1$chosen_seq
|
||||
|
||||
#--------
|
||||
# WT
|
||||
#--------
|
||||
dfP2 = wt_interimDF[i_extract,]
|
||||
dfP2 = data.frame(t(dfP2))
|
||||
names(dfP2) = i_extract
|
||||
cols_to_paste2 = names(dfP2)
|
||||
dfP2['chosen_seq'] = apply( dfP2[, cols_to_paste2]
|
||||
, 1
|
||||
, paste, sep = ''
|
||||
, collapse = "")
|
||||
|
||||
plot_wt_edM = dfP2$chosen_seq
|
||||
|
||||
}
|
||||
|
||||
}else{
|
||||
cat("\nNo. of positions selected:", length(plot_positions))
|
||||
i_ofr = plot_positions[!plot_positions%in%msa_all_pos]
|
||||
i_ofr = plot_positions[!plot_positions%in%msa_pos]
|
||||
cat("\n1 or more plot_positions out of range..."
|
||||
, "\nThese are:\n", i_ofr
|
||||
, "\nQuitting! Resubmit with correct plot_positions")
|
||||
|
@ -164,60 +239,10 @@ PlotLogolasMSA <- function(msaSeq_mut # chr vector
|
|||
}
|
||||
}
|
||||
|
||||
# Construct Y-axis for MSA mut plot:
|
||||
cat("\nCalculating y-axis for MSA mut plot")
|
||||
|
||||
if (missing(y_breaks)){
|
||||
|
||||
# Y-axis: Calculating
|
||||
cat("\n----------------------------------------"
|
||||
, "\nY-axis being generated from data"
|
||||
, "\n-----------------------------------------")
|
||||
|
||||
ylim_low <- floor(min(combED_mutM)); ylim_low
|
||||
|
||||
if( ylim_low == 0){
|
||||
|
||||
ylim_low = ylim_low
|
||||
cat("\nY-axis lower limit:", ylim_low)
|
||||
y_rlow = seq(0, ylim_low, length.out = 3); y_rlow
|
||||
|
||||
ylim_up <- ceiling(max(combED_mutM)) + 4; ylim_up
|
||||
cat("\nY-axis upper limit:", ylim_up)
|
||||
y_rup = seq(0, ylim_up, by = 2); y_rup
|
||||
|
||||
}else{
|
||||
|
||||
ylim_low = ylim_low + (-0.5)
|
||||
cat("\nY-axis lower limit is <0:", ylim_low)
|
||||
y_rlow = seq(0, ylim_low, length.out = 3); y_rlow
|
||||
|
||||
ylim_up <- ceiling(max(combED_mutM)) + 3; ylim_up
|
||||
cat("\nY-axis upper limit:", ylim_up)
|
||||
y_rup = seq(0, ylim_up, by = 3); y_rup
|
||||
}
|
||||
|
||||
#ylim_scale <- unique(sort(c(y_rlow, y_rup, ylim_up))); ylim_scale
|
||||
ylim_scale <- unique(sort(c(y_rlow, y_rup))); ylim_scale
|
||||
|
||||
cat("\nY-axis generated: see below\n"
|
||||
, ylim_scale)
|
||||
|
||||
}else{
|
||||
|
||||
# Y-axis: User provided
|
||||
cat("\n--------------------------------"
|
||||
, "\nUsing y-axis:: User provided"
|
||||
,"\n---------------------------------")
|
||||
ylim_scale = sort(y_breaks)
|
||||
ylim_low = min(ylim_scale); ylim_low
|
||||
ylim_up = max(ylim_scale); ylim_up
|
||||
}
|
||||
|
||||
######################################
|
||||
# Generating plots for muts and wt
|
||||
#####################################
|
||||
|
||||
if (my_logo_col %in% c('clustalx','taylor')) {
|
||||
cat("\nSelected colour scheme:", my_logo_col
|
||||
, "\nUsing black theme\n")
|
||||
|
@ -252,7 +277,6 @@ PlotLogolasMSA <- function(msaSeq_mut # chr vector
|
|||
# Mutant logo plot
|
||||
#-------------------
|
||||
p0 = ggseqlogo(plot_mut_edM
|
||||
#, facet = "grid"
|
||||
, method = msa_method
|
||||
, col_scheme = my_logo_col
|
||||
, seq_type = 'auto') +
|
||||
|
@ -279,17 +303,28 @@ PlotLogolasMSA <- function(msaSeq_mut # chr vector
|
|||
, colour = xtt_col)
|
||||
, axis.title.y = element_text(size = y_tts
|
||||
, colour = ytt_col)
|
||||
, plot.background = element_rect(fill = theme_bgc))+
|
||||
, plot.background = element_rect(fill = theme_bgc)) +
|
||||
|
||||
xlab(x_lab_mut) + ylab(y_lab_mut)
|
||||
xlab(x_lab_mut)
|
||||
|
||||
if (missing(plot_positions)){
|
||||
|
||||
ed_mut_logo_P = p0 +
|
||||
scale_x_discrete(breaks = msa_all_pos
|
||||
scale_x_discrete(breaks = msa_pos
|
||||
, expand = c(x_axis_offset, 0)
|
||||
, labels = msa_all_pos
|
||||
, limits = factor(msa_all_pos))+
|
||||
, labels = msa_pos
|
||||
, limits = factor(msa_pos))
|
||||
|
||||
}else{
|
||||
ed_mut_logo_P = p0 +
|
||||
scale_x_discrete(breaks = i_extract
|
||||
, expand = c(x_axis_offset_filtered, 0)
|
||||
, labels = i_extract
|
||||
, limits = factor(i_extract))
|
||||
|
||||
}
|
||||
|
||||
if (logo_type == "EDLogo"){
|
||||
ed_mut_logo_P = ed_mut_logo_P +
|
||||
scale_y_continuous(limits = c(ylim_low, ylim_up)
|
||||
, breaks = ylim_scale
|
||||
, expand = c(0, y_axis_offset)) +
|
||||
|
@ -297,21 +332,12 @@ PlotLogolasMSA <- function(msaSeq_mut # chr vector
|
|||
, linetype = "solid"
|
||||
, color = "grey"
|
||||
, size = 1)
|
||||
}
|
||||
|
||||
}else{
|
||||
ed_mut_logo_P = p0 +
|
||||
scale_x_discrete(breaks = i_extract
|
||||
, expand = c(x_axis_offset_filtered,0)
|
||||
, labels = i_extract
|
||||
, limits = factor(i_extract)) +
|
||||
#scale_y_continuous(expand = c(0,0.09)) +
|
||||
scale_y_continuous(limits = c(ylim_low, ylim_up)
|
||||
, breaks = ylim_scale
|
||||
, expand = c(0, y_axis_offset))+
|
||||
geom_hline(yintercept = 0
|
||||
, linetype = "solid"
|
||||
, color = "grey"
|
||||
, size = 1)
|
||||
if (missing(y_lab_mut)){
|
||||
ed_mut_logo_P = ed_mut_logo_P + ylab(y_label)
|
||||
} else{
|
||||
ed_mut_logo_P = ed_mut_logo_P + ylab(y_lab_mut)
|
||||
}
|
||||
|
||||
cat('\nDone: MSA plot for mutations')
|
||||
|
@ -354,10 +380,10 @@ PlotLogolasMSA <- function(msaSeq_mut # chr vector
|
|||
|
||||
# No y-axis needed
|
||||
ed_wt_logo_P = p1 +
|
||||
scale_x_discrete(breaks = wt_all_pos
|
||||
scale_x_discrete(breaks = wt_pos
|
||||
, expand = c(x_axis_offset, 0)
|
||||
, labels = wt_all_pos
|
||||
, limits = factor(wt_all_pos))
|
||||
, labels = wt_pos
|
||||
, limits = factor(wt_pos))
|
||||
}else{
|
||||
|
||||
ed_wt_logo_P = p1 +
|
Loading…
Add table
Add a link
Reference in a new issue