143 lines
4.7 KiB
R
143 lines
4.7 KiB
R
library(Logolas)
|
|
source("~/git/LSHTM_analysis/scripts/functions/my_logolas.R")
|
|
#####################################################################################
|
|
# DataED_PFM():
|
|
# Input:
|
|
# Data:
|
|
# msaSeq_mut: MSA chr vector for muts
|
|
# msaSeq_wt [Optional]: MSA chr vector for wt
|
|
|
|
# Others params:
|
|
# ED_score = c("log", log-odds", "diff", "probKL", "ratio", "unscaled_log", "wKL")
|
|
# bg_prob: background probability, default is equal i.e NULL
|
|
|
|
# Returns data for ED plot from MSA
|
|
# Mut matrix:
|
|
# PFM matrix
|
|
# PFM matrix scaled
|
|
# ED matrix
|
|
# Wt matrix [optional]
|
|
# For my case, I always use it as it helps see what is at the wild-type already!
|
|
|
|
# TODO: SHINY
|
|
# drop down: ED score type (in the actual plot function!)
|
|
# drop down/enter field : bg probability (in the actual plot function!)
|
|
# Make it hover over position and then get the corresponding data table!
|
|
########################a###########################################################
|
|
|
|
DataED_PFM <- function(msaSeq_mut
|
|
, msaSeq_wt
|
|
, ED_score = c("log")
|
|
, bg_prob = NULL)
|
|
|
|
{
|
|
|
|
dash_control = list()
|
|
dash_control_default <- list(concentration = NULL, mode = NULL,
|
|
optmethod = "mixEM", sample_weights = NULL, verbose = FALSE,
|
|
bf = TRUE, pi_init = NULL, squarem_control = list(),
|
|
dash_control = list(), reportcov = FALSE)
|
|
|
|
dash_control <- modifyList(dash_control_default, dash_control)
|
|
|
|
############################################
|
|
# Data processing for logo plot for nsSNPS
|
|
###########################################
|
|
|
|
cat("\nLength of MSA", length(msaSeq_mut))
|
|
|
|
pfm_mutM = matrix()
|
|
pfm_mut_scaledM = matrix()
|
|
combED_mutM = matrix()
|
|
|
|
#--------------------------
|
|
# Getting PFM: mutant MSA
|
|
#--------------------------
|
|
pfm_mutM <- Biostrings::consensusMatrix(msaSeq_mut)
|
|
colnames(pfm_mutM) <- 1:dim(pfm_mutM)[2]
|
|
pfm_mut_scaledM <- do.call(dash, append(list(comp_data = pfm_mutM),
|
|
dash_control))$posmean
|
|
|
|
logo_mut_h = get_logo_heights(pfm_mut_scaledM
|
|
, bg = bg_prob
|
|
, score = ED_score)
|
|
|
|
cat("\nGetting logo_heights from Logolas package...")
|
|
|
|
pos_mutM = logo_mut_h[['table_mat_pos_norm']]; pos_mutM
|
|
pos_mutS = logo_mut_h[['pos_ic']]; pos_mutS
|
|
pos_mutED = t(pos_mutS*t(pos_mutM)); pos_mutED
|
|
|
|
neg_mutM = logo_mut_h[['table_mat_neg_norm']]*(-1)
|
|
neg_mutS = logo_mut_h[['neg_ic']]; neg_mutS
|
|
neg_mutED = t(neg_mutS*t(neg_mutM)); neg_mutED
|
|
|
|
if (length(pos_mutS) && length(neg_mutS) == dim(pfm_mutM)[2]){
|
|
cat("\nPASS: pfm calculated successfully including scaled matrix"
|
|
, "\nDim of pfm matrix:", dim(pfm_mutM)[1], dim(pfm_mutM)[2])
|
|
}
|
|
|
|
combED_mutM = pos_mutED + neg_mutED
|
|
|
|
# initialise the mut list
|
|
names_mutL = c("pfm_mutM", "pfm_mut_scaledM", "combED_mutM")
|
|
EDmutDataL = vector("list", length(names_mutL))
|
|
EDmutDataL = list(pfm_mutM, pfm_mut_scaledM, combED_mutM)
|
|
names(EDmutDataL) = names_mutL
|
|
|
|
#---------------------
|
|
# Getting PFM: WT
|
|
#---------------------
|
|
if(!missing(msaSeq_wt)){
|
|
|
|
cat("\nLength of WT seq", length(msaSeq_wt))
|
|
|
|
pfm_wtM = matrix()
|
|
pfm_wt_scaledM = matrix()
|
|
combED_wtM = matrix()
|
|
|
|
pfm_wtM <- Biostrings::consensusMatrix(msaSeq_wt)
|
|
colnames(pfm_wtM) <- 1:dim(pfm_wtM)[2]
|
|
pfm_wt_scaledM <- do.call(dash, append(list(comp_data = pfm_wtM),
|
|
dash_control))$posmean
|
|
|
|
logo_wt_h = get_logo_heights(pfm_wt_scaledM
|
|
, bg = bg_prob
|
|
, score = ED_score)
|
|
|
|
pos_wtM = logo_wt_h[['table_mat_pos_norm']]; pos_wtM
|
|
pos_wtS = logo_wt_h[['pos_ic']]; pos_wtS
|
|
pos_wtED = t(pos_wtS*t(pos_wtM)); pos_wtED
|
|
|
|
neg_wtM = logo_wt_h[['table_mat_neg_norm']]*(-1)
|
|
neg_wtS = logo_wt_h[['neg_ic']]; neg_wtS
|
|
neg_wtED = t(neg_wtS*t(neg_wtM)); neg_wtED
|
|
|
|
if (length(pos_wtS) && length(neg_wtS) == dim(pfm_wtM)[2]){
|
|
cat("\nPASS: pfm calculated successfully including scaled matrix"
|
|
, "\nDim of pfm matrix:", dim(pfm_wtM)[1], dim(pfm_wtM)[2])
|
|
}
|
|
|
|
combED_wtM = pos_wtED + neg_wtED
|
|
|
|
# initialise the wt list
|
|
names_wtL = c("pfm_wtM", "pfm_wt_scaledM", "combED_wtM")
|
|
EDwtDataL = vector("list", length(names_wtL))
|
|
EDwtDataL = list(pfm_wtM, pfm_wt_scaledM, combED_wtM)
|
|
names(EDwtDataL) = names_wtL
|
|
|
|
# Combine two lists
|
|
EDallDataL = append(EDmutDataL, EDwtDataL)
|
|
|
|
cat("\nReturning output for Mut + WT"
|
|
, "\nLength of all data:", length(EDallDataL))
|
|
return(EDallDataL)
|
|
|
|
}else{
|
|
cat("\nReturning output for Mut data only"
|
|
, "\nLength of Mut data:", length(EDmutDataL))
|
|
|
|
return(EDmutDataL)
|
|
}
|
|
}
|
|
|