LSHTM_analysis/scripts/functions/ed_pfm_data.R

142 lines
4.7 KiB
R

source("~/git/LSHTM_analysis/scripts/functions/my_logolas.R")
#####################################################################################
# DataED_PFM():
# Input:
# Data:
# msaSeq_mut: MSA chr vector for muts
# msaSeq_wt [Optional]: MSA chr vector for wt
# Others params:
# ED_score = c("log", log-odds", "diff", "probKL", "ratio", "unscaled_log", "wKL")
# bg_prob: background probability, default is equal i.e NULL
# Returns data for ED plot from MSA
# Mut matrix:
# PFM matrix
# PFM matrix scaled
# ED matrix
# Wt matrix [optional]
# For my case, I always use it as it helps see what is at the wild-type already!
# TODO: SHINY
# drop down: ED score type (in the actual plot function!)
# drop down/enter field : bg probability (in the actual plot function!)
# Make it hover over position and then get the corresponding data table!
########################a###########################################################
DataED_PFM <- function(msaSeq_mut
, msaSeq_wt
, ED_score = c("log")
, bg_prob = NULL)
{
dash_control = list()
dash_control_default <- list(concentration = NULL, mode = NULL,
optmethod = "mixEM", sample_weights = NULL, verbose = FALSE,
bf = TRUE, pi_init = NULL, squarem_control = list(),
dash_control = list(), reportcov = FALSE)
dash_control <- modifyList(dash_control_default, dash_control)
############################################
# Data processing for logo plot for nsSNPS
###########################################
cat("\nLength of MSA", length(msaSeq_mut))
pfm_mutM = matrix()
pfm_mut_scaledM = matrix()
combED_mutM = matrix()
#--------------------------
# Getting PFM: mutant MSA
#--------------------------
pfm_mutM <- Biostrings::consensusMatrix(msaSeq_mut)
colnames(pfm_mutM) <- 1:dim(pfm_mutM)[2]
pfm_mut_scaledM <- do.call(dash, append(list(comp_data = pfm_mutM),
dash_control))$posmean
logo_mut_h = get_logo_heights(pfm_mut_scaledM
, bg = bg_prob
, score = ED_score)
cat("\nGetting logo_heights from Logolas package...")
pos_mutM = logo_mut_h[['table_mat_pos_norm']]; pos_mutM
pos_mutS = logo_mut_h[['pos_ic']]; pos_mutS
pos_mutED = t(pos_mutS*t(pos_mutM)); pos_mutED
neg_mutM = logo_mut_h[['table_mat_neg_norm']]*(-1)
neg_mutS = logo_mut_h[['neg_ic']]; neg_mutS
neg_mutED = t(neg_mutS*t(neg_mutM)); neg_mutED
if (length(pos_mutS) && length(neg_mutS) == dim(pfm_mutM)[2]){
cat("\nPASS: pfm calculated successfully including scaled matrix"
, "\nDim of pfm matrix:", dim(pfm_mutM)[1], dim(pfm_mutM)[2])
}
combED_mutM = pos_mutED + neg_mutED
# initialise the mut list
names_mutL = c("pfm_mutM", "pfm_mut_scaledM", "combED_mutM")
EDmutDataL = vector("list", length(names_mutL))
EDmutDataL = list(pfm_mutM, pfm_mut_scaledM, combED_mutM)
names(EDmutDataL) = names_mutL
#---------------------
# Getting PFM: WT
#---------------------
if(!missing(msaSeq_wt)){
cat("\nLength of WT seq", length(msaSeq_wt))
pfm_wtM = matrix()
pfm_wt_scaledM = matrix()
combED_wtM = matrix()
pfm_wtM <- Biostrings::consensusMatrix(msaSeq_wt)
colnames(pfm_wtM) <- 1:dim(pfm_wtM)[2]
pfm_wt_scaledM <- do.call(dash, append(list(comp_data = pfm_wtM),
dash_control))$posmean
logo_wt_h = get_logo_heights(pfm_wt_scaledM
, bg = bg_prob
, score = ED_score)
pos_wtM = logo_wt_h[['table_mat_pos_norm']]; pos_wtM
pos_wtS = logo_wt_h[['pos_ic']]; pos_wtS
pos_wtED = t(pos_wtS*t(pos_wtM)); pos_wtED
neg_wtM = logo_wt_h[['table_mat_neg_norm']]*(-1)
neg_wtS = logo_wt_h[['neg_ic']]; neg_wtS
neg_wtED = t(neg_wtS*t(neg_wtM)); neg_wtED
if (length(pos_wtS) && length(neg_wtS) == dim(pfm_wtM)[2]){
cat("\nPASS: pfm calculated successfully including scaled matrix"
, "\nDim of pfm matrix:", dim(pfm_wtM)[1], dim(pfm_wtM)[2])
}
combED_wtM = pos_wtED + neg_wtED
# initialise the wt list
names_wtL = c("pfm_wtM", "pfm_wt_scaledM", "combED_wtM")
EDwtDataL = vector("list", length(names_wtL))
EDwtDataL = list(pfm_wtM, pfm_wt_scaledM, combED_wtM)
names(EDwtDataL) = names_wtL
# Combine two lists
EDallDataL = append(EDmutDataL, EDwtDataL)
cat("\nReturning output for Mut + WT"
, "\nLength of all data:", length(EDallDataL))
return(EDallDataL)
}else{
cat("\nReturning output for Mut data only"
, "\nLength of Mut data:", length(EDmutDataL))
return(EDmutDataL)
}
}