added ed_pfm_data.R function and its corresponding test
This commit is contained in:
parent
5f9a95ccb1
commit
a2da95ef7c
3 changed files with 497 additions and 0 deletions
142
scripts/functions/ed_pfm_data.R
Normal file
142
scripts/functions/ed_pfm_data.R
Normal file
|
@ -0,0 +1,142 @@
|
|||
source("~/git/LSHTM_analysis/scripts/functions/my_logolas.R")
|
||||
#####################################################################################
|
||||
# DataED_PFM():
|
||||
# Input:
|
||||
# Data:
|
||||
# msaSeq_mut: MSA chr vector for muts
|
||||
# msaSeq_wt [Optional]: MSA chr vector for wt
|
||||
|
||||
# Others params:
|
||||
# ED_score = c("log", log-odds", "diff", "probKL", "ratio", "unscaled_log", "wKL")
|
||||
# bg_prob: background probability, default is equal i.e NULL
|
||||
|
||||
# Returns data for ED plot from MSA
|
||||
# Mut matrix:
|
||||
# PFM matrix
|
||||
# PFM matrix scaled
|
||||
# ED matrix
|
||||
# Wt matrix [optional]
|
||||
# For my case, I always use it as it helps see what is at the wild-type already!
|
||||
|
||||
# TODO: SHINY
|
||||
# drop down: ED score type (in the actual plot function!)
|
||||
# drop down/enter field : bg probability (in the actual plot function!)
|
||||
# Make it hover over position and then get the corresponding data table!
|
||||
########################a###########################################################
|
||||
|
||||
DataED_PFM <- function(msaSeq_mut
|
||||
, msaSeq_wt
|
||||
, ED_score = c("log")
|
||||
, bg_prob = NULL)
|
||||
|
||||
{
|
||||
|
||||
dash_control = list()
|
||||
dash_control_default <- list(concentration = NULL, mode = NULL,
|
||||
optmethod = "mixEM", sample_weights = NULL, verbose = FALSE,
|
||||
bf = TRUE, pi_init = NULL, squarem_control = list(),
|
||||
dash_control = list(), reportcov = FALSE)
|
||||
|
||||
dash_control <- modifyList(dash_control_default, dash_control)
|
||||
|
||||
############################################
|
||||
# Data processing for logo plot for nsSNPS
|
||||
###########################################
|
||||
|
||||
cat("\nLength of MSA", length(msaSeq_mut))
|
||||
|
||||
pfm_mutM = matrix()
|
||||
pfm_mut_scaledM = matrix()
|
||||
combED_mutM = matrix()
|
||||
|
||||
#--------------------------
|
||||
# Getting PFM: mutant MSA
|
||||
#--------------------------
|
||||
pfm_mutM <- Biostrings::consensusMatrix(msaSeq_mut)
|
||||
colnames(pfm_mutM) <- 1:dim(pfm_mutM)[2]
|
||||
pfm_mut_scaledM <- do.call(dash, append(list(comp_data = pfm_mutM),
|
||||
dash_control))$posmean
|
||||
|
||||
logo_mut_h = get_logo_heights(pfm_mut_scaledM
|
||||
, bg = bg_prob
|
||||
, score = ED_score)
|
||||
|
||||
cat("\nGetting logo_heights from Logolas package...")
|
||||
|
||||
pos_mutM = logo_mut_h[['table_mat_pos_norm']]; pos_mutM
|
||||
pos_mutS = logo_mut_h[['pos_ic']]; pos_mutS
|
||||
pos_mutED = t(pos_mutS*t(pos_mutM)); pos_mutED
|
||||
|
||||
neg_mutM = logo_mut_h[['table_mat_neg_norm']]*(-1)
|
||||
neg_mutS = logo_mut_h[['neg_ic']]; neg_mutS
|
||||
neg_mutED = t(neg_mutS*t(neg_mutM)); neg_mutED
|
||||
|
||||
if (length(pos_mutS) && length(neg_mutS) == dim(pfm_mutM)[2]){
|
||||
cat("\nPASS: pfm calculated successfully including scaled matrix"
|
||||
, "\nDim of pfm matrix:", dim(pfm_mutM)[1], dim(pfm_mutM)[2])
|
||||
}
|
||||
|
||||
combED_mutM = pos_mutED + neg_mutED
|
||||
|
||||
# initialise the mut list
|
||||
names_mutL = c("pfm_mutM", "pfm_mut_scaledM", "combED_mutM")
|
||||
EDmutDataL = vector("list", length(names_mutL))
|
||||
EDmutDataL = list(pfm_mutM, pfm_mut_scaledM, combED_mutM)
|
||||
names(EDmutDataL) = names_mutL
|
||||
|
||||
#---------------------
|
||||
# Getting PFM: WT
|
||||
#---------------------
|
||||
if(!missing(msaSeq_wt)){
|
||||
|
||||
cat("\nLength of WT seq", length(msaSeq_wt))
|
||||
|
||||
pfm_wtM = matrix()
|
||||
pfm_wt_scaledM = matrix()
|
||||
combED_wtM = matrix()
|
||||
|
||||
pfm_wtM <- Biostrings::consensusMatrix(msaSeq_wt)
|
||||
colnames(pfm_wtM) <- 1:dim(pfm_wtM)[2]
|
||||
pfm_wt_scaledM <- do.call(dash, append(list(comp_data = pfm_wtM),
|
||||
dash_control))$posmean
|
||||
|
||||
logo_wt_h = get_logo_heights(pfm_wt_scaledM
|
||||
, bg = bg_prob
|
||||
, score = ED_score)
|
||||
|
||||
pos_wtM = logo_wt_h[['table_mat_pos_norm']]; pos_wtM
|
||||
pos_wtS = logo_wt_h[['pos_ic']]; pos_wtS
|
||||
pos_wtED = t(pos_wtS*t(pos_wtM)); pos_wtED
|
||||
|
||||
neg_wtM = logo_wt_h[['table_mat_neg_norm']]*(-1)
|
||||
neg_wtS = logo_wt_h[['neg_ic']]; neg_wtS
|
||||
neg_wtED = t(neg_wtS*t(neg_wtM)); neg_wtED
|
||||
|
||||
if (length(pos_wtS) && length(neg_wtS) == dim(pfm_wtM)[2]){
|
||||
cat("\nPASS: pfm calculated successfully including scaled matrix"
|
||||
, "\nDim of pfm matrix:", dim(pfm_wtM)[1], dim(pfm_wtM)[2])
|
||||
}
|
||||
|
||||
combED_wtM = pos_wtED + neg_wtED
|
||||
|
||||
# initialise the wt list
|
||||
names_wtL = c("pfm_wtM", "pfm_wt_scaledM", "combED_wtM")
|
||||
EDwtDataL = vector("list", length(names_wtL))
|
||||
EDwtDataL = list(pfm_wtM, pfm_wt_scaledM, combED_wtM)
|
||||
names(EDwtDataL) = names_wtL
|
||||
|
||||
# Combine two lists
|
||||
EDallDataL = append(EDmutDataL, EDwtDataL)
|
||||
|
||||
cat("\nReturning output for Mut + WT"
|
||||
, "\nLength of all data:", length(EDallDataL))
|
||||
return(EDallDataL)
|
||||
|
||||
}else{
|
||||
cat("\nReturning output for Mut data only"
|
||||
, "\nLength of Mut data:", length(EDmutDataL))
|
||||
|
||||
return(EDmutDataL)
|
||||
}
|
||||
}
|
||||
|
Loading…
Add table
Add a link
Reference in a new issue