1884 lines
75 KiB
R
1884 lines
75 KiB
R
########################################################################
|
|
# From package: logolas
|
|
# need to source locally (along with dependant functions)
|
|
# for generating the pfm and pfm_scaled
|
|
# needed for gettting the ED values and EDPlot
|
|
########################################################################
|
|
|
|
my_logolas <- function (data, type = c("Logo", "EDLogo"), use_dash = TRUE,
|
|
bg = NULL, color_type = NULL, colors = NULL, color_seed = 2030,
|
|
return_heights = FALSE, logo_control = list(), dash_control = list())
|
|
{
|
|
dash_control_default <- list(concentration = NULL, mode = NULL,
|
|
optmethod = "mixEM", sample_weights = NULL, verbose = FALSE,
|
|
bf = TRUE, pi_init = NULL, squarem_control = list(),
|
|
dash_control = list(), reportcov = FALSE)
|
|
|
|
if (type == "Logo") {
|
|
logo_control_default <- list(ic = NULL, total_chars = c("A",
|
|
"B", "C", "D", "E", "F", "G", "H", "I", "J", "K",
|
|
"L", "M", "N", "O", "P", "Q", "R", "S", "T", "U",
|
|
"V", "W", "X", "Y", "Z", "zero", "one", "two", "three",
|
|
"four", "five", "six", "seven", "eight", "nine",
|
|
"dot", "comma", "dash", "colon", "semicolon", "leftarrow",
|
|
"rightarrow"), frame_width = NULL, ic.scale = TRUE,
|
|
xaxis = TRUE, yaxis = TRUE, xaxis_fontsize = 10,
|
|
xlab_fontsize = 15, y_fontsize = 15, main_fontsize = 16,
|
|
start = 0.001, yscale_change = TRUE, pop_name = "",
|
|
xlab = "position", ylab = "Information content",
|
|
col_line_split = "white", addlogos = NULL, addlogos_text = NULL,
|
|
newpage = TRUE, control = list())
|
|
}
|
|
else if (type == "EDLogo") {
|
|
logo_control_default <- list(ic = FALSE, score = "log",
|
|
total_chars = c("A", "B", "C", "D", "E", "F", "G",
|
|
"H", "I", "J", "K", "L", "M", "N", "O", "P",
|
|
"Q", "R", "S", "T", "U", "V", "W", "X", "Y",
|
|
"Z", "zero", "one", "two", "three", "four", "five",
|
|
"six", "seven", "eight", "nine", "dot", "comma",
|
|
"dash", "colon", "semicolon", "leftarrow", "rightarrow"),
|
|
frame_width = NULL, yscale_change = TRUE, pop_name = "",
|
|
addlogos = NULL, addlogos_text = NULL, newpage = TRUE,
|
|
yrange = NULL, xaxis = TRUE, yaxis = TRUE, xaxis_fontsize = 10,
|
|
xlab_fontsize = 15, y_fontsize = 15, main_fontsize = 16,
|
|
start = 0.001, xlab = "position", ylab = "Enrichment Score",
|
|
col_line_split = "white", control = list())
|
|
}
|
|
if (is.character(data)) {
|
|
if (length(data) == 1) {
|
|
stop("Just one character sequence provided, user needs to enter \n multiple such aligned sequences")
|
|
}
|
|
numchars <- sapply(data, function(x) return(nchar(x)))
|
|
if (!(isTRUE(all.equal(max(numchars), min(numchars))))) {
|
|
stop("character sequences entered are not all of same length : so \n cannot be aligned")
|
|
}
|
|
if (type == "PSSM") {
|
|
message("for character sequence data, switch to EDLogo type")
|
|
type = "EDLogo"
|
|
}
|
|
L <- length(data)
|
|
pfm <- Biostrings::consensusMatrix(data)
|
|
colnames(pfm) <- 1:dim(pfm)[2]
|
|
numchars <- sapply(rownames(pfm), function(x) return(nchar(x)))
|
|
if (max(numchars) == 1) {
|
|
logo_control_default$col_line_split <- "white"
|
|
}
|
|
else {
|
|
logo_control_default$col_line_split <- "grey80"
|
|
}
|
|
logo_control_default$pop_name = ""
|
|
logo_control_default$xlab = "position"
|
|
dash_control <- modifyList(dash_control_default, dash_control)
|
|
logo_control <- modifyList(logo_control_default, logo_control)
|
|
if (use_dash) {
|
|
pfm_scaled <- do.call(dash, append(list(comp_data = pfm),
|
|
dash_control))$posmean
|
|
}
|
|
else {
|
|
pfm_scaled <- pfm
|
|
}
|
|
if (is.null(color_type)) {
|
|
message("color_type not provided, so switching to per_row option for\n color_type")
|
|
color_type = "per_row"
|
|
}
|
|
if (color_type == "per_row") {
|
|
if (is.null(colors)) {
|
|
cols = RColorBrewer::brewer.pal.info[RColorBrewer::brewer.pal.info$category ==
|
|
"qual", ]
|
|
col_vector = unlist(mapply(RColorBrewer::brewer.pal,
|
|
cols$maxcolors, rownames(cols)))
|
|
set.seed(color_seed)
|
|
color_profile <- list(type = color_type, col = sample(col_vector,
|
|
dim(pfm_scaled)[1], replace = FALSE))
|
|
}
|
|
else {
|
|
if (length(colors) < dim(pfm_scaled)[1]) {
|
|
stop("For per_row color type, the colors vector must be as large\n as number of rows in the matrix for PFM/PWM input, or \n number of distinct characters in\n each aligned sequence for sequence data")
|
|
}
|
|
set.seed(color_seed)
|
|
color_profile <- list(type = color_type, col = sample(colors,
|
|
dim(pfm_scaled)[1], replace = FALSE))
|
|
}
|
|
}
|
|
if (color_type == "per_symbol") {
|
|
if (is.null(colors)) {
|
|
cols = RColorBrewer::brewer.pal.info[RColorBrewer::brewer.pal.info$category ==
|
|
"qual", ]
|
|
col_vector = unlist(mapply(RColorBrewer::brewer.pal,
|
|
cols$maxcolors, rownames(cols)))
|
|
set.seed(color_seed)
|
|
color_profile <- list(type = color_type, col = sample(col_vector,
|
|
length(logo_control$total_chars), replace = FALSE))
|
|
}
|
|
else {
|
|
if (length(colors) < length(logo_control$total_chars)) {
|
|
stop("For per_symbol color type, the colors vector must be \n as large as number of\n symbols in total_chars argument in logo_control() :\n which is 50 by default ")
|
|
}
|
|
set.seed(color_seed)
|
|
color_profile <- list(type = color_type, col = sample(colors,
|
|
length(logo_control$total_chars), replace = FALSE))
|
|
}
|
|
}
|
|
if (color_type == "per_column") {
|
|
if (is.null(colors)) {
|
|
cols = RColorBrewer::brewer.pal.info[RColorBrewer::brewer.pal.info$category ==
|
|
"qual", ]
|
|
col_vector = unlist(mapply(RColorBrewer::brewer.pal,
|
|
cols$maxcolors, rownames(cols)))
|
|
set.seed(color_seed)
|
|
color_profile <- list(type = color_type, col = sample(col_vector,
|
|
dim(pfm_scaled)[2], replace = FALSE))
|
|
}
|
|
else {
|
|
if (length(colors) < dim(pfm_scaled)[2]) {
|
|
stop("For per_column color type, the colors vector must be as\n large as number of columns in the matrix for PFM/PWM input, \n or number of characters in each aligned\n sequence for sequence data")
|
|
}
|
|
set.seed(color_seed)
|
|
color_profile <- list(type = color_type, col = sample(colors,
|
|
dim(pfm_scaled)[2], replace = FALSE))
|
|
}
|
|
}
|
|
if (type == "Logo") {
|
|
out <- do.call(plogomaker, append(list(table = pfm_scaled,
|
|
color_profile = color_profile, bg = bg), logo_control))
|
|
}
|
|
else if (type == "EDLogo") {
|
|
out <- do.call(nlogomaker, append(list(table = pfm_scaled,
|
|
color_profile = color_profile, bg = bg), logo_control))
|
|
}
|
|
}
|
|
if (!is.character(data)) {
|
|
if (!is.matrix(data) & !is.data.frame(data)) {
|
|
stop("if not a character sequence, data must be either a matrix \n or a data frame")
|
|
}
|
|
if (is.null(rownames(data))) {
|
|
stop("row names of the data matrix should be symbols to be plotted \n in the logo plot ")
|
|
}
|
|
if (is.null(colnames(data))) {
|
|
colnames(data) <- 1:dim(data)[2]
|
|
}
|
|
if (min(data[!is.na(data)]) < 0) {
|
|
stop("negative values in data matrix not permitted : try logo_pssm ()\n function for plotting position specific scores")
|
|
}
|
|
numchars <- sapply(rownames(data), function(x) return(nchar(x)))
|
|
if (max(numchars) == 1) {
|
|
logo_control_default$col_line_split <- "white"
|
|
}
|
|
else {
|
|
logo_control_default$col_line_split <- "grey80"
|
|
}
|
|
logo_control_default$pop_name = ""
|
|
logo_control_default$xlab = "position"
|
|
if (all(data == floor(data))) {
|
|
datatype = "PFM"
|
|
}
|
|
else {
|
|
datatype = "PWM"
|
|
}
|
|
if (datatype == "PWM") {
|
|
use_dash = FALSE
|
|
}
|
|
dash_control <- modifyList(dash_control_default, dash_control)
|
|
logo_control <- modifyList(logo_control_default, logo_control)
|
|
if (use_dash) {
|
|
data_scaled <- do.call(dash, append(list(comp_data = data),
|
|
dash_control))$posmean
|
|
}
|
|
else {
|
|
data_scaled <- data
|
|
}
|
|
if (is.null(color_type)) {
|
|
message("color_type not provided, so switching to per_row option for\n color_type")
|
|
color_type = "per_row"
|
|
}
|
|
if (color_type == "per_row") {
|
|
if (is.null(colors)) {
|
|
cols = RColorBrewer::brewer.pal.info[RColorBrewer::brewer.pal.info$category ==
|
|
"qual", ]
|
|
col_vector = unlist(mapply(RColorBrewer::brewer.pal,
|
|
cols$maxcolors, rownames(cols)))
|
|
set.seed(color_seed)
|
|
color_profile <- list(type = color_type, col = sample(col_vector,
|
|
dim(data_scaled)[1], replace = FALSE))
|
|
}
|
|
else {
|
|
if (length(colors) < dim(data_scaled)[1]) {
|
|
stop("For per_row color type, the colors vector must be as large\n as number of rows in the matrix for PFM/PWM input, or number\n of distinct characters in each aligned sequence for sequence \n data")
|
|
}
|
|
set.seed(color_seed)
|
|
color_profile <- list(type = color_type, col = sample(colors,
|
|
dim(data_scaled)[1], replace = FALSE))
|
|
}
|
|
}
|
|
if (color_type == "per_symbol") {
|
|
if (is.null(colors)) {
|
|
cols = RColorBrewer::brewer.pal.info[RColorBrewer::brewer.pal.info$category ==
|
|
"qual", ]
|
|
col_vector = unlist(mapply(RColorBrewer::brewer.pal,
|
|
cols$maxcolors, rownames(cols)))
|
|
set.seed(color_seed)
|
|
color_profile <- list(type = color_type, col = sample(col_vector,
|
|
length(logo_control$total_chars), replace = FALSE))
|
|
}
|
|
else {
|
|
if (length(colors) < length(logo_control$total_chars)) {
|
|
stop("For per_symbol color type, the colors vector must be as\n large as number of symbols in total_chars argument in \n logo_control() : which is 50 by default ")
|
|
}
|
|
set.seed(color_seed)
|
|
color_profile <- list(type = color_type, col = sample(colors,
|
|
length(logo_control$total_chars), replace = FALSE))
|
|
}
|
|
}
|
|
if (color_type == "per_column") {
|
|
if (is.null(colors)) {
|
|
cols = RColorBrewer::brewer.pal.info[RColorBrewer::brewer.pal.info$category ==
|
|
"qual", ]
|
|
col_vector = unlist(mapply(RColorBrewer::brewer.pal,
|
|
cols$maxcolors, rownames(cols)))
|
|
set.seed(color_seed)
|
|
color_profile <- list(type = color_type, col = sample(col_vector,
|
|
dim(data_scaled)[2], replace = FALSE))
|
|
}
|
|
else {
|
|
if (length(colors) < dim(data_scaled)[2]) {
|
|
stop("For per_column color type, the colors vector must be as large \n as number of columns in the matrix for PFM/PWM input, or\n number of characters in each aligned\n sequence for sequence data")
|
|
}
|
|
set.seed(color_seed)
|
|
color_profile <- list(type = color_type, col = sample(colors,
|
|
dim(data_scaled)[2], replace = FALSE))
|
|
}
|
|
}
|
|
if (type == "Logo") {
|
|
out <- do.call(plogomaker, append(list(table = data_scaled,
|
|
color_profile = color_profile, bg = bg), logo_control))
|
|
}
|
|
else if (type == "EDLogo") {
|
|
out <- do.call(nlogomaker, append(list(table = data_scaled,
|
|
color_profile = color_profile, bg = bg), logo_control))
|
|
}
|
|
}
|
|
if (return_heights) {
|
|
return(pfm_scaled)
|
|
#return(out)
|
|
}
|
|
}
|
|
########################################################################
|
|
#=================
|
|
# dash()
|
|
#=================
|
|
# @title Dirichlet adaptive shrinkage of compositional data using dash
|
|
#
|
|
# @description Given a matrix of compositional counts data, with samples along
|
|
# the rows and the categories of composition along columns, performs Bayesian
|
|
# adaptive shrinkage of the compositions to produce refined composition probs.
|
|
#
|
|
# @details The dash function provides a number of ways to perform
|
|
# Empirical Bayes shrinkage estimation on compositional data (counts).
|
|
#
|
|
# The inputs to dash is a matrix of compositional counts with samples along
|
|
# rows and categories along columns. The method assumes that the compositional
|
|
# counts data is generated from an underlying composition probability vector,
|
|
# which follows a mixture of Dirichlet distributions centered at the
|
|
# user defined mode (which defaults to means for all categories being equal).
|
|
#
|
|
# We assume that the component Dirichlet distributions in the mixture have
|
|
# varying degrees of concentration, varying from Inf (which is same as saying
|
|
# a point mass at the mode), and then from high to low values of concentration
|
|
# and even concentration values less than 1, which would represent spikes at
|
|
# the corners of the simplex.
|
|
#
|
|
# The grades of memberships/ mixture proportions in different Dirichlet
|
|
# components are estimated and post-hoc measures - posterior mean, posterior
|
|
# weights, posterior center and corner probabilities etc are computed.
|
|
# The posterior mean is considered as the shrunk compositional probability.
|
|
#
|
|
#
|
|
# @param comp_data, a n by m matrix where n represents the sample and m
|
|
# represents the category of composition.
|
|
# @param concentration a vector of concentration scales for different Dirichlet
|
|
# compositions. Defaults to NULL, in which case, we append
|
|
# concentration values of Inf, 100, 50, 20, 10, 5, 2, 1, 0.5
|
|
# and 0.1.
|
|
# @param mode An user defined mode/mean for the Dirichlet components. Defaults
|
|
# to equal means for all components.
|
|
# @param optmethod The method for performing optimization of the mixture
|
|
# proportions or grades of memberships for the different
|
|
# Dirichlet compositions. Can be either of "mixEM", "w_mixEM"
|
|
# or weighted mixEM and "mixIP" for interior point convex
|
|
# optimization.
|
|
# @param sample_weights The weights of the samples for performing the
|
|
# optimization. Defaults to NULL, in which case the weight
|
|
# is same for each sample.
|
|
# @param verbose if TRUE, outputs messages tracking progress of the method.
|
|
# @param bf A boolean (TRUE/FALSE) variable denoting whether log bayes factor
|
|
# (with respect to category with smallest representation) is used in
|
|
# optimization or the loglikelihood. Defaults to FALSE.
|
|
# @param pi_init An initial starting value for the mixture proportions. Defaults
|
|
# to same proportion for all categories.
|
|
# @param squarem_control A list of control parameters for the
|
|
# SQUAREM/IP algorithm,
|
|
# default value is set to be control.default=list(K = 1, method=3,
|
|
# square=TRUE, step.min0=1, step.max0=1, mstep=4, kr=1,
|
|
# objfn.inc=1,tol=1.e-07, maxiter=5000, trace=FALSE).
|
|
# @param dash_control A list of control parameters for determining the
|
|
# concentrations
|
|
# and prior weights and fdr control parameters for
|
|
# dash fucntion.
|
|
# @param reportcov A boolean indicating whether the user wants to return
|
|
# the covariance and correlation structure of the posterior.
|
|
# Defaults to FALSE.
|
|
#
|
|
# @return A list, including the following,
|
|
# \code{fitted_pi}: The fitted values of mixture proportions for
|
|
# Dirichlet components
|
|
# \code{concentration}: The concentration scales of the Dirichlet
|
|
# compositions
|
|
# \code{prior}: Prior strengths of Dirichlet components
|
|
# \code{posterior_weights}: Posterior weights of each sample on
|
|
# each category posterior component
|
|
# \code{posmean}: Posterior means of compositional probability from dash
|
|
# fit of each sample
|
|
# \code{datamean}: Original compositional probability of each sample
|
|
# \code{poscov}: Posterior covariance structure for each sample
|
|
# (if \code{reportcov} TRUE)
|
|
# \code{poscor}: Posterior correlation structure for each sample
|
|
# (if \code{reportcov} TRUE)
|
|
# \code{center_prob_local}: Posterior probability on Inf concentration
|
|
# Dirichlet component
|
|
# \code{center_prob}: Posterior probability on Dirichlet components with
|
|
# concentration less than \code{fdr_bound}
|
|
# \code{corner_prob}: Posterior probability on Dirichlet components with
|
|
# concentration less than 1
|
|
#
|
|
# @examples
|
|
# mat <- cbind(c(5, 0, 2, 0),
|
|
# c(1, 1, 0, 1),
|
|
# c(100, 100, 50, 100),
|
|
# c(20, 50, 100, 10),
|
|
# c(10, 10, 200, 20),
|
|
# c(50, 54, 58, 53),
|
|
# c(1,1,1,3),
|
|
# c(2, 4, 1, 1))
|
|
# out <- dash(mat, optmethod = "mixEM", verbose=TRUE)
|
|
# out <- dash(mat, optmethod = "w_mixEM", verbose=TRUE)
|
|
#
|
|
#' @importFrom utils modifyList
|
|
#' @importFrom stats cov2cor
|
|
dash <- function(comp_data,
|
|
concentration = NULL,
|
|
mode = NULL,
|
|
optmethod = "mixEM",
|
|
sample_weights = NULL,
|
|
verbose = FALSE,
|
|
bf = TRUE,
|
|
pi_init = NULL,
|
|
squarem_control = list(),
|
|
dash_control = list(),
|
|
reportcov = FALSE){
|
|
|
|
comp_data <- t(comp_data)
|
|
|
|
dash_control.default <- list(add_NULL = TRUE, add_Inf = TRUE,
|
|
add_corner = FALSE,
|
|
corner_val = 0.005, null_weight = 1,
|
|
Inf_weight = 1,
|
|
corner_weight = 1, fdr_bound = 50)
|
|
|
|
dash_control <- modifyList(dash_control.default, dash_control)
|
|
|
|
squarem_control.default=list(K = 1, method=3,
|
|
square=TRUE, step.min0=1, step.max0=1,
|
|
mstep=4, kr=1,
|
|
objfn.inc=1,tol=1.e-07, maxiter=5000,
|
|
trace=FALSE)
|
|
|
|
squarem_control <- modifyList(squarem_control.default, squarem_control)
|
|
|
|
## check if the mode has same length as columns of composition data
|
|
|
|
if(verbose){
|
|
cat("Checking inputs and processing the data \n")
|
|
}
|
|
|
|
|
|
if(is.null(mode)){
|
|
mode <- rep(1, dim(comp_data)[2])
|
|
}else{
|
|
mode <- mode/min(mode[mode!=0])
|
|
}
|
|
|
|
## weights for the samples - check if this vector has the same
|
|
## length as the number of samples (number of rows of the compositional data),
|
|
## unless it is NULL, in which case, all samples have equal weights.
|
|
|
|
if(!is.null(sample_weights)){
|
|
if(length(sample_weights) != dim(comp_data)[1]){
|
|
stop("The length of the user-defined sample weights must
|
|
match with number of rows in the comp_data")
|
|
}
|
|
}
|
|
|
|
## check if an initial mixture proportion pi has been provided by the user.
|
|
|
|
if(!is.null(pi_init)){
|
|
if(length(pi_init) != dim(comp_data)[2]){
|
|
stop("The length of the user-defined pi_init
|
|
must match with number of columns
|
|
in the comp_data")
|
|
}
|
|
}
|
|
|
|
|
|
if(!is.null(mode)){
|
|
if(length(mode) != dim(comp_data)[2]){
|
|
stop("The length of the user-defined mode must match with
|
|
number of columns
|
|
in the comp_data")
|
|
}
|
|
}
|
|
|
|
## add prior concentrations - adding an Inf and 1 concentration to the mix
|
|
## if not provided by the user
|
|
|
|
concentration <- unique(concentration)
|
|
|
|
if(is.null(concentration)){
|
|
concentration <- c(Inf, 100, 50, 20, 10, 5, 2, 1)
|
|
}else{
|
|
if (dash_control$add_NULL){
|
|
concentration <- c(concentration, 1)
|
|
}
|
|
if (dash_control$add_Inf){
|
|
concentration <- c(concentration, Inf)
|
|
}
|
|
if(dash_control$add_corner){
|
|
if (min(concentration) > dash_control$corner_val){
|
|
concentration <- c(concentration, dash_control$corner_val)
|
|
}
|
|
}
|
|
concentration <- sort(concentration, decreasing = TRUE)
|
|
}
|
|
|
|
conc <- concentration
|
|
conc[conc == Inf] <- 10^5
|
|
conc_mat <- t(sapply(conc,function(x) return(x*(mode+1e-04))))
|
|
|
|
prior <- array(1, length(concentration))
|
|
if(length(which(concentration == Inf)) > 0){
|
|
prior[which(concentration == Inf)] <- dash_control$Inf_weight
|
|
}
|
|
if(length(which(concentration == 1)) > 0){
|
|
prior[which(concentration == 1)] <- dash_control$null_weight
|
|
}
|
|
if(min(concentration) < 1){
|
|
prior[which(concentration == min(concentration))] <- dash_control$corner_weight
|
|
}
|
|
|
|
|
|
if(verbose){
|
|
cat("Fitting the dash shrinkage \n")
|
|
}
|
|
|
|
matrix_log_lik <- matrix(0, dim(comp_data)[1], dim(conc_mat)[1])
|
|
|
|
for(n in 1:dim(comp_data)[1]){
|
|
x <- comp_data[n,]
|
|
for(k in 2:dim(conc_mat)[1]){
|
|
# numero <- sum(x)*beta(sum(conc_mat[k,]), sum(x))
|
|
lognumero <- log(sum(x)) - LaplacesDemon::ddirichlet(rep(1,2),
|
|
alpha = c(sum(conc_mat[k,]), sum(x)), log=TRUE)
|
|
if(lognumero == -Inf | lognumero == Inf ){
|
|
matrix_log_lik[n, k] <- lognumero
|
|
}else{
|
|
index1 <- which(x > 0)
|
|
logdeno <- sum(log(x[index1]) - sapply(1:length(index1),
|
|
function(mm) return(LaplacesDemon::ddirichlet(rep(1,2),
|
|
alpha = c(conc_mat[k, index1[mm]], x[index1[mm]]), log=TRUE))))
|
|
matrix_log_lik[n,k] <- lognumero - logdeno
|
|
}
|
|
}
|
|
matrix_log_lik[n,1] <- logfac(sum(x)) - sum(sapply(x, function(y)
|
|
return(logfac(y)))) +
|
|
sum(x*log((conc_mat[1,]+1e-04)/sum(conc_mat[1,]+1e-04)))
|
|
}
|
|
|
|
if(!bf){
|
|
matrix_lik <- exp(matrix_log_lik - max(matrix_log_lik[matrix_log_lik != Inf
|
|
& matrix_log_lik != -Inf ]))
|
|
}else{
|
|
matrix_lik <- exp(matrix_log_lik - apply(matrix_log_lik, 1,
|
|
function(x) return(max(x))) %*% t(rep(1, dim(matrix_log_lik)[2])))
|
|
}
|
|
|
|
######################## mixEM optimization #########################
|
|
|
|
if(optmethod == "mixEM"){
|
|
fit=do.call("mixEM",args = list(matrix_lik= matrix_lik, prior=prior,
|
|
pi_init=pi_init, control=squarem_control))
|
|
}else if (optmethod == "w_mixEM"){
|
|
fit=do.call("w_mixEM",args = list(matrix_lik= matrix_lik, prior=prior,
|
|
pi_init=pi_init, control=squarem_control,
|
|
weights=sample_weights))
|
|
}else{
|
|
message("optmethod npt provided correctly: switching to mixEM")
|
|
fit=do.call("mixEM",args = list(matrix_lik= matrix_lik, prior=prior,
|
|
pi_init=pi_init, control=squarem_control))
|
|
}
|
|
|
|
|
|
if(verbose){
|
|
cat("Preparing output from fitted model \n")
|
|
}
|
|
|
|
## generate output list ll and assigning different attributes to it
|
|
## we first add the estimated pi, the concentration parameters and the prior
|
|
## parameters
|
|
|
|
ll <- list()
|
|
ll$fitted_pi <- fit$pihat
|
|
ll$concentration <- concentration
|
|
ll$prior <- prior
|
|
|
|
###################### posterior weights ###########################
|
|
|
|
pi_complete <- rep(1, dim(matrix_lik)[1]) %*% t(fit$pihat)
|
|
matrix_lik_adj <- matrix_lik*pi_complete
|
|
posterior_weights <- t(apply(matrix_lik_adj, 1, function(x) return(x/sum(x))))
|
|
colnames(posterior_weights) <- concentration
|
|
ll$posterior_weights <- posterior_weights
|
|
|
|
if(!is.null(rownames(comp_data))){
|
|
rownames(ll$posterior_weights) <- rownames(comp_data)
|
|
}
|
|
|
|
|
|
######################## posterior means ###########################
|
|
|
|
conc_mat[conc_mat == Inf] <- 10^5
|
|
posmean_comp <- array(0, c(dim(comp_data)[1], dim(comp_data)[2],
|
|
dim(conc_mat)[1]))
|
|
for(n in 1:dim(comp_data)[1]){
|
|
for(k in 1:dim(conc_mat)[1]){
|
|
temp <- comp_data[n,]+ conc_mat[k,]
|
|
posmean_comp[n,,k] <- (temp+1e-08)/sum(temp+1e-08)
|
|
}
|
|
}
|
|
|
|
posmean <- matrix(0, dim(comp_data)[1], dim(comp_data)[2])
|
|
|
|
for(n in 1:dim(comp_data)[1]){
|
|
posmean[n,] <- posmean_comp[n,,]%*%posterior_weights[n,]
|
|
}
|
|
ll$posmean <- t(posmean)
|
|
ll$datamean <- apply(comp_data, 1, function(x) return(x/sum(x)))
|
|
|
|
if(!is.null(colnames(comp_data))){
|
|
rownames(ll$posmean) <- colnames(comp_data)
|
|
rownames(ll$datamean) <- colnames(comp_data)
|
|
}
|
|
|
|
if(!is.null(rownames(comp_data))){
|
|
colnames(ll$posmean) <- rownames(comp_data)
|
|
colnames(ll$datamean) <- rownames(comp_data)
|
|
}
|
|
|
|
|
|
if(reportcov){
|
|
poscov_comp <- array(0, c(dim(comp_data)[1], dim(comp_data)[2],
|
|
dim(comp_data)[2], dim(conc_mat)[1]))
|
|
for(n in 1:dim(comp_data)[1]){
|
|
for(k in 1:dim(conc_mat)[1]){
|
|
temp <- comp_data[n,]+ conc_mat[k,]
|
|
alpha_0 <- sum(temp)
|
|
posvar <- (alpha_0* temp)/((alpha_0)^2*(alpha_0 + 1))
|
|
poscov_comp[n,,,k] <- diag(posvar) -
|
|
(temp %*% t(temp))/((alpha_0)^2*(alpha_0 + 1))
|
|
}
|
|
}
|
|
|
|
poscov <- array(0, c(dim(comp_data)[2], dim(comp_data)[2],
|
|
dim(comp_data)[1]))
|
|
poscor <- array(0, c(dim(comp_data)[2], dim(comp_data)[2],
|
|
dim(comp_data)[1]))
|
|
|
|
for(n in 1:dim(comp_data)[1]){
|
|
dsum <- matrix(0, dim(comp_data)[2], dim(comp_data)[2])
|
|
for(k in 1:dim(conc_mat)[1]){
|
|
dsum <- dsum + poscov_comp[n,,,k]*posterior_weights[n,k]
|
|
}
|
|
poscov[,,n] <- dsum
|
|
poscor[,,n] <- cov2cor(dsum)
|
|
}
|
|
|
|
ll$poscov <- poscov
|
|
ll$poscor <- poscor
|
|
}
|
|
|
|
###################### FDR + corner enrichment #######################
|
|
|
|
ll$center_prob_local <- posterior_weights[,which(concentration == Inf)]
|
|
ll$center_prob <- rowSums(posterior_weights[, which(concentration > dash_control$fdr_bound)])
|
|
ll$corner_prob <- rowSums(posterior_weights[, which(concentration < 1)])
|
|
|
|
return(ll)
|
|
}
|
|
|
|
|
|
logfac = function(x){
|
|
if(x < 1 && x > 0 || x < 0){
|
|
stop("x cannot be less than 0 or a fraction")
|
|
}else if(x < 10){
|
|
out <- log(factorial(x))
|
|
}else if (x == 0){
|
|
out <- 0
|
|
}else{
|
|
out <- sum(log(1:x))
|
|
}
|
|
return(out)
|
|
}
|
|
########################################################################
|
|
#=================
|
|
# mixEM()
|
|
#=================
|
|
|
|
# @title Estimate mixture proportions of a mixture model by EM algorithm
|
|
#
|
|
# @description Given the individual component likelihoods for a mixture model,
|
|
# estimates the mixture proportions by an EM algorithm.
|
|
#
|
|
# @details Fits a k component mixture model \deqn{f(x|\pi)= \sum_k \pi_k f_k(x)}
|
|
# to independent
|
|
# and identically distributed data \eqn{x_1,\dots,x_n}.
|
|
# Estimates mixture proportions \eqn{\pi} by maximum likelihood, or by maximum a
|
|
# posteriori (MAP) estimation for a Dirichlet prior on \eqn{\pi}
|
|
# (if a prior is specified). Uses the SQUAREM package to accelerate convergence
|
|
# of EM. Used by the ash main function; there is no need for a user to call this
|
|
# function separately, but it is exported for convenience.
|
|
#
|
|
#
|
|
# @param matrix_lik, a n by k matrix with (j,k)th element equal to \eqn{f_k(x_j)}.
|
|
# @param prior, a k vector of the parameters of the Dirichlet prior on
|
|
# \eqn{\pi}. Recommended to be rep(1,k)
|
|
# @param pi_init, the initial value of \eqn{\pi} to use. If not specified
|
|
# defaults to (1/k,...,1/k).
|
|
# @param control A list of control parameters for the SQUAREM algorithm,
|
|
# default value is set to be control.default=list(K = 1, method=3, square=TRUE,
|
|
# step.min0=1, step.max0=1, mstep=4, kr=1, objfn.inc=1,tol=1.e-07, maxiter=5000,
|
|
# trace=FALSE).
|
|
#
|
|
# @return A list, including the estimates (pihat), the log likelihood for
|
|
# each interation (B)
|
|
# and a flag to indicate convergence
|
|
#
|
|
#' @import SQUAREM
|
|
mixEM = function(matrix_lik,prior,pi_init=NULL,control=list()){
|
|
control = set_control_squarem(control,nrow(matrix_lik))
|
|
k=dim(matrix_lik)[2]
|
|
if(is.null(pi_init)){
|
|
pi_init = rep(1/k,k)# Use as starting point for pi
|
|
}
|
|
res = SQUAREM::squarem(par=pi_init,fixptfn=fixpoint,
|
|
objfn=negpenloglik,matrix_lik=matrix_lik,
|
|
prior=prior, control=control)
|
|
return(list(pihat = normalize(pmax(0,res$par)), B=res$value.objfn,
|
|
niter = res$iter, converged=res$convergence, control=control))
|
|
}
|
|
|
|
|
|
normalize = function(x){return(x/sum(x))}
|
|
|
|
fixpoint = function(pi, matrix_lik, prior){
|
|
pi = normalize(pmax(0,pi)) #avoid occasional problems with negative pis
|
|
# due to rounding
|
|
m = t(pi * t(matrix_lik)) # matrix_lik is n by k; so this is also n by k
|
|
m.rowsum = rowSums(m)
|
|
classprob = m/m.rowsum #an n by k matrix
|
|
pinew = normalize(colSums(classprob) + prior - 1)
|
|
return(pinew)
|
|
}
|
|
|
|
negpenloglik = function(pi,matrix_lik,prior){return(-penloglik(pi,
|
|
matrix_lik,prior))}
|
|
|
|
penloglik = function(pi, matrix_lik, prior){
|
|
pi = normalize(pmax(0,pi))
|
|
m = t(pi * t(matrix_lik)) # matrix_lik is n by k; so this is also n by k
|
|
m.rowsum = rowSums(m)
|
|
loglik = sum(log(m.rowsum))
|
|
subset = (prior != 1.0)
|
|
priordens = sum((prior-1)[subset]*log(pi[subset]))
|
|
return(loglik+priordens)
|
|
}
|
|
|
|
# @title Estimate mixture proportions of a mixture model by EM algorithm
|
|
# (weighted version)
|
|
#
|
|
# @description Given the individual component likelihoods for a mixture model,
|
|
# and a set of weights, estimates the mixture proportions by an EM algorithm.
|
|
#
|
|
# @details Fits a k component mixture model \deqn{f(x|\pi)= \sum_k \pi_k f_k(x)}
|
|
# to independent and identically distributed data \eqn{x_1,\dots,x_n}
|
|
# with weights \eqn{w_1,\dots,w_n}.
|
|
# Estimates mixture proportions \eqn{\pi} by maximum likelihood, or by
|
|
# maximum a posteriori (MAP) estimation for a Dirichlet prior on \eqn{\pi}
|
|
# (if a prior is specified). Here the log-likelihood for the weighted data
|
|
# is defined as \eqn{l(\pi) = \sum_j w_j log f(x_j | \pi)}. Uses the SQUAREM
|
|
# package to accelerate convergence of EM. Used by the ash main function;
|
|
# there is no need for a user to call this
|
|
# function separately, but it is exported for convenience.
|
|
#
|
|
#
|
|
# @param matrix_lik, a n by k matrix with (j,k)th element equal to
|
|
# \eqn{f_k(x_j)}.
|
|
# @param prior, a k vector of the parameters of the Dirichlet prior on
|
|
# \eqn{\pi}. Recommended to be rep(1,k)
|
|
# @param pi_init, the initial value of \eqn{\pi} to use. If not
|
|
# specified defaults to (1/k,...,1/k).
|
|
# @param weights, an n vector of weights
|
|
# @param control A list of control parameters for the SQUAREM algorithm,
|
|
# default value is set to be control.default=list(K = 1, method=3,
|
|
# squarem=TRUE, step.min0=1, step.max0=1, mstep=4, kr=1, objfn.inc=1,
|
|
# tol=1.e-07, maxiter=5000, trace=FALSE).
|
|
#
|
|
# @return A list, including the estimates (pihat), the log likelihood for each
|
|
# interation (B)
|
|
# and a flag to indicate convergence
|
|
#
|
|
#' @import SQUAREM
|
|
w_mixEM = function(matrix_lik,prior, pi_init=NULL, weights=NULL,control=list()){
|
|
control = set_control_squarem(control,nrow(matrix_lik))
|
|
k=dim(matrix_lik)[2]
|
|
if(is.null(pi_init)){
|
|
pi_init = rep(1/k,k)# Use as starting point for pi
|
|
}
|
|
if(is.null(weights)){
|
|
weights <- rep(1/dim(matrix_lik)[1], dim(matrix_lik)[1])
|
|
}
|
|
res = SQUAREM::squarem(par=pi_init,fixptfn=w_fixpoint, objfn=w_negpenloglik,
|
|
matrix_lik=matrix_lik, prior=prior, w=weights,
|
|
control=control)
|
|
return(list(pihat = normalize(pmax(0,res$par)), B=res$value.objfn,
|
|
niter = res$iter, converged=res$convergence, control=control))
|
|
}
|
|
|
|
w_fixpoint = function(pi, matrix_lik, prior, w){
|
|
pi = normalize(pmax(0,pi)) #avoid occasional problems with negative pis due
|
|
#to rounding
|
|
m = t(pi * t(matrix_lik)) # matrix_lik is n by k; so this is also n by k
|
|
m.rowsum = rowSums(m)
|
|
classprob = m/m.rowsum #an n by k matrix
|
|
pinew = normalize(colSums(w*classprob) + prior - 1)
|
|
return(pinew)
|
|
}
|
|
|
|
w_negpenloglik = function(pi,matrix_lik,prior, w)
|
|
{return(-w_penloglik(pi,matrix_lik,prior,w))}
|
|
|
|
w_penloglik = function(pi, matrix_lik, prior, w){
|
|
pi = normalize(pmax(0,pi))
|
|
m = t(pi * t(matrix_lik)) # matrix_lik is n by k; so this is also n by k
|
|
m.rowsum = rowSums(m)
|
|
loglik = sum(w*log(m.rowsum))
|
|
subset = (prior != 1.0)
|
|
priordens = sum((prior-1)[subset]*log(pi[subset]))
|
|
return(loglik+priordens)
|
|
}
|
|
|
|
# sets up a default for squarem, and modifies it with other provided values
|
|
set_control_squarem=function(control,nobs){
|
|
control.default=list(K = 1, method=3, square=TRUE, step.min0=1,
|
|
step.max0=1, mstep=4, kr=1, objfn.inc=1,tol=1.e-07,
|
|
maxiter=5000, trace=FALSE)
|
|
if (nobs > 50000) control.default$trace = TRUE
|
|
control.default$tol = min(0.1/nobs,1.e-7) # set default convergence criteria
|
|
##to be more stringent for larger samples
|
|
namc=names(control)
|
|
if (!all(namc %in% names(control.default)))
|
|
stop("unknown names in control: ",
|
|
namc[!(namc %in% names(control.default))])
|
|
control=utils::modifyList(control.default, control)
|
|
return(control)
|
|
}
|
|
########################################################################
|
|
#=================
|
|
# nlogomaker()
|
|
#=================
|
|
function (table, ic = FALSE, score = c("diff", "log", "log-odds",
|
|
"probKL", "ratio", "unscaled_log", "wKL"), color_profile,
|
|
total_chars = c("A", "B", "C", "D", "E", "F", "G", "H", "I",
|
|
"J", "K", "L", "M", "N", "O", "P", "Q", "R", "S", "T",
|
|
"U", "V", "W", "X", "Y", "Z", "zero", "one", "two", "three",
|
|
"four", "five", "six", "seven", "eight", "nine", "dot",
|
|
"comma", "dash", "colon", "semicolon", "leftarrow", "rightarrow"),
|
|
bg = NULL, frame_width = NULL, yscale_change = TRUE, pop_name = NULL,
|
|
addlogos = NULL, addlogos_text = NULL, newpage = TRUE, yrange = NULL,
|
|
xaxis = TRUE, yaxis = TRUE, xaxis_fontsize = 10, xlab_fontsize = 15,
|
|
y_fontsize = 15, main_fontsize = 16, start = 0.001, xlab = "X",
|
|
ylab = "Enrichment Score", col_line_split = "grey80", control = list())
|
|
{
|
|
control.default <- list(hist = FALSE, alpha = 1, opt = 1,
|
|
scale0 = 0.01, scale1 = 0.99, tofill_pos = TRUE, tofill_neg = TRUE,
|
|
lwd = 2, epsilon = 0.01, quant = 0.5, symm = TRUE, gap_xlab = 3,
|
|
gap_ylab = 3.5, minbins = 2, round_off = 1, lowrange = 0,
|
|
uprange = 0, negbins = 3, posbins = 3, size_port = 0,
|
|
viewport.margin.bottom = NULL, viewport.margin.left = NULL,
|
|
viewport.margin.top = NULL, viewport.margin.right = NULL,
|
|
use_seqLogo_heights = FALSE)
|
|
control <- modifyList(control.default, control)
|
|
scale0 <- control$scale0
|
|
scale1 <- control$scale1
|
|
if (length(score) != 1) {
|
|
score <- "log"
|
|
}
|
|
if (ic & score == "unscaled_log") {
|
|
warning("ic = TRUE not compatible with score = `unscaled-log`: \n switching to ic = FALSE")
|
|
ic = FALSE
|
|
}
|
|
if (ic & score == "wKL") {
|
|
warning("ic = TRUE not compatible with score = `wKL`:\n switching to ic = FALSE")
|
|
ic = FALSE
|
|
}
|
|
if (class(table) == "data.frame") {
|
|
table <- as.matrix(table)
|
|
}
|
|
else if (class(table) != "matrix") {
|
|
stop("the table must be of class matrix or data.frame")
|
|
}
|
|
chars <- as.character(rownames(table))
|
|
npos <- ncol(table)
|
|
table <- apply(table + 1e-04, 2, normalize3)
|
|
control_heights <- list(alpha = control$alpha, epsilon = control$epsilon,
|
|
opt = control$opt, hist = control$hist, quant = control$quant,
|
|
symm = control$symm)
|
|
ll <- do.call(get_logo_heights, append(list(table = table,
|
|
ic = ic, bg = bg, score = score), control_heights))
|
|
pos_ic <- ll$pos_ic
|
|
neg_ic <- ll$neg_ic
|
|
table_mat_pos_norm <- ll$table_mat_pos_norm
|
|
table_mat_neg_norm <- ll$table_mat_neg_norm
|
|
if (color_profile$type == "per_column") {
|
|
if (length(color_profile$col) != npos) {
|
|
stop("number of colors must equal the number of columns of the table")
|
|
}
|
|
}
|
|
if (color_profile$type == "per_row") {
|
|
if (length(color_profile$col) != nrow(table)) {
|
|
stop("the number of colors must match the number of rows of the table")
|
|
}
|
|
}
|
|
if (is.null(frame_width)) {
|
|
message("frame width not provided, taken to be 1")
|
|
wt <- rep(1, dim(table)[2])
|
|
}
|
|
if (!is.null(frame_width)) {
|
|
if (length(frame_width) == 1) {
|
|
wt <- rep(frame_width, dim(table)[2])
|
|
}
|
|
else {
|
|
wt <- frame_width
|
|
}
|
|
}
|
|
letters <- list(x = NULL, y = NULL, id = NULL, fill = NULL)
|
|
facs <- pos_ic
|
|
ylim <- ceiling(max(pos_ic))
|
|
x.pos <- 0
|
|
slash_inds <- grep("/", chars)
|
|
if (color_profile$type == "per_row") {
|
|
for (j in seq_len(npos)) {
|
|
column <- table_mat_pos_norm[, j]
|
|
hts <- as.numeric(0.99 * column * facs[j])
|
|
letterOrder <- order(hts)
|
|
y.pos <- 0
|
|
for (i in seq_along(chars)) {
|
|
letter <- chars[letterOrder[i]]
|
|
col <- color_profile$col[letterOrder[i]]
|
|
ht <- hts[letterOrder[i]]
|
|
if (length(intersect(letterOrder[i], slash_inds)) !=
|
|
0) {
|
|
if (ht > 0)
|
|
letters <- addLetter_n(letters, letter, tofill = control$tofill_pos,
|
|
lwd = control$lwd, col, total_chars, x.pos,
|
|
y.pos, ht, wt[j], scale0 = scale0, scale1 = scale1,
|
|
addlogos = addlogos, addlogos_text = addlogos_text)
|
|
}
|
|
else {
|
|
if (ht > 0)
|
|
letters <- addLetter_n(letters, letter, tofill = control$tofill_pos,
|
|
lwd = control$lwd, col, total_chars, x.pos,
|
|
y.pos, ht, wt[j], scale0 = scale0, scale1 = scale1,
|
|
addlogos = NULL, addlogos_text = NULL)
|
|
}
|
|
y.pos <- y.pos + ht + start
|
|
}
|
|
x.pos <- x.pos + wt[j]
|
|
}
|
|
}
|
|
if (color_profile$type == "per_symbol") {
|
|
for (j in seq_len(npos)) {
|
|
column <- table_mat_pos_norm[, j]
|
|
hts <- as.numeric(0.99 * column * facs[j])
|
|
letterOrder <- order(hts)
|
|
y.pos <- 0
|
|
for (i in seq_along(chars)) {
|
|
letter <- chars[letterOrder[i]]
|
|
ht <- hts[letterOrder[i]]
|
|
if (length(intersect(letterOrder[i], slash_inds)) !=
|
|
0) {
|
|
if (ht > 0)
|
|
letters <- addLetter_n(letters, letter, tofill = control$tofill_pos,
|
|
lwd = control$lwd, color_profile$col, total_chars,
|
|
x.pos, y.pos, ht, wt[j], scale0 = scale0,
|
|
scale1 = scale1, addlogos = addlogos, addlogos_text = addlogos_text)
|
|
}
|
|
else {
|
|
if (ht > 0)
|
|
letters <- addLetter_n(letters, letter, tofill = control$tofill_pos,
|
|
lwd = control$lwd, color_profile$col, total_chars,
|
|
x.pos, y.pos, ht, wt[j], scale0 = scale0,
|
|
scale1 = scale1, addlogos = NULL, addlogos_text = NULL)
|
|
}
|
|
y.pos <- y.pos + ht + start
|
|
}
|
|
x.pos <- x.pos + wt[j]
|
|
}
|
|
}
|
|
if (color_profile$type == "per_column") {
|
|
for (j in seq_len(npos)) {
|
|
column <- table_mat_pos_norm[, j]
|
|
hts <- as.numeric(0.99 * column * facs[j])
|
|
letterOrder <- order(hts)
|
|
y.pos <- 0
|
|
for (i in seq_along(chars)) {
|
|
letter <- chars[letterOrder[i]]
|
|
ht <- hts[letterOrder[i]]
|
|
if (length(intersect(letterOrder[i], slash_inds)) !=
|
|
0) {
|
|
if (ht > 0)
|
|
letters <- addLetter_n(letters, letter, tofill = control$tofill_pos,
|
|
lwd = control$lwd, color_profile$col[j],
|
|
total_chars, x.pos, y.pos, ht, wt[j], scale0 = scale0,
|
|
scale1 = scale1, addlogos = addlogos, addlogos_text = addlogos_text)
|
|
}
|
|
else {
|
|
if (ht > 0)
|
|
letters <- addLetter_n(letters, letter, tofill = control$tofill_pos,
|
|
lwd = control$lwd, color_profile$col[j],
|
|
total_chars, x.pos, y.pos, ht, wt[j], scale0 = scale0,
|
|
scale1 = scale1, addlogos = NULL, addlogos_text = NULL)
|
|
}
|
|
y.pos <- y.pos + ht + start
|
|
}
|
|
x.pos <- x.pos + wt[j]
|
|
}
|
|
}
|
|
xlim <- cumsum(wt) - wt/2
|
|
low_xlim <- c(xlim - 0.5 * wt, xlim[length(xlim)] + 0.5 *
|
|
wt[length(xlim)])
|
|
letters$y <- letters$y + max(max(abs(neg_ic)), control$lowrange)
|
|
y1 <- min(letters$y)
|
|
max1 <- max(letters$y)
|
|
yrange <- max(max(pos_ic), control$uprange) + max(max(neg_ic),
|
|
control$lowrange)
|
|
ylim <- yrange
|
|
ylim_scale <- seq(0, ylim, length.out = 6)
|
|
negbins <- control$negbins
|
|
posbins <- control$posbins
|
|
ic_lim_scale <- c(seq(0, y1, length.out = negbins), seq(y1,
|
|
ylim, length.out = posbins))
|
|
letters$y <- letters$y/ylim
|
|
markers <- round(ic_lim_scale, control$round_off) - round(y1,
|
|
control$round_off)
|
|
if (newpage) {
|
|
grid::grid.newpage()
|
|
}
|
|
if (control$use_seqLogo_heights) {
|
|
if (is.null(control$viewport.margin.bottom)) {
|
|
bottomMargin <- ifelse(xaxis, 1 + xaxis_fontsize/3.5,
|
|
3)
|
|
}
|
|
else {
|
|
bottomMargin <- control$viewport.margin.bottom
|
|
}
|
|
if (is.null(control$viewport.margin.left)) {
|
|
leftMargin <- ifelse(xaxis, 2 + xaxis_fontsize/3.5,
|
|
3)
|
|
}
|
|
else {
|
|
leftMargin <- control$viewport.margin.left
|
|
}
|
|
if (is.null(control$viewport.margin.top)) {
|
|
topMargin <- max(ylim) + 0.5
|
|
}
|
|
else {
|
|
topMargin <- control$viewport.margin.top
|
|
}
|
|
if (is.null(control$viewport.margin.right)) {
|
|
rightMargin <- max(ylim)
|
|
}
|
|
else {
|
|
rightMargin <- control$viewport.margin.right
|
|
}
|
|
}
|
|
else {
|
|
if (is.null(control$viewport.margin.bottom)) {
|
|
control$viewport.margin.bottom = 3
|
|
}
|
|
if (is.null(control$viewport.margin.left)) {
|
|
control$viewport.margin.left = 5
|
|
}
|
|
if (is.null(control$viewport.margin.top)) {
|
|
control$viewport.margin.top = 2.5
|
|
}
|
|
if (is.null(control$viewport.margin.right)) {
|
|
control$viewport.margin.right = 2.5
|
|
}
|
|
topMargin <- control$viewport.margin.top
|
|
rightMargin <- control$viewport.margin.right
|
|
leftMargin <- control$viewport.margin.left
|
|
bottomMargin <- control$viewport.margin.bottom
|
|
}
|
|
start1 <- 0.5
|
|
wt <- wt/min(wt)
|
|
wtdiff <- abs(wt[2:length(wt)] - wt[1:(length(wt) - 1)])
|
|
xticks <- array(0, dim(table)[2])
|
|
for (w in 1:length(wt)) {
|
|
xticks[w] <- start1
|
|
start1 <- xticks[w] + 1 + (wtdiff[w])/2
|
|
}
|
|
grid::pushViewport(grid::plotViewport(c(bottomMargin, leftMargin,
|
|
topMargin, rightMargin)))
|
|
grid::pushViewport(grid::dataViewport(0:(ncol(table) + control$size_port),
|
|
0:1, name = "vp1"))
|
|
if (control$tofill_pos) {
|
|
grid::grid.polygon(x = grid::unit(letters$x, "native"),
|
|
y = grid::unit(letters$y, "native"), id = letters$id,
|
|
gp = grid::gpar(fill = letters$fill, col = "transparent"))
|
|
}
|
|
else {
|
|
grid::grid.polygon(x = grid::unit(letters$x, "native"),
|
|
y = grid::unit(letters$y, "native"), id = letters$id,
|
|
gp = grid::gpar(col = letters$colfill, lwd = control$lwd))
|
|
}
|
|
for (n in 2:length(xlim)) {
|
|
grid::grid.lines(x = grid::unit(low_xlim[n], "native"),
|
|
y = grid::unit(c(0, 1), "native"), gp = grid::gpar(col = col_line_split))
|
|
}
|
|
if (is.null(pop_name)) {
|
|
if (ic) {
|
|
grid::grid.text(paste0("EDLogo plot: (", score, "-ic",
|
|
")"), y = grid::unit(1, "npc") + grid::unit(0.8,
|
|
"lines"), gp = grid::gpar(fontsize = main_fontsize))
|
|
}
|
|
else {
|
|
grid::grid.text(paste0("EDLogo plot: (", score, ")"),
|
|
y = grid::unit(1, "npc") + grid::unit(0.8, "lines"),
|
|
gp = grid::gpar(fontsize = main_fontsize))
|
|
}
|
|
}
|
|
else {
|
|
grid::grid.text(paste0(pop_name), y = grid::unit(1, "npc") +
|
|
grid::unit(0.8, "lines"), gp = grid::gpar(fontsize = main_fontsize))
|
|
}
|
|
if (xaxis) {
|
|
grid::grid.xaxis(at = xticks, draw = TRUE, label = colnames(table),
|
|
gp = grid::gpar(fontsize = xaxis_fontsize))
|
|
grid::grid.text(xlab, y = grid::unit(-control$gap_xlab,
|
|
"lines"), gp = grid::gpar(fontsize = xaxis_fontsize))
|
|
}
|
|
if (yaxis) {
|
|
grid::grid.yaxis(at = ic_lim_scale/ylim, draw = TRUE,
|
|
label = round(ic_lim_scale, control$round_off) -
|
|
round(y1, control$round_off), gp = grid::gpar(fontsize = y_fontsize))
|
|
grid::grid.text(ylab, x = grid::unit(-control$gap_ylab,
|
|
"lines"), rot = 90, gp = grid::gpar(fontsize = y_fontsize))
|
|
}
|
|
x.pos <- 0
|
|
letters <- list(x = NULL, y = NULL, id = NULL, fill = NULL)
|
|
npos <- ncol(table)
|
|
if (is.null(frame_width)) {
|
|
message("frame width not provided, taken to be 1")
|
|
wt <- rep(1, dim(table)[2])
|
|
}
|
|
if (!is.null(frame_width)) {
|
|
if (length(frame_width) == 1) {
|
|
wt <- rep(frame_width, dim(table)[2])
|
|
}
|
|
else {
|
|
wt <- frame_width
|
|
}
|
|
}
|
|
letters <- list(x = NULL, y = NULL, id = NULL, fill = NULL)
|
|
facs <- neg_ic
|
|
ylim <- yrange
|
|
slash_inds <- grep("/", chars)
|
|
if (color_profile$type == "per_row") {
|
|
for (j in seq_len(npos)) {
|
|
column <- table_mat_neg_norm[, j]
|
|
hts <- as.numeric(0.99 * column * facs[j])
|
|
letterOrder <- rev(order(hts))
|
|
y.pos <- -neg_ic[j]
|
|
for (i in seq_along(chars)) {
|
|
letter <- chars[letterOrder[i]]
|
|
col <- color_profile$col[letterOrder[i]]
|
|
ht <- hts[letterOrder[i]]
|
|
if (length(intersect(letterOrder[i], slash_inds)) !=
|
|
0) {
|
|
if (ht > 0)
|
|
letters <- addLetter_n(letters, letter, tofill = control$tofill_neg,
|
|
lwd = control$lwd, col, total_chars, x.pos,
|
|
y.pos, ht, wt[j], scale0 = scale0, scale1 = scale1,
|
|
addlogos = addlogos, addlogos_text = addlogos_text)
|
|
}
|
|
else {
|
|
if (ht > 0)
|
|
letters <- addLetter_n(letters, letter, tofill = control$tofill_neg,
|
|
lwd = control$lwd, col, total_chars, x.pos,
|
|
y.pos, ht, wt[j], scale0 = scale0, scale1 = scale1,
|
|
addlogos = NULL, addlogos_text = NULL)
|
|
}
|
|
y.pos <- y.pos + ht + start
|
|
}
|
|
x.pos <- x.pos + wt[j]
|
|
}
|
|
}
|
|
if (color_profile$type == "per_symbol") {
|
|
for (j in seq_len(npos)) {
|
|
column <- table_mat_neg_norm[, j]
|
|
hts <- as.numeric(0.99 * column * facs[j])
|
|
letterOrder <- rev(order(hts))
|
|
y.pos <- -neg_ic[j]
|
|
for (i in seq_along(chars)) {
|
|
letter <- chars[letterOrder[i]]
|
|
ht <- hts[letterOrder[i]]
|
|
if (length(intersect(letterOrder[i], slash_inds)) !=
|
|
0) {
|
|
if (ht > 0)
|
|
letters <- addLetter_n(letters, letter, tofill = control$tofill_neg,
|
|
lwd = control$lwd, color_profile$col, total_chars,
|
|
x.pos, y.pos, ht, wt[j], scale0 = scale0,
|
|
scale1 = scale1, addlogos = addlogos, addlogos_text = addlogos_text)
|
|
}
|
|
else {
|
|
if (ht > 0)
|
|
letters <- addLetter_n(letters, letter, tofill = control$tofill_neg,
|
|
lwd = control$lwd, color_profile$col, total_chars,
|
|
x.pos, y.pos, ht, wt[j], scale0 = scale0,
|
|
scale1 = scale1, addlogos = NULL, addlogos_text = NULL)
|
|
}
|
|
y.pos <- y.pos + ht + start
|
|
}
|
|
x.pos <- x.pos + wt[j]
|
|
}
|
|
}
|
|
if (color_profile$type == "per_column") {
|
|
for (j in seq_len(npos)) {
|
|
column <- table_mat_neg_norm[, j]
|
|
hts <- as.numeric(0.99 * column * facs[j])
|
|
letterOrder <- rev(order(hts))
|
|
y.pos <- -neg_ic[j]
|
|
for (i in seq_along(chars)) {
|
|
letter <- chars[letterOrder[i]]
|
|
ht <- hts[letterOrder[i]]
|
|
if (length(intersect(letterOrder[i], slash_inds)) !=
|
|
0) {
|
|
if (ht > 0)
|
|
letters <- addLetter_n(letters, letter, tofill = control$tofill_neg,
|
|
lwd = control$lwd, color_profile$col[j],
|
|
total_chars, x.pos, y.pos, ht, wt[j], scale0 = scale0,
|
|
scale1 = scale1, addlogos = addlogos, addlogos_text = addlogos_text)
|
|
}
|
|
else {
|
|
if (ht > 0)
|
|
letters <- addLetter_n(letters, letter, tofill = control$tofill_neg,
|
|
lwd = control$lwd, color_profile$col[j],
|
|
total_chars, x.pos, y.pos, ht, wt[j], scale0 = scale0,
|
|
scale1 = scale1, addlogos = NULL, addlogos_text = NULL)
|
|
}
|
|
y.pos <- y.pos + ht + start
|
|
}
|
|
x.pos <- x.pos + wt[j]
|
|
}
|
|
}
|
|
letters$y <- letters$y + max(max(abs(neg_ic)), control$lowrange)
|
|
letters$y <- letters$y/ylim
|
|
xlim <- cumsum(wt) - wt/2
|
|
low_xlim <- c(xlim - 0.5 * wt, xlim[length(xlim)] + 0.5 *
|
|
wt[length(xlim)])
|
|
ylim_scale <- seq(0, ylim, length.out = 6)
|
|
ic_lim_scale <- seq(-max(max(neg_ic), control$lowrange),
|
|
0, length.out = 6)
|
|
if (control$tofill_neg) {
|
|
grid::grid.polygon(x = grid::unit(letters$x, "native"),
|
|
y = grid::unit(letters$y, "native"), id = letters$id,
|
|
gp = grid::gpar(fill = letters$fill, col = "transparent"))
|
|
}
|
|
else {
|
|
grid::grid.polygon(x = grid::unit(letters$x, "native"),
|
|
y = grid::unit(letters$y, "native"), id = letters$id,
|
|
gp = grid::gpar(col = letters$colfill, lwd = control$lwd))
|
|
}
|
|
grid::grid.lines(x = grid::unit(c(0, (xlim + 0.5 * wt)),
|
|
"native"), y = grid::unit(y1/ylim, "native"), gp = grid::gpar(col = "black"))
|
|
grid::popViewport()
|
|
grid::popViewport()
|
|
return(ll)
|
|
}
|
|
########################################################################
|
|
#===========================
|
|
# get_logo_heights()
|
|
#===========================
|
|
|
|
get_logo_heights <- function (table, ic = FALSE, score = c("diff", "log", "log-odds",
|
|
"probKL", "ratio", "unscaled_log", "wKL"), bg = NULL, epsilon = 0.01,
|
|
opt = 1, symm = TRUE, alpha = 1, hist = FALSE, quant = 0.5)
|
|
{
|
|
if (ic & score == "unscaled_log") {
|
|
warning("ic = TRUE not compatible with score = `unscaled-log`: switching to\n ic = FALSE")
|
|
ic = FALSE
|
|
}
|
|
if (ic & score == "wKL") {
|
|
warning("ic = TRUE not compatible with score = `wKL`: switching to \n ic = FALSE")
|
|
ic = FALSE
|
|
}
|
|
if (length(score) != 1) {
|
|
stop("score can be wither diff, log, log-odds, probKL, ratio, \n unscaled_log or wKL")
|
|
}
|
|
if (is.vector(bg) == TRUE) {
|
|
if (length(bg) != dim(table)[1]) {
|
|
stop("If background prob (bg) is a vector, the length of bg must\n equal the number of symbols for the logo plot")
|
|
}
|
|
else if (length(which(is.na(table))) > 0) {
|
|
stop("For NA in table, a vector bg is not allowed")
|
|
}
|
|
else {
|
|
bgmat <- bg %*% t(rep(1, dim(table)[2]))
|
|
bgmat[which(is.na(table))] <- NA
|
|
bgmat <- apply(bgmat, 2, function(x) return(x/sum(x[!is.na(x)])))
|
|
}
|
|
}
|
|
else if (is.matrix(bg) == TRUE) {
|
|
if (dim(bg)[1] != dim(table)[1] | dim(bg)[2] != dim(table)[2]) {
|
|
stop("If background prob (bg) is a matrix, its dimensions must\n match that of the table")
|
|
}
|
|
else {
|
|
bgmat <- bg
|
|
bgmat[which(is.na(table))] <- NA
|
|
bgmat <- apply(bgmat, 2, function(x) return(x/sum(x[!is.na(x)])))
|
|
}
|
|
}
|
|
else {
|
|
message("using a background with equal probability for all symbols")
|
|
bgmat <- matrix(1/dim(table)[1], dim(table)[1], dim(table)[2])
|
|
bgmat[which(is.na(table))] <- NA
|
|
bgmat <- apply(bgmat, 2, function(x) return(x/sum(x[!is.na(x)])))
|
|
}
|
|
table <- apply(table + 1e-04, 2, normalize4)
|
|
bgmat <- apply(bgmat + 1e-04, 2, normalize4)
|
|
if (class(table) == "data.frame") {
|
|
table <- as.matrix(table)
|
|
}
|
|
else if (class(table) != "matrix") {
|
|
stop("the table must be of class matrix or data.frame")
|
|
}
|
|
table_mat_norm <- apply(table, 2, function(x) return(x/sum(x[!is.na(x)])))
|
|
bgmat <- apply(bgmat, 2, function(x) return(x/sum(x[!is.na(x)])))
|
|
npos <- ncol(table_mat_norm)
|
|
chars <- as.character(rownames(table_mat_norm))
|
|
if (!ic) {
|
|
if (score == "diff") {
|
|
table_mat_adj <- apply((table_mat_norm + epsilon) -
|
|
(bgmat + epsilon), 2, function(x) {
|
|
indices <- which(is.na(x))
|
|
if (length(indices) == 0) {
|
|
y = x
|
|
if (quant != 0) {
|
|
qq <- quantile(y, quant)
|
|
}
|
|
else {
|
|
qq <- 0
|
|
}
|
|
z <- y - qq
|
|
return(z)
|
|
}
|
|
else {
|
|
y <- x[!is.na(x)]
|
|
if (quant != 0) {
|
|
qq <- quantile(y, quant)
|
|
}
|
|
else {
|
|
qq <- 0
|
|
}
|
|
z <- y - qq
|
|
zext <- array(0, length(x))
|
|
zext[indices] <- 0
|
|
zext[-indices] <- z
|
|
return(zext)
|
|
}
|
|
})
|
|
}
|
|
else if (score == "log") {
|
|
table_mat_adj <- apply(log((table_mat_norm + epsilon)/(bgmat +
|
|
epsilon), base = 2), 2, function(x) {
|
|
indices <- which(is.na(x))
|
|
if (length(indices) == 0) {
|
|
y = x
|
|
if (quant != 0) {
|
|
qq <- quantile(y, quant)
|
|
}
|
|
else {
|
|
qq <- 0
|
|
}
|
|
z <- y - qq
|
|
return(z)
|
|
}
|
|
else {
|
|
y <- x[!is.na(x)]
|
|
if (quant != 0) {
|
|
qq <- quantile(y, quant)
|
|
}
|
|
else {
|
|
qq <- 0
|
|
}
|
|
z <- y - qq
|
|
zext <- array(0, length(x))
|
|
zext[indices] <- 0
|
|
zext[-indices] <- z
|
|
return(zext)
|
|
}
|
|
})
|
|
}
|
|
else if (score == "log-odds") {
|
|
if (opt == 1) {
|
|
table_mat_adj <- apply((table_mat_norm + epsilon)/(bgmat +
|
|
epsilon), 2, function(x) {
|
|
indices <- which(is.na(x))
|
|
if (length(indices) == 0) {
|
|
y = log(x/(sum(x) - x), base = 2)
|
|
if (quant != 0) {
|
|
qq <- quantile(y, quant)
|
|
}
|
|
else {
|
|
qq <- 0
|
|
}
|
|
z <- y - qq
|
|
return(z)
|
|
}
|
|
else {
|
|
w <- x[!is.na(x)]
|
|
y <- log(w/(sum(w) - w), base = 2)
|
|
if (quant != 0) {
|
|
qq <- quantile(y, quant)
|
|
}
|
|
else {
|
|
qq <- 0
|
|
}
|
|
z <- y - qq
|
|
zext <- array(0, length(x))
|
|
zext[indices] <- 0
|
|
zext[-indices] <- z
|
|
return(zext)
|
|
}
|
|
})
|
|
}
|
|
else {
|
|
table_mat_adj <- apply((table_mat_norm + epsilon),
|
|
2, function(x) {
|
|
indices <- which(is.na(x))
|
|
if (length(indices) == 0) {
|
|
y = log(x/(sum(x) - x), base = 2)
|
|
z <- y - quantile(y, quant)
|
|
return(z)
|
|
}
|
|
else {
|
|
w <- x[!is.na(x)]
|
|
y <- log(w/(sum(w) - w), base = 2)
|
|
z <- y - quantile(y, quant)
|
|
zext <- array(0, length(x))
|
|
zext[indices] <- 0
|
|
zext[-indices] <- z
|
|
return(zext)
|
|
}
|
|
})
|
|
}
|
|
}
|
|
else if (score == "probKL") {
|
|
table_mat_adj <- apply((table_mat_norm + epsilon) *
|
|
log((table_mat_norm + epsilon)/(bgmat + epsilon),
|
|
base = 2), 2, function(x) {
|
|
indices <- which(is.na(x))
|
|
if (length(indices) == 0) {
|
|
y = x
|
|
if (quant != 0) {
|
|
qq <- quantile(y, quant)
|
|
}
|
|
else {
|
|
qq <- 0
|
|
}
|
|
z <- y - qq
|
|
return(z)
|
|
}
|
|
else {
|
|
y <- x[!is.na(x)]
|
|
if (quant != 0) {
|
|
qq <- quantile(y, quant)
|
|
}
|
|
else {
|
|
qq <- 0
|
|
}
|
|
z <- y - qq
|
|
zext <- array(0, length(x))
|
|
zext[indices] <- 0
|
|
zext[-indices] <- z
|
|
return(zext)
|
|
}
|
|
})
|
|
}
|
|
else if (score == "ratio") {
|
|
table_mat_adj <- apply((table_mat_norm + epsilon)/(bgmat +
|
|
epsilon), 2, function(x) {
|
|
indices <- which(is.na(x))
|
|
if (length(indices) == 0) {
|
|
y = x
|
|
if (quant != 0) {
|
|
qq <- quantile(y, quant)
|
|
}
|
|
else {
|
|
qq <- 0
|
|
}
|
|
z <- y - qq
|
|
return(z)
|
|
}
|
|
else {
|
|
y <- x[!is.na(x)]
|
|
if (quant != 0) {
|
|
qq <- quantile(y, quant)
|
|
}
|
|
else {
|
|
qq <- 0
|
|
}
|
|
z <- y - qq
|
|
zext <- array(0, length(x))
|
|
zext[indices] <- 0
|
|
zext[-indices] <- z
|
|
return(zext)
|
|
}
|
|
})
|
|
}
|
|
else if (score == "unscaled_log") {
|
|
table_mat_adj <- apply(log((table_mat_norm + epsilon)/(bgmat +
|
|
epsilon), base = 2), 2, function(x) {
|
|
indices <- which(is.na(x))
|
|
if (length(indices) == 0) {
|
|
y = x
|
|
if (quant != 0) {
|
|
qq <- quantile(y, quant)
|
|
}
|
|
else {
|
|
qq <- 0
|
|
}
|
|
z <- y - qq
|
|
return(z)
|
|
}
|
|
else {
|
|
y <- x[!is.na(x)]
|
|
if (quant != 0) {
|
|
qq <- quantile(y, quant)
|
|
}
|
|
else {
|
|
qq <- 0
|
|
}
|
|
z <- y - qq
|
|
zext <- array(0, length(x))
|
|
zext[indices] <- 0
|
|
zext[-indices] <- z
|
|
return(zext)
|
|
}
|
|
})
|
|
}
|
|
else if (score == "wKL") {
|
|
table_mat_adj <- apply(log((table_mat_norm + epsilon)/(bgmat +
|
|
epsilon), base = 2), 2, function(x) {
|
|
indices <- which(is.na(x))
|
|
if (length(indices) == 0) {
|
|
y = x
|
|
if (quant != 0) {
|
|
qq <- quantile(y, quant)
|
|
}
|
|
else {
|
|
qq <- 0
|
|
}
|
|
z <- y - qq
|
|
return(z)
|
|
}
|
|
else {
|
|
y <- x[!is.na(x)]
|
|
if (quant != 0) {
|
|
qq <- quantile(y, quant)
|
|
}
|
|
else {
|
|
qq <- 0
|
|
}
|
|
z <- y - qq
|
|
zext <- array(0, length(x))
|
|
zext[indices] <- 0
|
|
zext[-indices] <- z
|
|
return(zext)
|
|
}
|
|
})
|
|
}
|
|
else {
|
|
stop("The value of score chosen is not compatible")
|
|
}
|
|
}
|
|
else {
|
|
if (score == "diff") {
|
|
if (opt == 1) {
|
|
table_mat_adj <- apply((table_mat_norm + epsilon) -
|
|
(bgmat + epsilon), 2, function(x) {
|
|
indices <- which(is.na(x))
|
|
if (length(indices) == 0) {
|
|
y = x
|
|
if (quant != 0) {
|
|
qq <- quantile(y, quant)
|
|
}
|
|
else {
|
|
qq <- 0
|
|
}
|
|
z <- y - qq
|
|
return(z)
|
|
}
|
|
else {
|
|
y <- x[!is.na(x)]
|
|
if (quant != 0) {
|
|
qq <- quantile(y, quant)
|
|
}
|
|
else {
|
|
qq <- 0
|
|
}
|
|
z <- y - qq
|
|
zext <- array(0, length(x))
|
|
zext[indices] <- 0
|
|
zext[-indices] <- z
|
|
return(zext)
|
|
}
|
|
})
|
|
}
|
|
else {
|
|
table_mat_adj <- apply(table_mat_norm + epsilon,
|
|
2, function(x) {
|
|
indices <- which(is.na(x))
|
|
if (length(indices) == 0) {
|
|
y = x
|
|
z <- y - quantile(y, quant)
|
|
return(z)
|
|
}
|
|
else {
|
|
y <- x[!is.na(x)]
|
|
z <- y - quantile(y, quant)
|
|
zext <- array(0, length(x))
|
|
zext[indices] <- 0
|
|
zext[-indices] <- z
|
|
return(zext)
|
|
}
|
|
})
|
|
}
|
|
}
|
|
else if (score == "log") {
|
|
if (opt == 1) {
|
|
table_mat_adj <- apply(log((table_mat_norm +
|
|
epsilon)/(bgmat + epsilon), base = 2), 2, function(x) {
|
|
indices <- which(is.na(x))
|
|
if (length(indices) == 0) {
|
|
y = x
|
|
if (quant != 0) {
|
|
qq <- quantile(y, quant)
|
|
}
|
|
else {
|
|
qq <- 0
|
|
}
|
|
z <- y - qq
|
|
return(z)
|
|
}
|
|
else {
|
|
y <- x[!is.na(x)]
|
|
if (quant != 0) {
|
|
qq <- quantile(y, quant)
|
|
}
|
|
else {
|
|
qq <- 0
|
|
}
|
|
z <- y - qq
|
|
zext <- array(0, length(x))
|
|
zext[indices] <- 0
|
|
zext[-indices] <- z
|
|
return(zext)
|
|
}
|
|
})
|
|
}
|
|
else {
|
|
table_mat_adj <- apply(log(table_mat_norm + epsilon,
|
|
base = 2), 2, function(x) {
|
|
indices <- which(is.na(x))
|
|
if (length(indices) == 0) {
|
|
y = x
|
|
z <- y - quantile(y, quant)
|
|
return(z)
|
|
}
|
|
else {
|
|
y <- x[!is.na(x)]
|
|
z <- y - quantile(y, quant)
|
|
zext <- array(0, length(x))
|
|
zext[indices] <- 0
|
|
zext[-indices] <- z
|
|
return(zext)
|
|
}
|
|
})
|
|
}
|
|
}
|
|
else if (score == "log-odds") {
|
|
if (opt == 1) {
|
|
table_mat_adj <- apply((table_mat_norm + epsilon)/(bgmat +
|
|
epsilon), 2, function(x) {
|
|
indices <- which(is.na(x))
|
|
if (length(indices) == 0) {
|
|
y = log(x/(sum(x) - x), base = 2)
|
|
if (quant != 0) {
|
|
qq <- quantile(y, quant)
|
|
}
|
|
else {
|
|
qq <- 0
|
|
}
|
|
z <- y - qq
|
|
return(z)
|
|
}
|
|
else {
|
|
w <- x[!is.na(x)]
|
|
y <- log(w/(sum(w) - w), base = 2)
|
|
if (quant != 0) {
|
|
qq <- quantile(y, quant)
|
|
}
|
|
else {
|
|
qq <- 0
|
|
}
|
|
z <- y - qq
|
|
zext <- array(0, length(x))
|
|
zext[indices] <- 0
|
|
zext[-indices] <- z
|
|
return(zext)
|
|
}
|
|
})
|
|
}
|
|
else {
|
|
table_mat_adj <- apply((table_mat_norm + epsilon),
|
|
2, function(x) {
|
|
indices <- which(is.na(x))
|
|
if (length(indices) == 0) {
|
|
y = log(x/(sum(x) - x), base = 2)
|
|
z <- y - quantile(y, quant)
|
|
return(z)
|
|
}
|
|
else {
|
|
w <- x[!is.na(x)]
|
|
y <- log(w/(sum(w) - w), base = 2)
|
|
z <- y - quantile(y, quant)
|
|
zext <- array(0, length(x))
|
|
zext[indices] <- 0
|
|
zext[-indices] <- z
|
|
return(zext)
|
|
}
|
|
})
|
|
}
|
|
}
|
|
else if (score == "probKL") {
|
|
if (opt == 1) {
|
|
table_mat_adj <- apply((table_mat_norm + epsilon) *
|
|
log((table_mat_norm + epsilon)/(bgmat + epsilon),
|
|
base = 2), 2, function(x) {
|
|
indices <- which(is.na(x))
|
|
if (length(indices) == 0) {
|
|
y = x
|
|
if (quant != 0) {
|
|
qq <- quantile(y, quant)
|
|
}
|
|
else {
|
|
qq <- 0
|
|
}
|
|
z <- y - qq
|
|
return(z)
|
|
}
|
|
else {
|
|
y <- x[!is.na(x)]
|
|
if (quant != 0) {
|
|
qq <- quantile(y, quant)
|
|
}
|
|
else {
|
|
qq <- 0
|
|
}
|
|
z <- y - qq
|
|
zext <- array(0, length(x))
|
|
zext[indices] <- 0
|
|
zext[-indices] <- z
|
|
return(zext)
|
|
}
|
|
})
|
|
}
|
|
else {
|
|
table_mat_adj <- apply((table_mat_norm + epsilon) *
|
|
log(table_mat_norm + epsilon, base = 2), 2,
|
|
function(x) {
|
|
indices <- which(is.na(x))
|
|
if (length(indices) == 0) {
|
|
y = x
|
|
z <- y - quantile(y, quant)
|
|
return(z)
|
|
}
|
|
else {
|
|
y <- x[!is.na(x)]
|
|
z <- y - quantile(y, quant)
|
|
zext <- array(0, length(x))
|
|
zext[indices] <- 0
|
|
zext[-indices] <- z
|
|
return(zext)
|
|
}
|
|
})
|
|
}
|
|
}
|
|
else if (score == "ratio") {
|
|
if (opt == 1) {
|
|
table_mat_adj <- apply((table_mat_norm + epsilon)/(bgmat +
|
|
epsilon), 2, function(x) {
|
|
indices <- which(is.na(x))
|
|
if (length(indices) == 0) {
|
|
y = x
|
|
if (quant != 0) {
|
|
qq <- quantile(y, quant)
|
|
}
|
|
else {
|
|
qq <- 0
|
|
}
|
|
z <- y - qq
|
|
return(z)
|
|
}
|
|
else {
|
|
y <- x[!is.na(x)]
|
|
if (quant != 0) {
|
|
qq <- quantile(y, quant)
|
|
}
|
|
else {
|
|
qq <- 0
|
|
}
|
|
z <- y - qq
|
|
zext <- array(0, length(x))
|
|
zext[indices] <- 0
|
|
zext[-indices] <- z
|
|
return(zext)
|
|
}
|
|
})
|
|
}
|
|
else {
|
|
table_mat_adj <- apply(table_mat_norm + scale,
|
|
2, function(x) {
|
|
indices <- which(is.na(x))
|
|
if (length(indices) == 0) {
|
|
y = x
|
|
z <- y - quantile(y, quant)
|
|
return(z)
|
|
}
|
|
else {
|
|
y <- x[!is.na(x)]
|
|
z <- y - quantile(y, quant)
|
|
zext <- array(0, length(x))
|
|
zext[indices] <- 0
|
|
zext[-indices] <- z
|
|
return(zext)
|
|
}
|
|
})
|
|
}
|
|
}
|
|
else {
|
|
stop("The value of score chosen is not compatible")
|
|
}
|
|
}
|
|
if (!ic) {
|
|
table_mat_pos <- table_mat_adj
|
|
table_mat_pos[table_mat_pos <= 0] = 0
|
|
table_mat_pos_norm <- apply(table_mat_pos, 2, function(x) return(x/sum(x)))
|
|
table_mat_pos_norm[table_mat_pos_norm == "NaN"] = 0
|
|
table_mat_neg <- table_mat_adj
|
|
table_mat_neg[table_mat_neg >= 0] = 0
|
|
table_mat_neg_norm <- apply(abs(table_mat_neg), 2, function(x) return(x/sum(x)))
|
|
table_mat_neg_norm[table_mat_neg_norm == "NaN"] = 0
|
|
pos_ic <- colSums(table_mat_pos)
|
|
neg_ic <- colSums(abs(table_mat_neg))
|
|
ll <- list()
|
|
ll$pos_ic <- pos_ic
|
|
ll$neg_ic <- neg_ic
|
|
ll$table_mat_pos_norm <- table_mat_pos_norm
|
|
ll$table_mat_neg_norm <- table_mat_neg_norm
|
|
}
|
|
else {
|
|
table_mat_pos <- table_mat_adj
|
|
table_mat_pos[table_mat_pos <= 0] = 0
|
|
table_mat_pos_norm <- apply(table_mat_pos, 2, function(x) return(x/sum(x)))
|
|
table_mat_pos_norm[table_mat_pos_norm == "NaN"] = 0
|
|
table_mat_neg <- table_mat_adj
|
|
table_mat_neg[table_mat_neg >= 0] = 0
|
|
table_mat_neg_norm <- apply(table_mat_neg, 2, function(x) return(x/sum(x)))
|
|
table_mat_neg_norm[table_mat_neg_norm == "NaN"] = 0
|
|
table_mat_norm <- replace(table_mat_norm, is.na(table_mat_norm),
|
|
0)
|
|
for (j in 1:dim(table_mat_neg_norm)[2]) {
|
|
if (sum(table_mat_neg_norm[, j]) == 0) {
|
|
table_mat_neg_norm[, j] <- normalize4(table_mat_neg_norm[,
|
|
j] + 0.001)
|
|
}
|
|
}
|
|
for (j in 1:dim(table_mat_pos_norm)[2]) {
|
|
if (sum(table_mat_pos_norm[, j]) == 0) {
|
|
table_mat_pos_norm[, j] <- normalize4(table_mat_pos_norm[,
|
|
j] + 0.001)
|
|
}
|
|
}
|
|
if (symm == TRUE) {
|
|
table_mat_norm[which(is.na(table))] <- NA
|
|
ic <- 0.5 * (ic_computer(table_mat_norm, alpha, hist = hist,
|
|
bg = bgmat) + ic_computer(bgmat, alpha, hist = hist,
|
|
bg = table_mat_norm))
|
|
}
|
|
else {
|
|
table_mat_norm[which(is.na(table))] <- NA
|
|
ic <- ic_computer(table_mat_norm, alpha, hist = hist,
|
|
bg = bgmat)
|
|
}
|
|
tab_neg <- apply(table_mat_adj, 2, function(x) {
|
|
y = x[x < 0]
|
|
if (length(y) == 0) {
|
|
return(0)
|
|
}
|
|
else {
|
|
return(abs(sum(y)))
|
|
}
|
|
})
|
|
tab_pos <- apply(table_mat_adj, 2, function(x) {
|
|
y = x[x > 0]
|
|
if (length(y) == 0) {
|
|
return(0)
|
|
}
|
|
else {
|
|
return(abs(sum(y)))
|
|
}
|
|
})
|
|
tab_pos[tab_pos == 0] <- 0.001
|
|
tab_neg[tab_neg == 0] <- 0.001
|
|
pos_neg_scaling <- apply(rbind(tab_pos, tab_neg), 2,
|
|
function(x) return(x/sum(x)))
|
|
pos_ic <- pos_neg_scaling[1, ] * ic
|
|
neg_ic <- pos_neg_scaling[2, ] * ic
|
|
ll <- list()
|
|
ll$pos_ic <- pos_ic
|
|
ll$neg_ic <- neg_ic
|
|
ll$table_mat_pos_norm <- table_mat_pos_norm
|
|
ll$table_mat_neg_norm <- table_mat_neg_norm
|
|
}
|
|
return(ll)
|
|
}
|