added get_logo_heights() in my_logolas.R

This commit is contained in:
Tanushree Tunstall 2022-01-26 11:16:02 +00:00
parent 92af9fd565
commit 1c1e98ad4f

View file

@ -1,9 +1,9 @@
######################## ########################################################################
# From package: logolas # From package: logolas
# need to source locally # need to source locally (along with dependant functions)
# for generating the pfm and pfm_scaled # for generating the pfm and pfm_scaled
# needed for gettting the ED values and EDPlot # needed for gettting the ED values and EDPlot
######################## ########################################################################
my_logolas <- function (data, type = c("Logo", "EDLogo"), use_dash = TRUE, my_logolas <- function (data, type = c("Logo", "EDLogo"), use_dash = TRUE,
bg = NULL, color_type = NULL, colors = NULL, color_seed = 2030, bg = NULL, color_type = NULL, colors = NULL, color_seed = 2030,
@ -262,10 +262,10 @@ my_logolas <- function (data, type = c("Logo", "EDLogo"), use_dash = TRUE,
#return(out) #return(out)
} }
} }
########################################################################
################ #=================
# dash.r # dash()
################ #=================
# @title Dirichlet adaptive shrinkage of compositional data using dash # @title Dirichlet adaptive shrinkage of compositional data using dash
# #
# @description Given a matrix of compositional counts data, with samples along # @description Given a matrix of compositional counts data, with samples along
@ -636,10 +636,10 @@ logfac = function(x){
} }
return(out) return(out)
} }
########################################################################
################ #=================
# mixEM.R # mixEM()
################ #=================
# @title Estimate mixture proportions of a mixture model by EM algorithm # @title Estimate mixture proportions of a mixture model by EM algorithm
# #
@ -799,10 +799,10 @@ set_control_squarem=function(control,nobs){
control=utils::modifyList(control.default, control) control=utils::modifyList(control.default, control)
return(control) return(control)
} }
#======================================================================= ########################################################################
################ #=================
# nlogomaker.R() # nlogomaker()
################ #=================
function (table, ic = FALSE, score = c("diff", "log", "log-odds", function (table, ic = FALSE, score = c("diff", "log", "log-odds",
"probKL", "ratio", "unscaled_log", "wKL"), color_profile, "probKL", "ratio", "unscaled_log", "wKL"), color_profile,
total_chars = c("A", "B", "C", "D", "E", "F", "G", "H", "I", total_chars = c("A", "B", "C", "D", "E", "F", "G", "H", "I",
@ -1223,3 +1223,662 @@ function (table, ic = FALSE, score = c("diff", "log", "log-odds",
grid::popViewport() grid::popViewport()
return(ll) return(ll)
} }
########################################################################
#===========================
# get_logo_heights()
#===========================
get_logo_heights <- function (table, ic = FALSE, score = c("diff", "log", "log-odds",
"probKL", "ratio", "unscaled_log", "wKL"), bg = NULL, epsilon = 0.01,
opt = 1, symm = TRUE, alpha = 1, hist = FALSE, quant = 0.5)
{
if (ic & score == "unscaled_log") {
warning("ic = TRUE not compatible with score = `unscaled-log`: switching to\n ic = FALSE")
ic = FALSE
}
if (ic & score == "wKL") {
warning("ic = TRUE not compatible with score = `wKL`: switching to \n ic = FALSE")
ic = FALSE
}
if (length(score) != 1) {
stop("score can be wither diff, log, log-odds, probKL, ratio, \n unscaled_log or wKL")
}
if (is.vector(bg) == TRUE) {
if (length(bg) != dim(table)[1]) {
stop("If background prob (bg) is a vector, the length of bg must\n equal the number of symbols for the logo plot")
}
else if (length(which(is.na(table))) > 0) {
stop("For NA in table, a vector bg is not allowed")
}
else {
bgmat <- bg %*% t(rep(1, dim(table)[2]))
bgmat[which(is.na(table))] <- NA
bgmat <- apply(bgmat, 2, function(x) return(x/sum(x[!is.na(x)])))
}
}
else if (is.matrix(bg) == TRUE) {
if (dim(bg)[1] != dim(table)[1] | dim(bg)[2] != dim(table)[2]) {
stop("If background prob (bg) is a matrix, its dimensions must\n match that of the table")
}
else {
bgmat <- bg
bgmat[which(is.na(table))] <- NA
bgmat <- apply(bgmat, 2, function(x) return(x/sum(x[!is.na(x)])))
}
}
else {
message("using a background with equal probability for all symbols")
bgmat <- matrix(1/dim(table)[1], dim(table)[1], dim(table)[2])
bgmat[which(is.na(table))] <- NA
bgmat <- apply(bgmat, 2, function(x) return(x/sum(x[!is.na(x)])))
}
table <- apply(table + 1e-04, 2, normalize4)
bgmat <- apply(bgmat + 1e-04, 2, normalize4)
if (class(table) == "data.frame") {
table <- as.matrix(table)
}
else if (class(table) != "matrix") {
stop("the table must be of class matrix or data.frame")
}
table_mat_norm <- apply(table, 2, function(x) return(x/sum(x[!is.na(x)])))
bgmat <- apply(bgmat, 2, function(x) return(x/sum(x[!is.na(x)])))
npos <- ncol(table_mat_norm)
chars <- as.character(rownames(table_mat_norm))
if (!ic) {
if (score == "diff") {
table_mat_adj <- apply((table_mat_norm + epsilon) -
(bgmat + epsilon), 2, function(x) {
indices <- which(is.na(x))
if (length(indices) == 0) {
y = x
if (quant != 0) {
qq <- quantile(y, quant)
}
else {
qq <- 0
}
z <- y - qq
return(z)
}
else {
y <- x[!is.na(x)]
if (quant != 0) {
qq <- quantile(y, quant)
}
else {
qq <- 0
}
z <- y - qq
zext <- array(0, length(x))
zext[indices] <- 0
zext[-indices] <- z
return(zext)
}
})
}
else if (score == "log") {
table_mat_adj <- apply(log((table_mat_norm + epsilon)/(bgmat +
epsilon), base = 2), 2, function(x) {
indices <- which(is.na(x))
if (length(indices) == 0) {
y = x
if (quant != 0) {
qq <- quantile(y, quant)
}
else {
qq <- 0
}
z <- y - qq
return(z)
}
else {
y <- x[!is.na(x)]
if (quant != 0) {
qq <- quantile(y, quant)
}
else {
qq <- 0
}
z <- y - qq
zext <- array(0, length(x))
zext[indices] <- 0
zext[-indices] <- z
return(zext)
}
})
}
else if (score == "log-odds") {
if (opt == 1) {
table_mat_adj <- apply((table_mat_norm + epsilon)/(bgmat +
epsilon), 2, function(x) {
indices <- which(is.na(x))
if (length(indices) == 0) {
y = log(x/(sum(x) - x), base = 2)
if (quant != 0) {
qq <- quantile(y, quant)
}
else {
qq <- 0
}
z <- y - qq
return(z)
}
else {
w <- x[!is.na(x)]
y <- log(w/(sum(w) - w), base = 2)
if (quant != 0) {
qq <- quantile(y, quant)
}
else {
qq <- 0
}
z <- y - qq
zext <- array(0, length(x))
zext[indices] <- 0
zext[-indices] <- z
return(zext)
}
})
}
else {
table_mat_adj <- apply((table_mat_norm + epsilon),
2, function(x) {
indices <- which(is.na(x))
if (length(indices) == 0) {
y = log(x/(sum(x) - x), base = 2)
z <- y - quantile(y, quant)
return(z)
}
else {
w <- x[!is.na(x)]
y <- log(w/(sum(w) - w), base = 2)
z <- y - quantile(y, quant)
zext <- array(0, length(x))
zext[indices] <- 0
zext[-indices] <- z
return(zext)
}
})
}
}
else if (score == "probKL") {
table_mat_adj <- apply((table_mat_norm + epsilon) *
log((table_mat_norm + epsilon)/(bgmat + epsilon),
base = 2), 2, function(x) {
indices <- which(is.na(x))
if (length(indices) == 0) {
y = x
if (quant != 0) {
qq <- quantile(y, quant)
}
else {
qq <- 0
}
z <- y - qq
return(z)
}
else {
y <- x[!is.na(x)]
if (quant != 0) {
qq <- quantile(y, quant)
}
else {
qq <- 0
}
z <- y - qq
zext <- array(0, length(x))
zext[indices] <- 0
zext[-indices] <- z
return(zext)
}
})
}
else if (score == "ratio") {
table_mat_adj <- apply((table_mat_norm + epsilon)/(bgmat +
epsilon), 2, function(x) {
indices <- which(is.na(x))
if (length(indices) == 0) {
y = x
if (quant != 0) {
qq <- quantile(y, quant)
}
else {
qq <- 0
}
z <- y - qq
return(z)
}
else {
y <- x[!is.na(x)]
if (quant != 0) {
qq <- quantile(y, quant)
}
else {
qq <- 0
}
z <- y - qq
zext <- array(0, length(x))
zext[indices] <- 0
zext[-indices] <- z
return(zext)
}
})
}
else if (score == "unscaled_log") {
table_mat_adj <- apply(log((table_mat_norm + epsilon)/(bgmat +
epsilon), base = 2), 2, function(x) {
indices <- which(is.na(x))
if (length(indices) == 0) {
y = x
if (quant != 0) {
qq <- quantile(y, quant)
}
else {
qq <- 0
}
z <- y - qq
return(z)
}
else {
y <- x[!is.na(x)]
if (quant != 0) {
qq <- quantile(y, quant)
}
else {
qq <- 0
}
z <- y - qq
zext <- array(0, length(x))
zext[indices] <- 0
zext[-indices] <- z
return(zext)
}
})
}
else if (score == "wKL") {
table_mat_adj <- apply(log((table_mat_norm + epsilon)/(bgmat +
epsilon), base = 2), 2, function(x) {
indices <- which(is.na(x))
if (length(indices) == 0) {
y = x
if (quant != 0) {
qq <- quantile(y, quant)
}
else {
qq <- 0
}
z <- y - qq
return(z)
}
else {
y <- x[!is.na(x)]
if (quant != 0) {
qq <- quantile(y, quant)
}
else {
qq <- 0
}
z <- y - qq
zext <- array(0, length(x))
zext[indices] <- 0
zext[-indices] <- z
return(zext)
}
})
}
else {
stop("The value of score chosen is not compatible")
}
}
else {
if (score == "diff") {
if (opt == 1) {
table_mat_adj <- apply((table_mat_norm + epsilon) -
(bgmat + epsilon), 2, function(x) {
indices <- which(is.na(x))
if (length(indices) == 0) {
y = x
if (quant != 0) {
qq <- quantile(y, quant)
}
else {
qq <- 0
}
z <- y - qq
return(z)
}
else {
y <- x[!is.na(x)]
if (quant != 0) {
qq <- quantile(y, quant)
}
else {
qq <- 0
}
z <- y - qq
zext <- array(0, length(x))
zext[indices] <- 0
zext[-indices] <- z
return(zext)
}
})
}
else {
table_mat_adj <- apply(table_mat_norm + epsilon,
2, function(x) {
indices <- which(is.na(x))
if (length(indices) == 0) {
y = x
z <- y - quantile(y, quant)
return(z)
}
else {
y <- x[!is.na(x)]
z <- y - quantile(y, quant)
zext <- array(0, length(x))
zext[indices] <- 0
zext[-indices] <- z
return(zext)
}
})
}
}
else if (score == "log") {
if (opt == 1) {
table_mat_adj <- apply(log((table_mat_norm +
epsilon)/(bgmat + epsilon), base = 2), 2, function(x) {
indices <- which(is.na(x))
if (length(indices) == 0) {
y = x
if (quant != 0) {
qq <- quantile(y, quant)
}
else {
qq <- 0
}
z <- y - qq
return(z)
}
else {
y <- x[!is.na(x)]
if (quant != 0) {
qq <- quantile(y, quant)
}
else {
qq <- 0
}
z <- y - qq
zext <- array(0, length(x))
zext[indices] <- 0
zext[-indices] <- z
return(zext)
}
})
}
else {
table_mat_adj <- apply(log(table_mat_norm + epsilon,
base = 2), 2, function(x) {
indices <- which(is.na(x))
if (length(indices) == 0) {
y = x
z <- y - quantile(y, quant)
return(z)
}
else {
y <- x[!is.na(x)]
z <- y - quantile(y, quant)
zext <- array(0, length(x))
zext[indices] <- 0
zext[-indices] <- z
return(zext)
}
})
}
}
else if (score == "log-odds") {
if (opt == 1) {
table_mat_adj <- apply((table_mat_norm + epsilon)/(bgmat +
epsilon), 2, function(x) {
indices <- which(is.na(x))
if (length(indices) == 0) {
y = log(x/(sum(x) - x), base = 2)
if (quant != 0) {
qq <- quantile(y, quant)
}
else {
qq <- 0
}
z <- y - qq
return(z)
}
else {
w <- x[!is.na(x)]
y <- log(w/(sum(w) - w), base = 2)
if (quant != 0) {
qq <- quantile(y, quant)
}
else {
qq <- 0
}
z <- y - qq
zext <- array(0, length(x))
zext[indices] <- 0
zext[-indices] <- z
return(zext)
}
})
}
else {
table_mat_adj <- apply((table_mat_norm + epsilon),
2, function(x) {
indices <- which(is.na(x))
if (length(indices) == 0) {
y = log(x/(sum(x) - x), base = 2)
z <- y - quantile(y, quant)
return(z)
}
else {
w <- x[!is.na(x)]
y <- log(w/(sum(w) - w), base = 2)
z <- y - quantile(y, quant)
zext <- array(0, length(x))
zext[indices] <- 0
zext[-indices] <- z
return(zext)
}
})
}
}
else if (score == "probKL") {
if (opt == 1) {
table_mat_adj <- apply((table_mat_norm + epsilon) *
log((table_mat_norm + epsilon)/(bgmat + epsilon),
base = 2), 2, function(x) {
indices <- which(is.na(x))
if (length(indices) == 0) {
y = x
if (quant != 0) {
qq <- quantile(y, quant)
}
else {
qq <- 0
}
z <- y - qq
return(z)
}
else {
y <- x[!is.na(x)]
if (quant != 0) {
qq <- quantile(y, quant)
}
else {
qq <- 0
}
z <- y - qq
zext <- array(0, length(x))
zext[indices] <- 0
zext[-indices] <- z
return(zext)
}
})
}
else {
table_mat_adj <- apply((table_mat_norm + epsilon) *
log(table_mat_norm + epsilon, base = 2), 2,
function(x) {
indices <- which(is.na(x))
if (length(indices) == 0) {
y = x
z <- y - quantile(y, quant)
return(z)
}
else {
y <- x[!is.na(x)]
z <- y - quantile(y, quant)
zext <- array(0, length(x))
zext[indices] <- 0
zext[-indices] <- z
return(zext)
}
})
}
}
else if (score == "ratio") {
if (opt == 1) {
table_mat_adj <- apply((table_mat_norm + epsilon)/(bgmat +
epsilon), 2, function(x) {
indices <- which(is.na(x))
if (length(indices) == 0) {
y = x
if (quant != 0) {
qq <- quantile(y, quant)
}
else {
qq <- 0
}
z <- y - qq
return(z)
}
else {
y <- x[!is.na(x)]
if (quant != 0) {
qq <- quantile(y, quant)
}
else {
qq <- 0
}
z <- y - qq
zext <- array(0, length(x))
zext[indices] <- 0
zext[-indices] <- z
return(zext)
}
})
}
else {
table_mat_adj <- apply(table_mat_norm + scale,
2, function(x) {
indices <- which(is.na(x))
if (length(indices) == 0) {
y = x
z <- y - quantile(y, quant)
return(z)
}
else {
y <- x[!is.na(x)]
z <- y - quantile(y, quant)
zext <- array(0, length(x))
zext[indices] <- 0
zext[-indices] <- z
return(zext)
}
})
}
}
else {
stop("The value of score chosen is not compatible")
}
}
if (!ic) {
table_mat_pos <- table_mat_adj
table_mat_pos[table_mat_pos <= 0] = 0
table_mat_pos_norm <- apply(table_mat_pos, 2, function(x) return(x/sum(x)))
table_mat_pos_norm[table_mat_pos_norm == "NaN"] = 0
table_mat_neg <- table_mat_adj
table_mat_neg[table_mat_neg >= 0] = 0
table_mat_neg_norm <- apply(abs(table_mat_neg), 2, function(x) return(x/sum(x)))
table_mat_neg_norm[table_mat_neg_norm == "NaN"] = 0
pos_ic <- colSums(table_mat_pos)
neg_ic <- colSums(abs(table_mat_neg))
ll <- list()
ll$pos_ic <- pos_ic
ll$neg_ic <- neg_ic
ll$table_mat_pos_norm <- table_mat_pos_norm
ll$table_mat_neg_norm <- table_mat_neg_norm
}
else {
table_mat_pos <- table_mat_adj
table_mat_pos[table_mat_pos <= 0] = 0
table_mat_pos_norm <- apply(table_mat_pos, 2, function(x) return(x/sum(x)))
table_mat_pos_norm[table_mat_pos_norm == "NaN"] = 0
table_mat_neg <- table_mat_adj
table_mat_neg[table_mat_neg >= 0] = 0
table_mat_neg_norm <- apply(table_mat_neg, 2, function(x) return(x/sum(x)))
table_mat_neg_norm[table_mat_neg_norm == "NaN"] = 0
table_mat_norm <- replace(table_mat_norm, is.na(table_mat_norm),
0)
for (j in 1:dim(table_mat_neg_norm)[2]) {
if (sum(table_mat_neg_norm[, j]) == 0) {
table_mat_neg_norm[, j] <- normalize4(table_mat_neg_norm[,
j] + 0.001)
}
}
for (j in 1:dim(table_mat_pos_norm)[2]) {
if (sum(table_mat_pos_norm[, j]) == 0) {
table_mat_pos_norm[, j] <- normalize4(table_mat_pos_norm[,
j] + 0.001)
}
}
if (symm == TRUE) {
table_mat_norm[which(is.na(table))] <- NA
ic <- 0.5 * (ic_computer(table_mat_norm, alpha, hist = hist,
bg = bgmat) + ic_computer(bgmat, alpha, hist = hist,
bg = table_mat_norm))
}
else {
table_mat_norm[which(is.na(table))] <- NA
ic <- ic_computer(table_mat_norm, alpha, hist = hist,
bg = bgmat)
}
tab_neg <- apply(table_mat_adj, 2, function(x) {
y = x[x < 0]
if (length(y) == 0) {
return(0)
}
else {
return(abs(sum(y)))
}
})
tab_pos <- apply(table_mat_adj, 2, function(x) {
y = x[x > 0]
if (length(y) == 0) {
return(0)
}
else {
return(abs(sum(y)))
}
})
tab_pos[tab_pos == 0] <- 0.001
tab_neg[tab_neg == 0] <- 0.001
pos_neg_scaling <- apply(rbind(tab_pos, tab_neg), 2,
function(x) return(x/sum(x)))
pos_ic <- pos_neg_scaling[1, ] * ic
neg_ic <- pos_neg_scaling[2, ] * ic
ll <- list()
ll$pos_ic <- pos_ic
ll$neg_ic <- neg_ic
ll$table_mat_pos_norm <- table_mat_pos_norm
ll$table_mat_neg_norm <- table_mat_neg_norm
}
return(ll)
}