diff --git a/scripts/functions/my_logolas.R b/scripts/functions/my_logolas.R index 9303e1d..d99fecf 100644 --- a/scripts/functions/my_logolas.R +++ b/scripts/functions/my_logolas.R @@ -1,9 +1,9 @@ -######################## +######################################################################## # From package: logolas -# need to source locally +# need to source locally (along with dependant functions) # for generating the pfm and pfm_scaled # needed for gettting the ED values and EDPlot -######################## +######################################################################## my_logolas <- function (data, type = c("Logo", "EDLogo"), use_dash = TRUE, bg = NULL, color_type = NULL, colors = NULL, color_seed = 2030, @@ -262,10 +262,10 @@ my_logolas <- function (data, type = c("Logo", "EDLogo"), use_dash = TRUE, #return(out) } } - -################ -# dash.r -################ +######################################################################## +#================= +# dash() +#================= # @title Dirichlet adaptive shrinkage of compositional data using dash # # @description Given a matrix of compositional counts data, with samples along @@ -636,10 +636,10 @@ logfac = function(x){ } return(out) } - -################ -# mixEM.R -################ +######################################################################## +#================= +# mixEM() +#================= # @title Estimate mixture proportions of a mixture model by EM algorithm # @@ -799,10 +799,10 @@ set_control_squarem=function(control,nobs){ control=utils::modifyList(control.default, control) return(control) } -#======================================================================= -################ -# nlogomaker.R() -################ +######################################################################## +#================= +# nlogomaker() +#================= function (table, ic = FALSE, score = c("diff", "log", "log-odds", "probKL", "ratio", "unscaled_log", "wKL"), color_profile, total_chars = c("A", "B", "C", "D", "E", "F", "G", "H", "I", @@ -1223,3 +1223,662 @@ function (table, ic = FALSE, score = c("diff", "log", "log-odds", grid::popViewport() return(ll) } +######################################################################## +#=========================== +# get_logo_heights() +#=========================== + +get_logo_heights <- function (table, ic = FALSE, score = c("diff", "log", "log-odds", + "probKL", "ratio", "unscaled_log", "wKL"), bg = NULL, epsilon = 0.01, + opt = 1, symm = TRUE, alpha = 1, hist = FALSE, quant = 0.5) +{ + if (ic & score == "unscaled_log") { + warning("ic = TRUE not compatible with score = `unscaled-log`: switching to\n ic = FALSE") + ic = FALSE + } + if (ic & score == "wKL") { + warning("ic = TRUE not compatible with score = `wKL`: switching to \n ic = FALSE") + ic = FALSE + } + if (length(score) != 1) { + stop("score can be wither diff, log, log-odds, probKL, ratio, \n unscaled_log or wKL") + } + if (is.vector(bg) == TRUE) { + if (length(bg) != dim(table)[1]) { + stop("If background prob (bg) is a vector, the length of bg must\n equal the number of symbols for the logo plot") + } + else if (length(which(is.na(table))) > 0) { + stop("For NA in table, a vector bg is not allowed") + } + else { + bgmat <- bg %*% t(rep(1, dim(table)[2])) + bgmat[which(is.na(table))] <- NA + bgmat <- apply(bgmat, 2, function(x) return(x/sum(x[!is.na(x)]))) + } + } + else if (is.matrix(bg) == TRUE) { + if (dim(bg)[1] != dim(table)[1] | dim(bg)[2] != dim(table)[2]) { + stop("If background prob (bg) is a matrix, its dimensions must\n match that of the table") + } + else { + bgmat <- bg + bgmat[which(is.na(table))] <- NA + bgmat <- apply(bgmat, 2, function(x) return(x/sum(x[!is.na(x)]))) + } + } + else { + message("using a background with equal probability for all symbols") + bgmat <- matrix(1/dim(table)[1], dim(table)[1], dim(table)[2]) + bgmat[which(is.na(table))] <- NA + bgmat <- apply(bgmat, 2, function(x) return(x/sum(x[!is.na(x)]))) + } + table <- apply(table + 1e-04, 2, normalize4) + bgmat <- apply(bgmat + 1e-04, 2, normalize4) + if (class(table) == "data.frame") { + table <- as.matrix(table) + } + else if (class(table) != "matrix") { + stop("the table must be of class matrix or data.frame") + } + table_mat_norm <- apply(table, 2, function(x) return(x/sum(x[!is.na(x)]))) + bgmat <- apply(bgmat, 2, function(x) return(x/sum(x[!is.na(x)]))) + npos <- ncol(table_mat_norm) + chars <- as.character(rownames(table_mat_norm)) + if (!ic) { + if (score == "diff") { + table_mat_adj <- apply((table_mat_norm + epsilon) - + (bgmat + epsilon), 2, function(x) { + indices <- which(is.na(x)) + if (length(indices) == 0) { + y = x + if (quant != 0) { + qq <- quantile(y, quant) + } + else { + qq <- 0 + } + z <- y - qq + return(z) + } + else { + y <- x[!is.na(x)] + if (quant != 0) { + qq <- quantile(y, quant) + } + else { + qq <- 0 + } + z <- y - qq + zext <- array(0, length(x)) + zext[indices] <- 0 + zext[-indices] <- z + return(zext) + } + }) + } + else if (score == "log") { + table_mat_adj <- apply(log((table_mat_norm + epsilon)/(bgmat + + epsilon), base = 2), 2, function(x) { + indices <- which(is.na(x)) + if (length(indices) == 0) { + y = x + if (quant != 0) { + qq <- quantile(y, quant) + } + else { + qq <- 0 + } + z <- y - qq + return(z) + } + else { + y <- x[!is.na(x)] + if (quant != 0) { + qq <- quantile(y, quant) + } + else { + qq <- 0 + } + z <- y - qq + zext <- array(0, length(x)) + zext[indices] <- 0 + zext[-indices] <- z + return(zext) + } + }) + } + else if (score == "log-odds") { + if (opt == 1) { + table_mat_adj <- apply((table_mat_norm + epsilon)/(bgmat + + epsilon), 2, function(x) { + indices <- which(is.na(x)) + if (length(indices) == 0) { + y = log(x/(sum(x) - x), base = 2) + if (quant != 0) { + qq <- quantile(y, quant) + } + else { + qq <- 0 + } + z <- y - qq + return(z) + } + else { + w <- x[!is.na(x)] + y <- log(w/(sum(w) - w), base = 2) + if (quant != 0) { + qq <- quantile(y, quant) + } + else { + qq <- 0 + } + z <- y - qq + zext <- array(0, length(x)) + zext[indices] <- 0 + zext[-indices] <- z + return(zext) + } + }) + } + else { + table_mat_adj <- apply((table_mat_norm + epsilon), + 2, function(x) { + indices <- which(is.na(x)) + if (length(indices) == 0) { + y = log(x/(sum(x) - x), base = 2) + z <- y - quantile(y, quant) + return(z) + } + else { + w <- x[!is.na(x)] + y <- log(w/(sum(w) - w), base = 2) + z <- y - quantile(y, quant) + zext <- array(0, length(x)) + zext[indices] <- 0 + zext[-indices] <- z + return(zext) + } + }) + } + } + else if (score == "probKL") { + table_mat_adj <- apply((table_mat_norm + epsilon) * + log((table_mat_norm + epsilon)/(bgmat + epsilon), + base = 2), 2, function(x) { + indices <- which(is.na(x)) + if (length(indices) == 0) { + y = x + if (quant != 0) { + qq <- quantile(y, quant) + } + else { + qq <- 0 + } + z <- y - qq + return(z) + } + else { + y <- x[!is.na(x)] + if (quant != 0) { + qq <- quantile(y, quant) + } + else { + qq <- 0 + } + z <- y - qq + zext <- array(0, length(x)) + zext[indices] <- 0 + zext[-indices] <- z + return(zext) + } + }) + } + else if (score == "ratio") { + table_mat_adj <- apply((table_mat_norm + epsilon)/(bgmat + + epsilon), 2, function(x) { + indices <- which(is.na(x)) + if (length(indices) == 0) { + y = x + if (quant != 0) { + qq <- quantile(y, quant) + } + else { + qq <- 0 + } + z <- y - qq + return(z) + } + else { + y <- x[!is.na(x)] + if (quant != 0) { + qq <- quantile(y, quant) + } + else { + qq <- 0 + } + z <- y - qq + zext <- array(0, length(x)) + zext[indices] <- 0 + zext[-indices] <- z + return(zext) + } + }) + } + else if (score == "unscaled_log") { + table_mat_adj <- apply(log((table_mat_norm + epsilon)/(bgmat + + epsilon), base = 2), 2, function(x) { + indices <- which(is.na(x)) + if (length(indices) == 0) { + y = x + if (quant != 0) { + qq <- quantile(y, quant) + } + else { + qq <- 0 + } + z <- y - qq + return(z) + } + else { + y <- x[!is.na(x)] + if (quant != 0) { + qq <- quantile(y, quant) + } + else { + qq <- 0 + } + z <- y - qq + zext <- array(0, length(x)) + zext[indices] <- 0 + zext[-indices] <- z + return(zext) + } + }) + } + else if (score == "wKL") { + table_mat_adj <- apply(log((table_mat_norm + epsilon)/(bgmat + + epsilon), base = 2), 2, function(x) { + indices <- which(is.na(x)) + if (length(indices) == 0) { + y = x + if (quant != 0) { + qq <- quantile(y, quant) + } + else { + qq <- 0 + } + z <- y - qq + return(z) + } + else { + y <- x[!is.na(x)] + if (quant != 0) { + qq <- quantile(y, quant) + } + else { + qq <- 0 + } + z <- y - qq + zext <- array(0, length(x)) + zext[indices] <- 0 + zext[-indices] <- z + return(zext) + } + }) + } + else { + stop("The value of score chosen is not compatible") + } + } + else { + if (score == "diff") { + if (opt == 1) { + table_mat_adj <- apply((table_mat_norm + epsilon) - + (bgmat + epsilon), 2, function(x) { + indices <- which(is.na(x)) + if (length(indices) == 0) { + y = x + if (quant != 0) { + qq <- quantile(y, quant) + } + else { + qq <- 0 + } + z <- y - qq + return(z) + } + else { + y <- x[!is.na(x)] + if (quant != 0) { + qq <- quantile(y, quant) + } + else { + qq <- 0 + } + z <- y - qq + zext <- array(0, length(x)) + zext[indices] <- 0 + zext[-indices] <- z + return(zext) + } + }) + } + else { + table_mat_adj <- apply(table_mat_norm + epsilon, + 2, function(x) { + indices <- which(is.na(x)) + if (length(indices) == 0) { + y = x + z <- y - quantile(y, quant) + return(z) + } + else { + y <- x[!is.na(x)] + z <- y - quantile(y, quant) + zext <- array(0, length(x)) + zext[indices] <- 0 + zext[-indices] <- z + return(zext) + } + }) + } + } + else if (score == "log") { + if (opt == 1) { + table_mat_adj <- apply(log((table_mat_norm + + epsilon)/(bgmat + epsilon), base = 2), 2, function(x) { + indices <- which(is.na(x)) + if (length(indices) == 0) { + y = x + if (quant != 0) { + qq <- quantile(y, quant) + } + else { + qq <- 0 + } + z <- y - qq + return(z) + } + else { + y <- x[!is.na(x)] + if (quant != 0) { + qq <- quantile(y, quant) + } + else { + qq <- 0 + } + z <- y - qq + zext <- array(0, length(x)) + zext[indices] <- 0 + zext[-indices] <- z + return(zext) + } + }) + } + else { + table_mat_adj <- apply(log(table_mat_norm + epsilon, + base = 2), 2, function(x) { + indices <- which(is.na(x)) + if (length(indices) == 0) { + y = x + z <- y - quantile(y, quant) + return(z) + } + else { + y <- x[!is.na(x)] + z <- y - quantile(y, quant) + zext <- array(0, length(x)) + zext[indices] <- 0 + zext[-indices] <- z + return(zext) + } + }) + } + } + else if (score == "log-odds") { + if (opt == 1) { + table_mat_adj <- apply((table_mat_norm + epsilon)/(bgmat + + epsilon), 2, function(x) { + indices <- which(is.na(x)) + if (length(indices) == 0) { + y = log(x/(sum(x) - x), base = 2) + if (quant != 0) { + qq <- quantile(y, quant) + } + else { + qq <- 0 + } + z <- y - qq + return(z) + } + else { + w <- x[!is.na(x)] + y <- log(w/(sum(w) - w), base = 2) + if (quant != 0) { + qq <- quantile(y, quant) + } + else { + qq <- 0 + } + z <- y - qq + zext <- array(0, length(x)) + zext[indices] <- 0 + zext[-indices] <- z + return(zext) + } + }) + } + else { + table_mat_adj <- apply((table_mat_norm + epsilon), + 2, function(x) { + indices <- which(is.na(x)) + if (length(indices) == 0) { + y = log(x/(sum(x) - x), base = 2) + z <- y - quantile(y, quant) + return(z) + } + else { + w <- x[!is.na(x)] + y <- log(w/(sum(w) - w), base = 2) + z <- y - quantile(y, quant) + zext <- array(0, length(x)) + zext[indices] <- 0 + zext[-indices] <- z + return(zext) + } + }) + } + } + else if (score == "probKL") { + if (opt == 1) { + table_mat_adj <- apply((table_mat_norm + epsilon) * + log((table_mat_norm + epsilon)/(bgmat + epsilon), + base = 2), 2, function(x) { + indices <- which(is.na(x)) + if (length(indices) == 0) { + y = x + if (quant != 0) { + qq <- quantile(y, quant) + } + else { + qq <- 0 + } + z <- y - qq + return(z) + } + else { + y <- x[!is.na(x)] + if (quant != 0) { + qq <- quantile(y, quant) + } + else { + qq <- 0 + } + z <- y - qq + zext <- array(0, length(x)) + zext[indices] <- 0 + zext[-indices] <- z + return(zext) + } + }) + } + else { + table_mat_adj <- apply((table_mat_norm + epsilon) * + log(table_mat_norm + epsilon, base = 2), 2, + function(x) { + indices <- which(is.na(x)) + if (length(indices) == 0) { + y = x + z <- y - quantile(y, quant) + return(z) + } + else { + y <- x[!is.na(x)] + z <- y - quantile(y, quant) + zext <- array(0, length(x)) + zext[indices] <- 0 + zext[-indices] <- z + return(zext) + } + }) + } + } + else if (score == "ratio") { + if (opt == 1) { + table_mat_adj <- apply((table_mat_norm + epsilon)/(bgmat + + epsilon), 2, function(x) { + indices <- which(is.na(x)) + if (length(indices) == 0) { + y = x + if (quant != 0) { + qq <- quantile(y, quant) + } + else { + qq <- 0 + } + z <- y - qq + return(z) + } + else { + y <- x[!is.na(x)] + if (quant != 0) { + qq <- quantile(y, quant) + } + else { + qq <- 0 + } + z <- y - qq + zext <- array(0, length(x)) + zext[indices] <- 0 + zext[-indices] <- z + return(zext) + } + }) + } + else { + table_mat_adj <- apply(table_mat_norm + scale, + 2, function(x) { + indices <- which(is.na(x)) + if (length(indices) == 0) { + y = x + z <- y - quantile(y, quant) + return(z) + } + else { + y <- x[!is.na(x)] + z <- y - quantile(y, quant) + zext <- array(0, length(x)) + zext[indices] <- 0 + zext[-indices] <- z + return(zext) + } + }) + } + } + else { + stop("The value of score chosen is not compatible") + } + } + if (!ic) { + table_mat_pos <- table_mat_adj + table_mat_pos[table_mat_pos <= 0] = 0 + table_mat_pos_norm <- apply(table_mat_pos, 2, function(x) return(x/sum(x))) + table_mat_pos_norm[table_mat_pos_norm == "NaN"] = 0 + table_mat_neg <- table_mat_adj + table_mat_neg[table_mat_neg >= 0] = 0 + table_mat_neg_norm <- apply(abs(table_mat_neg), 2, function(x) return(x/sum(x))) + table_mat_neg_norm[table_mat_neg_norm == "NaN"] = 0 + pos_ic <- colSums(table_mat_pos) + neg_ic <- colSums(abs(table_mat_neg)) + ll <- list() + ll$pos_ic <- pos_ic + ll$neg_ic <- neg_ic + ll$table_mat_pos_norm <- table_mat_pos_norm + ll$table_mat_neg_norm <- table_mat_neg_norm + } + else { + table_mat_pos <- table_mat_adj + table_mat_pos[table_mat_pos <= 0] = 0 + table_mat_pos_norm <- apply(table_mat_pos, 2, function(x) return(x/sum(x))) + table_mat_pos_norm[table_mat_pos_norm == "NaN"] = 0 + table_mat_neg <- table_mat_adj + table_mat_neg[table_mat_neg >= 0] = 0 + table_mat_neg_norm <- apply(table_mat_neg, 2, function(x) return(x/sum(x))) + table_mat_neg_norm[table_mat_neg_norm == "NaN"] = 0 + table_mat_norm <- replace(table_mat_norm, is.na(table_mat_norm), + 0) + for (j in 1:dim(table_mat_neg_norm)[2]) { + if (sum(table_mat_neg_norm[, j]) == 0) { + table_mat_neg_norm[, j] <- normalize4(table_mat_neg_norm[, + j] + 0.001) + } + } + for (j in 1:dim(table_mat_pos_norm)[2]) { + if (sum(table_mat_pos_norm[, j]) == 0) { + table_mat_pos_norm[, j] <- normalize4(table_mat_pos_norm[, + j] + 0.001) + } + } + if (symm == TRUE) { + table_mat_norm[which(is.na(table))] <- NA + ic <- 0.5 * (ic_computer(table_mat_norm, alpha, hist = hist, + bg = bgmat) + ic_computer(bgmat, alpha, hist = hist, + bg = table_mat_norm)) + } + else { + table_mat_norm[which(is.na(table))] <- NA + ic <- ic_computer(table_mat_norm, alpha, hist = hist, + bg = bgmat) + } + tab_neg <- apply(table_mat_adj, 2, function(x) { + y = x[x < 0] + if (length(y) == 0) { + return(0) + } + else { + return(abs(sum(y))) + } + }) + tab_pos <- apply(table_mat_adj, 2, function(x) { + y = x[x > 0] + if (length(y) == 0) { + return(0) + } + else { + return(abs(sum(y))) + } + }) + tab_pos[tab_pos == 0] <- 0.001 + tab_neg[tab_neg == 0] <- 0.001 + pos_neg_scaling <- apply(rbind(tab_pos, tab_neg), 2, + function(x) return(x/sum(x))) + pos_ic <- pos_neg_scaling[1, ] * ic + neg_ic <- pos_neg_scaling[2, ] * ic + ll <- list() + ll$pos_ic <- pos_ic + ll$neg_ic <- neg_ic + ll$table_mat_pos_norm <- table_mat_pos_norm + ll$table_mat_neg_norm <- table_mat_neg_norm + } + return(ll) +}