added get_logo_heights() in my_logolas.R

2022-01-26 11:16:02 +00:00 · 2022-01-26 11:16:02 +00:00 · 1c1e98ad4f
commit 1c1e98ad4f
parent 92af9fd565
1 changed files with 674 additions and 15 deletions
--- a/scripts/functions/my_logolas.R
+++ b/scripts/functions/my_logolas.R
@ -1,9 +1,9 @@
-########################
+########################################################################
 # From package: logolas
-# need to source locally
+# need to source locally (along with dependant functions)
 # for generating the pfm and pfm_scaled 
 # needed for gettting the ED values and EDPlot
-########################
+########################################################################
 my_logolas <- function (data, type = c("Logo", "EDLogo"), use_dash = TRUE, 
          bg = NULL, color_type = NULL, colors = NULL, color_seed = 2030, 
@ -262,10 +262,10 @@ my_logolas <- function (data, type = c("Logo", "EDLogo"), use_dash = TRUE,
    #return(out)
  }
 }
-
+########################################################################
-################
+#=================
-# dash.r
+# dash()
-################
+#=================
 # @title Dirichlet adaptive shrinkage of compositional data using dash
 #
 # @description Given a matrix of compositional counts data, with samples along
@ -636,10 +636,10 @@ logfac = function(x){
  }
  return(out)
 }
-
+########################################################################
-################
+#=================
-# mixEM.R
+# mixEM()
-################
+#=================
 # @title Estimate mixture proportions of a mixture model by EM algorithm
 #
@ -799,10 +799,10 @@ set_control_squarem=function(control,nobs){
  control=utils::modifyList(control.default, control)
  return(control)
 }
-#=======================================================================
+########################################################################
-################
+#=================
-# nlogomaker.R()
+# nlogomaker()
-################
+#=================
 function (table, ic = FALSE, score = c("diff", "log", "log-odds", 
                                       "probKL", "ratio", "unscaled_log", "wKL"), color_profile, 
          total_chars = c("A", "B", "C", "D", "E", "F", "G", "H", "I", 
@ -1223,3 +1223,662 @@ function (table, ic = FALSE, score = c("diff", "log", "log-odds",
  grid::popViewport()
  return(ll)
 }
 ########################################################################
 #===========================
 # get_logo_heights()
 #===========================
 get_logo_heights <- function (table, ic = FALSE, score = c("diff", "log", "log-odds", 
    "probKL", "ratio", "unscaled_log", "wKL"), bg = NULL, epsilon = 0.01, 
    opt = 1, symm = TRUE, alpha = 1, hist = FALSE, quant = 0.5) 
 {
    if (ic & score == "unscaled_log") {
        warning("ic = TRUE not compatible with score = `unscaled-log`: switching to\n            ic = FALSE")
        ic = FALSE
    }
    if (ic & score == "wKL") {
        warning("ic = TRUE not compatible with score = `wKL`: switching to \n            ic = FALSE")
        ic = FALSE
    }
    if (length(score) != 1) {
        stop("score can be wither diff, log, log-odds, probKL, ratio, \n         unscaled_log or wKL")
    }
    if (is.vector(bg) == TRUE) {
        if (length(bg) != dim(table)[1]) {
            stop("If background prob (bg) is a vector, the length of bg must\n           equal the number of symbols for the logo plot")
        }
        else if (length(which(is.na(table))) > 0) {
            stop("For NA in table, a vector bg is not allowed")
        }
        else {
            bgmat <- bg %*% t(rep(1, dim(table)[2]))
            bgmat[which(is.na(table))] <- NA
            bgmat <- apply(bgmat, 2, function(x) return(x/sum(x[!is.na(x)])))
        }
    }
    else if (is.matrix(bg) == TRUE) {
        if (dim(bg)[1] != dim(table)[1] | dim(bg)[2] != dim(table)[2]) {
            stop("If background prob (bg) is a matrix, its dimensions must\n           match that of the table")
        }
        else {
            bgmat <- bg
            bgmat[which(is.na(table))] <- NA
            bgmat <- apply(bgmat, 2, function(x) return(x/sum(x[!is.na(x)])))
        }
    }
    else {
        message("using a background with equal probability for all symbols")
        bgmat <- matrix(1/dim(table)[1], dim(table)[1], dim(table)[2])
        bgmat[which(is.na(table))] <- NA
        bgmat <- apply(bgmat, 2, function(x) return(x/sum(x[!is.na(x)])))
    }
    table <- apply(table + 1e-04, 2, normalize4)
    bgmat <- apply(bgmat + 1e-04, 2, normalize4)
    if (class(table) == "data.frame") {
        table <- as.matrix(table)
    }
    else if (class(table) != "matrix") {
        stop("the table must be of class matrix or data.frame")
    }
    table_mat_norm <- apply(table, 2, function(x) return(x/sum(x[!is.na(x)])))
    bgmat <- apply(bgmat, 2, function(x) return(x/sum(x[!is.na(x)])))
    npos <- ncol(table_mat_norm)
    chars <- as.character(rownames(table_mat_norm))
    if (!ic) {
        if (score == "diff") {
            table_mat_adj <- apply((table_mat_norm + epsilon) - 
                (bgmat + epsilon), 2, function(x) {
                indices <- which(is.na(x))
                if (length(indices) == 0) {
                  y = x
                  if (quant != 0) {
                    qq <- quantile(y, quant)
                  }
                  else {
                    qq <- 0
                  }
                  z <- y - qq
                  return(z)
                }
                else {
                  y <- x[!is.na(x)]
                  if (quant != 0) {
                    qq <- quantile(y, quant)
                  }
                  else {
                    qq <- 0
                  }
                  z <- y - qq
                  zext <- array(0, length(x))
                  zext[indices] <- 0
                  zext[-indices] <- z
                  return(zext)
                }
            })
        }
        else if (score == "log") {
            table_mat_adj <- apply(log((table_mat_norm + epsilon)/(bgmat + 
                epsilon), base = 2), 2, function(x) {
                indices <- which(is.na(x))
                if (length(indices) == 0) {
                  y = x
                  if (quant != 0) {
                    qq <- quantile(y, quant)
                  }
                  else {
                    qq <- 0
                  }
                  z <- y - qq
                  return(z)
                }
                else {
                  y <- x[!is.na(x)]
                  if (quant != 0) {
                    qq <- quantile(y, quant)
                  }
                  else {
                    qq <- 0
                  }
                  z <- y - qq
                  zext <- array(0, length(x))
                  zext[indices] <- 0
                  zext[-indices] <- z
                  return(zext)
                }
            })
        }
        else if (score == "log-odds") {
            if (opt == 1) {
                table_mat_adj <- apply((table_mat_norm + epsilon)/(bgmat + 
                  epsilon), 2, function(x) {
                  indices <- which(is.na(x))
                  if (length(indices) == 0) {
                    y = log(x/(sum(x) - x), base = 2)
                    if (quant != 0) {
                      qq <- quantile(y, quant)
                    }
                    else {
                      qq <- 0
                    }
                    z <- y - qq
                    return(z)
                  }
                  else {
                    w <- x[!is.na(x)]
                    y <- log(w/(sum(w) - w), base = 2)
                    if (quant != 0) {
                      qq <- quantile(y, quant)
                    }
                    else {
                      qq <- 0
                    }
                    z <- y - qq
                    zext <- array(0, length(x))
                    zext[indices] <- 0
                    zext[-indices] <- z
                    return(zext)
                  }
                })
            }
            else {
                table_mat_adj <- apply((table_mat_norm + epsilon), 
                  2, function(x) {
                    indices <- which(is.na(x))
                    if (length(indices) == 0) {
                      y = log(x/(sum(x) - x), base = 2)
                      z <- y - quantile(y, quant)
                      return(z)
                    }
                    else {
                      w <- x[!is.na(x)]
                      y <- log(w/(sum(w) - w), base = 2)
                      z <- y - quantile(y, quant)
                      zext <- array(0, length(x))
                      zext[indices] <- 0
                      zext[-indices] <- z
                      return(zext)
                    }
                  })
            }
        }
        else if (score == "probKL") {
            table_mat_adj <- apply((table_mat_norm + epsilon) * 
                log((table_mat_norm + epsilon)/(bgmat + epsilon), 
                  base = 2), 2, function(x) {
                indices <- which(is.na(x))
                if (length(indices) == 0) {
                  y = x
                  if (quant != 0) {
                    qq <- quantile(y, quant)
                  }
                  else {
                    qq <- 0
                  }
                  z <- y - qq
                  return(z)
                }
                else {
                  y <- x[!is.na(x)]
                  if (quant != 0) {
                    qq <- quantile(y, quant)
                  }
                  else {
                    qq <- 0
                  }
                  z <- y - qq
                  zext <- array(0, length(x))
                  zext[indices] <- 0
                  zext[-indices] <- z
                  return(zext)
                }
            })
        }
        else if (score == "ratio") {
            table_mat_adj <- apply((table_mat_norm + epsilon)/(bgmat + 
                epsilon), 2, function(x) {
                indices <- which(is.na(x))
                if (length(indices) == 0) {
                  y = x
                  if (quant != 0) {
                    qq <- quantile(y, quant)
                  }
                  else {
                    qq <- 0
                  }
                  z <- y - qq
                  return(z)
                }
                else {
                  y <- x[!is.na(x)]
                  if (quant != 0) {
                    qq <- quantile(y, quant)
                  }
                  else {
                    qq <- 0
                  }
                  z <- y - qq
                  zext <- array(0, length(x))
                  zext[indices] <- 0
                  zext[-indices] <- z
                  return(zext)
                }
            })
        }
        else if (score == "unscaled_log") {
            table_mat_adj <- apply(log((table_mat_norm + epsilon)/(bgmat + 
                epsilon), base = 2), 2, function(x) {
                indices <- which(is.na(x))
                if (length(indices) == 0) {
                  y = x
                  if (quant != 0) {
                    qq <- quantile(y, quant)
                  }
                  else {
                    qq <- 0
                  }
                  z <- y - qq
                  return(z)
                }
                else {
                  y <- x[!is.na(x)]
                  if (quant != 0) {
                    qq <- quantile(y, quant)
                  }
                  else {
                    qq <- 0
                  }
                  z <- y - qq
                  zext <- array(0, length(x))
                  zext[indices] <- 0
                  zext[-indices] <- z
                  return(zext)
                }
            })
        }
        else if (score == "wKL") {
            table_mat_adj <- apply(log((table_mat_norm + epsilon)/(bgmat + 
                epsilon), base = 2), 2, function(x) {
                indices <- which(is.na(x))
                if (length(indices) == 0) {
                  y = x
                  if (quant != 0) {
                    qq <- quantile(y, quant)
                  }
                  else {
                    qq <- 0
                  }
                  z <- y - qq
                  return(z)
                }
                else {
                  y <- x[!is.na(x)]
                  if (quant != 0) {
                    qq <- quantile(y, quant)
                  }
                  else {
                    qq <- 0
                  }
                  z <- y - qq
                  zext <- array(0, length(x))
                  zext[indices] <- 0
                  zext[-indices] <- z
                  return(zext)
                }
            })
        }
        else {
            stop("The value of score chosen is not compatible")
        }
    }
    else {
        if (score == "diff") {
            if (opt == 1) {
                table_mat_adj <- apply((table_mat_norm + epsilon) - 
                  (bgmat + epsilon), 2, function(x) {
                  indices <- which(is.na(x))
                  if (length(indices) == 0) {
                    y = x
                    if (quant != 0) {
                      qq <- quantile(y, quant)
                    }
                    else {
                      qq <- 0
                    }
                    z <- y - qq
                    return(z)
                  }
                  else {
                    y <- x[!is.na(x)]
                    if (quant != 0) {
                      qq <- quantile(y, quant)
                    }
                    else {
                      qq <- 0
                    }
                    z <- y - qq
                    zext <- array(0, length(x))
                    zext[indices] <- 0
                    zext[-indices] <- z
                    return(zext)
                  }
                })
            }
            else {
                table_mat_adj <- apply(table_mat_norm + epsilon, 
                  2, function(x) {
                    indices <- which(is.na(x))
                    if (length(indices) == 0) {
                      y = x
                      z <- y - quantile(y, quant)
                      return(z)
                    }
                    else {
                      y <- x[!is.na(x)]
                      z <- y - quantile(y, quant)
                      zext <- array(0, length(x))
                      zext[indices] <- 0
                      zext[-indices] <- z
                      return(zext)
                    }
                  })
            }
        }
        else if (score == "log") {
            if (opt == 1) {
                table_mat_adj <- apply(log((table_mat_norm + 
                  epsilon)/(bgmat + epsilon), base = 2), 2, function(x) {
                  indices <- which(is.na(x))
                  if (length(indices) == 0) {
                    y = x
                    if (quant != 0) {
                      qq <- quantile(y, quant)
                    }
                    else {
                      qq <- 0
                    }
                    z <- y - qq
                    return(z)
                  }
                  else {
                    y <- x[!is.na(x)]
                    if (quant != 0) {
                      qq <- quantile(y, quant)
                    }
                    else {
                      qq <- 0
                    }
                    z <- y - qq
                    zext <- array(0, length(x))
                    zext[indices] <- 0
                    zext[-indices] <- z
                    return(zext)
                  }
                })
            }
            else {
                table_mat_adj <- apply(log(table_mat_norm + epsilon, 
                  base = 2), 2, function(x) {
                  indices <- which(is.na(x))
                  if (length(indices) == 0) {
                    y = x
                    z <- y - quantile(y, quant)
                    return(z)
                  }
                  else {
                    y <- x[!is.na(x)]
                    z <- y - quantile(y, quant)
                    zext <- array(0, length(x))
                    zext[indices] <- 0
                    zext[-indices] <- z
                    return(zext)
                  }
                })
            }
        }
        else if (score == "log-odds") {
            if (opt == 1) {
                table_mat_adj <- apply((table_mat_norm + epsilon)/(bgmat + 
                  epsilon), 2, function(x) {
                  indices <- which(is.na(x))
                  if (length(indices) == 0) {
                    y = log(x/(sum(x) - x), base = 2)
                    if (quant != 0) {
                      qq <- quantile(y, quant)
                    }
                    else {
                      qq <- 0
                    }
                    z <- y - qq
                    return(z)
                  }
                  else {
                    w <- x[!is.na(x)]
                    y <- log(w/(sum(w) - w), base = 2)
                    if (quant != 0) {
                      qq <- quantile(y, quant)
                    }
                    else {
                      qq <- 0
                    }
                    z <- y - qq
                    zext <- array(0, length(x))
                    zext[indices] <- 0
                    zext[-indices] <- z
                    return(zext)
                  }
                })
            }
            else {
                table_mat_adj <- apply((table_mat_norm + epsilon), 
                  2, function(x) {
                    indices <- which(is.na(x))
                    if (length(indices) == 0) {
                      y = log(x/(sum(x) - x), base = 2)
                      z <- y - quantile(y, quant)
                      return(z)
                    }
                    else {
                      w <- x[!is.na(x)]
                      y <- log(w/(sum(w) - w), base = 2)
                      z <- y - quantile(y, quant)
                      zext <- array(0, length(x))
                      zext[indices] <- 0
                      zext[-indices] <- z
                      return(zext)
                    }
                  })
            }
        }
        else if (score == "probKL") {
            if (opt == 1) {
                table_mat_adj <- apply((table_mat_norm + epsilon) * 
                  log((table_mat_norm + epsilon)/(bgmat + epsilon), 
                    base = 2), 2, function(x) {
                  indices <- which(is.na(x))
                  if (length(indices) == 0) {
                    y = x
                    if (quant != 0) {
                      qq <- quantile(y, quant)
                    }
                    else {
                      qq <- 0
                    }
                    z <- y - qq
                    return(z)
                  }
                  else {
                    y <- x[!is.na(x)]
                    if (quant != 0) {
                      qq <- quantile(y, quant)
                    }
                    else {
                      qq <- 0
                    }
                    z <- y - qq
                    zext <- array(0, length(x))
                    zext[indices] <- 0
                    zext[-indices] <- z
                    return(zext)
                  }
                })
            }
            else {
                table_mat_adj <- apply((table_mat_norm + epsilon) * 
                  log(table_mat_norm + epsilon, base = 2), 2, 
                  function(x) {
                    indices <- which(is.na(x))
                    if (length(indices) == 0) {
                      y = x
                      z <- y - quantile(y, quant)
                      return(z)
                    }
                    else {
                      y <- x[!is.na(x)]
                      z <- y - quantile(y, quant)
                      zext <- array(0, length(x))
                      zext[indices] <- 0
                      zext[-indices] <- z
                      return(zext)
                    }
                  })
            }
        }
        else if (score == "ratio") {
            if (opt == 1) {
                table_mat_adj <- apply((table_mat_norm + epsilon)/(bgmat + 
                  epsilon), 2, function(x) {
                  indices <- which(is.na(x))
                  if (length(indices) == 0) {
                    y = x
                    if (quant != 0) {
                      qq <- quantile(y, quant)
                    }
                    else {
                      qq <- 0
                    }
                    z <- y - qq
                    return(z)
                  }
                  else {
                    y <- x[!is.na(x)]
                    if (quant != 0) {
                      qq <- quantile(y, quant)
                    }
                    else {
                      qq <- 0
                    }
                    z <- y - qq
                    zext <- array(0, length(x))
                    zext[indices] <- 0
                    zext[-indices] <- z
                    return(zext)
                  }
                })
            }
            else {
                table_mat_adj <- apply(table_mat_norm + scale, 
                  2, function(x) {
                    indices <- which(is.na(x))
                    if (length(indices) == 0) {
                      y = x
                      z <- y - quantile(y, quant)
                      return(z)
                    }
                    else {
                      y <- x[!is.na(x)]
                      z <- y - quantile(y, quant)
                      zext <- array(0, length(x))
                      zext[indices] <- 0
                      zext[-indices] <- z
                      return(zext)
                    }
                  })
            }
        }
        else {
            stop("The value of score chosen is not compatible")
        }
    }
    if (!ic) {
        table_mat_pos <- table_mat_adj
        table_mat_pos[table_mat_pos <= 0] = 0
        table_mat_pos_norm <- apply(table_mat_pos, 2, function(x) return(x/sum(x)))
        table_mat_pos_norm[table_mat_pos_norm == "NaN"] = 0
        table_mat_neg <- table_mat_adj
        table_mat_neg[table_mat_neg >= 0] = 0
        table_mat_neg_norm <- apply(abs(table_mat_neg), 2, function(x) return(x/sum(x)))
        table_mat_neg_norm[table_mat_neg_norm == "NaN"] = 0
        pos_ic <- colSums(table_mat_pos)
        neg_ic <- colSums(abs(table_mat_neg))
        ll <- list()
        ll$pos_ic <- pos_ic
        ll$neg_ic <- neg_ic
        ll$table_mat_pos_norm <- table_mat_pos_norm
        ll$table_mat_neg_norm <- table_mat_neg_norm
    }
    else {
        table_mat_pos <- table_mat_adj
        table_mat_pos[table_mat_pos <= 0] = 0
        table_mat_pos_norm <- apply(table_mat_pos, 2, function(x) return(x/sum(x)))
        table_mat_pos_norm[table_mat_pos_norm == "NaN"] = 0
        table_mat_neg <- table_mat_adj
        table_mat_neg[table_mat_neg >= 0] = 0
        table_mat_neg_norm <- apply(table_mat_neg, 2, function(x) return(x/sum(x)))
        table_mat_neg_norm[table_mat_neg_norm == "NaN"] = 0
        table_mat_norm <- replace(table_mat_norm, is.na(table_mat_norm), 
            0)
        for (j in 1:dim(table_mat_neg_norm)[2]) {
            if (sum(table_mat_neg_norm[, j]) == 0) {
                table_mat_neg_norm[, j] <- normalize4(table_mat_neg_norm[, 
                  j] + 0.001)
            }
        }
        for (j in 1:dim(table_mat_pos_norm)[2]) {
            if (sum(table_mat_pos_norm[, j]) == 0) {
                table_mat_pos_norm[, j] <- normalize4(table_mat_pos_norm[, 
                  j] + 0.001)
            }
        }
        if (symm == TRUE) {
            table_mat_norm[which(is.na(table))] <- NA
            ic <- 0.5 * (ic_computer(table_mat_norm, alpha, hist = hist, 
                bg = bgmat) + ic_computer(bgmat, alpha, hist = hist, 
                bg = table_mat_norm))
        }
        else {
            table_mat_norm[which(is.na(table))] <- NA
            ic <- ic_computer(table_mat_norm, alpha, hist = hist, 
                bg = bgmat)
        }
        tab_neg <- apply(table_mat_adj, 2, function(x) {
            y = x[x < 0]
            if (length(y) == 0) {
                return(0)
            }
            else {
                return(abs(sum(y)))
            }
        })
        tab_pos <- apply(table_mat_adj, 2, function(x) {
            y = x[x > 0]
            if (length(y) == 0) {
                return(0)
            }
            else {
                return(abs(sum(y)))
            }
        })
        tab_pos[tab_pos == 0] <- 0.001
        tab_neg[tab_neg == 0] <- 0.001
        pos_neg_scaling <- apply(rbind(tab_pos, tab_neg), 2, 
            function(x) return(x/sum(x)))
        pos_ic <- pos_neg_scaling[1, ] * ic
        neg_ic <- pos_neg_scaling[2, ] * ic
        ll <- list()
        ll$pos_ic <- pos_ic
        ll$neg_ic <- neg_ic
        ll$table_mat_pos_norm <- table_mat_pos_norm
        ll$table_mat_neg_norm <- table_mat_neg_norm
    }
    return(ll)
 }