tested edplot with alr gene
This commit is contained in:
parent
8750e3126a
commit
1b20f09075
6 changed files with 62 additions and 108 deletions
|
@ -169,7 +169,7 @@ if(!require(protr)){
|
|||
library(protr)
|
||||
}
|
||||
|
||||
#if (!requireNamespace("BiocManager", quietly = TRUE))
|
||||
# if (!requireNamespace("BiocManager", quietly = TRUE))
|
||||
# install.packages("BiocManager")
|
||||
|
||||
#BiocManager::install("Logolas")
|
||||
|
|
|
@ -1,3 +1,4 @@
|
|||
library(Logolas)
|
||||
source("~/git/LSHTM_analysis/scripts/functions/my_logolas.R")
|
||||
#####################################################################################
|
||||
# DataED_PFM():
|
||||
|
|
|
@ -397,7 +397,7 @@ LogoPlotMSA <- function(msaSeq_mut # chr vector
|
|||
|
||||
#=========================================
|
||||
# Output
|
||||
# Combined plot: logo ED plot
|
||||
# Combined plot: logo ED/other logo plot
|
||||
# customised for ggseqlogo
|
||||
#=========================================
|
||||
|
||||
|
|
|
@ -686,6 +686,7 @@ mixEM = function(matrix_lik,prior,pi_init=NULL,control=list()){
|
|||
|
||||
|
||||
normalize = function(x){return(x/sum(x))}
|
||||
normalize4 = function(x){return(x/sum(x[!is.na(x)]))}
|
||||
|
||||
fixpoint = function(pi, matrix_lik, prior){
|
||||
pi = normalize(pmax(0,pi)) #avoid occasional problems with negative pis
|
||||
|
@ -1228,9 +1229,9 @@ function (table, ic = FALSE, score = c("diff", "log", "log-odds",
|
|||
# get_logo_heights()
|
||||
#===========================
|
||||
|
||||
get_logo_heights <- function (table, ic = FALSE, score = c("diff", "log", "log-odds",
|
||||
"probKL", "ratio", "unscaled_log", "wKL"), bg = NULL, epsilon = 0.01,
|
||||
opt = 1, symm = TRUE, alpha = 1, hist = FALSE, quant = 0.5)
|
||||
get_logo_heights <- function (table, ic = FALSE, score = c("diff", "log", "log-odds",
|
||||
"probKL", "ratio", "unscaled_log", "wKL"), bg = NULL, epsilon = 0.01,
|
||||
opt = 1, symm = TRUE, alpha = 1, hist = FALSE, quant = 0.5)
|
||||
{
|
||||
if (ic & score == "unscaled_log") {
|
||||
warning("ic = TRUE not compatible with score = `unscaled-log`: switching to\n ic = FALSE")
|
||||
|
@ -1286,7 +1287,7 @@ get_logo_heights <- function (table, ic = FALSE, score = c("diff", "log", "log-o
|
|||
chars <- as.character(rownames(table_mat_norm))
|
||||
if (!ic) {
|
||||
if (score == "diff") {
|
||||
table_mat_adj <- apply((table_mat_norm + epsilon) -
|
||||
table_mat_adj <- apply((table_mat_norm + epsilon) -
|
||||
(bgmat + epsilon), 2, function(x) {
|
||||
indices <- which(is.na(x))
|
||||
if (length(indices) == 0) {
|
||||
|
@ -1317,7 +1318,7 @@ get_logo_heights <- function (table, ic = FALSE, score = c("diff", "log", "log-o
|
|||
})
|
||||
}
|
||||
else if (score == "log") {
|
||||
table_mat_adj <- apply(log((table_mat_norm + epsilon)/(bgmat +
|
||||
table_mat_adj <- apply(log((table_mat_norm + epsilon)/(bgmat +
|
||||
epsilon), base = 2), 2, function(x) {
|
||||
indices <- which(is.na(x))
|
||||
if (length(indices) == 0) {
|
||||
|
@ -1349,7 +1350,7 @@ get_logo_heights <- function (table, ic = FALSE, score = c("diff", "log", "log-o
|
|||
}
|
||||
else if (score == "log-odds") {
|
||||
if (opt == 1) {
|
||||
table_mat_adj <- apply((table_mat_norm + epsilon)/(bgmat +
|
||||
table_mat_adj <- apply((table_mat_norm + epsilon)/(bgmat +
|
||||
epsilon), 2, function(x) {
|
||||
indices <- which(is.na(x))
|
||||
if (length(indices) == 0) {
|
||||
|
@ -1381,7 +1382,7 @@ get_logo_heights <- function (table, ic = FALSE, score = c("diff", "log", "log-o
|
|||
})
|
||||
}
|
||||
else {
|
||||
table_mat_adj <- apply((table_mat_norm + epsilon),
|
||||
table_mat_adj <- apply((table_mat_norm + epsilon),
|
||||
2, function(x) {
|
||||
indices <- which(is.na(x))
|
||||
if (length(indices) == 0) {
|
||||
|
@ -1402,8 +1403,8 @@ get_logo_heights <- function (table, ic = FALSE, score = c("diff", "log", "log-o
|
|||
}
|
||||
}
|
||||
else if (score == "probKL") {
|
||||
table_mat_adj <- apply((table_mat_norm + epsilon) *
|
||||
log((table_mat_norm + epsilon)/(bgmat + epsilon),
|
||||
table_mat_adj <- apply((table_mat_norm + epsilon) *
|
||||
log((table_mat_norm + epsilon)/(bgmat + epsilon),
|
||||
base = 2), 2, function(x) {
|
||||
indices <- which(is.na(x))
|
||||
if (length(indices) == 0) {
|
||||
|
@ -1434,7 +1435,7 @@ get_logo_heights <- function (table, ic = FALSE, score = c("diff", "log", "log-o
|
|||
})
|
||||
}
|
||||
else if (score == "ratio") {
|
||||
table_mat_adj <- apply((table_mat_norm + epsilon)/(bgmat +
|
||||
table_mat_adj <- apply((table_mat_norm + epsilon)/(bgmat +
|
||||
epsilon), 2, function(x) {
|
||||
indices <- which(is.na(x))
|
||||
if (length(indices) == 0) {
|
||||
|
@ -1465,7 +1466,7 @@ get_logo_heights <- function (table, ic = FALSE, score = c("diff", "log", "log-o
|
|||
})
|
||||
}
|
||||
else if (score == "unscaled_log") {
|
||||
table_mat_adj <- apply(log((table_mat_norm + epsilon)/(bgmat +
|
||||
table_mat_adj <- apply(log((table_mat_norm + epsilon)/(bgmat +
|
||||
epsilon), base = 2), 2, function(x) {
|
||||
indices <- which(is.na(x))
|
||||
if (length(indices) == 0) {
|
||||
|
@ -1496,7 +1497,7 @@ get_logo_heights <- function (table, ic = FALSE, score = c("diff", "log", "log-o
|
|||
})
|
||||
}
|
||||
else if (score == "wKL") {
|
||||
table_mat_adj <- apply(log((table_mat_norm + epsilon)/(bgmat +
|
||||
table_mat_adj <- apply(log((table_mat_norm + epsilon)/(bgmat +
|
||||
epsilon), base = 2), 2, function(x) {
|
||||
indices <- which(is.na(x))
|
||||
if (length(indices) == 0) {
|
||||
|
@ -1533,7 +1534,7 @@ get_logo_heights <- function (table, ic = FALSE, score = c("diff", "log", "log-o
|
|||
else {
|
||||
if (score == "diff") {
|
||||
if (opt == 1) {
|
||||
table_mat_adj <- apply((table_mat_norm + epsilon) -
|
||||
table_mat_adj <- apply((table_mat_norm + epsilon) -
|
||||
(bgmat + epsilon), 2, function(x) {
|
||||
indices <- which(is.na(x))
|
||||
if (length(indices) == 0) {
|
||||
|
@ -1564,7 +1565,7 @@ get_logo_heights <- function (table, ic = FALSE, score = c("diff", "log", "log-o
|
|||
})
|
||||
}
|
||||
else {
|
||||
table_mat_adj <- apply(table_mat_norm + epsilon,
|
||||
table_mat_adj <- apply(table_mat_norm + epsilon,
|
||||
2, function(x) {
|
||||
indices <- which(is.na(x))
|
||||
if (length(indices) == 0) {
|
||||
|
@ -1585,7 +1586,7 @@ get_logo_heights <- function (table, ic = FALSE, score = c("diff", "log", "log-o
|
|||
}
|
||||
else if (score == "log") {
|
||||
if (opt == 1) {
|
||||
table_mat_adj <- apply(log((table_mat_norm +
|
||||
table_mat_adj <- apply(log((table_mat_norm +
|
||||
epsilon)/(bgmat + epsilon), base = 2), 2, function(x) {
|
||||
indices <- which(is.na(x))
|
||||
if (length(indices) == 0) {
|
||||
|
@ -1616,7 +1617,7 @@ get_logo_heights <- function (table, ic = FALSE, score = c("diff", "log", "log-o
|
|||
})
|
||||
}
|
||||
else {
|
||||
table_mat_adj <- apply(log(table_mat_norm + epsilon,
|
||||
table_mat_adj <- apply(log(table_mat_norm + epsilon,
|
||||
base = 2), 2, function(x) {
|
||||
indices <- which(is.na(x))
|
||||
if (length(indices) == 0) {
|
||||
|
@ -1637,7 +1638,7 @@ get_logo_heights <- function (table, ic = FALSE, score = c("diff", "log", "log-o
|
|||
}
|
||||
else if (score == "log-odds") {
|
||||
if (opt == 1) {
|
||||
table_mat_adj <- apply((table_mat_norm + epsilon)/(bgmat +
|
||||
table_mat_adj <- apply((table_mat_norm + epsilon)/(bgmat +
|
||||
epsilon), 2, function(x) {
|
||||
indices <- which(is.na(x))
|
||||
if (length(indices) == 0) {
|
||||
|
@ -1669,7 +1670,7 @@ get_logo_heights <- function (table, ic = FALSE, score = c("diff", "log", "log-o
|
|||
})
|
||||
}
|
||||
else {
|
||||
table_mat_adj <- apply((table_mat_norm + epsilon),
|
||||
table_mat_adj <- apply((table_mat_norm + epsilon),
|
||||
2, function(x) {
|
||||
indices <- which(is.na(x))
|
||||
if (length(indices) == 0) {
|
||||
|
@ -1691,8 +1692,8 @@ get_logo_heights <- function (table, ic = FALSE, score = c("diff", "log", "log-o
|
|||
}
|
||||
else if (score == "probKL") {
|
||||
if (opt == 1) {
|
||||
table_mat_adj <- apply((table_mat_norm + epsilon) *
|
||||
log((table_mat_norm + epsilon)/(bgmat + epsilon),
|
||||
table_mat_adj <- apply((table_mat_norm + epsilon) *
|
||||
log((table_mat_norm + epsilon)/(bgmat + epsilon),
|
||||
base = 2), 2, function(x) {
|
||||
indices <- which(is.na(x))
|
||||
if (length(indices) == 0) {
|
||||
|
@ -1723,8 +1724,8 @@ get_logo_heights <- function (table, ic = FALSE, score = c("diff", "log", "log-o
|
|||
})
|
||||
}
|
||||
else {
|
||||
table_mat_adj <- apply((table_mat_norm + epsilon) *
|
||||
log(table_mat_norm + epsilon, base = 2), 2,
|
||||
table_mat_adj <- apply((table_mat_norm + epsilon) *
|
||||
log(table_mat_norm + epsilon, base = 2), 2,
|
||||
function(x) {
|
||||
indices <- which(is.na(x))
|
||||
if (length(indices) == 0) {
|
||||
|
@ -1745,7 +1746,7 @@ get_logo_heights <- function (table, ic = FALSE, score = c("diff", "log", "log-o
|
|||
}
|
||||
else if (score == "ratio") {
|
||||
if (opt == 1) {
|
||||
table_mat_adj <- apply((table_mat_norm + epsilon)/(bgmat +
|
||||
table_mat_adj <- apply((table_mat_norm + epsilon)/(bgmat +
|
||||
epsilon), 2, function(x) {
|
||||
indices <- which(is.na(x))
|
||||
if (length(indices) == 0) {
|
||||
|
@ -1776,7 +1777,7 @@ get_logo_heights <- function (table, ic = FALSE, score = c("diff", "log", "log-o
|
|||
})
|
||||
}
|
||||
else {
|
||||
table_mat_adj <- apply(table_mat_norm + scale,
|
||||
table_mat_adj <- apply(table_mat_norm + scale,
|
||||
2, function(x) {
|
||||
indices <- which(is.na(x))
|
||||
if (length(indices) == 0) {
|
||||
|
@ -1825,29 +1826,29 @@ get_logo_heights <- function (table, ic = FALSE, score = c("diff", "log", "log-o
|
|||
table_mat_neg[table_mat_neg >= 0] = 0
|
||||
table_mat_neg_norm <- apply(table_mat_neg, 2, function(x) return(x/sum(x)))
|
||||
table_mat_neg_norm[table_mat_neg_norm == "NaN"] = 0
|
||||
table_mat_norm <- replace(table_mat_norm, is.na(table_mat_norm),
|
||||
table_mat_norm <- replace(table_mat_norm, is.na(table_mat_norm),
|
||||
0)
|
||||
for (j in 1:dim(table_mat_neg_norm)[2]) {
|
||||
if (sum(table_mat_neg_norm[, j]) == 0) {
|
||||
table_mat_neg_norm[, j] <- normalize4(table_mat_neg_norm[,
|
||||
table_mat_neg_norm[, j] <- normalize4(table_mat_neg_norm[,
|
||||
j] + 0.001)
|
||||
}
|
||||
}
|
||||
for (j in 1:dim(table_mat_pos_norm)[2]) {
|
||||
if (sum(table_mat_pos_norm[, j]) == 0) {
|
||||
table_mat_pos_norm[, j] <- normalize4(table_mat_pos_norm[,
|
||||
table_mat_pos_norm[, j] <- normalize4(table_mat_pos_norm[,
|
||||
j] + 0.001)
|
||||
}
|
||||
}
|
||||
if (symm == TRUE) {
|
||||
table_mat_norm[which(is.na(table))] <- NA
|
||||
ic <- 0.5 * (ic_computer(table_mat_norm, alpha, hist = hist,
|
||||
bg = bgmat) + ic_computer(bgmat, alpha, hist = hist,
|
||||
ic <- 0.5 * (ic_computer(table_mat_norm, alpha, hist = hist,
|
||||
bg = bgmat) + ic_computer(bgmat, alpha, hist = hist,
|
||||
bg = table_mat_norm))
|
||||
}
|
||||
else {
|
||||
table_mat_norm[which(is.na(table))] <- NA
|
||||
ic <- ic_computer(table_mat_norm, alpha, hist = hist,
|
||||
ic <- ic_computer(table_mat_norm, alpha, hist = hist,
|
||||
bg = bgmat)
|
||||
}
|
||||
tab_neg <- apply(table_mat_adj, 2, function(x) {
|
||||
|
@ -1870,7 +1871,7 @@ get_logo_heights <- function (table, ic = FALSE, score = c("diff", "log", "log-o
|
|||
})
|
||||
tab_pos[tab_pos == 0] <- 0.001
|
||||
tab_neg[tab_neg == 0] <- 0.001
|
||||
pos_neg_scaling <- apply(rbind(tab_pos, tab_neg), 2,
|
||||
pos_neg_scaling <- apply(rbind(tab_pos, tab_neg), 2,
|
||||
function(x) return(x/sum(x)))
|
||||
pos_ic <- pos_neg_scaling[1, ] * ic
|
||||
neg_ic <- pos_neg_scaling[2, ] * ic
|
||||
|
|
|
@ -1,3 +1,6 @@
|
|||
source("~/git/LSHTM_analysis/scripts/Header_TT.R")
|
||||
source("~/git/LSHTM_analysis/scripts/functions/ed_pfm_data.R")
|
||||
|
||||
# data msa: mut
|
||||
my_data = read.csv("/home/tanu/git/Misc/practice_plots/pnca_msa_eg2.csv", header = F) #15 cols only
|
||||
msaSeq_mut = my_data$V1
|
||||
|
@ -23,14 +26,12 @@ wt_seq = msaSeq_wt
|
|||
################################
|
||||
# DataED_PFM():
|
||||
# script: ed_pfm_data.R
|
||||
source("~/git/LSHTM_analysis/scripts/functions/ed_pfm_data.R")
|
||||
################################
|
||||
|
||||
data_ed = DataED_PFM(msa_seq, wt_seq)
|
||||
names(data_ed)
|
||||
|
||||
#par(mfrow = c(2,1))
|
||||
logomaker(msa_seq, type = "EDLogo")
|
||||
#logomaker(msa_seq, type = "EDLogo")
|
||||
ggseqlogo(data_ed[['combED_mutM']]
|
||||
, method = "custom")
|
||||
|
||||
|
|
|
@ -1,8 +1,8 @@
|
|||
#source("~/git/LSHTM_analysis/config/gid.R")
|
||||
source("~/git/LSHTM_analysis/config/pnca.R")
|
||||
#source("~/git/LSHTM_analysis/config/pnca.R")
|
||||
#source("~/git/LSHTM_analysis/config/embb.R")
|
||||
#source("~/git/LSHTM_analysis/config/katg.R")
|
||||
#source("~/git/LSHTM_analysis/config/alr.R")
|
||||
source("~/git/LSHTM_analysis/config/alr.R")
|
||||
#source("~/git/LSHTM_analysis/config/rpob.R")
|
||||
#---------------------------------------------------
|
||||
source("~/git/LSHTM_analysis/scripts/plotting/get_plotting_dfs.R")
|
||||
|
@ -62,91 +62,42 @@ source("~/git/LSHTM_analysis/scripts/plotting/get_plotting_dfs.R")
|
|||
# , leg_tts = 16 # leg title size
|
||||
# )
|
||||
|
||||
########################################
|
||||
####################################################
|
||||
# Logo plot MSA
|
||||
# Mutant and wild-type
|
||||
# wild-type and mutant aa
|
||||
# Logo type:
|
||||
# EDLogo
|
||||
# Bits/probability (PFM matrix)
|
||||
# Bits/probability (Raw MSA data)
|
||||
# Can select active site residues
|
||||
# specify {plot_positions}
|
||||
# To plot entire MSA, simply don't specify {plot_positions}
|
||||
# script: logoP_msa.R
|
||||
########################################
|
||||
# LogoPlotMSA(msaSeq_mut = msa_seq
|
||||
# , msaSeq_wt = wt_seq
|
||||
# # , use_pfm
|
||||
# # , use_pfm_scaled
|
||||
# # , use_ed
|
||||
# , msa_method = 'bits' # or probability
|
||||
# , my_logo_col = "taylor"
|
||||
# , plot_positions = 1:15
|
||||
# , x_lab = "nsSNP position"
|
||||
# , y_lab = ""
|
||||
# , x_ats = 10 # text size
|
||||
# , x_tangle = 90 # text angle
|
||||
# , x_axis_offset = 0.05
|
||||
# , y_ats = 15
|
||||
# , y_tangle = 0
|
||||
# , x_tts = 13 # title size
|
||||
# , y_tts = 15
|
||||
# , leg_pos = "top" # can be top, left, right and bottom or c(0.8, 0.9)
|
||||
# , leg_dir = "horizontal" #can be vertical or horizontal
|
||||
# , leg_ts = 16 # leg text size
|
||||
# , leg_tts = 16 # leg title size
|
||||
# )
|
||||
|
||||
########################################
|
||||
# ED Logo plot MSA
|
||||
# Mutant and wild-type
|
||||
########################################
|
||||
# library(Logolas)
|
||||
# library(ggseqlogo)
|
||||
# source("~/git/LSHTM_analysis/scripts/functions/my_logolas.R")
|
||||
# source("~/git/LSHTM_analysis/scripts/functions/logoP_logolas.R")
|
||||
#
|
||||
# # data msa: mut
|
||||
# my_data = read.csv("/home/tanu/git/Misc/practice_plots/pnca_msa_eg2.csv", header = F) #15 cols only
|
||||
# msaSeq_mut = my_data$V1
|
||||
# msa_seq = msaSeq_mut
|
||||
#
|
||||
# # data msa: wt
|
||||
# gene = "pncA"
|
||||
# drug = "pyrazinamide"
|
||||
# indir = paste0("~/git/Data/", drug , "/input/")
|
||||
#
|
||||
# in_filename_fasta = paste0(tolower(gene), "2_f2.fasta")
|
||||
# infile_fasta = paste0(indir, in_filename_fasta)
|
||||
# cat("\nInput fasta file for WT: ", infile_fasta, "\n")
|
||||
#
|
||||
# msa2 = read.csv(infile_fasta, header = F)
|
||||
# head(msa2)
|
||||
# cat("\nLength of WT fasta:", nrow(msa2))
|
||||
# wt_seq = msa2$V1
|
||||
# head(wt_seq)
|
||||
# msaSeq_wt = msa2$V1
|
||||
# wt_seq = msaSeq_wt
|
||||
# to select a small dataset: see test_ed_pfm_data.R
|
||||
#####################################################
|
||||
|
||||
#PlotLogolasMSA()
|
||||
PlotLogolasMSA(msaSeq_mut = msa_seq
|
||||
LogoPlotMSA(msaSeq_mut = msa_seq
|
||||
, msaSeq_wt = wt_seq
|
||||
, logo_type = c("bits_pfm") # "EDLogo", bits_pfm", "probability_pfm", "bits_raw", "probability_raw") # can be "bits", "probability" or "custom"
|
||||
, EDScore_type = c("log") # see if this relevant, or source function should have it!
|
||||
, logo_type = c("bits_pfm") # "EDLogo", bits_pfm", "probability_pfm", "bits_raw", "probability_raw")
|
||||
, EDScore_type = c("log")
|
||||
, bg_prob = NULL
|
||||
, my_logo_col = "taylor"
|
||||
, plot_positions = c(1:15)
|
||||
#, plot_positions = active_aa_pos
|
||||
, x_axis_offset = 0.02
|
||||
, x_axis_offset_filtered = 0.05
|
||||
, y_axis_offset = 0.05
|
||||
#, y_breaks
|
||||
, x_lab_mut = "nsSNP-position"
|
||||
#, y_lab_mut
|
||||
, x_ats = 13 # text size
|
||||
, x_tangle = 90 # text angle
|
||||
, x_axis_offset = 0.05
|
||||
, x_axis_offset_filtered = 0.05
|
||||
, y_axis_offset = 0.05
|
||||
, y_ats = 13
|
||||
, x_ats = 10
|
||||
, x_tangle = 90
|
||||
, y_ats = 15
|
||||
, y_tangle = 0
|
||||
, x_tts = 13
|
||||
, y_tts = 13
|
||||
, leg_pos = "top" # can be top, left, right and bottom or c(0.8, 0.9)
|
||||
, leg_dir = "horizontal" #can be vertical or horizontal
|
||||
, leg_ts = 16 # leg text size
|
||||
, leg_tts = 16 # leg title size
|
||||
, leg_pos = "top"
|
||||
, leg_dir = "horizontal"
|
||||
, leg_ts = 16
|
||||
, leg_tts = 16
|
||||
)
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue