tested edplot with alr gene

This commit is contained in:
Tanushree Tunstall 2022-01-26 13:35:57 +00:00
parent 8750e3126a
commit 1b20f09075
6 changed files with 62 additions and 108 deletions

View file

@ -169,7 +169,7 @@ if(!require(protr)){
library(protr) library(protr)
} }
#if (!requireNamespace("BiocManager", quietly = TRUE)) # if (!requireNamespace("BiocManager", quietly = TRUE))
# install.packages("BiocManager") # install.packages("BiocManager")
#BiocManager::install("Logolas") #BiocManager::install("Logolas")

View file

@ -1,3 +1,4 @@
library(Logolas)
source("~/git/LSHTM_analysis/scripts/functions/my_logolas.R") source("~/git/LSHTM_analysis/scripts/functions/my_logolas.R")
##################################################################################### #####################################################################################
# DataED_PFM(): # DataED_PFM():

View file

@ -397,7 +397,7 @@ LogoPlotMSA <- function(msaSeq_mut # chr vector
#========================================= #=========================================
# Output # Output
# Combined plot: logo ED plot # Combined plot: logo ED/other logo plot
# customised for ggseqlogo # customised for ggseqlogo
#========================================= #=========================================

View file

@ -686,6 +686,7 @@ mixEM = function(matrix_lik,prior,pi_init=NULL,control=list()){
normalize = function(x){return(x/sum(x))} normalize = function(x){return(x/sum(x))}
normalize4 = function(x){return(x/sum(x[!is.na(x)]))}
fixpoint = function(pi, matrix_lik, prior){ fixpoint = function(pi, matrix_lik, prior){
pi = normalize(pmax(0,pi)) #avoid occasional problems with negative pis pi = normalize(pmax(0,pi)) #avoid occasional problems with negative pis
@ -1228,9 +1229,9 @@ function (table, ic = FALSE, score = c("diff", "log", "log-odds",
# get_logo_heights() # get_logo_heights()
#=========================== #===========================
get_logo_heights <- function (table, ic = FALSE, score = c("diff", "log", "log-odds", get_logo_heights <- function (table, ic = FALSE, score = c("diff", "log", "log-odds",
"probKL", "ratio", "unscaled_log", "wKL"), bg = NULL, epsilon = 0.01, "probKL", "ratio", "unscaled_log", "wKL"), bg = NULL, epsilon = 0.01,
opt = 1, symm = TRUE, alpha = 1, hist = FALSE, quant = 0.5) opt = 1, symm = TRUE, alpha = 1, hist = FALSE, quant = 0.5)
{ {
if (ic & score == "unscaled_log") { if (ic & score == "unscaled_log") {
warning("ic = TRUE not compatible with score = `unscaled-log`: switching to\n ic = FALSE") warning("ic = TRUE not compatible with score = `unscaled-log`: switching to\n ic = FALSE")
@ -1286,7 +1287,7 @@ get_logo_heights <- function (table, ic = FALSE, score = c("diff", "log", "log-o
chars <- as.character(rownames(table_mat_norm)) chars <- as.character(rownames(table_mat_norm))
if (!ic) { if (!ic) {
if (score == "diff") { if (score == "diff") {
table_mat_adj <- apply((table_mat_norm + epsilon) - table_mat_adj <- apply((table_mat_norm + epsilon) -
(bgmat + epsilon), 2, function(x) { (bgmat + epsilon), 2, function(x) {
indices <- which(is.na(x)) indices <- which(is.na(x))
if (length(indices) == 0) { if (length(indices) == 0) {
@ -1317,7 +1318,7 @@ get_logo_heights <- function (table, ic = FALSE, score = c("diff", "log", "log-o
}) })
} }
else if (score == "log") { else if (score == "log") {
table_mat_adj <- apply(log((table_mat_norm + epsilon)/(bgmat + table_mat_adj <- apply(log((table_mat_norm + epsilon)/(bgmat +
epsilon), base = 2), 2, function(x) { epsilon), base = 2), 2, function(x) {
indices <- which(is.na(x)) indices <- which(is.na(x))
if (length(indices) == 0) { if (length(indices) == 0) {
@ -1349,7 +1350,7 @@ get_logo_heights <- function (table, ic = FALSE, score = c("diff", "log", "log-o
} }
else if (score == "log-odds") { else if (score == "log-odds") {
if (opt == 1) { if (opt == 1) {
table_mat_adj <- apply((table_mat_norm + epsilon)/(bgmat + table_mat_adj <- apply((table_mat_norm + epsilon)/(bgmat +
epsilon), 2, function(x) { epsilon), 2, function(x) {
indices <- which(is.na(x)) indices <- which(is.na(x))
if (length(indices) == 0) { if (length(indices) == 0) {
@ -1381,7 +1382,7 @@ get_logo_heights <- function (table, ic = FALSE, score = c("diff", "log", "log-o
}) })
} }
else { else {
table_mat_adj <- apply((table_mat_norm + epsilon), table_mat_adj <- apply((table_mat_norm + epsilon),
2, function(x) { 2, function(x) {
indices <- which(is.na(x)) indices <- which(is.na(x))
if (length(indices) == 0) { if (length(indices) == 0) {
@ -1402,8 +1403,8 @@ get_logo_heights <- function (table, ic = FALSE, score = c("diff", "log", "log-o
} }
} }
else if (score == "probKL") { else if (score == "probKL") {
table_mat_adj <- apply((table_mat_norm + epsilon) * table_mat_adj <- apply((table_mat_norm + epsilon) *
log((table_mat_norm + epsilon)/(bgmat + epsilon), log((table_mat_norm + epsilon)/(bgmat + epsilon),
base = 2), 2, function(x) { base = 2), 2, function(x) {
indices <- which(is.na(x)) indices <- which(is.na(x))
if (length(indices) == 0) { if (length(indices) == 0) {
@ -1434,7 +1435,7 @@ get_logo_heights <- function (table, ic = FALSE, score = c("diff", "log", "log-o
}) })
} }
else if (score == "ratio") { else if (score == "ratio") {
table_mat_adj <- apply((table_mat_norm + epsilon)/(bgmat + table_mat_adj <- apply((table_mat_norm + epsilon)/(bgmat +
epsilon), 2, function(x) { epsilon), 2, function(x) {
indices <- which(is.na(x)) indices <- which(is.na(x))
if (length(indices) == 0) { if (length(indices) == 0) {
@ -1465,7 +1466,7 @@ get_logo_heights <- function (table, ic = FALSE, score = c("diff", "log", "log-o
}) })
} }
else if (score == "unscaled_log") { else if (score == "unscaled_log") {
table_mat_adj <- apply(log((table_mat_norm + epsilon)/(bgmat + table_mat_adj <- apply(log((table_mat_norm + epsilon)/(bgmat +
epsilon), base = 2), 2, function(x) { epsilon), base = 2), 2, function(x) {
indices <- which(is.na(x)) indices <- which(is.na(x))
if (length(indices) == 0) { if (length(indices) == 0) {
@ -1496,7 +1497,7 @@ get_logo_heights <- function (table, ic = FALSE, score = c("diff", "log", "log-o
}) })
} }
else if (score == "wKL") { else if (score == "wKL") {
table_mat_adj <- apply(log((table_mat_norm + epsilon)/(bgmat + table_mat_adj <- apply(log((table_mat_norm + epsilon)/(bgmat +
epsilon), base = 2), 2, function(x) { epsilon), base = 2), 2, function(x) {
indices <- which(is.na(x)) indices <- which(is.na(x))
if (length(indices) == 0) { if (length(indices) == 0) {
@ -1533,7 +1534,7 @@ get_logo_heights <- function (table, ic = FALSE, score = c("diff", "log", "log-o
else { else {
if (score == "diff") { if (score == "diff") {
if (opt == 1) { if (opt == 1) {
table_mat_adj <- apply((table_mat_norm + epsilon) - table_mat_adj <- apply((table_mat_norm + epsilon) -
(bgmat + epsilon), 2, function(x) { (bgmat + epsilon), 2, function(x) {
indices <- which(is.na(x)) indices <- which(is.na(x))
if (length(indices) == 0) { if (length(indices) == 0) {
@ -1564,7 +1565,7 @@ get_logo_heights <- function (table, ic = FALSE, score = c("diff", "log", "log-o
}) })
} }
else { else {
table_mat_adj <- apply(table_mat_norm + epsilon, table_mat_adj <- apply(table_mat_norm + epsilon,
2, function(x) { 2, function(x) {
indices <- which(is.na(x)) indices <- which(is.na(x))
if (length(indices) == 0) { if (length(indices) == 0) {
@ -1585,7 +1586,7 @@ get_logo_heights <- function (table, ic = FALSE, score = c("diff", "log", "log-o
} }
else if (score == "log") { else if (score == "log") {
if (opt == 1) { if (opt == 1) {
table_mat_adj <- apply(log((table_mat_norm + table_mat_adj <- apply(log((table_mat_norm +
epsilon)/(bgmat + epsilon), base = 2), 2, function(x) { epsilon)/(bgmat + epsilon), base = 2), 2, function(x) {
indices <- which(is.na(x)) indices <- which(is.na(x))
if (length(indices) == 0) { if (length(indices) == 0) {
@ -1616,7 +1617,7 @@ get_logo_heights <- function (table, ic = FALSE, score = c("diff", "log", "log-o
}) })
} }
else { else {
table_mat_adj <- apply(log(table_mat_norm + epsilon, table_mat_adj <- apply(log(table_mat_norm + epsilon,
base = 2), 2, function(x) { base = 2), 2, function(x) {
indices <- which(is.na(x)) indices <- which(is.na(x))
if (length(indices) == 0) { if (length(indices) == 0) {
@ -1637,7 +1638,7 @@ get_logo_heights <- function (table, ic = FALSE, score = c("diff", "log", "log-o
} }
else if (score == "log-odds") { else if (score == "log-odds") {
if (opt == 1) { if (opt == 1) {
table_mat_adj <- apply((table_mat_norm + epsilon)/(bgmat + table_mat_adj <- apply((table_mat_norm + epsilon)/(bgmat +
epsilon), 2, function(x) { epsilon), 2, function(x) {
indices <- which(is.na(x)) indices <- which(is.na(x))
if (length(indices) == 0) { if (length(indices) == 0) {
@ -1669,7 +1670,7 @@ get_logo_heights <- function (table, ic = FALSE, score = c("diff", "log", "log-o
}) })
} }
else { else {
table_mat_adj <- apply((table_mat_norm + epsilon), table_mat_adj <- apply((table_mat_norm + epsilon),
2, function(x) { 2, function(x) {
indices <- which(is.na(x)) indices <- which(is.na(x))
if (length(indices) == 0) { if (length(indices) == 0) {
@ -1691,8 +1692,8 @@ get_logo_heights <- function (table, ic = FALSE, score = c("diff", "log", "log-o
} }
else if (score == "probKL") { else if (score == "probKL") {
if (opt == 1) { if (opt == 1) {
table_mat_adj <- apply((table_mat_norm + epsilon) * table_mat_adj <- apply((table_mat_norm + epsilon) *
log((table_mat_norm + epsilon)/(bgmat + epsilon), log((table_mat_norm + epsilon)/(bgmat + epsilon),
base = 2), 2, function(x) { base = 2), 2, function(x) {
indices <- which(is.na(x)) indices <- which(is.na(x))
if (length(indices) == 0) { if (length(indices) == 0) {
@ -1723,8 +1724,8 @@ get_logo_heights <- function (table, ic = FALSE, score = c("diff", "log", "log-o
}) })
} }
else { else {
table_mat_adj <- apply((table_mat_norm + epsilon) * table_mat_adj <- apply((table_mat_norm + epsilon) *
log(table_mat_norm + epsilon, base = 2), 2, log(table_mat_norm + epsilon, base = 2), 2,
function(x) { function(x) {
indices <- which(is.na(x)) indices <- which(is.na(x))
if (length(indices) == 0) { if (length(indices) == 0) {
@ -1745,7 +1746,7 @@ get_logo_heights <- function (table, ic = FALSE, score = c("diff", "log", "log-o
} }
else if (score == "ratio") { else if (score == "ratio") {
if (opt == 1) { if (opt == 1) {
table_mat_adj <- apply((table_mat_norm + epsilon)/(bgmat + table_mat_adj <- apply((table_mat_norm + epsilon)/(bgmat +
epsilon), 2, function(x) { epsilon), 2, function(x) {
indices <- which(is.na(x)) indices <- which(is.na(x))
if (length(indices) == 0) { if (length(indices) == 0) {
@ -1776,7 +1777,7 @@ get_logo_heights <- function (table, ic = FALSE, score = c("diff", "log", "log-o
}) })
} }
else { else {
table_mat_adj <- apply(table_mat_norm + scale, table_mat_adj <- apply(table_mat_norm + scale,
2, function(x) { 2, function(x) {
indices <- which(is.na(x)) indices <- which(is.na(x))
if (length(indices) == 0) { if (length(indices) == 0) {
@ -1825,29 +1826,29 @@ get_logo_heights <- function (table, ic = FALSE, score = c("diff", "log", "log-o
table_mat_neg[table_mat_neg >= 0] = 0 table_mat_neg[table_mat_neg >= 0] = 0
table_mat_neg_norm <- apply(table_mat_neg, 2, function(x) return(x/sum(x))) table_mat_neg_norm <- apply(table_mat_neg, 2, function(x) return(x/sum(x)))
table_mat_neg_norm[table_mat_neg_norm == "NaN"] = 0 table_mat_neg_norm[table_mat_neg_norm == "NaN"] = 0
table_mat_norm <- replace(table_mat_norm, is.na(table_mat_norm), table_mat_norm <- replace(table_mat_norm, is.na(table_mat_norm),
0) 0)
for (j in 1:dim(table_mat_neg_norm)[2]) { for (j in 1:dim(table_mat_neg_norm)[2]) {
if (sum(table_mat_neg_norm[, j]) == 0) { if (sum(table_mat_neg_norm[, j]) == 0) {
table_mat_neg_norm[, j] <- normalize4(table_mat_neg_norm[, table_mat_neg_norm[, j] <- normalize4(table_mat_neg_norm[,
j] + 0.001) j] + 0.001)
} }
} }
for (j in 1:dim(table_mat_pos_norm)[2]) { for (j in 1:dim(table_mat_pos_norm)[2]) {
if (sum(table_mat_pos_norm[, j]) == 0) { if (sum(table_mat_pos_norm[, j]) == 0) {
table_mat_pos_norm[, j] <- normalize4(table_mat_pos_norm[, table_mat_pos_norm[, j] <- normalize4(table_mat_pos_norm[,
j] + 0.001) j] + 0.001)
} }
} }
if (symm == TRUE) { if (symm == TRUE) {
table_mat_norm[which(is.na(table))] <- NA table_mat_norm[which(is.na(table))] <- NA
ic <- 0.5 * (ic_computer(table_mat_norm, alpha, hist = hist, ic <- 0.5 * (ic_computer(table_mat_norm, alpha, hist = hist,
bg = bgmat) + ic_computer(bgmat, alpha, hist = hist, bg = bgmat) + ic_computer(bgmat, alpha, hist = hist,
bg = table_mat_norm)) bg = table_mat_norm))
} }
else { else {
table_mat_norm[which(is.na(table))] <- NA table_mat_norm[which(is.na(table))] <- NA
ic <- ic_computer(table_mat_norm, alpha, hist = hist, ic <- ic_computer(table_mat_norm, alpha, hist = hist,
bg = bgmat) bg = bgmat)
} }
tab_neg <- apply(table_mat_adj, 2, function(x) { tab_neg <- apply(table_mat_adj, 2, function(x) {
@ -1870,7 +1871,7 @@ get_logo_heights <- function (table, ic = FALSE, score = c("diff", "log", "log-o
}) })
tab_pos[tab_pos == 0] <- 0.001 tab_pos[tab_pos == 0] <- 0.001
tab_neg[tab_neg == 0] <- 0.001 tab_neg[tab_neg == 0] <- 0.001
pos_neg_scaling <- apply(rbind(tab_pos, tab_neg), 2, pos_neg_scaling <- apply(rbind(tab_pos, tab_neg), 2,
function(x) return(x/sum(x))) function(x) return(x/sum(x)))
pos_ic <- pos_neg_scaling[1, ] * ic pos_ic <- pos_neg_scaling[1, ] * ic
neg_ic <- pos_neg_scaling[2, ] * ic neg_ic <- pos_neg_scaling[2, ] * ic

View file

@ -1,3 +1,6 @@
source("~/git/LSHTM_analysis/scripts/Header_TT.R")
source("~/git/LSHTM_analysis/scripts/functions/ed_pfm_data.R")
# data msa: mut # data msa: mut
my_data = read.csv("/home/tanu/git/Misc/practice_plots/pnca_msa_eg2.csv", header = F) #15 cols only my_data = read.csv("/home/tanu/git/Misc/practice_plots/pnca_msa_eg2.csv", header = F) #15 cols only
msaSeq_mut = my_data$V1 msaSeq_mut = my_data$V1
@ -23,14 +26,12 @@ wt_seq = msaSeq_wt
################################ ################################
# DataED_PFM(): # DataED_PFM():
# script: ed_pfm_data.R # script: ed_pfm_data.R
source("~/git/LSHTM_analysis/scripts/functions/ed_pfm_data.R")
################################ ################################
data_ed = DataED_PFM(msa_seq, wt_seq) data_ed = DataED_PFM(msa_seq, wt_seq)
names(data_ed) names(data_ed)
#par(mfrow = c(2,1)) #par(mfrow = c(2,1))
logomaker(msa_seq, type = "EDLogo") #logomaker(msa_seq, type = "EDLogo")
ggseqlogo(data_ed[['combED_mutM']] ggseqlogo(data_ed[['combED_mutM']]
, method = "custom") , method = "custom")

View file

@ -1,8 +1,8 @@
#source("~/git/LSHTM_analysis/config/gid.R") #source("~/git/LSHTM_analysis/config/gid.R")
source("~/git/LSHTM_analysis/config/pnca.R") #source("~/git/LSHTM_analysis/config/pnca.R")
#source("~/git/LSHTM_analysis/config/embb.R") #source("~/git/LSHTM_analysis/config/embb.R")
#source("~/git/LSHTM_analysis/config/katg.R") #source("~/git/LSHTM_analysis/config/katg.R")
#source("~/git/LSHTM_analysis/config/alr.R") source("~/git/LSHTM_analysis/config/alr.R")
#source("~/git/LSHTM_analysis/config/rpob.R") #source("~/git/LSHTM_analysis/config/rpob.R")
#--------------------------------------------------- #---------------------------------------------------
source("~/git/LSHTM_analysis/scripts/plotting/get_plotting_dfs.R") source("~/git/LSHTM_analysis/scripts/plotting/get_plotting_dfs.R")
@ -62,91 +62,42 @@ source("~/git/LSHTM_analysis/scripts/plotting/get_plotting_dfs.R")
# , leg_tts = 16 # leg title size # , leg_tts = 16 # leg title size
# ) # )
######################################## ####################################################
# Logo plot MSA # Logo plot MSA
# Mutant and wild-type # Mutant and wild-type
# wild-type and mutant aa # Logo type:
# EDLogo
# Bits/probability (PFM matrix)
# Bits/probability (Raw MSA data)
# Can select active site residues # Can select active site residues
# specify {plot_positions} # specify {plot_positions}
# To plot entire MSA, simply don't specify {plot_positions} # To plot entire MSA, simply don't specify {plot_positions}
# script: logoP_msa.R # script: logoP_msa.R
########################################
# LogoPlotMSA(msaSeq_mut = msa_seq
# , msaSeq_wt = wt_seq
# # , use_pfm
# # , use_pfm_scaled
# # , use_ed
# , msa_method = 'bits' # or probability
# , my_logo_col = "taylor"
# , plot_positions = 1:15
# , x_lab = "nsSNP position"
# , y_lab = ""
# , x_ats = 10 # text size
# , x_tangle = 90 # text angle
# , x_axis_offset = 0.05
# , y_ats = 15
# , y_tangle = 0
# , x_tts = 13 # title size
# , y_tts = 15
# , leg_pos = "top" # can be top, left, right and bottom or c(0.8, 0.9)
# , leg_dir = "horizontal" #can be vertical or horizontal
# , leg_ts = 16 # leg text size
# , leg_tts = 16 # leg title size
# )
######################################## # to select a small dataset: see test_ed_pfm_data.R
# ED Logo plot MSA #####################################################
# Mutant and wild-type
########################################
# library(Logolas)
# library(ggseqlogo)
# source("~/git/LSHTM_analysis/scripts/functions/my_logolas.R")
# source("~/git/LSHTM_analysis/scripts/functions/logoP_logolas.R")
#
# # data msa: mut
# my_data = read.csv("/home/tanu/git/Misc/practice_plots/pnca_msa_eg2.csv", header = F) #15 cols only
# msaSeq_mut = my_data$V1
# msa_seq = msaSeq_mut
#
# # data msa: wt
# gene = "pncA"
# drug = "pyrazinamide"
# indir = paste0("~/git/Data/", drug , "/input/")
#
# in_filename_fasta = paste0(tolower(gene), "2_f2.fasta")
# infile_fasta = paste0(indir, in_filename_fasta)
# cat("\nInput fasta file for WT: ", infile_fasta, "\n")
#
# msa2 = read.csv(infile_fasta, header = F)
# head(msa2)
# cat("\nLength of WT fasta:", nrow(msa2))
# wt_seq = msa2$V1
# head(wt_seq)
# msaSeq_wt = msa2$V1
# wt_seq = msaSeq_wt
#PlotLogolasMSA() LogoPlotMSA(msaSeq_mut = msa_seq
PlotLogolasMSA(msaSeq_mut = msa_seq
, msaSeq_wt = wt_seq , msaSeq_wt = wt_seq
, logo_type = c("bits_pfm") # "EDLogo", bits_pfm", "probability_pfm", "bits_raw", "probability_raw") # can be "bits", "probability" or "custom" , logo_type = c("bits_pfm") # "EDLogo", bits_pfm", "probability_pfm", "bits_raw", "probability_raw")
, EDScore_type = c("log") # see if this relevant, or source function should have it! , EDScore_type = c("log")
, bg_prob = NULL , bg_prob = NULL
, my_logo_col = "taylor" , my_logo_col = "taylor"
, plot_positions = c(1:15) #, plot_positions = active_aa_pos
, x_axis_offset = 0.02
, x_axis_offset_filtered = 0.05
, y_axis_offset = 0.05
#, y_breaks #, y_breaks
, x_lab_mut = "nsSNP-position" , x_lab_mut = "nsSNP-position"
#, y_lab_mut #, y_lab_mut
, x_ats = 13 # text size , x_ats = 10
, x_tangle = 90 # text angle , x_tangle = 90
, x_axis_offset = 0.05 , y_ats = 15
, x_axis_offset_filtered = 0.05
, y_axis_offset = 0.05
, y_ats = 13
, y_tangle = 0 , y_tangle = 0
, x_tts = 13 , x_tts = 13
, y_tts = 13 , y_tts = 13
, leg_pos = "top" # can be top, left, right and bottom or c(0.8, 0.9) , leg_pos = "top"
, leg_dir = "horizontal" #can be vertical or horizontal , leg_dir = "horizontal"
, leg_ts = 16 # leg text size , leg_ts = 16
, leg_tts = 16 # leg title size , leg_tts = 16
) )