tested edplot with alr gene

This commit is contained in:
Tanushree Tunstall 2022-01-26 13:35:57 +00:00
parent 8750e3126a
commit 1b20f09075
6 changed files with 62 additions and 108 deletions

View file

@ -169,7 +169,7 @@ if(!require(protr)){
library(protr)
}
#if (!requireNamespace("BiocManager", quietly = TRUE))
# if (!requireNamespace("BiocManager", quietly = TRUE))
# install.packages("BiocManager")
#BiocManager::install("Logolas")

View file

@ -1,3 +1,4 @@
library(Logolas)
source("~/git/LSHTM_analysis/scripts/functions/my_logolas.R")
#####################################################################################
# DataED_PFM():

View file

@ -397,7 +397,7 @@ LogoPlotMSA <- function(msaSeq_mut # chr vector
#=========================================
# Output
# Combined plot: logo ED plot
# Combined plot: logo ED/other logo plot
# customised for ggseqlogo
#=========================================

View file

@ -686,6 +686,7 @@ mixEM = function(matrix_lik,prior,pi_init=NULL,control=list()){
normalize = function(x){return(x/sum(x))}
normalize4 = function(x){return(x/sum(x[!is.na(x)]))}
fixpoint = function(pi, matrix_lik, prior){
pi = normalize(pmax(0,pi)) #avoid occasional problems with negative pis

View file

@ -1,3 +1,6 @@
source("~/git/LSHTM_analysis/scripts/Header_TT.R")
source("~/git/LSHTM_analysis/scripts/functions/ed_pfm_data.R")
# data msa: mut
my_data = read.csv("/home/tanu/git/Misc/practice_plots/pnca_msa_eg2.csv", header = F) #15 cols only
msaSeq_mut = my_data$V1
@ -23,14 +26,12 @@ wt_seq = msaSeq_wt
################################
# DataED_PFM():
# script: ed_pfm_data.R
source("~/git/LSHTM_analysis/scripts/functions/ed_pfm_data.R")
################################
data_ed = DataED_PFM(msa_seq, wt_seq)
names(data_ed)
#par(mfrow = c(2,1))
logomaker(msa_seq, type = "EDLogo")
#logomaker(msa_seq, type = "EDLogo")
ggseqlogo(data_ed[['combED_mutM']]
, method = "custom")

View file

@ -1,8 +1,8 @@
#source("~/git/LSHTM_analysis/config/gid.R")
source("~/git/LSHTM_analysis/config/pnca.R")
#source("~/git/LSHTM_analysis/config/pnca.R")
#source("~/git/LSHTM_analysis/config/embb.R")
#source("~/git/LSHTM_analysis/config/katg.R")
#source("~/git/LSHTM_analysis/config/alr.R")
source("~/git/LSHTM_analysis/config/alr.R")
#source("~/git/LSHTM_analysis/config/rpob.R")
#---------------------------------------------------
source("~/git/LSHTM_analysis/scripts/plotting/get_plotting_dfs.R")
@ -62,91 +62,42 @@ source("~/git/LSHTM_analysis/scripts/plotting/get_plotting_dfs.R")
# , leg_tts = 16 # leg title size
# )
########################################
####################################################
# Logo plot MSA
# Mutant and wild-type
# wild-type and mutant aa
# Logo type:
# EDLogo
# Bits/probability (PFM matrix)
# Bits/probability (Raw MSA data)
# Can select active site residues
# specify {plot_positions}
# To plot entire MSA, simply don't specify {plot_positions}
# script: logoP_msa.R
########################################
# LogoPlotMSA(msaSeq_mut = msa_seq
# , msaSeq_wt = wt_seq
# # , use_pfm
# # , use_pfm_scaled
# # , use_ed
# , msa_method = 'bits' # or probability
# , my_logo_col = "taylor"
# , plot_positions = 1:15
# , x_lab = "nsSNP position"
# , y_lab = ""
# , x_ats = 10 # text size
# , x_tangle = 90 # text angle
# , x_axis_offset = 0.05
# , y_ats = 15
# , y_tangle = 0
# , x_tts = 13 # title size
# , y_tts = 15
# , leg_pos = "top" # can be top, left, right and bottom or c(0.8, 0.9)
# , leg_dir = "horizontal" #can be vertical or horizontal
# , leg_ts = 16 # leg text size
# , leg_tts = 16 # leg title size
# )
########################################
# ED Logo plot MSA
# Mutant and wild-type
########################################
# library(Logolas)
# library(ggseqlogo)
# source("~/git/LSHTM_analysis/scripts/functions/my_logolas.R")
# source("~/git/LSHTM_analysis/scripts/functions/logoP_logolas.R")
#
# # data msa: mut
# my_data = read.csv("/home/tanu/git/Misc/practice_plots/pnca_msa_eg2.csv", header = F) #15 cols only
# msaSeq_mut = my_data$V1
# msa_seq = msaSeq_mut
#
# # data msa: wt
# gene = "pncA"
# drug = "pyrazinamide"
# indir = paste0("~/git/Data/", drug , "/input/")
#
# in_filename_fasta = paste0(tolower(gene), "2_f2.fasta")
# infile_fasta = paste0(indir, in_filename_fasta)
# cat("\nInput fasta file for WT: ", infile_fasta, "\n")
#
# msa2 = read.csv(infile_fasta, header = F)
# head(msa2)
# cat("\nLength of WT fasta:", nrow(msa2))
# wt_seq = msa2$V1
# head(wt_seq)
# msaSeq_wt = msa2$V1
# wt_seq = msaSeq_wt
# to select a small dataset: see test_ed_pfm_data.R
#####################################################
#PlotLogolasMSA()
PlotLogolasMSA(msaSeq_mut = msa_seq
LogoPlotMSA(msaSeq_mut = msa_seq
, msaSeq_wt = wt_seq
, logo_type = c("bits_pfm") # "EDLogo", bits_pfm", "probability_pfm", "bits_raw", "probability_raw") # can be "bits", "probability" or "custom"
, EDScore_type = c("log") # see if this relevant, or source function should have it!
, logo_type = c("bits_pfm") # "EDLogo", bits_pfm", "probability_pfm", "bits_raw", "probability_raw")
, EDScore_type = c("log")
, bg_prob = NULL
, my_logo_col = "taylor"
, plot_positions = c(1:15)
#, plot_positions = active_aa_pos
, x_axis_offset = 0.02
, x_axis_offset_filtered = 0.05
, y_axis_offset = 0.05
#, y_breaks
, x_lab_mut = "nsSNP-position"
#, y_lab_mut
, x_ats = 13 # text size
, x_tangle = 90 # text angle
, x_axis_offset = 0.05
, x_axis_offset_filtered = 0.05
, y_axis_offset = 0.05
, y_ats = 13
, x_ats = 10
, x_tangle = 90
, y_ats = 15
, y_tangle = 0
, x_tts = 13
, y_tts = 13
, leg_pos = "top" # can be top, left, right and bottom or c(0.8, 0.9)
, leg_dir = "horizontal" #can be vertical or horizontal
, leg_ts = 16 # leg text size
, leg_tts = 16 # leg title size
, leg_pos = "top"
, leg_dir = "horizontal"
, leg_ts = 16
, leg_tts = 16
)