tested edplot with alr gene

2022-01-26 13:35:57 +00:00 · 2022-01-26 13:35:57 +00:00 · 1b20f09075
commit 1b20f09075
parent 8750e3126a
6 changed files with 62 additions and 108 deletions
--- a/scripts/Header_TT.R
+++ b/scripts/Header_TT.R
@ -169,7 +169,7 @@ if(!require(protr)){
  library(protr)
 }

-#if (!requireNamespace("BiocManager", quietly = TRUE))
+# if (!requireNamespace("BiocManager", quietly = TRUE))
 #  install.packages("BiocManager")

 #BiocManager::install("Logolas")
--- a/scripts/functions/ed_pfm_data.R
+++ b/scripts/functions/ed_pfm_data.R
@ -1,3 +1,4 @@
+library(Logolas)
 source("~/git/LSHTM_analysis/scripts/functions/my_logolas.R")
 #####################################################################################
 # DataED_PFM(): 
--- a/scripts/functions/logoP_msa.R
+++ b/scripts/functions/logoP_msa.R
@ -397,7 +397,7 @@ LogoPlotMSA <- function(msaSeq_mut # chr vector
  
  #=========================================
  # Output
-  # Combined plot: logo ED plot
+  # Combined plot: logo ED/other logo plot
  # customised for ggseqlogo
  #=========================================

--- a/scripts/functions/my_logolas.R
+++ b/scripts/functions/my_logolas.R
@ -686,6 +686,7 @@ mixEM = function(matrix_lik,prior,pi_init=NULL,control=list()){


 normalize = function(x){return(x/sum(x))}
+normalize4 = function(x){return(x/sum(x[!is.na(x)]))} 

 fixpoint = function(pi, matrix_lik, prior){
  pi = normalize(pmax(0,pi)) #avoid occasional problems with negative pis 
--- a/scripts/functions/tests/test_ed_pfm_data.R
+++ b/scripts/functions/tests/test_ed_pfm_data.R
@ -1,3 +1,6 @@
+source("~/git/LSHTM_analysis/scripts/Header_TT.R")
+source("~/git/LSHTM_analysis/scripts/functions/ed_pfm_data.R")
+
 # data msa: mut
 my_data = read.csv("/home/tanu/git/Misc/practice_plots/pnca_msa_eg2.csv", header = F) #15 cols only
 msaSeq_mut = my_data$V1
@ -23,14 +26,12 @@ wt_seq = msaSeq_wt
 ################################
 # DataED_PFM():
 # script: ed_pfm_data.R
-source("~/git/LSHTM_analysis/scripts/functions/ed_pfm_data.R")
 ################################
-
 data_ed = DataED_PFM(msa_seq, wt_seq)
 names(data_ed)

 #par(mfrow = c(2,1))
-logomaker(msa_seq, type = "EDLogo")
+#logomaker(msa_seq, type = "EDLogo")
 ggseqlogo(data_ed[['combED_mutM']]
          , method = "custom")

--- a/scripts/functions/tests/test_logo_plots.R
+++ b/scripts/functions/tests/test_logo_plots.R
@ -1,8 +1,8 @@
 #source("~/git/LSHTM_analysis/config/gid.R")
-source("~/git/LSHTM_analysis/config/pnca.R")
+#source("~/git/LSHTM_analysis/config/pnca.R")
 #source("~/git/LSHTM_analysis/config/embb.R")
 #source("~/git/LSHTM_analysis/config/katg.R")
-#source("~/git/LSHTM_analysis/config/alr.R")
+source("~/git/LSHTM_analysis/config/alr.R")
 #source("~/git/LSHTM_analysis/config/rpob.R")
 #---------------------------------------------------
 source("~/git/LSHTM_analysis/scripts/plotting/get_plotting_dfs.R")
@ -62,91 +62,42 @@ source("~/git/LSHTM_analysis/scripts/plotting/get_plotting_dfs.R")
 #              , leg_tts = 16 # leg title size
 # )

-########################################
+####################################################
 # Logo plot MSA
 # Mutant and wild-type
-# wild-type and mutant aa
+# Logo type: 
+  # EDLogo
+  # Bits/probability (PFM matrix)
+  # Bits/probability (Raw MSA data)
 # Can select active site residues
  # specify {plot_positions}
  # To plot entire MSA, simply don't specify {plot_positions}
 # script: logoP_msa.R
-########################################
-# LogoPlotMSA(msaSeq_mut = msa_seq
-#             , msaSeq_wt = wt_seq
-#             # , use_pfm
-#             # , use_pfm_scaled
-#             # , use_ed 
-#             , msa_method = 'bits' # or probability
-#             , my_logo_col = "taylor"
-#             , plot_positions = 1:15
-#             , x_lab = "nsSNP position"
-#             , y_lab = ""
-#             , x_ats = 10 # text size
-#             , x_tangle = 90 # text angle
-#             , x_axis_offset = 0.05
-#             , y_ats = 15
-#             , y_tangle = 0
-#             , x_tts = 13 # title size
-#             , y_tts = 15
-#             , leg_pos = "top" # can be top, left, right and bottom or c(0.8, 0.9)
-#             , leg_dir = "horizontal" #can be vertical or horizontal
-#             , leg_ts = 16 # leg text size
-#             , leg_tts = 16 # leg title size
-# )

-########################################
-# ED Logo plot MSA
-# Mutant and wild-type
-########################################
-# library(Logolas)
-# library(ggseqlogo)
-# source("~/git/LSHTM_analysis/scripts/functions/my_logolas.R")
-# source("~/git/LSHTM_analysis/scripts/functions/logoP_logolas.R")
-# 
-# # data msa: mut
-# my_data = read.csv("/home/tanu/git/Misc/practice_plots/pnca_msa_eg2.csv", header = F) #15 cols only
-# msaSeq_mut = my_data$V1
-# msa_seq = msaSeq_mut
-# 
-# # data msa: wt
-# gene = "pncA"
-# drug = "pyrazinamide"
-# indir = paste0("~/git/Data/", drug , "/input/")
-# 
-# in_filename_fasta = paste0(tolower(gene), "2_f2.fasta")
-# infile_fasta = paste0(indir, in_filename_fasta)
-# cat("\nInput fasta file for WT: ", infile_fasta, "\n")  
-# 
-# msa2 = read.csv(infile_fasta, header = F)
-# head(msa2)
-# cat("\nLength of WT fasta:", nrow(msa2))
-# wt_seq = msa2$V1
-# head(wt_seq)  
-# msaSeq_wt = msa2$V1
-# wt_seq = msaSeq_wt
+# to select a small dataset: see test_ed_pfm_data.R
+#####################################################

-#PlotLogolasMSA()
-PlotLogolasMSA(msaSeq_mut = msa_seq
+LogoPlotMSA(msaSeq_mut = msa_seq
               , msaSeq_wt = wt_seq
-               , logo_type = c("bits_pfm") # "EDLogo", bits_pfm", "probability_pfm", "bits_raw", "probability_raw") # can be "bits", "probability" or "custom"
-               , EDScore_type =  c("log") # see if this relevant, or source function should have it!
+               , logo_type = c("bits_pfm") # "EDLogo", bits_pfm", "probability_pfm", "bits_raw", "probability_raw")
+               , EDScore_type =  c("log")
               , bg_prob = NULL
               , my_logo_col = "taylor" 
-               , plot_positions = c(1:15)
+               #, plot_positions = active_aa_pos
+               , x_axis_offset = 0.02
+               , x_axis_offset_filtered = 0.05
+               , y_axis_offset = 0.05
               #, y_breaks
               , x_lab_mut = "nsSNP-position"
               #, y_lab_mut
-               , x_ats = 13 # text size
-               , x_tangle = 90 # text angle
-               , x_axis_offset = 0.05
-               , x_axis_offset_filtered = 0.05
-               , y_axis_offset = 0.05
-               , y_ats = 13
+               , x_ats = 10
+               , x_tangle = 90
+               , y_ats = 15
               , y_tangle = 0
               , x_tts = 13
               , y_tts = 13
-               , leg_pos = "top" # can be top, left, right and bottom or c(0.8, 0.9)
-               , leg_dir = "horizontal" #can be vertical or horizontal
-               , leg_ts = 16 # leg text size
-               , leg_tts = 16 # leg title size
+               , leg_pos = "top"
+               , leg_dir = "horizontal" 
+               , leg_ts = 16 
+               , leg_tts = 16 
 )