renamed logoP_msa.R --> logoP_msa_raw.R

This commit is contained in:
Tanushree Tunstall 2022-01-26 11:03:45 +00:00
parent 6a9f4a0cab
commit 3bc5dcbad3
4 changed files with 316 additions and 267 deletions

View file

@ -1,39 +1,14 @@
library(Logolas)
library(ggseqlogo)
source("~/git/LSHTM_analysis/scripts/functions/my_logolas.R")
# data msa: mut
my_data = read.csv("/home/tanu/git/Misc/practice_plots/pnca_msa_eg2.csv", header = F) #15 cols only
msaSeq_mut = my_data$V1
# data msa: wt
gene = "pncA"
drug = "pyrazinamide"
indir = paste0("~/git/Data/", drug , "/input/")
in_filename_fasta = paste0(tolower(gene), "2_f2.fasta")
infile_fasta = paste0(indir, in_filename_fasta)
cat("\nInput fasta file for WT: ", infile_fasta, "\n")
msa2 = read.csv(infile_fasta, header = F)
head(msa2)
cat("\nLength of WT fasta:", nrow(msa2))
#wt_seq = msa2$V1
#head(wt_seq)
msaSeq_wt = msa2$V1
########################################### ###########################################
PlotLogolasMSA <- function(msaSeq_mut # chr vector PlotLogolasMSA <- function(msaSeq_mut # chr vector
, msaSeq_wt # chr vector , msaSeq_wt # chr vector
, msa_method = c("custom") # will be c("EDLogo", "Logo)# , msa_method = c("custom") # will be c("EDLogo", "Logo)#
, EDLogo_score = c("log")# can be: "log-odds", "diff", "probKL", "ratio", "unscaled_log", "wKL" , ED_score = c("log")# can be: "log-odds", "diff", "probKL", "ratio", "unscaled_log", "wKL"
, bg_prob = NULL , bg_prob = NULL
, my_logo_col = "chemistry" , my_logo_col = "chemistry"
, plot_positions = c(1, 10, 14, 8) , plot_positions
, y_breaks , y_breaks
, x_lab = "Wild-type position" , x_lab_mut = "nsSNP-position"
, y_lab = "" , y_lab_mut = ""
, x_ats = 13 # text size , x_ats = 13 # text size
, x_tangle = 90 # text angle , x_tangle = 90 # text angle
, x_axis_offset = 0.05 # dist b/w y-axis and plot start , x_axis_offset = 0.05 # dist b/w y-axis and plot start
@ -50,6 +25,7 @@ PlotLogolasMSA <- function(msaSeq_mut # chr vector
) )
{ {
#''' Can be put into a separate EDData plot function'''
dash_control = list() dash_control = list()
dash_control_default <- list(concentration = NULL, mode = NULL, dash_control_default <- list(concentration = NULL, mode = NULL,
optmethod = "mixEM", sample_weights = NULL, verbose = FALSE, optmethod = "mixEM", sample_weights = NULL, verbose = FALSE,
@ -79,22 +55,23 @@ PlotLogolasMSA <- function(msaSeq_mut # chr vector
logo_mut_h = get_logo_heights(pfm_mut_scaled logo_mut_h = get_logo_heights(pfm_mut_scaled
, bg = bg_prob , bg = bg_prob
, score = EDLogo_score) , score = ED_score)
logo_mut_h$pos_ic
logo_mut_h$neg_ic
# TODO: Add sanity check! cat("\nGetting logo_heights from Logolas package...")
#<...
#...>
pos_mutM = logo_mut_h[['table_mat_pos_norm']]; pos_mutM pos_mutM = logo_mut_h[['table_mat_pos_norm']]; pos_mutM
pos_mutS = logo_mut_h$pos_ic; pos_mutS pos_mutS = logo_mut_h[['pos_ic']]; pos_mutS
pos_mutED = t(pos_mutS*t(pos_mutM)); pos_mutED pos_mutED = t(pos_mutS*t(pos_mutM)); pos_mutED
neg_mutM = logo_mut_h[['table_mat_neg_norm']]*(-1) neg_mutM = logo_mut_h[['table_mat_neg_norm']]*(-1)
neg_mutS = logo_mut_h$neg_ic; neg_mutS neg_mutS = logo_mut_h[['neg_ic']]; neg_mutS
neg_mutED = t(neg_mutS*t(neg_mutM)); neg_mutED neg_mutED = t(neg_mutS*t(neg_mutM)); neg_mutED
if (length(pos_mutS) && length(neg_mutS) == dim(pfm_mut)[2]){
cat("\nPASS: pfm calculated successfully including scaled matrix"
, "\nDim of pfm matrix:", dim(pfm_mut)[1], dim(pfm_mut)[2])
}
combED_mutM = pos_mutED + neg_mutED combED_mutM = pos_mutED + neg_mutED
# Construct the x-axis: mutant MSA # Construct the x-axis: mutant MSA
@ -110,18 +87,21 @@ PlotLogolasMSA <- function(msaSeq_mut # chr vector
logo_wt_h = get_logo_heights(pfm_wt_scaled logo_wt_h = get_logo_heights(pfm_wt_scaled
, bg = bg_prob , bg = bg_prob
, score = EDLogo_score) , score = ED_score)
logo_wt_h$pos_ic
logo_wt_h$neg_ic
pos_wtM = logo_wt_h[['table_mat_pos_norm']]; pos_wtM pos_wtM = logo_wt_h[['table_mat_pos_norm']]; pos_wtM
pos_wtS = logo_wt_h$pos_ic; pos_wtS pos_wtS = logo_wt_h[['pos_ic']]; pos_wtS
pos_wtED = t(pos_wtS*t(pos_wtM)); pos_wtED pos_wtED = t(pos_wtS*t(pos_wtM)); pos_wtED
neg_wtM = logo_wt_h[['table_mat_neg_norm']]*(-1) neg_wtM = logo_wt_h[['table_mat_neg_norm']]*(-1)
neg_wtS = logo_wt_h$neg_ic; neg_wtS neg_wtS = logo_wt_h[['neg_ic']]; neg_wtS
neg_wtED = t(neg_wtS*t(neg_wtM)); neg_wtED neg_wtED = t(neg_wtS*t(neg_wtM)); neg_wtED
if (length(pos_wtS) && length(neg_wtS) == dim(pfm_wt)[2]){
cat("\nPASS: pfm calculated successfully including scaled matrix"
, "\nDim of pfm matrix:", dim(pfm_wt)[1], dim(pfm_wt)[2])
}
combED_wtM = pos_wtED + neg_wtED combED_wtM = pos_wtED + neg_wtED
# Construct the x-axis: mutant MSA # Construct the x-axis: mutant MSA
@ -136,6 +116,7 @@ PlotLogolasMSA <- function(msaSeq_mut # chr vector
cat("\n===========================================" cat("\n==========================================="
, "\nGenerated PFM mut: No filtering" , "\nGenerated PFM mut: No filtering"
, "\n===========================================") , "\n===========================================")
plot_mut_edM = combED_mutM plot_mut_edM = combED_mutM
#------------------------------ #------------------------------
@ -144,6 +125,7 @@ PlotLogolasMSA <- function(msaSeq_mut # chr vector
cat("\n===========================================" cat("\n==========================================="
, "\nGenerated PFM WT: No filtering" , "\nGenerated PFM WT: No filtering"
, "\n===========================================") , "\n===========================================")
plot_wt_edM = combED_wtM plot_wt_edM = combED_wtM
}else{ }else{
@ -151,6 +133,7 @@ PlotLogolasMSA <- function(msaSeq_mut # chr vector
#------------------------------ #------------------------------
# PFM mut: Filtered positions # PFM mut: Filtered positions
#------------------------------- #-------------------------------
cat("\n===========================================" cat("\n==========================================="
, "\nGenerating PFM MSA: filtered positions" , "\nGenerating PFM MSA: filtered positions"
, "\n===========================================" , "\n==========================================="
@ -181,17 +164,20 @@ PlotLogolasMSA <- function(msaSeq_mut # chr vector
} }
} }
# Construct Y-axis for MSA mut plot:
cat("\nCalculating y-axis for MSA mut plot")
# Construct the y-axis: Calculating
cat("\n-------------------------"
, "\nConstructing y-axis:"
, "\nUser did not provide"
,"\n--------------------------")
if (missing(y_breaks)){ if (missing(y_breaks)){
# Y-axis: Calculating
cat("\n----------------------------------------"
, "\nY-axis being generated from data"
, "\n-----------------------------------------")
ylim_low <- floor(min(combED_mutM)); ylim_low ylim_low <- floor(min(combED_mutM)); ylim_low
if( ylim_low == 0){ if( ylim_low == 0){
ylim_low = ylim_low ylim_low = ylim_low
cat("\nY-axis lower limit:", ylim_low) cat("\nY-axis lower limit:", ylim_low)
y_rlow = seq(0, ylim_low, length.out = 3); y_rlow y_rlow = seq(0, ylim_low, length.out = 3); y_rlow
@ -201,6 +187,7 @@ PlotLogolasMSA <- function(msaSeq_mut # chr vector
y_rup = seq(0, ylim_up, by = 2); y_rup y_rup = seq(0, ylim_up, by = 2); y_rup
}else{ }else{
ylim_low = ylim_low + (-0.5) ylim_low = ylim_low + (-0.5)
cat("\nY-axis lower limit is <0:", ylim_low) cat("\nY-axis lower limit is <0:", ylim_low)
y_rlow = seq(0, ylim_low, length.out = 3); y_rlow y_rlow = seq(0, ylim_low, length.out = 3); y_rlow
@ -213,25 +200,20 @@ PlotLogolasMSA <- function(msaSeq_mut # chr vector
#ylim_scale <- unique(sort(c(y_rlow, y_rup, ylim_up))); ylim_scale #ylim_scale <- unique(sort(c(y_rlow, y_rup, ylim_up))); ylim_scale
ylim_scale <- unique(sort(c(y_rlow, y_rup))); ylim_scale ylim_scale <- unique(sort(c(y_rlow, y_rup))); ylim_scale
cat("\nY-axis generated: see below" cat("\nY-axis generated: see below\n"
, "\n"
, ylim_scale) , ylim_scale)
}else{ }else{
# Construct the y-axis: User provided
cat("\n-------------------------" # Y-axis: User provided
, "\nConstructing y-axis:" cat("\n--------------------------------"
, "\nUser provided" , "\nUsing y-axis:: User provided"
,"\n--------------------------") ,"\n---------------------------------")
ylim_scale = sort(y_breaks) ylim_scale = sort(y_breaks)
ylim_low = min(ylim_scale); ylim_low ylim_low = min(ylim_scale); ylim_low
ylim_up = max(ylim_scale); ylim_up ylim_up = max(ylim_scale); ylim_up
} }
else {
}
###################################### ######################################
# Generating plots for muts and wt # Generating plots for muts and wt
##################################### #####################################
@ -245,6 +227,7 @@ else {
yfont_bgc = "white" yfont_bgc = "white"
xtt_col = "white" xtt_col = "white"
ytt_col = "white" ytt_col = "white"
} }
if (my_logo_col %in% c('chemistry', 'hydrophobicity')) { if (my_logo_col %in% c('chemistry', 'hydrophobicity')) {
@ -257,6 +240,7 @@ else {
yfont_bgc = "black" yfont_bgc = "black"
xtt_col = "black" xtt_col = "black"
ytt_col = "black" ytt_col = "black"
} }
##################################### #####################################
@ -272,6 +256,7 @@ else {
, method = msa_method , method = msa_method
, col_scheme = my_logo_col , col_scheme = my_logo_col
, seq_type = 'auto') + , seq_type = 'auto') +
theme(legend.position = leg_pos theme(legend.position = leg_pos
, legend.direction = leg_dir , legend.direction = leg_dir
#, legend.title = element_blank() #, legend.title = element_blank()
@ -295,9 +280,11 @@ else {
, axis.title.y = element_text(size = y_tts , axis.title.y = element_text(size = y_tts
, colour = ytt_col) , colour = ytt_col)
, plot.background = element_rect(fill = theme_bgc))+ , plot.background = element_rect(fill = theme_bgc))+
xlab(x_lab)
xlab(x_lab_mut) + ylab(y_lab_mut)
if (missing(plot_positions)){ if (missing(plot_positions)){
ed_mut_logo_P = p0 + ed_mut_logo_P = p0 +
scale_x_discrete(breaks = msa_all_pos scale_x_discrete(breaks = msa_all_pos
, expand = c(x_axis_offset, 0) , expand = c(x_axis_offset, 0)
@ -313,19 +300,18 @@ else {
}else{ }else{
ed_mut_logo_P = p0 + ed_mut_logo_P = p0 +
#scale_y_continuous(expand = c(0,0.09)) +
scale_y_continuous(limits = c(ylim_low, ylim_up)
, breaks = ylim_scale
, expand = c(0,y_axis_offset))+
scale_x_discrete(breaks = i_extract scale_x_discrete(breaks = i_extract
, expand = c(x_axis_offset_filtered,0) , expand = c(x_axis_offset_filtered,0)
, labels = i_extract , labels = i_extract
, limits = factor(i_extract)) + , limits = factor(i_extract)) +
#scale_y_continuous(expand = c(0,0.09)) +
scale_y_continuous(limits = c(ylim_low, ylim_up)
, breaks = ylim_scale
, expand = c(0, y_axis_offset))+
geom_hline(yintercept = 0 geom_hline(yintercept = 0
, linetype = "solid" , linetype = "solid"
, color = "grey" , color = "grey"
, size = 1) , size = 1)
} }
cat('\nDone: MSA plot for mutations') cat('\nDone: MSA plot for mutations')
@ -361,17 +347,20 @@ else {
, colour = ytt_col) , colour = ytt_col)
, plot.background = element_rect(fill = theme_bgc)) + , plot.background = element_rect(fill = theme_bgc)) +
ylab("") + xlab("Wild-type position") ylab("") + xlab("Wild-type position")
if (missing(plot_positions)){ if (missing(plot_positions)){
# No y-axis needed
ed_wt_logo_P = p1 + ed_wt_logo_P = p1 +
scale_x_discrete(breaks = wt_all_pos scale_x_discrete(breaks = wt_all_pos
, expand = c(x_axis_offset, 0) , expand = c(x_axis_offset, 0)
, labels = wt_all_pos , labels = wt_all_pos
, limits = factor(wt_all_pos)) , limits = factor(wt_all_pos))
}else{ }else{
ed_wt_logo_P = p1 + ed_wt_logo_P = p1 +
scale_y_continuous(expand = c(0,0.09)) +
scale_x_discrete(breaks = i_extract scale_x_discrete(breaks = i_extract
, expand = c(x_axis_offset_filtered, 0) , expand = c(x_axis_offset_filtered, 0)
, labels = i_extract , labels = i_extract

View file

@ -1,23 +1,30 @@
#logo plots #####################################################################################
# LogoPlotMSA():
# Input:
# Data:
# msaSeq_mut: MSA chr vector for muts
# msaSeq_wt [Optional]: MSA chr vector for wt
# one for multiple muts # Others params:
# --> select/drop down option to filter count of nsSNPs # plot_positions: can choose what positions to plot
# --> select/drop down option for colour # msa_method : can be "bits" or "probability"
# --> should include WT # my_logo_col : can be "chemistry", "hydrophobicity", "taylor" or "clustalx"
# Data used # Returns data LogoPlot from MSA
#tab_mt # mutant logo plot #...
#tab_wt # wt logo plot
# TODO: SHINY
# drop down: my_logo_col i.e the 4 colour choices
# drop down: for DataED_PFM(), ED score options:
# c("log", log-odds", "diff", "probKL", "ratio", "unscaled_log", "wKL")
# drop down/enter field: for DataED_PFM(), background probability
# Make it hover over position and then get the corresponding data table! # Make it hover over position and then get the corresponding data table!
#%%====================================================================== ###################################################################################
#================== #==================
# logo data: OR # logo data: OR
#================== #==================
# NOTE: my_logo_col
LogoPlotMSA <- function(msaSeq_mut LogoPlotMSA <- function(msaSeq_mut
, msaSeq_wt , msaSeq_wt
, plot_positions , plot_positions

View file

@ -1,12 +1,12 @@
#source("~/git/LSHTM_analysis/config/gid.R") #source("~/git/LSHTM_analysis/config/gid.R")
source("~/git/LSHTM_analysis/config/pnca.R") #source("~/git/LSHTM_analysis/config/pnca.R")
#source("~/git/LSHTM_analysis/config/embb.R") #source("~/git/LSHTM_analysis/config/embb.R")
#source("~/git/LSHTM_analysis/config/katg.R") #source("~/git/LSHTM_analysis/config/katg.R")
#source("~/git/LSHTM_analysis/config/alr.R") #source("~/git/LSHTM_analysis/config/alr.R")
#source("~/git/LSHTM_analysis/config/rpob.R") #source("~/git/LSHTM_analysis/config/rpob.R")
#--------------------------------------------------- #---------------------------------------------------
source("~/git/LSHTM_analysis/scripts/plotting/get_plotting_dfs.R") #source("~/git/LSHTM_analysis/scripts/plotting/get_plotting_dfs.R")
#
################################ ################################
# Logo plot with custom Y axis # Logo plot with custom Y axis
# mainly OR # mainly OR
@ -61,7 +61,6 @@ source("~/git/LSHTM_analysis/scripts/plotting/get_plotting_dfs.R")
# , leg_ts = 14 # leg text size # , leg_ts = 14 # leg text size
# , leg_tts = 16 # leg title size # , leg_tts = 16 # leg title size
# ) # )
#
######################################## ########################################
# Logo plot MSA # Logo plot MSA
@ -71,31 +70,84 @@ source("~/git/LSHTM_analysis/scripts/plotting/get_plotting_dfs.R")
# specify {plot_positions} # specify {plot_positions}
# To plot entire MSA, simply don't specify {plot_positions} # To plot entire MSA, simply don't specify {plot_positions}
# script: logoP_msa.R # script: logoP_msa.R
# TODO perhaps: ED logo from Logolas
# TODO: Add scaled data option # TODO: Add scaled data option
######################################## ########################################
LogoPlotMSA(msaSeq_mut = msa_seq # LogoPlotMSA(msaSeq_mut = msa_seq
, msaSeq_wt = wt_seq # , msaSeq_wt = wt_seq
, msa_method = 'bits' # or probability # # , use_pfm
, my_logo_col = "taylor" # # , use_pfm_scaled
, plot_positions = active_aa_pos # # , use_ed
, x_lab = "nsSNP position" # , msa_method = 'bits' # or probability
, y_lab = "" # , my_logo_col = "taylor"
, x_ats = 10 # text size # , plot_positions = 1:15
, x_tangle = 90 # text angle # , x_lab = "nsSNP position"
, x_axis_offset = 0.05 # , y_lab = ""
, y_ats = 15 # , x_ats = 10 # text size
, y_tangle = 0 # , x_tangle = 90 # text angle
, x_tts = 13 # title size # , x_axis_offset = 0.05
, y_tts = 15 # , y_ats = 15
, leg_pos = "top" # can be top, left, right and bottom or c(0.8, 0.9) # , y_tangle = 0
, leg_dir = "horizontal" #can be vertical or horizontal # , x_tts = 13 # title size
, leg_ts = 16 # leg text size # , y_tts = 15
, leg_tts = 16 # leg title size # , leg_pos = "top" # can be top, left, right and bottom or c(0.8, 0.9)
) # , leg_dir = "horizontal" #can be vertical or horizontal
# , leg_ts = 16 # leg text size
# , leg_tts = 16 # leg title size
# )
######################################## ########################################
# ED Logo plot MSA # ED Logo plot MSA
# Mutant and wild-type # Mutant and wild-type
######################################## ########################################
# library(Logolas)
# library(ggseqlogo)
# source("~/git/LSHTM_analysis/scripts/functions/my_logolas.R")
# source("~/git/LSHTM_analysis/scripts/functions/logoP_logolas.R")
#
# # data msa: mut
# my_data = read.csv("/home/tanu/git/Misc/practice_plots/pnca_msa_eg2.csv", header = F) #15 cols only
# msaSeq_mut = my_data$V1
# msa_seq = msaSeq_mut
#
# # data msa: wt
# gene = "pncA"
# drug = "pyrazinamide"
# indir = paste0("~/git/Data/", drug , "/input/")
#
# in_filename_fasta = paste0(tolower(gene), "2_f2.fasta")
# infile_fasta = paste0(indir, in_filename_fasta)
# cat("\nInput fasta file for WT: ", infile_fasta, "\n")
#
# msa2 = read.csv(infile_fasta, header = F)
# head(msa2)
# cat("\nLength of WT fasta:", nrow(msa2))
# wt_seq = msa2$V1
# head(wt_seq)
# msaSeq_wt = msa2$V1
# wt_seq = msaSeq_wt
#PlotLogolasMSA()
PlotLogolasMSA(msaSeq_mut = msa_seq
, msaSeq_wt = wt_seq
, logo_type = c("bits_pfm") # "EDLogo", bits_pfm", "probability_pfm", "bits_raw", "probability_raw") # can be "bits", "probability" or "custom"
, EDScore_type = c("log") # see if this relevant, or source function should have it!
, bg_prob = NULL
, my_logo_col = "taylor"
, plot_positions = c(1:15)
#, y_breaks
, x_lab_mut = "nsSNP-position"
#, y_lab_mut
, x_ats = 13 # text size
, x_tangle = 90 # text angle
, x_axis_offset = 0.05
, x_axis_offset_filtered = 0.05
, y_axis_offset = 0.05
, y_ats = 13
, y_tangle = 0
, x_tts = 13
, y_tts = 13
, leg_pos = "top" # can be top, left, right and bottom or c(0.8, 0.9)
, leg_dir = "horizontal" #can be vertical or horizontal
, leg_ts = 16 # leg text size
, leg_tts = 16 # leg title size
)

View file

@ -174,6 +174,7 @@ if(!require(protr)){
#BiocManager::install("Logolas") #BiocManager::install("Logolas")
library("Logolas") library("Logolas")
library("Biostrings")
#################################### ####################################