renamed logoP_msa.R --> logoP_msa_raw.R
This commit is contained in:
parent
6a9f4a0cab
commit
3bc5dcbad3
4 changed files with 316 additions and 267 deletions
|
@ -1,39 +1,14 @@
|
|||
library(Logolas)
|
||||
library(ggseqlogo)
|
||||
source("~/git/LSHTM_analysis/scripts/functions/my_logolas.R")
|
||||
|
||||
# data msa: mut
|
||||
my_data = read.csv("/home/tanu/git/Misc/practice_plots/pnca_msa_eg2.csv", header = F) #15 cols only
|
||||
msaSeq_mut = my_data$V1
|
||||
|
||||
# data msa: wt
|
||||
gene = "pncA"
|
||||
drug = "pyrazinamide"
|
||||
indir = paste0("~/git/Data/", drug , "/input/")
|
||||
|
||||
in_filename_fasta = paste0(tolower(gene), "2_f2.fasta")
|
||||
infile_fasta = paste0(indir, in_filename_fasta)
|
||||
cat("\nInput fasta file for WT: ", infile_fasta, "\n")
|
||||
|
||||
msa2 = read.csv(infile_fasta, header = F)
|
||||
head(msa2)
|
||||
cat("\nLength of WT fasta:", nrow(msa2))
|
||||
#wt_seq = msa2$V1
|
||||
#head(wt_seq)
|
||||
msaSeq_wt = msa2$V1
|
||||
|
||||
###########################################
|
||||
|
||||
PlotLogolasMSA <- function(msaSeq_mut # chr vector
|
||||
, msaSeq_wt # chr vector
|
||||
, msa_method = c("custom") # will be c("EDLogo", "Logo)#
|
||||
, EDLogo_score = c("log")# can be: "log-odds", "diff", "probKL", "ratio", "unscaled_log", "wKL"
|
||||
, ED_score = c("log")# can be: "log-odds", "diff", "probKL", "ratio", "unscaled_log", "wKL"
|
||||
, bg_prob = NULL
|
||||
, my_logo_col = "chemistry"
|
||||
, plot_positions = c(1, 10, 14, 8)
|
||||
, plot_positions
|
||||
, y_breaks
|
||||
, x_lab = "Wild-type position"
|
||||
, y_lab = ""
|
||||
, x_lab_mut = "nsSNP-position"
|
||||
, y_lab_mut = ""
|
||||
, x_ats = 13 # text size
|
||||
, x_tangle = 90 # text angle
|
||||
, x_axis_offset = 0.05 # dist b/w y-axis and plot start
|
||||
|
@ -50,6 +25,7 @@ PlotLogolasMSA <- function(msaSeq_mut # chr vector
|
|||
)
|
||||
|
||||
{
|
||||
#''' Can be put into a separate EDData plot function'''
|
||||
dash_control = list()
|
||||
dash_control_default <- list(concentration = NULL, mode = NULL,
|
||||
optmethod = "mixEM", sample_weights = NULL, verbose = FALSE,
|
||||
|
@ -65,205 +41,213 @@ PlotLogolasMSA <- function(msaSeq_mut # chr vector
|
|||
cat("\nLength of MSA", length(msaSeq_mut)
|
||||
, "\nlength of WT seq:", length(msaSeq_wt))
|
||||
|
||||
cat("\n======================="
|
||||
cat("\n======================="
|
||||
, "\nPlotting entire MSA"
|
||||
, "\n========================")
|
||||
|
||||
#--------------------------
|
||||
# Getting PFM: mutant MSA
|
||||
#--------------------------
|
||||
pfm_mut <- Biostrings::consensusMatrix(msaSeq_mut)
|
||||
colnames(pfm_mut) <- 1:dim(pfm_mut)[2]
|
||||
pfm_mut_scaled <- do.call(dash, append(list(comp_data = pfm_mut),
|
||||
#--------------------------
|
||||
# Getting PFM: mutant MSA
|
||||
#--------------------------
|
||||
pfm_mut <- Biostrings::consensusMatrix(msaSeq_mut)
|
||||
colnames(pfm_mut) <- 1:dim(pfm_mut)[2]
|
||||
pfm_mut_scaled <- do.call(dash, append(list(comp_data = pfm_mut),
|
||||
dash_control))$posmean
|
||||
|
||||
logo_mut_h = get_logo_heights(pfm_mut_scaled
|
||||
, bg = bg_prob
|
||||
, score = EDLogo_score)
|
||||
logo_mut_h$pos_ic
|
||||
logo_mut_h$neg_ic
|
||||
logo_mut_h = get_logo_heights(pfm_mut_scaled
|
||||
, bg = bg_prob
|
||||
, score = ED_score)
|
||||
|
||||
cat("\nGetting logo_heights from Logolas package...")
|
||||
|
||||
pos_mutM = logo_mut_h[['table_mat_pos_norm']]; pos_mutM
|
||||
pos_mutS = logo_mut_h[['pos_ic']]; pos_mutS
|
||||
pos_mutED = t(pos_mutS*t(pos_mutM)); pos_mutED
|
||||
|
||||
# TODO: Add sanity check!
|
||||
#<...
|
||||
#...>
|
||||
neg_mutM = logo_mut_h[['table_mat_neg_norm']]*(-1)
|
||||
neg_mutS = logo_mut_h[['neg_ic']]; neg_mutS
|
||||
neg_mutED = t(neg_mutS*t(neg_mutM)); neg_mutED
|
||||
|
||||
if (length(pos_mutS) && length(neg_mutS) == dim(pfm_mut)[2]){
|
||||
cat("\nPASS: pfm calculated successfully including scaled matrix"
|
||||
, "\nDim of pfm matrix:", dim(pfm_mut)[1], dim(pfm_mut)[2])
|
||||
}
|
||||
|
||||
combED_mutM = pos_mutED + neg_mutED
|
||||
|
||||
pos_mutM = logo_mut_h[['table_mat_pos_norm']]; pos_mutM
|
||||
pos_mutS = logo_mut_h$pos_ic; pos_mutS
|
||||
pos_mutED = t(pos_mutS*t(pos_mutM)); pos_mutED
|
||||
# Construct the x-axis: mutant MSA
|
||||
msa_all_pos = as.numeric(colnames(combED_mutM))
|
||||
|
||||
neg_mutM = logo_mut_h[['table_mat_neg_norm']]*(-1)
|
||||
neg_mutS = logo_mut_h$neg_ic; neg_mutS
|
||||
neg_mutED = t(neg_mutS*t(neg_mutM)); neg_mutED
|
||||
|
||||
combED_mutM = pos_mutED + neg_mutED
|
||||
|
||||
# Construct the x-axis: mutant MSA
|
||||
msa_all_pos = as.numeric(colnames(combED_mutM))
|
||||
|
||||
#---------------------
|
||||
# Getting PFM: WT
|
||||
#---------------------
|
||||
pfm_wt <- Biostrings::consensusMatrix(msaSeq_wt)
|
||||
colnames(pfm_wt) <- 1:dim(pfm_wt)[2]
|
||||
pfm_wt_scaled <- do.call(dash, append(list(comp_data = pfm_wt),
|
||||
#---------------------
|
||||
# Getting PFM: WT
|
||||
#---------------------
|
||||
pfm_wt <- Biostrings::consensusMatrix(msaSeq_wt)
|
||||
colnames(pfm_wt) <- 1:dim(pfm_wt)[2]
|
||||
pfm_wt_scaled <- do.call(dash, append(list(comp_data = pfm_wt),
|
||||
dash_control))$posmean
|
||||
|
||||
logo_wt_h = get_logo_heights(pfm_wt_scaled
|
||||
, bg = bg_prob
|
||||
, score = EDLogo_score)
|
||||
logo_wt_h$pos_ic
|
||||
logo_wt_h$neg_ic
|
||||
|
||||
pos_wtM = logo_wt_h[['table_mat_pos_norm']]; pos_wtM
|
||||
pos_wtS = logo_wt_h$pos_ic; pos_wtS
|
||||
pos_wtED = t(pos_wtS*t(pos_wtM)); pos_wtED
|
||||
|
||||
neg_wtM = logo_wt_h[['table_mat_neg_norm']]*(-1)
|
||||
neg_wtS = logo_wt_h$neg_ic; neg_wtS
|
||||
neg_wtED = t(neg_wtS*t(neg_wtM)); neg_wtED
|
||||
|
||||
combED_wtM = pos_wtED + neg_wtED
|
||||
|
||||
# Construct the x-axis: mutant MSA
|
||||
wt_all_pos = as.numeric(colnames(combED_wtM))
|
||||
|
||||
|
||||
if(missing(plot_positions)){
|
||||
|
||||
#------------------------------
|
||||
# MSA mut: All, no filtering
|
||||
#-------------------------------
|
||||
cat("\n==========================================="
|
||||
, "\nGenerated PFM mut: No filtering"
|
||||
, "\n===========================================")
|
||||
plot_mut_edM = combED_mutM
|
||||
|
||||
#------------------------------
|
||||
# MSA WT: All, no filtering
|
||||
#-------------------------------
|
||||
cat("\n==========================================="
|
||||
, "\nGenerated PFM WT: No filtering"
|
||||
, "\n===========================================")
|
||||
plot_wt_edM = combED_wtM
|
||||
|
||||
}else{
|
||||
|
||||
#------------------------------
|
||||
# PFM mut: Filtered positions
|
||||
#-------------------------------
|
||||
cat("\n==========================================="
|
||||
, "\nGenerating PFM MSA: filtered positions"
|
||||
, "\n==========================================="
|
||||
, "\nUser specified plotting positions for MSA:"
|
||||
, "\nThese are:\n", plot_positions
|
||||
, "\nSorting plot positions...")
|
||||
|
||||
plot_positions = sort(plot_positions)
|
||||
|
||||
cat("\nPlotting positions sorted:\n"
|
||||
, plot_positions)
|
||||
|
||||
if ( all(plot_positions%in%msa_all_pos) && all(plot_positions%in%wt_all_pos) ){
|
||||
cat("\nAll positions within range"
|
||||
, "\nFiltering positions as specified..."
|
||||
, "\nNo. of positions in plot:", length(plot_positions))
|
||||
i_extract = plot_positions
|
||||
plot_mut_edM = combED_mutM[, i_extract]
|
||||
plot_wt_edM = combED_wtM[, i_extract]
|
||||
|
||||
}else{
|
||||
cat("\nNo. of positions selected:", length(plot_positions))
|
||||
i_ofr = plot_positions[!plot_positions%in%msa_all_pos]
|
||||
cat("\n1 or more plot_positions out of range..."
|
||||
, "\nThese are:\n", i_ofr
|
||||
, "\nQuitting! Resubmit with correct plot_positions")
|
||||
quit()
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
# Construct the y-axis: Calculating
|
||||
cat("\n-------------------------"
|
||||
, "\nConstructing y-axis:"
|
||||
, "\nUser did not provide"
|
||||
,"\n--------------------------")
|
||||
if (missing(y_breaks)){
|
||||
|
||||
ylim_low <- floor(min(combED_mutM)); ylim_low
|
||||
|
||||
if( ylim_low == 0){
|
||||
ylim_low = ylim_low
|
||||
cat("\nY-axis lower limit:", ylim_low)
|
||||
y_rlow = seq(0, ylim_low, length.out = 3); y_rlow
|
||||
|
||||
ylim_up <- ceiling(max(combED_mutM)) + 4; ylim_up
|
||||
cat("\nY-axis upper limit:", ylim_up)
|
||||
y_rup = seq(0, ylim_up, by = 2); y_rup
|
||||
|
||||
}else{
|
||||
ylim_low = ylim_low + (-0.5)
|
||||
cat("\nY-axis lower limit is <0:", ylim_low)
|
||||
y_rlow = seq(0, ylim_low, length.out = 3); y_rlow
|
||||
|
||||
ylim_up <- ceiling(max(combED_mutM)) + 3; ylim_up
|
||||
cat("\nY-axis upper limit:", ylim_up)
|
||||
y_rup = seq(0, ylim_up, by = 3); y_rup
|
||||
}
|
||||
|
||||
#ylim_scale <- unique(sort(c(y_rlow, y_rup, ylim_up))); ylim_scale
|
||||
ylim_scale <- unique(sort(c(y_rlow, y_rup))); ylim_scale
|
||||
|
||||
cat("\nY-axis generated: see below"
|
||||
, "\n"
|
||||
, ylim_scale)
|
||||
|
||||
}else{
|
||||
# Construct the y-axis: User provided
|
||||
cat("\n-------------------------"
|
||||
, "\nConstructing y-axis:"
|
||||
, "\nUser provided"
|
||||
,"\n--------------------------")
|
||||
ylim_scale = sort(y_breaks)
|
||||
ylim_low = min(ylim_scale); ylim_low
|
||||
ylim_up = max(ylim_scale); ylim_up
|
||||
}
|
||||
|
||||
else {
|
||||
logo_wt_h = get_logo_heights(pfm_wt_scaled
|
||||
, bg = bg_prob
|
||||
, score = ED_score)
|
||||
|
||||
pos_wtM = logo_wt_h[['table_mat_pos_norm']]; pos_wtM
|
||||
pos_wtS = logo_wt_h[['pos_ic']]; pos_wtS
|
||||
pos_wtED = t(pos_wtS*t(pos_wtM)); pos_wtED
|
||||
|
||||
}
|
||||
######################################
|
||||
# Generating plots for muts and wt
|
||||
#####################################
|
||||
neg_wtM = logo_wt_h[['table_mat_neg_norm']]*(-1)
|
||||
neg_wtS = logo_wt_h[['neg_ic']]; neg_wtS
|
||||
neg_wtED = t(neg_wtS*t(neg_wtM)); neg_wtED
|
||||
|
||||
if (my_logo_col %in% c('clustalx','taylor')) {
|
||||
cat("\nSelected colour scheme:", my_logo_col
|
||||
, "\nUsing black theme\n")
|
||||
|
||||
theme_bgc = "black"
|
||||
xfont_bgc = "white"
|
||||
yfont_bgc = "white"
|
||||
xtt_col = "white"
|
||||
ytt_col = "white"
|
||||
if (length(pos_wtS) && length(neg_wtS) == dim(pfm_wt)[2]){
|
||||
cat("\nPASS: pfm calculated successfully including scaled matrix"
|
||||
, "\nDim of pfm matrix:", dim(pfm_wt)[1], dim(pfm_wt)[2])
|
||||
}
|
||||
|
||||
if (my_logo_col %in% c('chemistry', 'hydrophobicity')) {
|
||||
cat("\nstart of MSA"
|
||||
, '\nSelected colour scheme:', my_logo_col
|
||||
, "\nUsing grey theme")
|
||||
combED_wtM = pos_wtED + neg_wtED
|
||||
|
||||
theme_bgc = "grey"
|
||||
xfont_bgc = "black"
|
||||
yfont_bgc = "black"
|
||||
xtt_col = "black"
|
||||
ytt_col = "black"
|
||||
# Construct the x-axis: mutant MSA
|
||||
wt_all_pos = as.numeric(colnames(combED_wtM))
|
||||
|
||||
|
||||
if(missing(plot_positions)){
|
||||
|
||||
#------------------------------
|
||||
# MSA mut: All, no filtering
|
||||
#-------------------------------
|
||||
cat("\n==========================================="
|
||||
, "\nGenerated PFM mut: No filtering"
|
||||
, "\n===========================================")
|
||||
|
||||
plot_mut_edM = combED_mutM
|
||||
|
||||
#------------------------------
|
||||
# MSA WT: All, no filtering
|
||||
#-------------------------------
|
||||
cat("\n==========================================="
|
||||
, "\nGenerated PFM WT: No filtering"
|
||||
, "\n===========================================")
|
||||
|
||||
plot_wt_edM = combED_wtM
|
||||
|
||||
}else{
|
||||
|
||||
#------------------------------
|
||||
# PFM mut: Filtered positions
|
||||
#-------------------------------
|
||||
|
||||
cat("\n==========================================="
|
||||
, "\nGenerating PFM MSA: filtered positions"
|
||||
, "\n==========================================="
|
||||
, "\nUser specified plotting positions for MSA:"
|
||||
, "\nThese are:\n", plot_positions
|
||||
, "\nSorting plot positions...")
|
||||
|
||||
plot_positions = sort(plot_positions)
|
||||
|
||||
cat("\nPlotting positions sorted:\n"
|
||||
, plot_positions)
|
||||
|
||||
if ( all(plot_positions%in%msa_all_pos) && all(plot_positions%in%wt_all_pos) ){
|
||||
cat("\nAll positions within range"
|
||||
, "\nFiltering positions as specified..."
|
||||
, "\nNo. of positions in plot:", length(plot_positions))
|
||||
i_extract = plot_positions
|
||||
plot_mut_edM = combED_mutM[, i_extract]
|
||||
plot_wt_edM = combED_wtM[, i_extract]
|
||||
|
||||
}else{
|
||||
cat("\nNo. of positions selected:", length(plot_positions))
|
||||
i_ofr = plot_positions[!plot_positions%in%msa_all_pos]
|
||||
cat("\n1 or more plot_positions out of range..."
|
||||
, "\nThese are:\n", i_ofr
|
||||
, "\nQuitting! Resubmit with correct plot_positions")
|
||||
quit()
|
||||
}
|
||||
}
|
||||
|
||||
# Construct Y-axis for MSA mut plot:
|
||||
cat("\nCalculating y-axis for MSA mut plot")
|
||||
|
||||
if (missing(y_breaks)){
|
||||
|
||||
# Y-axis: Calculating
|
||||
cat("\n----------------------------------------"
|
||||
, "\nY-axis being generated from data"
|
||||
, "\n-----------------------------------------")
|
||||
|
||||
ylim_low <- floor(min(combED_mutM)); ylim_low
|
||||
|
||||
if( ylim_low == 0){
|
||||
|
||||
ylim_low = ylim_low
|
||||
cat("\nY-axis lower limit:", ylim_low)
|
||||
y_rlow = seq(0, ylim_low, length.out = 3); y_rlow
|
||||
|
||||
ylim_up <- ceiling(max(combED_mutM)) + 4; ylim_up
|
||||
cat("\nY-axis upper limit:", ylim_up)
|
||||
y_rup = seq(0, ylim_up, by = 2); y_rup
|
||||
|
||||
}else{
|
||||
|
||||
ylim_low = ylim_low + (-0.5)
|
||||
cat("\nY-axis lower limit is <0:", ylim_low)
|
||||
y_rlow = seq(0, ylim_low, length.out = 3); y_rlow
|
||||
|
||||
ylim_up <- ceiling(max(combED_mutM)) + 3; ylim_up
|
||||
cat("\nY-axis upper limit:", ylim_up)
|
||||
y_rup = seq(0, ylim_up, by = 3); y_rup
|
||||
}
|
||||
|
||||
#ylim_scale <- unique(sort(c(y_rlow, y_rup, ylim_up))); ylim_scale
|
||||
ylim_scale <- unique(sort(c(y_rlow, y_rup))); ylim_scale
|
||||
|
||||
cat("\nY-axis generated: see below\n"
|
||||
, ylim_scale)
|
||||
|
||||
}else{
|
||||
|
||||
# Y-axis: User provided
|
||||
cat("\n--------------------------------"
|
||||
, "\nUsing y-axis:: User provided"
|
||||
,"\n---------------------------------")
|
||||
ylim_scale = sort(y_breaks)
|
||||
ylim_low = min(ylim_scale); ylim_low
|
||||
ylim_up = max(ylim_scale); ylim_up
|
||||
}
|
||||
|
||||
######################################
|
||||
# Generating plots for muts and wt
|
||||
#####################################
|
||||
|
||||
if (my_logo_col %in% c('clustalx','taylor')) {
|
||||
cat("\nSelected colour scheme:", my_logo_col
|
||||
, "\nUsing black theme\n")
|
||||
|
||||
theme_bgc = "black"
|
||||
xfont_bgc = "white"
|
||||
yfont_bgc = "white"
|
||||
xtt_col = "white"
|
||||
ytt_col = "white"
|
||||
|
||||
}
|
||||
|
||||
if (my_logo_col %in% c('chemistry', 'hydrophobicity')) {
|
||||
cat("\nstart of MSA"
|
||||
, '\nSelected colour scheme:', my_logo_col
|
||||
, "\nUsing grey theme")
|
||||
|
||||
theme_bgc = "grey"
|
||||
xfont_bgc = "black"
|
||||
yfont_bgc = "black"
|
||||
xtt_col = "black"
|
||||
ytt_col = "black"
|
||||
|
||||
}
|
||||
|
||||
#####################################
|
||||
# Generating logo plots for nsSNPs
|
||||
#####################################
|
||||
PlotlogolasL <- list()
|
||||
|
||||
|
||||
#-------------------
|
||||
# Mutant logo plot
|
||||
#-------------------
|
||||
|
@ -272,6 +256,7 @@ else {
|
|||
, method = msa_method
|
||||
, col_scheme = my_logo_col
|
||||
, seq_type = 'auto') +
|
||||
|
||||
theme(legend.position = leg_pos
|
||||
, legend.direction = leg_dir
|
||||
#, legend.title = element_blank()
|
||||
|
@ -295,17 +280,19 @@ else {
|
|||
, axis.title.y = element_text(size = y_tts
|
||||
, colour = ytt_col)
|
||||
, plot.background = element_rect(fill = theme_bgc))+
|
||||
xlab(x_lab)
|
||||
|
||||
xlab(x_lab_mut) + ylab(y_lab_mut)
|
||||
|
||||
if (missing(plot_positions)){
|
||||
ed_mut_logo_P = p0 +
|
||||
|
||||
ed_mut_logo_P = p0 +
|
||||
scale_x_discrete(breaks = msa_all_pos
|
||||
, expand = c(x_axis_offset,0)
|
||||
, expand = c(x_axis_offset, 0)
|
||||
, labels = msa_all_pos
|
||||
, limits = factor(msa_all_pos))+
|
||||
scale_y_continuous(limits = c(ylim_low, ylim_up)
|
||||
, breaks = ylim_scale
|
||||
, expand = c(0,y_axis_offset)) +
|
||||
, expand = c(0, y_axis_offset)) +
|
||||
geom_hline(yintercept = 0
|
||||
, linetype = "solid"
|
||||
, color = "grey"
|
||||
|
@ -313,20 +300,19 @@ else {
|
|||
|
||||
}else{
|
||||
ed_mut_logo_P = p0 +
|
||||
#scale_y_continuous(expand = c(0,0.09)) +
|
||||
scale_y_continuous(limits = c(ylim_low, ylim_up)
|
||||
, breaks = ylim_scale
|
||||
, expand = c(0,y_axis_offset))+
|
||||
scale_x_discrete(breaks = i_extract
|
||||
scale_x_discrete(breaks = i_extract
|
||||
, expand = c(x_axis_offset_filtered,0)
|
||||
, labels = i_extract
|
||||
, limits = factor(i_extract)) +
|
||||
#scale_y_continuous(expand = c(0,0.09)) +
|
||||
scale_y_continuous(limits = c(ylim_low, ylim_up)
|
||||
, breaks = ylim_scale
|
||||
, expand = c(0, y_axis_offset))+
|
||||
geom_hline(yintercept = 0
|
||||
, linetype = "solid"
|
||||
, color = "grey"
|
||||
, size = 1)
|
||||
|
||||
}
|
||||
, color = "grey"
|
||||
, size = 1)
|
||||
}
|
||||
|
||||
cat('\nDone: MSA plot for mutations')
|
||||
#return(msa_mut_logoP)
|
||||
|
@ -361,22 +347,25 @@ else {
|
|||
, colour = ytt_col)
|
||||
|
||||
, plot.background = element_rect(fill = theme_bgc)) +
|
||||
|
||||
ylab("") + xlab("Wild-type position")
|
||||
|
||||
if (missing(plot_positions)){
|
||||
ed_wt_logo_P = p1 +
|
||||
|
||||
# No y-axis needed
|
||||
ed_wt_logo_P = p1 +
|
||||
scale_x_discrete(breaks = wt_all_pos
|
||||
, expand = c(x_axis_offset,0)
|
||||
, expand = c(x_axis_offset, 0)
|
||||
, labels = wt_all_pos
|
||||
, limits = factor(wt_all_pos))
|
||||
}else{
|
||||
|
||||
ed_wt_logo_P = p1 +
|
||||
scale_y_continuous(expand = c(0,0.09)) +
|
||||
scale_x_discrete(breaks = i_extract
|
||||
, expand = c(x_axis_offset_filtered, 0)
|
||||
, labels = i_extract
|
||||
, limits = factor(i_extract))
|
||||
}
|
||||
}
|
||||
|
||||
cat('\nDone: MSA plot for WT')
|
||||
#return(msa_wt_logoP)
|
||||
|
|
|
@ -1,23 +1,30 @@
|
|||
#logo plots
|
||||
#####################################################################################
|
||||
# LogoPlotMSA():
|
||||
# Input:
|
||||
# Data:
|
||||
# msaSeq_mut: MSA chr vector for muts
|
||||
# msaSeq_wt [Optional]: MSA chr vector for wt
|
||||
|
||||
# one for multiple muts
|
||||
# --> select/drop down option to filter count of nsSNPs
|
||||
# --> select/drop down option for colour
|
||||
# --> should include WT
|
||||
# Others params:
|
||||
# plot_positions: can choose what positions to plot
|
||||
# msa_method : can be "bits" or "probability"
|
||||
# my_logo_col : can be "chemistry", "hydrophobicity", "taylor" or "clustalx"
|
||||
|
||||
# Data used
|
||||
# Returns data LogoPlot from MSA
|
||||
|
||||
#tab_mt # mutant logo plot
|
||||
#tab_wt # wt logo plot
|
||||
#...
|
||||
|
||||
# TODO: SHINY
|
||||
# drop down: my_logo_col i.e the 4 colour choices
|
||||
# drop down: for DataED_PFM(), ED score options:
|
||||
# c("log", log-odds", "diff", "probKL", "ratio", "unscaled_log", "wKL")
|
||||
# drop down/enter field: for DataED_PFM(), background probability
|
||||
# Make it hover over position and then get the corresponding data table!
|
||||
#%%======================================================================
|
||||
###################################################################################
|
||||
|
||||
#==================
|
||||
# logo data: OR
|
||||
#==================
|
||||
# NOTE: my_logo_col
|
||||
|
||||
LogoPlotMSA <- function(msaSeq_mut
|
||||
, msaSeq_wt
|
||||
, plot_positions
|
||||
|
@ -45,21 +52,21 @@ LogoPlotMSA <- function(msaSeq_mut
|
|||
###########################################
|
||||
cat("\nLength of MSA", length(msaSeq_mut)
|
||||
, "\nlength of WT seq:", length(msaSeq_wt))
|
||||
|
||||
|
||||
if(missing(plot_positions)){
|
||||
#if(is.null(plot_positions)){
|
||||
cat("\n======================="
|
||||
, "\nPlotting entire MSA"
|
||||
, "\n========================")
|
||||
msa_seq_plot = msaSeq_mut
|
||||
msa_all_interim = sapply(msa_seq_plot, function(x) unlist(strsplit(x,"")))
|
||||
msa_seq_plot = msaSeq_mut
|
||||
msa_all_interim = sapply(msa_seq_plot, function(x) unlist(strsplit(x,"")))
|
||||
msa_all_interimDF = data.frame(msa_all_interim)
|
||||
msa_all_pos = as.numeric(rownames(msa_all_interimDF))
|
||||
msa_all_pos = as.numeric(rownames(msa_all_interimDF))
|
||||
|
||||
wt_seq_plot = msaSeq_wt
|
||||
wt_all_interim = sapply(wt_seq_plot, function(x) unlist(strsplit(x,"")))
|
||||
wt_all_interimDF = data.frame(wt_all_interim)
|
||||
wt_all_pos = as.numeric(rownames(wt_all_interimDF))
|
||||
wt_seq_plot = msaSeq_wt
|
||||
wt_all_interim = sapply(wt_seq_plot, function(x) unlist(strsplit(x,"")))
|
||||
wt_all_interimDF = data.frame(wt_all_interim)
|
||||
wt_all_pos = as.numeric(rownames(wt_all_interimDF))
|
||||
|
||||
|
||||
} else {
|
||||
|
@ -309,4 +316,4 @@ LogoPlotMSA <- function(msaSeq_mut
|
|||
|
||||
return(LogoMSA_comb)
|
||||
|
||||
}
|
||||
}
|
|
@ -1,12 +1,12 @@
|
|||
#source("~/git/LSHTM_analysis/config/gid.R")
|
||||
source("~/git/LSHTM_analysis/config/pnca.R")
|
||||
#source("~/git/LSHTM_analysis/config/pnca.R")
|
||||
#source("~/git/LSHTM_analysis/config/embb.R")
|
||||
#source("~/git/LSHTM_analysis/config/katg.R")
|
||||
#source("~/git/LSHTM_analysis/config/alr.R")
|
||||
#source("~/git/LSHTM_analysis/config/rpob.R")
|
||||
#---------------------------------------------------
|
||||
source("~/git/LSHTM_analysis/scripts/plotting/get_plotting_dfs.R")
|
||||
#
|
||||
#source("~/git/LSHTM_analysis/scripts/plotting/get_plotting_dfs.R")
|
||||
|
||||
################################
|
||||
# Logo plot with custom Y axis
|
||||
# mainly OR
|
||||
|
@ -61,7 +61,6 @@ source("~/git/LSHTM_analysis/scripts/plotting/get_plotting_dfs.R")
|
|||
# , leg_ts = 14 # leg text size
|
||||
# , leg_tts = 16 # leg title size
|
||||
# )
|
||||
#
|
||||
|
||||
########################################
|
||||
# Logo plot MSA
|
||||
|
@ -71,31 +70,84 @@ source("~/git/LSHTM_analysis/scripts/plotting/get_plotting_dfs.R")
|
|||
# specify {plot_positions}
|
||||
# To plot entire MSA, simply don't specify {plot_positions}
|
||||
# script: logoP_msa.R
|
||||
# TODO perhaps: ED logo from Logolas
|
||||
# TODO: Add scaled data option
|
||||
########################################
|
||||
LogoPlotMSA(msaSeq_mut = msa_seq
|
||||
, msaSeq_wt = wt_seq
|
||||
, msa_method = 'bits' # or probability
|
||||
, my_logo_col = "taylor"
|
||||
, plot_positions = active_aa_pos
|
||||
, x_lab = "nsSNP position"
|
||||
, y_lab = ""
|
||||
, x_ats = 10 # text size
|
||||
, x_tangle = 90 # text angle
|
||||
, x_axis_offset = 0.05
|
||||
, y_ats = 15
|
||||
, y_tangle = 0
|
||||
, x_tts = 13 # title size
|
||||
, y_tts = 15
|
||||
, leg_pos = "top" # can be top, left, right and bottom or c(0.8, 0.9)
|
||||
, leg_dir = "horizontal" #can be vertical or horizontal
|
||||
, leg_ts = 16 # leg text size
|
||||
, leg_tts = 16 # leg title size
|
||||
)
|
||||
# LogoPlotMSA(msaSeq_mut = msa_seq
|
||||
# , msaSeq_wt = wt_seq
|
||||
# # , use_pfm
|
||||
# # , use_pfm_scaled
|
||||
# # , use_ed
|
||||
# , msa_method = 'bits' # or probability
|
||||
# , my_logo_col = "taylor"
|
||||
# , plot_positions = 1:15
|
||||
# , x_lab = "nsSNP position"
|
||||
# , y_lab = ""
|
||||
# , x_ats = 10 # text size
|
||||
# , x_tangle = 90 # text angle
|
||||
# , x_axis_offset = 0.05
|
||||
# , y_ats = 15
|
||||
# , y_tangle = 0
|
||||
# , x_tts = 13 # title size
|
||||
# , y_tts = 15
|
||||
# , leg_pos = "top" # can be top, left, right and bottom or c(0.8, 0.9)
|
||||
# , leg_dir = "horizontal" #can be vertical or horizontal
|
||||
# , leg_ts = 16 # leg text size
|
||||
# , leg_tts = 16 # leg title size
|
||||
# )
|
||||
|
||||
########################################
|
||||
# ED Logo plot MSA
|
||||
# Mutant and wild-type
|
||||
########################################
|
||||
# library(Logolas)
|
||||
# library(ggseqlogo)
|
||||
# source("~/git/LSHTM_analysis/scripts/functions/my_logolas.R")
|
||||
# source("~/git/LSHTM_analysis/scripts/functions/logoP_logolas.R")
|
||||
#
|
||||
# # data msa: mut
|
||||
# my_data = read.csv("/home/tanu/git/Misc/practice_plots/pnca_msa_eg2.csv", header = F) #15 cols only
|
||||
# msaSeq_mut = my_data$V1
|
||||
# msa_seq = msaSeq_mut
|
||||
#
|
||||
# # data msa: wt
|
||||
# gene = "pncA"
|
||||
# drug = "pyrazinamide"
|
||||
# indir = paste0("~/git/Data/", drug , "/input/")
|
||||
#
|
||||
# in_filename_fasta = paste0(tolower(gene), "2_f2.fasta")
|
||||
# infile_fasta = paste0(indir, in_filename_fasta)
|
||||
# cat("\nInput fasta file for WT: ", infile_fasta, "\n")
|
||||
#
|
||||
# msa2 = read.csv(infile_fasta, header = F)
|
||||
# head(msa2)
|
||||
# cat("\nLength of WT fasta:", nrow(msa2))
|
||||
# wt_seq = msa2$V1
|
||||
# head(wt_seq)
|
||||
# msaSeq_wt = msa2$V1
|
||||
# wt_seq = msaSeq_wt
|
||||
|
||||
#PlotLogolasMSA()
|
||||
PlotLogolasMSA(msaSeq_mut = msa_seq
|
||||
, msaSeq_wt = wt_seq
|
||||
, logo_type = c("bits_pfm") # "EDLogo", bits_pfm", "probability_pfm", "bits_raw", "probability_raw") # can be "bits", "probability" or "custom"
|
||||
, EDScore_type = c("log") # see if this relevant, or source function should have it!
|
||||
, bg_prob = NULL
|
||||
, my_logo_col = "taylor"
|
||||
, plot_positions = c(1:15)
|
||||
#, y_breaks
|
||||
, x_lab_mut = "nsSNP-position"
|
||||
#, y_lab_mut
|
||||
, x_ats = 13 # text size
|
||||
, x_tangle = 90 # text angle
|
||||
, x_axis_offset = 0.05
|
||||
, x_axis_offset_filtered = 0.05
|
||||
, y_axis_offset = 0.05
|
||||
, y_ats = 13
|
||||
, y_tangle = 0
|
||||
, x_tts = 13
|
||||
, y_tts = 13
|
||||
, leg_pos = "top" # can be top, left, right and bottom or c(0.8, 0.9)
|
||||
, leg_dir = "horizontal" #can be vertical or horizontal
|
||||
, leg_ts = 16 # leg text size
|
||||
, leg_tts = 16 # leg title size
|
||||
)
|
||||
|
|
|
@ -174,6 +174,7 @@ if(!require(protr)){
|
|||
|
||||
#BiocManager::install("Logolas")
|
||||
library("Logolas")
|
||||
library("Biostrings")
|
||||
|
||||
|
||||
####################################
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue