playing with MSA plots to allow filtering of positions, arghhh
This commit is contained in:
parent
08bd8a2ee5
commit
00094f036a
6 changed files with 209 additions and 46 deletions
|
@ -1,2 +1,8 @@
|
||||||
gene = "gid"
|
gene = "gid"
|
||||||
drug = "streptomycin"
|
drug = "streptomycin"
|
||||||
|
|
||||||
|
rna_bind_aa_pos = c(96, 97, 118, 163)
|
||||||
|
bin_aa_pos = c(48, 51, 137, 200)
|
||||||
|
active_aa_pos = c(rna_bind_aa_pos, bin_aa_pos)
|
||||||
|
|
||||||
|
#rna_site = G518
|
||||||
|
|
|
@ -20,10 +20,11 @@
|
||||||
|
|
||||||
LogoPlotMSA <- function(msaSeq_mut
|
LogoPlotMSA <- function(msaSeq_mut
|
||||||
, msaSeq_wt
|
, msaSeq_wt
|
||||||
|
, plot_positions
|
||||||
, msa_method = 'bits' # or probability
|
, msa_method = 'bits' # or probability
|
||||||
, my_logo_col = "chemistry"
|
, my_logo_col = "chemistry"
|
||||||
, x_lab = "Wild-type position"
|
, x_lab = "Wild-type position"
|
||||||
, y_lab = "Count"
|
, y_lab = ""
|
||||||
, x_ats = 13 # text size
|
, x_ats = 13 # text size
|
||||||
, x_tangle = 90 # text angle
|
, x_tangle = 90 # text angle
|
||||||
, y_ats = 13
|
, y_ats = 13
|
||||||
|
@ -37,12 +38,94 @@ LogoPlotMSA <- function(msaSeq_mut
|
||||||
)
|
)
|
||||||
|
|
||||||
{
|
{
|
||||||
|
|
||||||
############################################
|
############################################
|
||||||
# Data processing for logo plot for nsSNPS
|
# Data processing for logo plot for nsSNPS
|
||||||
############################################
|
###########################################
|
||||||
|
cat("\nLength of MSA", length(msaSeq_mut)
|
||||||
|
, "\nlength of WT seq:", length(msaSeq_wt))
|
||||||
|
|
||||||
|
if(missing(plot_positions)){
|
||||||
|
#if(is.null(plot_positions)){
|
||||||
|
cat("\nPlotting entire MSA")
|
||||||
|
msa_seq_plot = msaSeq_mut
|
||||||
|
wt_seq_plot = msaSeq_wt
|
||||||
|
|
||||||
|
} else {
|
||||||
|
cat("\nUser specified plotting positions for MSA:"
|
||||||
|
, "These are:", plot_positions)
|
||||||
|
|
||||||
|
#-----------
|
||||||
|
# MSA: mut
|
||||||
|
#-----------
|
||||||
|
cat("\nGenerating MSA: filtered positions")
|
||||||
|
msa_interim = sapply(msaSeq_mut, function(x) unlist(strsplit(x,"")))
|
||||||
|
|
||||||
|
if (any(is.na(msa_interim[plot_positions]))){
|
||||||
|
cat("Plot_positions selected:", length(plot_positions))
|
||||||
|
i_ofr = plot_positions[is.na(msa_interim[plot_positions])]
|
||||||
|
cat("\nIndex out of range: 1 or more"
|
||||||
|
, "\nThese are:", i_ofr
|
||||||
|
, "\nOmitting these and proceeding...")
|
||||||
|
i_extract = na.omit(msa_interim[plot_positions])
|
||||||
|
cat("\nFinal positions being plottted:", length(i_extract)
|
||||||
|
, "\nNo. of positions dropped from request:", length(i_ofr))
|
||||||
|
|
||||||
|
}else{
|
||||||
|
cat("\nAll positions within range"
|
||||||
|
, "\nProceeing with generating requested position MSA seqs...")
|
||||||
|
i_extract = plot_positions
|
||||||
|
}
|
||||||
|
|
||||||
|
matP1 = msa_interim[i_extract, 1:ncol(msa_interim)]
|
||||||
|
|
||||||
|
dfP1 = data.frame(t(matP1))
|
||||||
|
names(dfP1) = i_extract
|
||||||
|
cols_to_paste = names(dfP1)
|
||||||
|
dfP1['chosen_seq'] = apply( dfP1[ , cols_to_paste]
|
||||||
|
, 1
|
||||||
|
, paste, sep = ''
|
||||||
|
, collapse = "")
|
||||||
|
|
||||||
|
msa_seq_plot = dfP1$chosen_seq
|
||||||
|
|
||||||
|
#-----------
|
||||||
|
# WT: fasta
|
||||||
|
#-----------
|
||||||
|
cat("\nGenerating WT fasta: filtered positions")
|
||||||
|
|
||||||
|
wt_interim = sapply(msaSeq_wt, function(x) unlist(strsplit(x,"")))
|
||||||
|
|
||||||
|
if (any(is.na(wt_interim[plot_positions]))){
|
||||||
|
cat("Plot_positions selected:", length(plot_positions))
|
||||||
|
i2_ofr = plot_positions[is.na(wt_interim[plot_positions])]
|
||||||
|
cat("\nIndex out of range: 1 or more"
|
||||||
|
, "\nThese are:", i2_ofr
|
||||||
|
, "\nOmitting these and proceeding...")
|
||||||
|
i2_extract = na.omit(wt_interim[plot_positions])
|
||||||
|
cat("\nFinal positions being plottted:", length(i2_extract)
|
||||||
|
, "\nNo. of positions dropped from request:", length(i2_ofr))
|
||||||
|
|
||||||
|
}else{
|
||||||
|
cat("\nAll positions within range"
|
||||||
|
, "\nProceeing with generating requested position MSA seqs...")
|
||||||
|
i2_extract = plot_positions
|
||||||
|
}
|
||||||
|
|
||||||
|
matP2 = wt_interim[i_extract, 1:ncol(wt_interim)]
|
||||||
|
|
||||||
|
dfP2 = data.frame(t(matP2))
|
||||||
|
names(dfP2) = i2_extract
|
||||||
|
cols_to_paste_P2 = names(dfP2)
|
||||||
|
|
||||||
|
dfP2['chosen_seq'] = apply( dfP2[ , cols_to_paste_P2]
|
||||||
|
, 1
|
||||||
|
, paste, sep = ''
|
||||||
|
, collapse = "")
|
||||||
|
|
||||||
|
wt_seq_plot = dfP2$chosen_seq
|
||||||
|
}
|
||||||
|
|
||||||
cat("\nLength of MSA", nrow(msaSeq_mut)
|
|
||||||
, "\nlength of WT seq:", nrow(msaSeq_wt))
|
|
||||||
|
|
||||||
######################################
|
######################################
|
||||||
# Generating plots for muts and wt
|
# Generating plots for muts and wt
|
||||||
|
@ -78,12 +161,16 @@ LogoPlotMSA <- function(msaSeq_mut
|
||||||
#-------------------
|
#-------------------
|
||||||
# Mutant logo plot
|
# Mutant logo plot
|
||||||
#-------------------
|
#-------------------
|
||||||
p0 = ggseqlogo(msaSeq_mut
|
p0 = ggseqlogo(msa_seq_plot
|
||||||
#msaSeq_mut$V1
|
|
||||||
, facet = "grid"
|
, facet = "grid"
|
||||||
, method = msa_method
|
, method = msa_method
|
||||||
, col_scheme = my_logo_col
|
, col_scheme = my_logo_col
|
||||||
, seq_type = 'aa')
|
, seq_type = 'aa') +
|
||||||
|
scale_x_discrete(x_lab
|
||||||
|
, breaks = i_extract
|
||||||
|
, labels = i_extract
|
||||||
|
#, limits = min(i_extract): max(i_extract))
|
||||||
|
, limits = factor(i_extract))
|
||||||
|
|
||||||
# further customisation
|
# further customisation
|
||||||
msa_mut_logo_P = p0 + theme(legend.position = leg_pos
|
msa_mut_logo_P = p0 + theme(legend.position = leg_pos
|
||||||
|
@ -111,6 +198,7 @@ LogoPlotMSA <- function(msaSeq_mut
|
||||||
|
|
||||||
, plot.background = element_rect(fill = theme_bgc))
|
, plot.background = element_rect(fill = theme_bgc))
|
||||||
|
|
||||||
|
|
||||||
cat('\nDone: msa_mut_logo_P')
|
cat('\nDone: msa_mut_logo_P')
|
||||||
#return(msa_mut_logoP)
|
#return(msa_mut_logoP)
|
||||||
LogoPlotMSAL[['msa_mut_logoP']] <- msa_mut_logo_P
|
LogoPlotMSAL[['msa_mut_logoP']] <- msa_mut_logo_P
|
||||||
|
@ -118,12 +206,16 @@ LogoPlotMSA <- function(msaSeq_mut
|
||||||
#---------------------------------
|
#---------------------------------
|
||||||
# Wild-type MSA: gene_fasta file
|
# Wild-type MSA: gene_fasta file
|
||||||
#---------------------------------
|
#---------------------------------
|
||||||
p1 = ggseqlogo(msaSeq_wt
|
p1 = ggseqlogo(wt_seq_plot
|
||||||
#msaSeq_wt$V1
|
|
||||||
, facet = "grid"
|
, facet = "grid"
|
||||||
, method = msa_method
|
, method = msa_method
|
||||||
, col_scheme = my_logo_col
|
, col_scheme = my_logo_col
|
||||||
, seq_type = 'aa')
|
, seq_type = 'aa')+
|
||||||
|
scale_x_discrete(x_lab
|
||||||
|
, breaks = i_extract
|
||||||
|
, labels = i_extract
|
||||||
|
#, limits = min(i_extract): max(i_extract))
|
||||||
|
, limits = factor(i_extract))
|
||||||
|
|
||||||
# further customisation
|
# further customisation
|
||||||
msa_wt_logo_P = p1 +
|
msa_wt_logo_P = p1 +
|
||||||
|
|
|
@ -2,10 +2,10 @@ source("~/git/LSHTM_analysis/config/gid.R")
|
||||||
#source("~/git/LSHTM_analysis/config/pnca.R") # YES
|
#source("~/git/LSHTM_analysis/config/pnca.R") # YES
|
||||||
#---------------------------------------------------
|
#---------------------------------------------------
|
||||||
# FIXME
|
# FIXME
|
||||||
source("~/git/LSHTM_analysis/config/alr.R")
|
# source("~/git/LSHTM_analysis/config/alr.R")
|
||||||
source("~/git/LSHTM_analysis/config/embb.R")
|
# source("~/git/LSHTM_analysis/config/embb.R")
|
||||||
source("~/git/LSHTM_analysis/config/katg.R")
|
# source("~/git/LSHTM_analysis/config/katg.R")
|
||||||
source("~/git/LSHTM_analysis/config/rpob.R")
|
# source("~/git/LSHTM_analysis/config/rpob.R")
|
||||||
#---------------------------------------------------
|
#---------------------------------------------------
|
||||||
source("~/git/LSHTM_analysis/scripts/plotting/get_plotting_dfs.R")
|
source("~/git/LSHTM_analysis/scripts/plotting/get_plotting_dfs.R")
|
||||||
|
|
||||||
|
@ -72,16 +72,25 @@ LogoPlotSnps(plot_df = merged_df3
|
||||||
# wild-type and mutant aa
|
# wild-type and mutant aa
|
||||||
# script: logoP_msa.R
|
# script: logoP_msa.R
|
||||||
########################################
|
########################################
|
||||||
# msa1 = read.csv("/home/tanu/git/Data/cycloserine/output/alr_msa.csv", header = F)
|
# BOTH WORK
|
||||||
# head(msa1)
|
#LogoPlotMSA(msa_seq, wt_seq)
|
||||||
# msa_seq= msa1$V1
|
|
||||||
# head(msa_seq)
|
LogoPlotMSA(msaSeq_mut = msa_seq
|
||||||
#
|
, msaSeq_wt = wt_seq
|
||||||
# msa2 = read.csv("/home/tanu/git/Data/cycloserine/input/alr.1fasta", header = F)
|
, msa_method = 'bits' # or probability
|
||||||
# head(msa2)
|
, my_logo_col = "chemistry"
|
||||||
# wt_seq = msa2$V1
|
#, plot_positions = active_aa_pos
|
||||||
# head(wt_seq)
|
, plot_positions
|
||||||
#
|
, x_lab = "Wild-type position"
|
||||||
# # BOTH WORK
|
, y_lab = ""
|
||||||
# LogoPlotMSA(msa_seq, wt_seq)
|
, x_ats = 13 # text size
|
||||||
# LogoPlotMSA(msa1, msa2)
|
, x_tangle = 90 # text angle
|
||||||
|
, y_ats = 13
|
||||||
|
, y_tangle = 0
|
||||||
|
, x_tts = 13 # title size
|
||||||
|
, y_tts = 13
|
||||||
|
, leg_pos = "top" # can be top, left, right and bottom or c(0.8, 0.9)
|
||||||
|
, leg_dir = "horizontal" #can be vertical or horizontal
|
||||||
|
, leg_ts = 16 # leg text size
|
||||||
|
, leg_tts = 16 # leg title size
|
||||||
|
)
|
|
@ -110,9 +110,16 @@ merged_df3_comp = all_plot_dfs[[4]]
|
||||||
####################################################################
|
####################################################################
|
||||||
|
|
||||||
#source(paste0(plot_script_path, "logo_data.R"))
|
#source(paste0(plot_script_path, "logo_data.R"))
|
||||||
|
|
||||||
#s1 = c("\nSuccessfully sourced logo_data.R")
|
#s1 = c("\nSuccessfully sourced logo_data.R")
|
||||||
#cat(s1)
|
#cat(s1)
|
||||||
|
# input data is merged_df3
|
||||||
|
# so repurposed it into a function so params can be passed instead to generate
|
||||||
|
# data required for plotting.
|
||||||
|
# Moved "logo_data.R" to redundant/
|
||||||
|
|
||||||
|
source(paste0(plot_script_path, "logo_data_msa.R"))
|
||||||
|
s1 = c("\nSuccessfully sourced logo_data_msa.R")
|
||||||
|
cat(s1)
|
||||||
|
|
||||||
####################################################################
|
####################################################################
|
||||||
# Data for DM OM Plots: Long format dfs
|
# Data for DM OM Plots: Long format dfs
|
||||||
|
|
49
scripts/plotting/logo_data_msa.R
Normal file
49
scripts/plotting/logo_data_msa.R
Normal file
|
@ -0,0 +1,49 @@
|
||||||
|
#=================================================
|
||||||
|
# Data for Logo MSA plots
|
||||||
|
#=================================================
|
||||||
|
cat("\n=========================================="
|
||||||
|
, "\nLogo MSA Plots Data: ALL params"
|
||||||
|
, "\n=========================================")
|
||||||
|
|
||||||
|
#msa1 = read.csv("/home/tanu/git/Data/cycloserine/output/gid_msa.csv", header = F)
|
||||||
|
#head(msa1)
|
||||||
|
#msa_seq= msa1$V1
|
||||||
|
#head(msa_seq)
|
||||||
|
|
||||||
|
#msa2 = read.csv("/home/tanu/git/Data/cycloserine/input/gid.1fasta", header = F)
|
||||||
|
#head(msa2)
|
||||||
|
#wt_seq = msa2$V1
|
||||||
|
#head(wt_seq)
|
||||||
|
|
||||||
|
# BOTH WORK
|
||||||
|
#LogoPlotMSA(msa_seq, wt_seq)
|
||||||
|
#LogoPlotMSA(msa1, msa2)
|
||||||
|
#####################################
|
||||||
|
|
||||||
|
#================
|
||||||
|
# MSA file: muts
|
||||||
|
#================
|
||||||
|
|
||||||
|
in_filename_msa = paste0(tolower(gene), "_msa.csv")
|
||||||
|
infile_msa = paste0(outdir, "/", in_filename_msa)
|
||||||
|
cat("\nInput file for MSA plots: ", infile_msa, "\n")
|
||||||
|
|
||||||
|
msa1 = read.csv(infile_msa, header = F)
|
||||||
|
head(msa1)
|
||||||
|
cat("\nLength of MSA:", nrow(msa1))
|
||||||
|
msa_seq = msa1$V1
|
||||||
|
head(msa_seq)
|
||||||
|
|
||||||
|
#================
|
||||||
|
# fasta file: wt
|
||||||
|
#================
|
||||||
|
|
||||||
|
in_filename_fasta = paste0(tolower(gene), ".1fasta")
|
||||||
|
infile_fasta = paste0(indir, "/", in_filename_fasta)
|
||||||
|
cat("\nInput fasta file for WT: ", infile_fasta, "\n")
|
||||||
|
|
||||||
|
msa2 = read.csv(infile_fasta, header = F)
|
||||||
|
head(msa2)
|
||||||
|
cat("\nLength of WT fasta:", nrow(msa2))
|
||||||
|
wt_seq = msa2$V1
|
||||||
|
head(wt_seq)
|
Loading…
Add table
Add a link
Reference in a new issue