added my_logolas.R

This commit is contained in:
Tanushree Tunstall 2022-01-24 13:46:50 +00:00
parent febb8f0f7f
commit 9aa62b33b1
5 changed files with 1423 additions and 93 deletions

View file

@ -2,7 +2,20 @@ gene = "gid"
drug = "streptomycin"
rna_bind_aa_pos = c(96, 97, 118, 163)
bin_aa_pos = c(48, 51, 137, 200)
active_aa_pos = c(rna_bind_aa_pos, bin_aa_pos)
binding_aa_pos = c(48, 51, 137, 200)
active_aa_pos = sort(unique(c(rna_bind_aa_pos, binding_aa_pos)))
#rna_site = G518
cat("\nNo. of active site residues for gene"
, gene, ":"
, length(active_aa_pos)
, "\nThese are:\n"
, active_aa_pos)
cat("\n==================================================="
, "\nActive site residues for", gene, "comprise of..."
, "\n==================================================="
, "\nRNA binding residues:"
, rna_bind_aa_pos
, "\nBinding site residues:"
, binding_aa_pos)

View file

@ -1,2 +1,36 @@
gene = "pncA"
drug = "pyrazinamide"
#===================================
#Iron centre --> purple
#Catalytic triad --> yellow
#Substrate binding --> teal and blue
#H-bond --> green
#====================================
metal_aa_pos = c(49, 51, 57, 71)
catalytic_aa_pos = c(8, 96, 138)
substrate_aa_pos = c(13, 68, 103, 137)
hbond_aa_pos = c(133, 134, 8, 138)
active_aa_pos = sort(unique(c(metal_aa_pos
, catalytic_aa_pos
, substrate_aa_pos
, hbond_aa_pos)))
cat("\nNo. of active site residues for gene"
, gene, ":"
, length(active_aa_pos)
, "\nThese are:\n"
, active_aa_pos)
cat("\n==================================================="
, "\nActive site residues for", gene, "comprise of..."
, "\n==================================================="
, "\nMetal coordination centre residues:"
, metal_aa_pos
, "\nCatalytic triad residues:"
, catalytic_aa_pos
, "\nSubstrate binding residues:"
, substrate_aa_pos
, "\nH-bonding residues:"
, hbond_aa_pos)

View file

@ -27,6 +27,7 @@ LogoPlotMSA <- function(msaSeq_mut
, y_lab = ""
, x_ats = 13 # text size
, x_tangle = 90 # text angle
, x_axis_offset = 0.07 # dist b/w y-axis and plot start
, y_ats = 13
, y_tangle = 0
, x_tts = 13 # title size
@ -47,42 +48,66 @@ LogoPlotMSA <- function(msaSeq_mut
if(missing(plot_positions)){
#if(is.null(plot_positions)){
cat("\nPlotting entire MSA")
cat("\n======================="
, "\nPlotting entire MSA"
, "\n========================")
msa_seq_plot = msaSeq_mut
msa_all_interim = sapply(msa_seq_plot, function(x) unlist(strsplit(x,"")))
msa_all_interimDF = data.frame(msa_all_interim)
msa_all_pos = as.numeric(rownames(msa_all_interimDF))
wt_seq_plot = msaSeq_wt
wt_all_interim = sapply(wt_seq_plot, function(x) unlist(strsplit(x,"")))
wt_all_interimDF = data.frame(wt_all_interim)
wt_all_pos = as.numeric(rownames(wt_all_interimDF))
} else {
cat("\nUser specified plotting positions for MSA:"
, "These are:", plot_positions)
, "\nThese are:\n", plot_positions
, "\nSorting plot positions...")
plot_positions = sort(plot_positions)
cat("\nPlotting positions sorted:\n"
, plot_positions)
#-----------
# MSA: mut
#-----------
cat("\nGenerating MSA: filtered positions")
msa_interim = sapply(msaSeq_mut, function(x) unlist(strsplit(x,"")))
cat("\n==========================================="
, "\nGenerating MSA: filtered positions"
, "\n===========================================")
if (any(is.na(msa_interim[plot_positions]))){
cat("Plot_positions selected:", length(plot_positions))
i_ofr = plot_positions[is.na(msa_interim[plot_positions])]
cat("\nIndex out of range: 1 or more"
, "\nThese are:", i_ofr
, "\nOmitting these and proceeding...")
i_extract = na.omit(msa_interim[plot_positions])
cat("\nFinal positions being plottted:", length(i_extract)
, "\nNo. of positions dropped from request:", length(i_ofr))
msa_interim = sapply(msaSeq_mut, function(x) unlist(strsplit(x,"")))
msa_interimDF = data.frame(msa_interim)
msa_pos = as.numeric(rownames(msa_interimDF))
if (all(plot_positions%in%msa_pos)){
cat("\nAll positions within range"
, "\nProceeding with generating requested position MSA seqs..."
, "\nNo. of positions in plot:", length(plot_positions))
i_extract = plot_positions
dfP1 = msa_interimDF[i_extract,]
}else{
cat("\nAll positions within range"
, "\nProceeing with generating requested position MSA seqs...")
i_extract = plot_positions
cat("\nNo. of positions selected:", length(plot_positions))
i_ofr = plot_positions[!plot_positions%in%msa_pos]
cat("\n1 or more plot_positions out of range..."
, "\nThese are:\n", i_ofr
, "\nQuitting! Resubmit with correct plot_positions")
#i_extract = plot_positions[plot_positions%in%msa_pos]
#cat("\nFinal no. of positions being plottted:", length(i_extract)
# , "\nNo. of positions dropped from request:", length(i_ofr))
quit()
}
matP1 = msa_interim[i_extract, 1:ncol(msa_interim)]
dfP1 = data.frame(t(matP1))
#matP1 = msa_interim[i_extract, 1:ncol(msa_interim)]
#dfP1 = msa_interimDF[i_extract,]
dfP1 = data.frame(t(dfP1))
names(dfP1) = i_extract
cols_to_paste = names(dfP1)
dfP1['chosen_seq'] = apply( dfP1[ , cols_to_paste]
dfP1['chosen_seq'] = apply(dfP1[ , cols_to_paste]
, 1
, paste, sep = ''
, collapse = "")
@ -92,40 +117,42 @@ LogoPlotMSA <- function(msaSeq_mut
#-----------
# WT: fasta
#-----------
cat("\nGenerating WT fasta: filtered positions")
cat("\n========================================="
, "\nGenerating WT fasta: filtered positions"
,"\n===========================================")
wt_interim = sapply(msaSeq_wt, function(x) unlist(strsplit(x,"")))
wt_interimDF = data.frame(wt_interim)
wt_pos = as.numeric(rownames(wt_interimDF))
if (any(is.na(wt_interim[plot_positions]))){
cat("Plot_positions selected:", length(plot_positions))
i2_ofr = plot_positions[is.na(wt_interim[plot_positions])]
cat("\nIndex out of range: 1 or more"
, "\nThese are:", i2_ofr
, "\nOmitting these and proceeding...")
i2_extract = na.omit(wt_interim[plot_positions])
cat("\nFinal positions being plottted:", length(i2_extract)
, "\nNo. of positions dropped from request:", length(i2_ofr))
}else{
if (all(plot_positions%in%wt_pos)){
cat("\nAll positions within range"
, "\nProceeing with generating requested position MSA seqs...")
, "\nProceeding with generating requested position MSA seqs..."
, "\nplot positions:", length(plot_positions))
i2_extract = plot_positions
}else{
cat("\nNo. of positions selected:", length(plot_positions))
i2_ofr = plot_positions[!plot_positions%in%wt_pos]
cat("\n1 or more plot_positions out of range..."
, "\nThese are:\n", i_ofr
, "\nQuitting! Resubmit with correct plot_positions")
#i2_extract = plot_positions[plot_positions%in%wt_pos]
#cat("\nFinal no. of positions being plottted:", length(i2_extract)
# , "\nNo. of positions dropped from request:", length(i2_ofr))
quit()
}
matP2 = wt_interim[i_extract, 1:ncol(wt_interim)]
dfP2 = data.frame(t(matP2))
#matP1 = msa_interim[i_extract, 1:ncol(msa_interim)]
dfP2 = wt_interimDF[i2_extract,]
dfP2 = data.frame(t(dfP2))
names(dfP2) = i2_extract
cols_to_paste_P2 = names(dfP2)
dfP2['chosen_seq'] = apply( dfP2[ , cols_to_paste_P2]
cols_to_paste2 = names(dfP2)
dfP2['chosen_seq'] = apply( dfP2[ , cols_to_paste2]
, 1
, paste, sep = ''
, collapse = "")
wt_seq_plot = dfP2$chosen_seq
}
}
######################################
# Generating plots for muts and wt
@ -144,7 +171,8 @@ LogoPlotMSA <- function(msaSeq_mut
}
if (my_logo_col %in% c('chemistry', 'hydrophobicity')) {
cat('\nSelected colour scheme:', my_logo_col
cat("\nstart of MSA"
, '\nSelected colour scheme:', my_logo_col
, "\nUsing grey theme")
theme_bgc = "grey"
@ -192,18 +220,22 @@ LogoPlotMSA <- function(msaSeq_mut
xlab(x_lab)
if (missing(plot_positions)){
msa_mut_logo_P = p0
msa_mut_logo_P = p0 +
scale_x_discrete(breaks = msa_all_pos
, expand = c(0.02,0)
, labels = msa_all_pos
, limits = factor(msa_all_pos))
}else{
msa_mut_logo_P = p0 +
scale_y_continuous(expand = c(0,0.09)) +
scale_x_discrete(breaks = i_extract
, expand = c(0.09,0)
, expand = c(x_axis_offset,0)
, labels = i_extract
, limits = factor(i_extract))
}
cat('\nDone: msa_mut_logo_P')
cat('\nDone: MSA plot for mutations')
#return(msa_mut_logoP)
LogoPlotMSAL[['msa_mut_logoP']] <- msa_mut_logo_P
@ -238,19 +270,23 @@ LogoPlotMSA <- function(msaSeq_mut
, plot.background = element_rect(fill = theme_bgc)) +
ylab("") + xlab("Wild-type position")
if (missing(plot_positions)){
msa_wt_logo_P = p1
msa_wt_logo_P = p1 +
scale_x_discrete(breaks = wt_all_pos
, expand = c(0.02,0)
, labels = wt_all_pos
, limits = factor(wt_all_pos) )
}else{
msa_wt_logo_P = p1 +
scale_y_continuous(expand = c(0,0.09)) +
scale_x_discrete(breaks = i2_extract
, expand = c(0.09,0)
, expand = c(x_axis_offset, 0)
, labels = i2_extract
, limits = factor(i2_extract))
}
cat('\nDone: msa_wt_logo_P')
cat('\nDone: MSA plot for WT')
#return(msa_wt_logoP)
LogoPlotMSAL[['msa_wt_logoP']] <- msa_wt_logo_P

File diff suppressed because it is too large Load diff

View file

@ -1,32 +1,29 @@
#source("~/git/LSHTM_analysis/config/gid.R")
#source("~/git/LSHTM_analysis/config/pnca.R") # YES
source("~/git/LSHTM_analysis/config/embb.R")
#---------------------------------------------------
# FIXME
# source("~/git/LSHTM_analysis/config/alr.R")
# source("~/git/LSHTM_analysis/config/embb.R")
# source("~/git/LSHTM_analysis/config/katg.R")
# source("~/git/LSHTM_analysis/config/rpob.R")
source("~/git/LSHTM_analysis/config/pnca.R") # YES
#source("~/git/LSHTM_analysis/config/embb.R")
#source("~/git/LSHTM_analysis/config/katg.R")
#source("~/git/LSHTM_analysis/config/alr.R")
#source("~/git/LSHTM_analysis/config/rpob.R")
#---------------------------------------------------
source("~/git/LSHTM_analysis/scripts/plotting/get_plotting_dfs.R")
#
# ################################
# # Logo plot with custom Y axis
# # mainly OR
# # script: logoP.R
# ################################
################################
# Logo plot with custom Y axis
# mainly OR
# script: logoP.R
################################
# LogoPlotCustomH (plot_df = merged_df3
# , x_axis_colname = "position"
# , y_axis_colname = "or_mychisq"
# , symbol_colname = "mutant_type"
# , y_axis_log = F
# , y_axis_log = T
# , log_value = log10
# , y_axis_increment = 5
# , rm_empty_y = F
# , y_axis_increment = 100
# , rm_empty_y = T
# , my_logo_col = 'chemistry'
# , x_lab = "Wild-type position"
# , y_lab = "Odds Ratio"
# , x_ats = 12 # text size
# , x_ats = 10 # text size
# , x_tangle = 90 # text angle
# , y_ats = 22
# , y_tangle = 0
@ -38,24 +35,24 @@ source("~/git/LSHTM_analysis/scripts/plotting/get_plotting_dfs.R")
# , leg_ts = 15 # leg text size
# , leg_tts = 16 # leg title size
# )
#
#
# # ########################################
# # # Logo plot showing nsSNPs by positions
# # # wild-type and mutant aa
# # # script: logoP_snp.R
# # ########################################
########################################
# Logo plot showing nsSNPs by positions
# wild-type and mutant aa
# script: logoP_snp.R
########################################
# LogoPlotSnps(plot_df = merged_df3
# , x_axis_colname = "position"
# , symbol_mut_colname = "mutant_type"
# , symbol_wt_colname = "wild_type"
# , omit_snp_count = c(0)# can be 1, 2, etc.
# , omit_snp_count = c(1)# can be 0,1, 2, etc.
# , my_logo_col = "chemistry"
# , x_lab = "Wild-type position"
# , y_lab = ""
# , x_ats = 13 # text size
# , y_lab = "nsSNP count"
# , x_ats = 10 # text size
# , x_tangle = 90 # text angle
# , y_ats = 20
# , y_ats = 18
# , y_tangle = 0
# , x_tts = 18 # title size
# , y_tts = 18
@ -65,7 +62,6 @@ source("~/git/LSHTM_analysis/scripts/plotting/get_plotting_dfs.R")
# , leg_tts = 16 # leg title size
# )
#
#
########################################
# Logo plot MSA
@ -76,21 +72,47 @@ source("~/git/LSHTM_analysis/scripts/plotting/get_plotting_dfs.R")
# To plot entire MSA, simply don't specify {plot_positions}
# script: logoP_msa.R
# TODO perhaps: ED logo from Logolas
########################################
# LogoPlotMSA(msaSeq_mut = msa_seq
# , msaSeq_wt = wt_seq
# , msa_method = 'bits' # or probability
# , my_logo_col = "taylor"
# , plot_positions = active_aa_pos
# , x_lab = "nsSNP position"
# , y_lab = ""
# , x_ats = 10 # text size
# , x_tangle = 90 # text angle
# , x_axis_offset = 0.05
# , y_ats = 15
# , y_tangle = 0
# , x_tts = 13 # title size
# , y_tts = 15
# , leg_pos = "top" # can be top, left, right and bottom or c(0.8, 0.9)
# , leg_dir = "horizontal" #can be vertical or horizontal
# , leg_ts = 16 # leg text size
# , leg_tts = 16 # leg title size
# )
########################################
LogoPlotMSA(msaSeq_mut = msa_seq
# ED Logo plot MSA
# Mutant and wild-type
########################################
LogoPlotED(msaSeq_mut = msa_seq
, msaSeq_wt = wt_seq
, msa_method = 'bits' # or probability
, my_logo_col = "taylor"
#, plot_positions = c(24, 54, 48, 58, 44, 80, 87, 1000:1008)
, plot_positions = active_aa_pos
, x_lab = "nsSNP position"
, y_lab = ""
, x_ats = 10 # text size
, x_tangle = 90 # text angle
, y_ats = 13
, x_axis_offset = 0.05
, y_ats = 15
, y_tangle = 0
, x_tts = 13 # title size
, y_tts = 13
, y_tts = 15
, leg_pos = "top" # can be top, left, right and bottom or c(0.8, 0.9)
, leg_dir = "horizontal" #can be vertical or horizontal
, leg_ts = 16 # leg text size