added my_logolas.R

This commit is contained in:
Tanushree Tunstall 2022-01-24 13:46:50 +00:00
parent febb8f0f7f
commit 9aa62b33b1
5 changed files with 1423 additions and 93 deletions

View file

@ -2,7 +2,20 @@ gene = "gid"
drug = "streptomycin" drug = "streptomycin"
rna_bind_aa_pos = c(96, 97, 118, 163) rna_bind_aa_pos = c(96, 97, 118, 163)
bin_aa_pos = c(48, 51, 137, 200) binding_aa_pos = c(48, 51, 137, 200)
active_aa_pos = c(rna_bind_aa_pos, bin_aa_pos) active_aa_pos = sort(unique(c(rna_bind_aa_pos, binding_aa_pos)))
#rna_site = G518 #rna_site = G518
cat("\nNo. of active site residues for gene"
, gene, ":"
, length(active_aa_pos)
, "\nThese are:\n"
, active_aa_pos)
cat("\n==================================================="
, "\nActive site residues for", gene, "comprise of..."
, "\n==================================================="
, "\nRNA binding residues:"
, rna_bind_aa_pos
, "\nBinding site residues:"
, binding_aa_pos)

View file

@ -1,2 +1,36 @@
gene = "pncA" gene = "pncA"
drug = "pyrazinamide" drug = "pyrazinamide"
#===================================
#Iron centre --> purple
#Catalytic triad --> yellow
#Substrate binding --> teal and blue
#H-bond --> green
#====================================
metal_aa_pos = c(49, 51, 57, 71)
catalytic_aa_pos = c(8, 96, 138)
substrate_aa_pos = c(13, 68, 103, 137)
hbond_aa_pos = c(133, 134, 8, 138)
active_aa_pos = sort(unique(c(metal_aa_pos
, catalytic_aa_pos
, substrate_aa_pos
, hbond_aa_pos)))
cat("\nNo. of active site residues for gene"
, gene, ":"
, length(active_aa_pos)
, "\nThese are:\n"
, active_aa_pos)
cat("\n==================================================="
, "\nActive site residues for", gene, "comprise of..."
, "\n==================================================="
, "\nMetal coordination centre residues:"
, metal_aa_pos
, "\nCatalytic triad residues:"
, catalytic_aa_pos
, "\nSubstrate binding residues:"
, substrate_aa_pos
, "\nH-bonding residues:"
, hbond_aa_pos)

View file

@ -27,6 +27,7 @@ LogoPlotMSA <- function(msaSeq_mut
, y_lab = "" , y_lab = ""
, x_ats = 13 # text size , x_ats = 13 # text size
, x_tangle = 90 # text angle , x_tangle = 90 # text angle
, x_axis_offset = 0.07 # dist b/w y-axis and plot start
, y_ats = 13 , y_ats = 13
, y_tangle = 0 , y_tangle = 0
, x_tts = 13 # title size , x_tts = 13 # title size
@ -47,42 +48,66 @@ LogoPlotMSA <- function(msaSeq_mut
if(missing(plot_positions)){ if(missing(plot_positions)){
#if(is.null(plot_positions)){ #if(is.null(plot_positions)){
cat("\nPlotting entire MSA") cat("\n======================="
, "\nPlotting entire MSA"
, "\n========================")
msa_seq_plot = msaSeq_mut msa_seq_plot = msaSeq_mut
msa_all_interim = sapply(msa_seq_plot, function(x) unlist(strsplit(x,"")))
msa_all_interimDF = data.frame(msa_all_interim)
msa_all_pos = as.numeric(rownames(msa_all_interimDF))
wt_seq_plot = msaSeq_wt wt_seq_plot = msaSeq_wt
wt_all_interim = sapply(wt_seq_plot, function(x) unlist(strsplit(x,"")))
wt_all_interimDF = data.frame(wt_all_interim)
wt_all_pos = as.numeric(rownames(wt_all_interimDF))
} else { } else {
cat("\nUser specified plotting positions for MSA:" cat("\nUser specified plotting positions for MSA:"
, "These are:", plot_positions) , "\nThese are:\n", plot_positions
, "\nSorting plot positions...")
plot_positions = sort(plot_positions)
cat("\nPlotting positions sorted:\n"
, plot_positions)
#----------- #-----------
# MSA: mut # MSA: mut
#----------- #-----------
cat("\nGenerating MSA: filtered positions") cat("\n==========================================="
msa_interim = sapply(msaSeq_mut, function(x) unlist(strsplit(x,""))) , "\nGenerating MSA: filtered positions"
, "\n===========================================")
if (any(is.na(msa_interim[plot_positions]))){ msa_interim = sapply(msaSeq_mut, function(x) unlist(strsplit(x,"")))
cat("Plot_positions selected:", length(plot_positions)) msa_interimDF = data.frame(msa_interim)
i_ofr = plot_positions[is.na(msa_interim[plot_positions])] msa_pos = as.numeric(rownames(msa_interimDF))
cat("\nIndex out of range: 1 or more"
, "\nThese are:", i_ofr if (all(plot_positions%in%msa_pos)){
, "\nOmitting these and proceeding...")
i_extract = na.omit(msa_interim[plot_positions])
cat("\nFinal positions being plottted:", length(i_extract)
, "\nNo. of positions dropped from request:", length(i_ofr))
}else{
cat("\nAll positions within range" cat("\nAll positions within range"
, "\nProceeing with generating requested position MSA seqs...") , "\nProceeding with generating requested position MSA seqs..."
, "\nNo. of positions in plot:", length(plot_positions))
i_extract = plot_positions i_extract = plot_positions
dfP1 = msa_interimDF[i_extract,]
}else{
cat("\nNo. of positions selected:", length(plot_positions))
i_ofr = plot_positions[!plot_positions%in%msa_pos]
cat("\n1 or more plot_positions out of range..."
, "\nThese are:\n", i_ofr
, "\nQuitting! Resubmit with correct plot_positions")
#i_extract = plot_positions[plot_positions%in%msa_pos]
#cat("\nFinal no. of positions being plottted:", length(i_extract)
# , "\nNo. of positions dropped from request:", length(i_ofr))
quit()
} }
matP1 = msa_interim[i_extract, 1:ncol(msa_interim)] #matP1 = msa_interim[i_extract, 1:ncol(msa_interim)]
#dfP1 = msa_interimDF[i_extract,]
dfP1 = data.frame(t(matP1)) dfP1 = data.frame(t(dfP1))
names(dfP1) = i_extract names(dfP1) = i_extract
cols_to_paste = names(dfP1) cols_to_paste = names(dfP1)
dfP1['chosen_seq'] = apply( dfP1[ , cols_to_paste] dfP1['chosen_seq'] = apply(dfP1[ , cols_to_paste]
, 1 , 1
, paste, sep = '' , paste, sep = ''
, collapse = "") , collapse = "")
@ -92,44 +117,46 @@ LogoPlotMSA <- function(msaSeq_mut
#----------- #-----------
# WT: fasta # WT: fasta
#----------- #-----------
cat("\nGenerating WT fasta: filtered positions") cat("\n========================================="
, "\nGenerating WT fasta: filtered positions"
,"\n===========================================")
wt_interim = sapply(msaSeq_wt, function(x) unlist(strsplit(x,""))) wt_interim = sapply(msaSeq_wt, function(x) unlist(strsplit(x,"")))
wt_interimDF = data.frame(wt_interim)
if (any(is.na(wt_interim[plot_positions]))){ wt_pos = as.numeric(rownames(wt_interimDF))
cat("Plot_positions selected:", length(plot_positions))
i2_ofr = plot_positions[is.na(wt_interim[plot_positions])] if (all(plot_positions%in%wt_pos)){
cat("\nIndex out of range: 1 or more"
, "\nThese are:", i2_ofr
, "\nOmitting these and proceeding...")
i2_extract = na.omit(wt_interim[plot_positions])
cat("\nFinal positions being plottted:", length(i2_extract)
, "\nNo. of positions dropped from request:", length(i2_ofr))
}else{
cat("\nAll positions within range" cat("\nAll positions within range"
, "\nProceeing with generating requested position MSA seqs...") , "\nProceeding with generating requested position MSA seqs..."
, "\nplot positions:", length(plot_positions))
i2_extract = plot_positions i2_extract = plot_positions
}else{
cat("\nNo. of positions selected:", length(plot_positions))
i2_ofr = plot_positions[!plot_positions%in%wt_pos]
cat("\n1 or more plot_positions out of range..."
, "\nThese are:\n", i_ofr
, "\nQuitting! Resubmit with correct plot_positions")
#i2_extract = plot_positions[plot_positions%in%wt_pos]
#cat("\nFinal no. of positions being plottted:", length(i2_extract)
# , "\nNo. of positions dropped from request:", length(i2_ofr))
quit()
} }
matP2 = wt_interim[i_extract, 1:ncol(wt_interim)] #matP1 = msa_interim[i_extract, 1:ncol(msa_interim)]
dfP2 = wt_interimDF[i2_extract,]
dfP2 = data.frame(t(matP2)) dfP2 = data.frame(t(dfP2))
names(dfP2) = i2_extract names(dfP2) = i2_extract
cols_to_paste_P2 = names(dfP2) cols_to_paste2 = names(dfP2)
dfP2['chosen_seq'] = apply( dfP2[ , cols_to_paste2]
dfP2['chosen_seq'] = apply( dfP2[ , cols_to_paste_P2]
, 1 , 1
, paste, sep = '' , paste, sep = ''
, collapse = "") , collapse = "")
wt_seq_plot = dfP2$chosen_seq wt_seq_plot = dfP2$chosen_seq
} }
###################################### ######################################
# Generating plots for muts and wt # Generating plots for muts and wt
##################################### #####################################
LogoPlotMSAL <- list() LogoPlotMSAL <- list()
if (my_logo_col %in% c('clustalx','taylor')) { if (my_logo_col %in% c('clustalx','taylor')) {
@ -144,8 +171,9 @@ LogoPlotMSA <- function(msaSeq_mut
} }
if (my_logo_col %in% c('chemistry', 'hydrophobicity')) { if (my_logo_col %in% c('chemistry', 'hydrophobicity')) {
cat('\nSelected colour scheme:', my_logo_col cat("\nstart of MSA"
, "\nUsing grey theme") , '\nSelected colour scheme:', my_logo_col
, "\nUsing grey theme")
theme_bgc = "grey" theme_bgc = "grey"
xfont_bgc = "black" xfont_bgc = "black"
@ -192,18 +220,22 @@ LogoPlotMSA <- function(msaSeq_mut
xlab(x_lab) xlab(x_lab)
if (missing(plot_positions)){ if (missing(plot_positions)){
msa_mut_logo_P = p0 msa_mut_logo_P = p0 +
scale_x_discrete(breaks = msa_all_pos
, expand = c(0.02,0)
, labels = msa_all_pos
, limits = factor(msa_all_pos))
}else{ }else{
msa_mut_logo_P = p0 + msa_mut_logo_P = p0 +
scale_y_continuous(expand = c(0,0.09)) + scale_y_continuous(expand = c(0,0.09)) +
scale_x_discrete(breaks = i_extract scale_x_discrete(breaks = i_extract
, expand = c(0.09,0) , expand = c(x_axis_offset,0)
, labels = i_extract , labels = i_extract
, limits = factor(i_extract)) , limits = factor(i_extract))
} }
cat('\nDone: msa_mut_logo_P') cat('\nDone: MSA plot for mutations')
#return(msa_mut_logoP) #return(msa_mut_logoP)
LogoPlotMSAL[['msa_mut_logoP']] <- msa_mut_logo_P LogoPlotMSAL[['msa_mut_logoP']] <- msa_mut_logo_P
@ -237,20 +269,24 @@ LogoPlotMSA <- function(msaSeq_mut
, plot.background = element_rect(fill = theme_bgc)) + , plot.background = element_rect(fill = theme_bgc)) +
ylab("") + xlab("Wild-type position") ylab("") + xlab("Wild-type position")
if (missing(plot_positions)){ if (missing(plot_positions)){
msa_wt_logo_P = p1 msa_wt_logo_P = p1 +
scale_x_discrete(breaks = wt_all_pos
, expand = c(0.02,0)
, labels = wt_all_pos
, limits = factor(wt_all_pos) )
}else{ }else{
msa_wt_logo_P = p1 + msa_wt_logo_P = p1 +
scale_y_continuous(expand = c(0,0.09)) + scale_y_continuous(expand = c(0,0.09)) +
scale_x_discrete(breaks = i2_extract scale_x_discrete(breaks = i2_extract
, expand = c(0.09,0) , expand = c(x_axis_offset, 0)
, labels = i2_extract , labels = i2_extract
, limits = factor(i2_extract)) , limits = factor(i2_extract))
} }
cat('\nDone: msa_wt_logo_P') cat('\nDone: MSA plot for WT')
#return(msa_wt_logoP) #return(msa_wt_logoP)
LogoPlotMSAL[['msa_wt_logoP']] <- msa_wt_logo_P LogoPlotMSAL[['msa_wt_logoP']] <- msa_wt_logo_P

File diff suppressed because it is too large Load diff

View file

@ -1,32 +1,29 @@
#source("~/git/LSHTM_analysis/config/gid.R") #source("~/git/LSHTM_analysis/config/gid.R")
#source("~/git/LSHTM_analysis/config/pnca.R") # YES source("~/git/LSHTM_analysis/config/pnca.R") # YES
source("~/git/LSHTM_analysis/config/embb.R") #source("~/git/LSHTM_analysis/config/embb.R")
#--------------------------------------------------- #source("~/git/LSHTM_analysis/config/katg.R")
# FIXME #source("~/git/LSHTM_analysis/config/alr.R")
# source("~/git/LSHTM_analysis/config/alr.R") #source("~/git/LSHTM_analysis/config/rpob.R")
# source("~/git/LSHTM_analysis/config/embb.R")
# source("~/git/LSHTM_analysis/config/katg.R")
# source("~/git/LSHTM_analysis/config/rpob.R")
#--------------------------------------------------- #---------------------------------------------------
source("~/git/LSHTM_analysis/scripts/plotting/get_plotting_dfs.R") source("~/git/LSHTM_analysis/scripts/plotting/get_plotting_dfs.R")
# #
# ################################ ################################
# # Logo plot with custom Y axis # Logo plot with custom Y axis
# # mainly OR # mainly OR
# # script: logoP.R # script: logoP.R
# ################################ ################################
# LogoPlotCustomH (plot_df = merged_df3 # LogoPlotCustomH (plot_df = merged_df3
# , x_axis_colname = "position" # , x_axis_colname = "position"
# , y_axis_colname = "or_mychisq" # , y_axis_colname = "or_mychisq"
# , symbol_colname = "mutant_type" # , symbol_colname = "mutant_type"
# , y_axis_log = F # , y_axis_log = T
# , log_value = log10 # , log_value = log10
# , y_axis_increment = 5 # , y_axis_increment = 100
# , rm_empty_y = F # , rm_empty_y = T
# , my_logo_col = 'chemistry' # , my_logo_col = 'chemistry'
# , x_lab = "Wild-type position" # , x_lab = "Wild-type position"
# , y_lab = "Odds Ratio" # , y_lab = "Odds Ratio"
# , x_ats = 12 # text size # , x_ats = 10 # text size
# , x_tangle = 90 # text angle # , x_tangle = 90 # text angle
# , y_ats = 22 # , y_ats = 22
# , y_tangle = 0 # , y_tangle = 0
@ -38,24 +35,24 @@ source("~/git/LSHTM_analysis/scripts/plotting/get_plotting_dfs.R")
# , leg_ts = 15 # leg text size # , leg_ts = 15 # leg text size
# , leg_tts = 16 # leg title size # , leg_tts = 16 # leg title size
# ) # )
#
#
# # ######################################## ########################################
# # # Logo plot showing nsSNPs by positions # Logo plot showing nsSNPs by positions
# # # wild-type and mutant aa # wild-type and mutant aa
# # # script: logoP_snp.R # script: logoP_snp.R
# # ######################################## ########################################
# LogoPlotSnps(plot_df = merged_df3 # LogoPlotSnps(plot_df = merged_df3
# , x_axis_colname = "position" # , x_axis_colname = "position"
# , symbol_mut_colname = "mutant_type" # , symbol_mut_colname = "mutant_type"
# , symbol_wt_colname = "wild_type" # , symbol_wt_colname = "wild_type"
# , omit_snp_count = c(0)# can be 1, 2, etc. # , omit_snp_count = c(1)# can be 0,1, 2, etc.
# , my_logo_col = "chemistry" # , my_logo_col = "chemistry"
# , x_lab = "Wild-type position" # , x_lab = "Wild-type position"
# , y_lab = "" # , y_lab = "nsSNP count"
# , x_ats = 13 # text size # , x_ats = 10 # text size
# , x_tangle = 90 # text angle # , x_tangle = 90 # text angle
# , y_ats = 20 # , y_ats = 18
# , y_tangle = 0 # , y_tangle = 0
# , x_tts = 18 # title size # , x_tts = 18 # title size
# , y_tts = 18 # , y_tts = 18
@ -65,7 +62,6 @@ source("~/git/LSHTM_analysis/scripts/plotting/get_plotting_dfs.R")
# , leg_tts = 16 # leg title size # , leg_tts = 16 # leg title size
# ) # )
# #
#
######################################## ########################################
# Logo plot MSA # Logo plot MSA
@ -76,23 +72,49 @@ source("~/git/LSHTM_analysis/scripts/plotting/get_plotting_dfs.R")
# To plot entire MSA, simply don't specify {plot_positions} # To plot entire MSA, simply don't specify {plot_positions}
# script: logoP_msa.R # script: logoP_msa.R
# TODO perhaps: ED logo from Logolas # TODO perhaps: ED logo from Logolas
########################################
# LogoPlotMSA(msaSeq_mut = msa_seq
# , msaSeq_wt = wt_seq
# , msa_method = 'bits' # or probability
# , my_logo_col = "taylor"
# , plot_positions = active_aa_pos
# , x_lab = "nsSNP position"
# , y_lab = ""
# , x_ats = 10 # text size
# , x_tangle = 90 # text angle
# , x_axis_offset = 0.05
# , y_ats = 15
# , y_tangle = 0
# , x_tts = 13 # title size
# , y_tts = 15
# , leg_pos = "top" # can be top, left, right and bottom or c(0.8, 0.9)
# , leg_dir = "horizontal" #can be vertical or horizontal
# , leg_ts = 16 # leg text size
# , leg_tts = 16 # leg title size
# )
######################################## ########################################
LogoPlotMSA(msaSeq_mut = msa_seq # ED Logo plot MSA
# Mutant and wild-type
########################################
LogoPlotED(msaSeq_mut = msa_seq
, msaSeq_wt = wt_seq , msaSeq_wt = wt_seq
, msa_method = 'bits' # or probability , msa_method = 'bits' # or probability
, my_logo_col = "taylor" , my_logo_col = "taylor"
#, plot_positions = c(24, 54, 48, 58, 44, 80, 87, 1000:1008) , plot_positions = active_aa_pos
, x_lab = "nsSNP position" , x_lab = "nsSNP position"
, y_lab = "" , y_lab = ""
, x_ats = 10 # text size , x_ats = 10 # text size
, x_tangle = 90 # text angle , x_tangle = 90 # text angle
, y_ats = 13 , x_axis_offset = 0.05
, y_ats = 15
, y_tangle = 0 , y_tangle = 0
, x_tts = 13 # title size , x_tts = 13 # title size
, y_tts = 13 , y_tts = 15
, leg_pos = "top" # can be top, left, right and bottom or c(0.8, 0.9) , leg_pos = "top" # can be top, left, right and bottom or c(0.8, 0.9)
, leg_dir = "horizontal" #can be vertical or horizontal , leg_dir = "horizontal" #can be vertical or horizontal
, leg_ts = 16 # leg text size , leg_ts = 16 # leg text size
, leg_tts = 16 # leg title size , leg_tts = 16 # leg title size
) )