added pdb_fasta_plot.R for generating some useful plots for shiny
This commit is contained in:
parent
067fc85163
commit
9cb33ed67b
1 changed files with 110 additions and 0 deletions
110
scripts/pdb_fasta_plot.R
Normal file
110
scripts/pdb_fasta_plot.R
Normal file
|
@ -0,0 +1,110 @@
|
|||
library(bio3d)
|
||||
infile_pdb = "~/git/Data/ethambutol/input/embb_complex.pdb"
|
||||
infile_pdb = "~/git/Data/streptomycin/input/gid_complex.pdb"
|
||||
|
||||
# working dir and loading libraries
|
||||
getwd()
|
||||
setwd("~/git/LSHTM_analysis/scripts/")
|
||||
getwd()
|
||||
|
||||
drug = ""
|
||||
gene = ""
|
||||
|
||||
source("functions/plotting_globals.R")
|
||||
import_dirs(drug_name = drug, gene_name = gene)
|
||||
########################################################################
|
||||
# get atom coordinates from pdb:
|
||||
~/git/LSHTM_analysis/scripts/my_pdbtools/pdbtools/scripts/pdb_seq -c B -a <input> > <output>
|
||||
|
||||
########################################################################
|
||||
my_pdb = read.pdb(infile_pdb
|
||||
, maxlines = -1
|
||||
, multi = FALSE
|
||||
, rm.insert = FALSE
|
||||
, rm.alt = TRUE
|
||||
, ATOM.only = FALSE
|
||||
, hex = FALSE
|
||||
, verbose = TRUE)
|
||||
|
||||
my_pdb = my_pdb[['atom']]
|
||||
table(my_pdb$type)
|
||||
table(my_pdb$type == "HETATM")
|
||||
|
||||
my_pdb_c1 = my_pdb[my_pdb$type != "HETATM",]
|
||||
foo = table(grepl("[A-Z]{3}", my_pdb_c1$resid)); foo
|
||||
|
||||
my_pdb_c = my_pdb_c[grepl("[A-Z]{3}", my_pdb_c1$resid),]
|
||||
|
||||
table(my_pdb_c$type == "HETATM")
|
||||
table(my_pdb_c$chain)
|
||||
table(my_pdb_c$chain, my_pdb_c$resno)
|
||||
|
||||
chain_count = table(my_pdb_c$chain)
|
||||
|
||||
barplot(chain_count)
|
||||
|
||||
hetatm_df = my_pdb[my_pdb$type == "HETATM",]
|
||||
barplot(table(hetatm_df$resid, hetatm_df$chain), legend =T)
|
||||
|
||||
|
||||
# TO DO:
|
||||
# DECIPHER CHAIN
|
||||
sel_chain = "B" # embb
|
||||
sel_chain = "A"
|
||||
|
||||
#my_fasta_file = paste0(indir, "/", gene, "_complex.fasta")
|
||||
my_fasta_file = "~/git/Data/ethambutol/input/7bvf_B_seq.txt"
|
||||
my_fasta_file = "~/git/Data/streptomycin/input/gid_complex.fasta"
|
||||
|
||||
aa_seq_s = read.fasta(my_fasta_file
|
||||
, seqtype = "AA"
|
||||
, as.string = F)
|
||||
|
||||
aa_seq = as.character(aa_seq_s[[1]]); aa_seq
|
||||
g_len = length(aa_seq)
|
||||
barplot(g_len)
|
||||
|
||||
table(my_pdb_c$chain[my_pdb_c$chain == sel_chain])
|
||||
c_len = unique(length(table(my_pdb_c$resno[my_pdb_c$chain == sel_chain]))); c_len
|
||||
barplot(c_len)
|
||||
|
||||
gc_lengths = c(g_len, c_len)
|
||||
str_coverage = (c_len/g_len)*100
|
||||
|
||||
# make this interactive
|
||||
barplot(gc_lengths
|
||||
, horiz = T
|
||||
, main = "gene length and structural coverage"
|
||||
, xlab = "length"
|
||||
, ylab = ""
|
||||
, names.arg = c("gene_length", "chain_length")
|
||||
, col = c("beige", "darkgrey"))
|
||||
|
||||
# add text underneath graph in Rshiny
|
||||
cat("\nGene length:", g_len
|
||||
, "\nChain length:", c_len
|
||||
, "\n% structural coverage:", str_coverage)
|
||||
|
||||
#%%
|
||||
foo = data.frame(my_pdb_c$resno, my_pdb_c$resid)
|
||||
foo_u = foo[!duplicated(foo$my_pdb_c.resno),]
|
||||
|
||||
foo_u$my_pdb_c.resid
|
||||
aa3_upper = foo_u$my_pdb_c.resid
|
||||
aa3_upper
|
||||
|
||||
|
||||
aa3_1up = str_to_title(aa3_upper)
|
||||
aa3_1up
|
||||
|
||||
foo_aa1 = a(aa3_1up); foo_aa1
|
||||
aa_seq
|
||||
|
||||
identical(foo_aa1, aa_seq)
|
||||
|
||||
foo_aa1_s = paste(foo_aa1, collapse = ""); foo_aa1_s
|
||||
aa_seq_s = paste(aa_seq, collapse = ""); aa_seq_s
|
||||
|
||||
seqaln(foo_aa1_s)
|
||||
# magic answer
|
||||
# position 129 idendtified from global alignment from EMBOSS
|
Loading…
Add table
Add a link
Reference in a new issue