110 lines
2.9 KiB
R
110 lines
2.9 KiB
R
library(bio3d)
|
|
infile_pdb = "~/git/Data/ethambutol/input/embb_complex.pdb"
|
|
infile_pdb = "~/git/Data/streptomycin/input/gid_complex.pdb"
|
|
|
|
# working dir and loading libraries
|
|
getwd()
|
|
setwd("~/git/LSHTM_analysis/scripts/")
|
|
getwd()
|
|
|
|
drug = ""
|
|
gene = ""
|
|
|
|
source("functions/plotting_globals.R")
|
|
import_dirs(drug_name = drug, gene_name = gene)
|
|
########################################################################
|
|
# get atom coordinates from pdb:
|
|
~/git/LSHTM_analysis/scripts/my_pdbtools/pdbtools/scripts/pdb_seq -c B -a <input> > <output>
|
|
|
|
########################################################################
|
|
my_pdb = read.pdb(infile_pdb
|
|
, maxlines = -1
|
|
, multi = FALSE
|
|
, rm.insert = FALSE
|
|
, rm.alt = TRUE
|
|
, ATOM.only = FALSE
|
|
, hex = FALSE
|
|
, verbose = TRUE)
|
|
|
|
my_pdb = my_pdb[['atom']]
|
|
table(my_pdb$type)
|
|
table(my_pdb$type == "HETATM")
|
|
|
|
my_pdb_c1 = my_pdb[my_pdb$type != "HETATM",]
|
|
foo = table(grepl("[A-Z]{3}", my_pdb_c1$resid)); foo
|
|
|
|
my_pdb_c = my_pdb_c[grepl("[A-Z]{3}", my_pdb_c1$resid),]
|
|
|
|
table(my_pdb_c$type == "HETATM")
|
|
table(my_pdb_c$chain)
|
|
table(my_pdb_c$chain, my_pdb_c$resno)
|
|
|
|
chain_count = table(my_pdb_c$chain)
|
|
|
|
barplot(chain_count)
|
|
|
|
hetatm_df = my_pdb[my_pdb$type == "HETATM",]
|
|
barplot(table(hetatm_df$resid, hetatm_df$chain), legend =T)
|
|
|
|
|
|
# TO DO:
|
|
# DECIPHER CHAIN
|
|
sel_chain = "B" # embb
|
|
sel_chain = "A"
|
|
|
|
#my_fasta_file = paste0(indir, "/", gene, "_complex.fasta")
|
|
my_fasta_file = "~/git/Data/ethambutol/input/7bvf_B_seq.txt"
|
|
my_fasta_file = "~/git/Data/streptomycin/input/gid_complex.fasta"
|
|
|
|
aa_seq_s = read.fasta(my_fasta_file
|
|
, seqtype = "AA"
|
|
, as.string = F)
|
|
|
|
aa_seq = as.character(aa_seq_s[[1]]); aa_seq
|
|
g_len = length(aa_seq)
|
|
barplot(g_len)
|
|
|
|
table(my_pdb_c$chain[my_pdb_c$chain == sel_chain])
|
|
c_len = unique(length(table(my_pdb_c$resno[my_pdb_c$chain == sel_chain]))); c_len
|
|
barplot(c_len)
|
|
|
|
gc_lengths = c(g_len, c_len)
|
|
str_coverage = (c_len/g_len)*100
|
|
|
|
# make this interactive
|
|
barplot(gc_lengths
|
|
, horiz = T
|
|
, main = "gene length and structural coverage"
|
|
, xlab = "length"
|
|
, ylab = ""
|
|
, names.arg = c("gene_length", "chain_length")
|
|
, col = c("beige", "darkgrey"))
|
|
|
|
# add text underneath graph in Rshiny
|
|
cat("\nGene length:", g_len
|
|
, "\nChain length:", c_len
|
|
, "\n% structural coverage:", str_coverage)
|
|
|
|
#%%
|
|
foo = data.frame(my_pdb_c$resno, my_pdb_c$resid)
|
|
foo_u = foo[!duplicated(foo$my_pdb_c.resno),]
|
|
|
|
foo_u$my_pdb_c.resid
|
|
aa3_upper = foo_u$my_pdb_c.resid
|
|
aa3_upper
|
|
|
|
|
|
aa3_1up = str_to_title(aa3_upper)
|
|
aa3_1up
|
|
|
|
foo_aa1 = a(aa3_1up); foo_aa1
|
|
aa_seq
|
|
|
|
identical(foo_aa1, aa_seq)
|
|
|
|
foo_aa1_s = paste(foo_aa1, collapse = ""); foo_aa1_s
|
|
aa_seq_s = paste(aa_seq, collapse = ""); aa_seq_s
|
|
|
|
seqaln(foo_aa1_s)
|
|
# magic answer
|
|
# position 129 idendtified from global alignment from EMBOSS
|