library(bio3d) infile_pdb = "~/git/Data/ethambutol/input/embb_complex.pdb" infile_pdb = "~/git/Data/streptomycin/input/gid_complex.pdb" # working dir and loading libraries getwd() setwd("~/git/LSHTM_analysis/scripts/") getwd() drug = "" gene = "" source("functions/plotting_globals.R") import_dirs(drug_name = drug, gene_name = gene) ######################################################################## # get atom coordinates from pdb: ~/git/LSHTM_analysis/scripts/my_pdbtools/pdbtools/scripts/pdb_seq -c B -a > ######################################################################## my_pdb = read.pdb(infile_pdb , maxlines = -1 , multi = FALSE , rm.insert = FALSE , rm.alt = TRUE , ATOM.only = FALSE , hex = FALSE , verbose = TRUE) my_pdb = my_pdb[['atom']] table(my_pdb$type) table(my_pdb$type == "HETATM") my_pdb_c1 = my_pdb[my_pdb$type != "HETATM",] foo = table(grepl("[A-Z]{3}", my_pdb_c1$resid)); foo my_pdb_c = my_pdb_c[grepl("[A-Z]{3}", my_pdb_c1$resid),] table(my_pdb_c$type == "HETATM") table(my_pdb_c$chain) table(my_pdb_c$chain, my_pdb_c$resno) chain_count = table(my_pdb_c$chain) barplot(chain_count) hetatm_df = my_pdb[my_pdb$type == "HETATM",] barplot(table(hetatm_df$resid, hetatm_df$chain), legend =T) # TO DO: # DECIPHER CHAIN sel_chain = "B" # embb sel_chain = "A" #my_fasta_file = paste0(indir, "/", gene, "_complex.fasta") my_fasta_file = "~/git/Data/ethambutol/input/7bvf_B_seq.txt" my_fasta_file = "~/git/Data/streptomycin/input/gid_complex.fasta" aa_seq_s = read.fasta(my_fasta_file , seqtype = "AA" , as.string = F) aa_seq = as.character(aa_seq_s[[1]]); aa_seq g_len = length(aa_seq) barplot(g_len) table(my_pdb_c$chain[my_pdb_c$chain == sel_chain]) c_len = unique(length(table(my_pdb_c$resno[my_pdb_c$chain == sel_chain]))); c_len barplot(c_len) gc_lengths = c(g_len, c_len) str_coverage = (c_len/g_len)*100 # make this interactive barplot(gc_lengths , horiz = T , main = "gene length and structural coverage" , xlab = "length" , ylab = "" , names.arg = c("gene_length", "chain_length") , col = c("beige", "darkgrey")) # add text underneath graph in Rshiny cat("\nGene length:", g_len , "\nChain length:", c_len , "\n% structural coverage:", str_coverage) #%% foo = data.frame(my_pdb_c$resno, my_pdb_c$resid) foo_u = foo[!duplicated(foo$my_pdb_c.resno),] foo_u$my_pdb_c.resid aa3_upper = foo_u$my_pdb_c.resid aa3_upper aa3_1up = str_to_title(aa3_upper) aa3_1up foo_aa1 = a(aa3_1up); foo_aa1 aa_seq identical(foo_aa1, aa_seq) foo_aa1_s = paste(foo_aa1, collapse = ""); foo_aa1_s aa_seq_s = paste(aa_seq, collapse = ""); aa_seq_s seqaln(foo_aa1_s) # magic answer # position 129 idendtified from global alignment from EMBOSS