added scratch/

This commit is contained in:
Tanushree Tunstall 2020-05-22 12:03:11 +01:00
parent 73762568e8
commit 3a0ff9b35e
10 changed files with 42018 additions and 0 deletions

11289
scripts/scratch/3byw.pdb Normal file

File diff suppressed because it is too large Load diff

17357
scripts/scratch/7bvf_b.pdb Normal file

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,15 @@
>Mycobacterium tuberculosis H37Rv|Rv3423c|alr
VKRFWENVGKPNDTTDGRGTTSLAMTPISQTPGLLAEAMVDLGAIEHNVRVLREHAGHAQLMAVVKADGYGH
GATRVAQTALGAGAAELGVATVDEALALRADGITAPVLAWLHPPGIDFGPALLADVQVAVSSLRQLDELLHA
VRRTGRTATVTVKVDTGLNRNGVGPAQFPAMLTALRQAMAEDAVRLRGLMSHMVYADKPDDSINDVQAQRFT
AFLAQAREQGVRFEVAHLSNSSATMARPDLTFDLVRPGIAVYGLSPVPALGDMGLVPAMTVKCAVALVKSIR
AGEGVSYGHTWIAPRDTNLALLPIGYADGVFRSLGGRLEVLINGRRCPGVGRICMDQFMVDLGPGPLDVAEG
DEAILFGPGIRGEPTAQDWADLVGTIHYEVVTSPRGRITRTYREAENR
>alr_complex | chain A | 371 aa
LAEAMVDLGAIEHNVRVLREHAGHAQLMAVVKADGYGHGATRVAQTALGAGAAELGVATVDEALALRADGIT
APVLAWLHPPGIDFGPALLADVQVAVSSLRQLDELLHAVRRTGRTATVTVKVDTGLNRNGVGPAQFPAMLTA
LRQAMAEDAVRLRGLMSHMVYADKPDDSINDVQAQRFTAFLAQAREQGVRFEVAHLSNSSATMARPDLTFDL
VRPGIAVYGLSPVPALGDMGLVPAMTVKCAVALVKSIRAGEGVSYGHTWIAPRDTNLALLPIGYADGVFRSL
GGRLEVLINGRRCPGVGRICMDQFMVDLGPGPLDVAEGDEAILFGPGIRGEPTAQDWADLVGTIHYEVVTSP
RGRITRTYREA

125
scripts/scratch/align_template.py Executable file
View file

@ -0,0 +1,125 @@
#!/usr/bin/env python3
# useful links
#https://towardsdatascience.com/pairwise-sequence-alignment-using-biopython-d1a9d0ba861f
#https://www.biostars.org/p/265338/
from Bio import SeqIO
from Bio import pairwise2
from Bio.pairwise2 import format_alignment
import re
import os
#%%
os.chdir('/home/tanu/git/LSHTM_analysis/scripts/examples')
#%%
def myalign(ref_seq, pdb_seq):
myalign_dict = {}
alignments = pairwise2.align.globalxx(ref_seq, pdb_seq)
#alignments = pairwise2.align.localxx(ref, struct)
match = []
for a, b in zip(alignments[0][0], alignments[0][1]):
if a == b:
match.append('|')
else:
match.append(' ')
#print(match)
print(alignments[0][0])
print("".join(match))
print(alignments[0][1])
result_align = alignments[0][1]
#print(result_align)
print('===============================================================\n')
# update dict
myalign_dict.update({'aligned_fasta': result_align})
aa_regex = '\w'
m = re.search(aa_regex, result_align)
#m = my_match.span()
offset = m.start()
offset_end = m.end()
print('start of match:', offset
, '\nend of match:', offset_end)
print('===============================================================\n')
# update dict
myalign_dict.update({'start_match' : offset})
myalign_dict.update({'end_match' : offset_end})
return myalign_dict
def main():
"""
read file containing reference and pdb_sequence to align
"""
my_dict = {}
align_fastas_to_align = open('align_fastas.txt', 'r')
for record in SeqIO.parse(align_fastas_to_align,"fasta"):
myid = record.id
seq = str(record.seq)
my_dict.update({myid : seq})
my_keys = list(my_dict.keys())
my_ref_seq = my_dict[my_keys[0]] # ref_seq
my_pdb_seq = my_dict[my_keys[1]] # pdb_seq
fasta_alignment = myalign(my_ref_seq, my_pdb_seq)
print('this is my result:', fasta_alignment)
if __name__ == '__main__':
main()
#%% debug: individually
my_dict = {}
align_fastas_to_align = open('align_fastas.txt', 'r')
for record in SeqIO.parse(align_fastas_to_align,"fasta"):
myid =record.id
seq=str(record.seq)
#print(myid, seq)
my_dict.update({myid: seq})
print(my_dict)
print(my_dict.keys())
my_keys = list(my_dict.keys())
alignments = pairwise2.align.globalxx(my_dict[my_keys[0]], my_dict[my_keys[1]])
match = []
for a, b in zip(alignments[0][0], alignments[0][1]):
if a == b:
match.append('|')
else:
match.append(' ')
#print(match)
print(alignments[0][0])
print("".join(match))
print(alignments[0][1])
result_align = alignments[0][1]
#print(result_align)
print('===============================================================\n')
#offset = ''
aa_regex = '\w'
m = re.search(aa_regex, result_align)
#m = my_match.span()
offset = m.start()
offset_end = m.end()
print('start of match:', offset, '\nend of match:', offset_end)
print('===============================================================\n')

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,52 @@
#!/usr/bin/python3
#=======================================================================
# TASK: select specified chains from the structure & save a cropped structure with
# the selected chains. Useful for dimer, etc modelling.
# link for saving each chain as a separate file
# https://stackoverflow.com/questions/11685716/how-to-extract-chains-from-a-structure-file
#=======================================================================
#%%
import os, sys
from Bio.PDB import PDBParser, PDBIO, Select
#%% homdir and curr dir and local imports
homedir = os.path.expanduser('~')
# set working dir
os.getcwd()
os.chdir(homedir + '/git/Data/ethambutol/input/')
os.getcwd()
#%%
# Select() Method to return True for every chain in 'chains'
class ChainExtract(Select):
def __init__(self, chain):
self.chain = chain
def accept_chain(self, chain):
#print dir(chain)
if chain.id in self.chain:
return 1
else:
return 0
def main():
p = PDBParser(PERMISSIVE=1)
structure = p.get_structure("3byw", "3byw.pdb")
my_chains = ['G', 'H'] # specify selected chains
c_names = ''.join(my_chains)
pdb_chains_file = 'pdb_crop_' + c_names + '.pdb'
io = PDBIO()
io.set_structure(structure)
#io.save(structure.get_id() + "_crop.structure", ChainExtract(my_chains))
io.save(pdb_chains_file, ChainExtract(my_chains))
if __name__ == '__main__':
main()
#%%
# test
#my_chains = ['G', 'H'] # specify selected chains
#foo = ''.join(my_chains) # to append to file name
#pdb_chains_file = '_{}.pdb'.format(my_chains)

View file

@ -0,0 +1,49 @@
#!/usr/bin/python3
#=======================================================================
# TASK: extract chain from pdb and save each chain as a separate file
# link for saving each chain as a separate file
# https://stackoverflow.com/questions/11685716/how-to-extract-chains-from-a-pdb-file
# command line args
# https://stackoverflow.com/questions/15753701/how-can-i-pass-a-list-as-a-command-line-argument-with-argparse
#=======================================================================
#%%
import os, sys
from Bio.PDB import Select, PDBIO
from Bio.PDB.PDBParser import PDBParser
#%% homdir and curr dir and local imports
homedir = os.path.expanduser('~')
# set working dir
os.getcwd()
os.chdir(homedir + '/git/LSHTM_analysis/scripts')
os.getcwd()
#%%
class ChainSelect(Select):
def __init__(self, chain):
self.chain = chain
def accept_chain(self, chain):
if chain.get_id() == self.chain:
return 1
else:
return 0
def main():
chains = ['A','B','C','F']
p = PDBParser(PERMISSIVE=1)
#structure = p.get_structure(pdb_file, pdb_file)
structure = p.get_structure('/home/tanu/git/Data/ethambutol/input/3byw', '/home/tanu/git/Data/ethambutol/input/3byw.pdb')
#print('STRUCTURE:', structure.get_id())
# pdb_filename = print()
for chain in chains:
pdb_chain_file = 'pdb_file_chain_{}.pdb'.format(chain)
io = PDBIO()
io.set_structure(structure)
io.save('{}'.format(pdb_chain_file), ChainSelect(chain))
# If run from command line...
if __name__ == "__main__":
main()

45
scripts/scratch/check_ligand.py Executable file
View file

@ -0,0 +1,45 @@
#!/usr/bin/env python
import os
from biopandas.pdb import PandasPdb
#%%
homedir = os.path.expanduser('~')
os.chdir(homedir + '/git/LSHTM_analysis/scripts/examples')
#%%
file_list = ['7bvf_b.pdb', 'pnca_complex.pdb', 'alr_complex.pdb']
file_list = ['7bvf_b.pdb']
#file_list = ['pnca_complex.pdb']
file_list = ['alr_complex.pdb']
BORING_LIGANDS = ["HOH","CA","SO4","IOD","NA","CL","GOL","PO4"]
#%% df with list
ligands_dict = {}
for pdb_id in file_list:
ppdb = PandasPdb()
pdb_file = ppdb.read_pdb(pdb_id)
het = pdb_file.df['HETATM']
het_list = list(set(het['residue_name']))
ligands = [ l for l in het_list if l not in BORING_LIGANDS]
lig_dict = {pdb_id:ligands}
#lig_dict = {pdb_id:het_list} # include BORING_LIGANDS
ligands_dict.update(lig_dict)
print(ligands_dict)
print('pdb_code:', pdb_file.code) # works only in case of valid pdb
print('pdb_code:', pdb_file.pdb_path) #works for bespoke pdb but prints the full path
print('pdb_code:', os.path.basename(pdb_file.pdb_path)) # prints only the last part i.e filename
#%% test with one
ppdb = PandasPdb()
pdb_file = ppdb.read_pdb('7bvf_b.pdb')
het = pdb_file.df['HETATM']
het_list = list(set(het['residue_name']))
print(het_list)
ligands = [ l for l in het_list if l not in BORING_LIGANDS]
print(ligands)
#%% extract last part from file path
print(os.path.basename(homedir + '/git/LSHTM_analysis/scripts/examples'))
print(os.path.basename('alr_complex.pdb'))
foo = os.path.basename(pdb_file.pdb_path)
print(foo)

File diff suppressed because it is too large Load diff