added scratch/
This commit is contained in:
parent
73762568e8
commit
3a0ff9b35e
10 changed files with 42018 additions and 0 deletions
11289
scripts/scratch/3byw.pdb
Normal file
11289
scripts/scratch/3byw.pdb
Normal file
File diff suppressed because it is too large
Load diff
17357
scripts/scratch/7bvf_b.pdb
Normal file
17357
scripts/scratch/7bvf_b.pdb
Normal file
File diff suppressed because it is too large
Load diff
15
scripts/scratch/align_fastas.txt
Normal file
15
scripts/scratch/align_fastas.txt
Normal file
|
@ -0,0 +1,15 @@
|
||||||
|
>Mycobacterium tuberculosis H37Rv|Rv3423c|alr
|
||||||
|
VKRFWENVGKPNDTTDGRGTTSLAMTPISQTPGLLAEAMVDLGAIEHNVRVLREHAGHAQLMAVVKADGYGH
|
||||||
|
GATRVAQTALGAGAAELGVATVDEALALRADGITAPVLAWLHPPGIDFGPALLADVQVAVSSLRQLDELLHA
|
||||||
|
VRRTGRTATVTVKVDTGLNRNGVGPAQFPAMLTALRQAMAEDAVRLRGLMSHMVYADKPDDSINDVQAQRFT
|
||||||
|
AFLAQAREQGVRFEVAHLSNSSATMARPDLTFDLVRPGIAVYGLSPVPALGDMGLVPAMTVKCAVALVKSIR
|
||||||
|
AGEGVSYGHTWIAPRDTNLALLPIGYADGVFRSLGGRLEVLINGRRCPGVGRICMDQFMVDLGPGPLDVAEG
|
||||||
|
DEAILFGPGIRGEPTAQDWADLVGTIHYEVVTSPRGRITRTYREAENR
|
||||||
|
|
||||||
|
>alr_complex | chain A | 371 aa
|
||||||
|
LAEAMVDLGAIEHNVRVLREHAGHAQLMAVVKADGYGHGATRVAQTALGAGAAELGVATVDEALALRADGIT
|
||||||
|
APVLAWLHPPGIDFGPALLADVQVAVSSLRQLDELLHAVRRTGRTATVTVKVDTGLNRNGVGPAQFPAMLTA
|
||||||
|
LRQAMAEDAVRLRGLMSHMVYADKPDDSINDVQAQRFTAFLAQAREQGVRFEVAHLSNSSATMARPDLTFDL
|
||||||
|
VRPGIAVYGLSPVPALGDMGLVPAMTVKCAVALVKSIRAGEGVSYGHTWIAPRDTNLALLPIGYADGVFRSL
|
||||||
|
GGRLEVLINGRRCPGVGRICMDQFMVDLGPGPLDVAEGDEAILFGPGIRGEPTAQDWADLVGTIHYEVVTSP
|
||||||
|
RGRITRTYREA
|
125
scripts/scratch/align_template.py
Executable file
125
scripts/scratch/align_template.py
Executable file
|
@ -0,0 +1,125 @@
|
||||||
|
#!/usr/bin/env python3
|
||||||
|
|
||||||
|
# useful links
|
||||||
|
#https://towardsdatascience.com/pairwise-sequence-alignment-using-biopython-d1a9d0ba861f
|
||||||
|
#https://www.biostars.org/p/265338/
|
||||||
|
|
||||||
|
from Bio import SeqIO
|
||||||
|
from Bio import pairwise2
|
||||||
|
from Bio.pairwise2 import format_alignment
|
||||||
|
import re
|
||||||
|
import os
|
||||||
|
|
||||||
|
#%%
|
||||||
|
os.chdir('/home/tanu/git/LSHTM_analysis/scripts/examples')
|
||||||
|
#%%
|
||||||
|
def myalign(ref_seq, pdb_seq):
|
||||||
|
myalign_dict = {}
|
||||||
|
|
||||||
|
alignments = pairwise2.align.globalxx(ref_seq, pdb_seq)
|
||||||
|
#alignments = pairwise2.align.localxx(ref, struct)
|
||||||
|
|
||||||
|
match = []
|
||||||
|
|
||||||
|
for a, b in zip(alignments[0][0], alignments[0][1]):
|
||||||
|
if a == b:
|
||||||
|
match.append('|')
|
||||||
|
else:
|
||||||
|
match.append(' ')
|
||||||
|
|
||||||
|
|
||||||
|
#print(match)
|
||||||
|
print(alignments[0][0])
|
||||||
|
print("".join(match))
|
||||||
|
print(alignments[0][1])
|
||||||
|
|
||||||
|
result_align = alignments[0][1]
|
||||||
|
#print(result_align)
|
||||||
|
print('===============================================================\n')
|
||||||
|
|
||||||
|
# update dict
|
||||||
|
myalign_dict.update({'aligned_fasta': result_align})
|
||||||
|
|
||||||
|
|
||||||
|
aa_regex = '\w'
|
||||||
|
m = re.search(aa_regex, result_align)
|
||||||
|
#m = my_match.span()
|
||||||
|
offset = m.start()
|
||||||
|
offset_end = m.end()
|
||||||
|
|
||||||
|
print('start of match:', offset
|
||||||
|
, '\nend of match:', offset_end)
|
||||||
|
print('===============================================================\n')
|
||||||
|
|
||||||
|
# update dict
|
||||||
|
myalign_dict.update({'start_match' : offset})
|
||||||
|
myalign_dict.update({'end_match' : offset_end})
|
||||||
|
|
||||||
|
return myalign_dict
|
||||||
|
|
||||||
|
|
||||||
|
def main():
|
||||||
|
"""
|
||||||
|
read file containing reference and pdb_sequence to align
|
||||||
|
"""
|
||||||
|
my_dict = {}
|
||||||
|
align_fastas_to_align = open('align_fastas.txt', 'r')
|
||||||
|
for record in SeqIO.parse(align_fastas_to_align,"fasta"):
|
||||||
|
myid = record.id
|
||||||
|
seq = str(record.seq)
|
||||||
|
my_dict.update({myid : seq})
|
||||||
|
|
||||||
|
my_keys = list(my_dict.keys())
|
||||||
|
my_ref_seq = my_dict[my_keys[0]] # ref_seq
|
||||||
|
my_pdb_seq = my_dict[my_keys[1]] # pdb_seq
|
||||||
|
|
||||||
|
fasta_alignment = myalign(my_ref_seq, my_pdb_seq)
|
||||||
|
print('this is my result:', fasta_alignment)
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
main()
|
||||||
|
|
||||||
|
#%% debug: individually
|
||||||
|
my_dict = {}
|
||||||
|
align_fastas_to_align = open('align_fastas.txt', 'r')
|
||||||
|
for record in SeqIO.parse(align_fastas_to_align,"fasta"):
|
||||||
|
myid =record.id
|
||||||
|
seq=str(record.seq)
|
||||||
|
#print(myid, seq)
|
||||||
|
my_dict.update({myid: seq})
|
||||||
|
|
||||||
|
print(my_dict)
|
||||||
|
print(my_dict.keys())
|
||||||
|
|
||||||
|
my_keys = list(my_dict.keys())
|
||||||
|
alignments = pairwise2.align.globalxx(my_dict[my_keys[0]], my_dict[my_keys[1]])
|
||||||
|
match = []
|
||||||
|
|
||||||
|
for a, b in zip(alignments[0][0], alignments[0][1]):
|
||||||
|
if a == b:
|
||||||
|
match.append('|')
|
||||||
|
else:
|
||||||
|
match.append(' ')
|
||||||
|
|
||||||
|
#print(match)
|
||||||
|
print(alignments[0][0])
|
||||||
|
print("".join(match))
|
||||||
|
print(alignments[0][1])
|
||||||
|
|
||||||
|
result_align = alignments[0][1]
|
||||||
|
#print(result_align)
|
||||||
|
print('===============================================================\n')
|
||||||
|
|
||||||
|
#offset = ''
|
||||||
|
aa_regex = '\w'
|
||||||
|
m = re.search(aa_regex, result_align)
|
||||||
|
#m = my_match.span()
|
||||||
|
offset = m.start()
|
||||||
|
offset_end = m.end()
|
||||||
|
|
||||||
|
print('start of match:', offset, '\nend of match:', offset_end)
|
||||||
|
print('===============================================================\n')
|
||||||
|
|
||||||
|
|
||||||
|
|
11179
scripts/scratch/alr_complex.pdb
Normal file
11179
scripts/scratch/alr_complex.pdb
Normal file
File diff suppressed because it is too large
Load diff
52
scripts/scratch/chain_extract_template.py
Normal file
52
scripts/scratch/chain_extract_template.py
Normal file
|
@ -0,0 +1,52 @@
|
||||||
|
#!/usr/bin/python3
|
||||||
|
|
||||||
|
#=======================================================================
|
||||||
|
# TASK: select specified chains from the structure & save a cropped structure with
|
||||||
|
# the selected chains. Useful for dimer, etc modelling.
|
||||||
|
|
||||||
|
# link for saving each chain as a separate file
|
||||||
|
# https://stackoverflow.com/questions/11685716/how-to-extract-chains-from-a-structure-file
|
||||||
|
#=======================================================================
|
||||||
|
#%%
|
||||||
|
import os, sys
|
||||||
|
from Bio.PDB import PDBParser, PDBIO, Select
|
||||||
|
#%% homdir and curr dir and local imports
|
||||||
|
homedir = os.path.expanduser('~')
|
||||||
|
# set working dir
|
||||||
|
os.getcwd()
|
||||||
|
os.chdir(homedir + '/git/Data/ethambutol/input/')
|
||||||
|
os.getcwd()
|
||||||
|
#%%
|
||||||
|
# Select() Method to return True for every chain in 'chains'
|
||||||
|
class ChainExtract(Select):
|
||||||
|
def __init__(self, chain):
|
||||||
|
self.chain = chain
|
||||||
|
|
||||||
|
def accept_chain(self, chain):
|
||||||
|
#print dir(chain)
|
||||||
|
if chain.id in self.chain:
|
||||||
|
return 1
|
||||||
|
else:
|
||||||
|
return 0
|
||||||
|
|
||||||
|
def main():
|
||||||
|
p = PDBParser(PERMISSIVE=1)
|
||||||
|
structure = p.get_structure("3byw", "3byw.pdb")
|
||||||
|
|
||||||
|
my_chains = ['G', 'H'] # specify selected chains
|
||||||
|
c_names = ''.join(my_chains)
|
||||||
|
|
||||||
|
pdb_chains_file = 'pdb_crop_' + c_names + '.pdb'
|
||||||
|
|
||||||
|
io = PDBIO()
|
||||||
|
io.set_structure(structure)
|
||||||
|
#io.save(structure.get_id() + "_crop.structure", ChainExtract(my_chains))
|
||||||
|
io.save(pdb_chains_file, ChainExtract(my_chains))
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
main()
|
||||||
|
#%%
|
||||||
|
# test
|
||||||
|
#my_chains = ['G', 'H'] # specify selected chains
|
||||||
|
#foo = ''.join(my_chains) # to append to file name
|
||||||
|
#pdb_chains_file = '_{}.pdb'.format(my_chains)
|
49
scripts/scratch/chain_splitter_template.py
Executable file
49
scripts/scratch/chain_splitter_template.py
Executable file
|
@ -0,0 +1,49 @@
|
||||||
|
#!/usr/bin/python3
|
||||||
|
|
||||||
|
#=======================================================================
|
||||||
|
# TASK: extract chain from pdb and save each chain as a separate file
|
||||||
|
|
||||||
|
# link for saving each chain as a separate file
|
||||||
|
# https://stackoverflow.com/questions/11685716/how-to-extract-chains-from-a-pdb-file
|
||||||
|
# command line args
|
||||||
|
# https://stackoverflow.com/questions/15753701/how-can-i-pass-a-list-as-a-command-line-argument-with-argparse
|
||||||
|
#=======================================================================
|
||||||
|
|
||||||
|
#%%
|
||||||
|
import os, sys
|
||||||
|
from Bio.PDB import Select, PDBIO
|
||||||
|
from Bio.PDB.PDBParser import PDBParser
|
||||||
|
#%% homdir and curr dir and local imports
|
||||||
|
homedir = os.path.expanduser('~')
|
||||||
|
# set working dir
|
||||||
|
os.getcwd()
|
||||||
|
os.chdir(homedir + '/git/LSHTM_analysis/scripts')
|
||||||
|
os.getcwd()
|
||||||
|
#%%
|
||||||
|
class ChainSelect(Select):
|
||||||
|
def __init__(self, chain):
|
||||||
|
self.chain = chain
|
||||||
|
|
||||||
|
def accept_chain(self, chain):
|
||||||
|
if chain.get_id() == self.chain:
|
||||||
|
return 1
|
||||||
|
else:
|
||||||
|
return 0
|
||||||
|
|
||||||
|
def main():
|
||||||
|
chains = ['A','B','C','F']
|
||||||
|
p = PDBParser(PERMISSIVE=1)
|
||||||
|
#structure = p.get_structure(pdb_file, pdb_file)
|
||||||
|
structure = p.get_structure('/home/tanu/git/Data/ethambutol/input/3byw', '/home/tanu/git/Data/ethambutol/input/3byw.pdb')
|
||||||
|
#print('STRUCTURE:', structure.get_id())
|
||||||
|
# pdb_filename = print()
|
||||||
|
for chain in chains:
|
||||||
|
pdb_chain_file = 'pdb_file_chain_{}.pdb'.format(chain)
|
||||||
|
|
||||||
|
io = PDBIO()
|
||||||
|
io.set_structure(structure)
|
||||||
|
io.save('{}'.format(pdb_chain_file), ChainSelect(chain))
|
||||||
|
|
||||||
|
# If run from command line...
|
||||||
|
if __name__ == "__main__":
|
||||||
|
main()
|
45
scripts/scratch/check_ligand.py
Executable file
45
scripts/scratch/check_ligand.py
Executable file
|
@ -0,0 +1,45 @@
|
||||||
|
#!/usr/bin/env python
|
||||||
|
import os
|
||||||
|
from biopandas.pdb import PandasPdb
|
||||||
|
|
||||||
|
#%%
|
||||||
|
homedir = os.path.expanduser('~')
|
||||||
|
os.chdir(homedir + '/git/LSHTM_analysis/scripts/examples')
|
||||||
|
|
||||||
|
#%%
|
||||||
|
file_list = ['7bvf_b.pdb', 'pnca_complex.pdb', 'alr_complex.pdb']
|
||||||
|
|
||||||
|
file_list = ['7bvf_b.pdb']
|
||||||
|
#file_list = ['pnca_complex.pdb']
|
||||||
|
file_list = ['alr_complex.pdb']
|
||||||
|
BORING_LIGANDS = ["HOH","CA","SO4","IOD","NA","CL","GOL","PO4"]
|
||||||
|
#%% df with list
|
||||||
|
ligands_dict = {}
|
||||||
|
for pdb_id in file_list:
|
||||||
|
ppdb = PandasPdb()
|
||||||
|
pdb_file = ppdb.read_pdb(pdb_id)
|
||||||
|
het = pdb_file.df['HETATM']
|
||||||
|
het_list = list(set(het['residue_name']))
|
||||||
|
ligands = [ l for l in het_list if l not in BORING_LIGANDS]
|
||||||
|
lig_dict = {pdb_id:ligands}
|
||||||
|
#lig_dict = {pdb_id:het_list} # include BORING_LIGANDS
|
||||||
|
|
||||||
|
ligands_dict.update(lig_dict)
|
||||||
|
print(ligands_dict)
|
||||||
|
print('pdb_code:', pdb_file.code) # works only in case of valid pdb
|
||||||
|
print('pdb_code:', pdb_file.pdb_path) #works for bespoke pdb but prints the full path
|
||||||
|
print('pdb_code:', os.path.basename(pdb_file.pdb_path)) # prints only the last part i.e filename
|
||||||
|
#%% test with one
|
||||||
|
ppdb = PandasPdb()
|
||||||
|
pdb_file = ppdb.read_pdb('7bvf_b.pdb')
|
||||||
|
het = pdb_file.df['HETATM']
|
||||||
|
het_list = list(set(het['residue_name']))
|
||||||
|
print(het_list)
|
||||||
|
ligands = [ l for l in het_list if l not in BORING_LIGANDS]
|
||||||
|
print(ligands)
|
||||||
|
|
||||||
|
#%% extract last part from file path
|
||||||
|
print(os.path.basename(homedir + '/git/LSHTM_analysis/scripts/examples'))
|
||||||
|
print(os.path.basename('alr_complex.pdb'))
|
||||||
|
foo = os.path.basename(pdb_file.pdb_path)
|
||||||
|
print(foo)
|
1907
scripts/scratch/pnca_complex.pdb
Normal file
1907
scripts/scratch/pnca_complex.pdb
Normal file
File diff suppressed because it is too large
Load diff
Loading…
Add table
Add a link
Reference in a new issue