125 lines
No EOL
3.1 KiB
Python
Executable file
125 lines
No EOL
3.1 KiB
Python
Executable file
#!/usr/bin/env python3
|
|
|
|
# useful links
|
|
#https://towardsdatascience.com/pairwise-sequence-alignment-using-biopython-d1a9d0ba861f
|
|
#https://www.biostars.org/p/265338/
|
|
|
|
from Bio import SeqIO
|
|
from Bio import pairwise2
|
|
from Bio.pairwise2 import format_alignment
|
|
import re
|
|
import os
|
|
|
|
#%%
|
|
os.chdir('/home/tanu/git/LSHTM_analysis/scripts/examples')
|
|
#%%
|
|
def myalign(ref_seq, pdb_seq):
|
|
myalign_dict = {}
|
|
|
|
alignments = pairwise2.align.globalxx(ref_seq, pdb_seq)
|
|
#alignments = pairwise2.align.localxx(ref, struct)
|
|
|
|
match = []
|
|
|
|
for a, b in zip(alignments[0][0], alignments[0][1]):
|
|
if a == b:
|
|
match.append('|')
|
|
else:
|
|
match.append(' ')
|
|
|
|
|
|
#print(match)
|
|
print(alignments[0][0])
|
|
print("".join(match))
|
|
print(alignments[0][1])
|
|
|
|
result_align = alignments[0][1]
|
|
#print(result_align)
|
|
print('===============================================================\n')
|
|
|
|
# update dict
|
|
myalign_dict.update({'aligned_fasta': result_align})
|
|
|
|
|
|
aa_regex = '\w'
|
|
m = re.search(aa_regex, result_align)
|
|
#m = my_match.span()
|
|
offset = m.start()
|
|
offset_end = m.end()
|
|
|
|
print('start of match:', offset
|
|
, '\nend of match:', offset_end)
|
|
print('===============================================================\n')
|
|
|
|
# update dict
|
|
myalign_dict.update({'start_match' : offset})
|
|
myalign_dict.update({'end_match' : offset_end})
|
|
|
|
return myalign_dict
|
|
|
|
|
|
def main():
|
|
"""
|
|
read file containing reference and pdb_sequence to align
|
|
"""
|
|
my_dict = {}
|
|
align_fastas_to_align = open('align_fastas.txt', 'r')
|
|
for record in SeqIO.parse(align_fastas_to_align,"fasta"):
|
|
myid = record.id
|
|
seq = str(record.seq)
|
|
my_dict.update({myid : seq})
|
|
|
|
my_keys = list(my_dict.keys())
|
|
my_ref_seq = my_dict[my_keys[0]] # ref_seq
|
|
my_pdb_seq = my_dict[my_keys[1]] # pdb_seq
|
|
|
|
fasta_alignment = myalign(my_ref_seq, my_pdb_seq)
|
|
print('this is my result:', fasta_alignment)
|
|
|
|
|
|
if __name__ == '__main__':
|
|
main()
|
|
|
|
#%% debug: individually
|
|
my_dict = {}
|
|
align_fastas_to_align = open('align_fastas.txt', 'r')
|
|
for record in SeqIO.parse(align_fastas_to_align,"fasta"):
|
|
myid =record.id
|
|
seq=str(record.seq)
|
|
#print(myid, seq)
|
|
my_dict.update({myid: seq})
|
|
|
|
print(my_dict)
|
|
print(my_dict.keys())
|
|
|
|
my_keys = list(my_dict.keys())
|
|
alignments = pairwise2.align.globalxx(my_dict[my_keys[0]], my_dict[my_keys[1]])
|
|
match = []
|
|
|
|
for a, b in zip(alignments[0][0], alignments[0][1]):
|
|
if a == b:
|
|
match.append('|')
|
|
else:
|
|
match.append(' ')
|
|
|
|
#print(match)
|
|
print(alignments[0][0])
|
|
print("".join(match))
|
|
print(alignments[0][1])
|
|
|
|
result_align = alignments[0][1]
|
|
#print(result_align)
|
|
print('===============================================================\n')
|
|
|
|
#offset = ''
|
|
aa_regex = '\w'
|
|
m = re.search(aa_regex, result_align)
|
|
#m = my_match.span()
|
|
offset = m.start()
|
|
offset_end = m.end()
|
|
|
|
print('start of match:', offset, '\nend of match:', offset_end)
|
|
print('===============================================================\n')
|
|
|
|
|
|
|