added script for pairwise alignment
This commit is contained in:
parent
548d9a5192
commit
15dea0cbf6
2 changed files with 85 additions and 0 deletions
51
scripts/align.py
Executable file
51
scripts/align.py
Executable file
|
@ -0,0 +1,51 @@
|
||||||
|
#!/usr/bin/env python3
|
||||||
|
from Bio import SeqIO
|
||||||
|
from Bio import pairwise2
|
||||||
|
from Bio.pairwise2 import format_alignment
|
||||||
|
import re
|
||||||
|
import os
|
||||||
|
#%%
|
||||||
|
def myalign(ref_seq, pdb_seq):
|
||||||
|
|
||||||
|
myalign_dict = {}
|
||||||
|
alignments = pairwise2.align.globalxx(ref_seq, pdb_seq)
|
||||||
|
#alignments = pairwise2.align.localxx(ref, struct)
|
||||||
|
|
||||||
|
match = []
|
||||||
|
|
||||||
|
for a, b in zip(alignments[0][0], alignments[0][1]):
|
||||||
|
if a == b:
|
||||||
|
match.append('|')
|
||||||
|
else:
|
||||||
|
match.append(' ')
|
||||||
|
|
||||||
|
|
||||||
|
#print(match)
|
||||||
|
print(alignments[0][0])
|
||||||
|
print("".join(match))
|
||||||
|
print(alignments[0][1])
|
||||||
|
|
||||||
|
result_align = alignments[0][1]
|
||||||
|
#print(result_align)
|
||||||
|
print('===============================================================\n')
|
||||||
|
|
||||||
|
# update dict
|
||||||
|
myalign_dict.update({'aligned_fasta': result_align})
|
||||||
|
|
||||||
|
# find start and end of match
|
||||||
|
aa_regex = '\w'
|
||||||
|
m = re.search(aa_regex, result_align)
|
||||||
|
#m = my_match.span()
|
||||||
|
offset = m.start()
|
||||||
|
offset_end = m.end()
|
||||||
|
|
||||||
|
print('start of match:', offset
|
||||||
|
, '\nend of match:', offset_end)
|
||||||
|
print('===============================================================\n')
|
||||||
|
|
||||||
|
# update dict
|
||||||
|
myalign_dict.update({'start_match' : offset})
|
||||||
|
myalign_dict.update({'end_match' : offset_end})
|
||||||
|
|
||||||
|
return myalign_dict
|
||||||
|
|
34
scripts/pdb_align.py
Executable file
34
scripts/pdb_align.py
Executable file
|
@ -0,0 +1,34 @@
|
||||||
|
#!/usr/bin/env python3
|
||||||
|
from Bio import SeqIO
|
||||||
|
from Bio import pairwise2
|
||||||
|
from Bio.pairwise2 import format_alignment
|
||||||
|
from align import myalign
|
||||||
|
import re
|
||||||
|
import os
|
||||||
|
os.chdir('/home/tanu/git/LSHTM_analysis/scripts/examples')
|
||||||
|
|
||||||
|
|
||||||
|
def main():
|
||||||
|
"""
|
||||||
|
align ref_seq and pdb_seq
|
||||||
|
# FIXME: pass command line args i.e filename
|
||||||
|
|
||||||
|
"""
|
||||||
|
my_dict = {}
|
||||||
|
align_fastas_to_align = open('align_fastas.txt', 'r')
|
||||||
|
for record in SeqIO.parse(align_fastas_to_align,"fasta"):
|
||||||
|
myid = record.id
|
||||||
|
seq = str(record.seq)
|
||||||
|
my_dict.update({myid : seq})
|
||||||
|
|
||||||
|
my_keys = list(my_dict.keys())
|
||||||
|
my_ref_seq = my_dict[my_keys[0]]
|
||||||
|
my_pdb_seq = my_dict[my_keys[1]]
|
||||||
|
|
||||||
|
fasta_alignment = myalign(my_ref_seq, my_pdb_seq)
|
||||||
|
print(fasta_alignment)
|
||||||
|
print('class:', type(fasta_alignment))
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
main()
|
Loading…
Add table
Add a link
Reference in a new issue