From 15dea0cbf612b1872083a0531f5b3d54bccaf14d Mon Sep 17 00:00:00 2001 From: Tanushree Tunstall Date: Fri, 15 May 2020 17:58:14 +0100 Subject: [PATCH] added script for pairwise alignment --- scripts/align.py | 51 ++++++++++++++++++++++++++++++++++++++++++++ scripts/pdb_align.py | 34 +++++++++++++++++++++++++++++ 2 files changed, 85 insertions(+) create mode 100755 scripts/align.py create mode 100755 scripts/pdb_align.py diff --git a/scripts/align.py b/scripts/align.py new file mode 100755 index 0000000..b7149e8 --- /dev/null +++ b/scripts/align.py @@ -0,0 +1,51 @@ +#!/usr/bin/env python3 +from Bio import SeqIO +from Bio import pairwise2 +from Bio.pairwise2 import format_alignment +import re +import os +#%% +def myalign(ref_seq, pdb_seq): + + myalign_dict = {} + alignments = pairwise2.align.globalxx(ref_seq, pdb_seq) + #alignments = pairwise2.align.localxx(ref, struct) + + match = [] + + for a, b in zip(alignments[0][0], alignments[0][1]): + if a == b: + match.append('|') + else: + match.append(' ') + + + #print(match) + print(alignments[0][0]) + print("".join(match)) + print(alignments[0][1]) + + result_align = alignments[0][1] + #print(result_align) + print('===============================================================\n') + + # update dict + myalign_dict.update({'aligned_fasta': result_align}) + + # find start and end of match + aa_regex = '\w' + m = re.search(aa_regex, result_align) + #m = my_match.span() + offset = m.start() + offset_end = m.end() + + print('start of match:', offset + , '\nend of match:', offset_end) + print('===============================================================\n') + + # update dict + myalign_dict.update({'start_match' : offset}) + myalign_dict.update({'end_match' : offset_end}) + + return myalign_dict + diff --git a/scripts/pdb_align.py b/scripts/pdb_align.py new file mode 100755 index 0000000..616e923 --- /dev/null +++ b/scripts/pdb_align.py @@ -0,0 +1,34 @@ +#!/usr/bin/env python3 +from Bio import SeqIO +from Bio import pairwise2 +from Bio.pairwise2 import format_alignment +from align import myalign +import re +import os +os.chdir('/home/tanu/git/LSHTM_analysis/scripts/examples') + + +def main(): + """ + align ref_seq and pdb_seq + # FIXME: pass command line args i.e filename + + """ + my_dict = {} + align_fastas_to_align = open('align_fastas.txt', 'r') + for record in SeqIO.parse(align_fastas_to_align,"fasta"): + myid = record.id + seq = str(record.seq) + my_dict.update({myid : seq}) + + my_keys = list(my_dict.keys()) + my_ref_seq = my_dict[my_keys[0]] + my_pdb_seq = my_dict[my_keys[1]] + + fasta_alignment = myalign(my_ref_seq, my_pdb_seq) + print(fasta_alignment) + print('class:', type(fasta_alignment)) + + +if __name__ == '__main__': + main()