added scratch/

This commit is contained in:
Tanushree Tunstall 2020-05-22 12:03:11 +01:00
parent 3fe1d35df5
commit f6fc6e47ab
10 changed files with 42018 additions and 0 deletions

125
scripts/scratch/align_template.py Executable file
View file

@ -0,0 +1,125 @@
#!/usr/bin/env python3
# useful links
#https://towardsdatascience.com/pairwise-sequence-alignment-using-biopython-d1a9d0ba861f
#https://www.biostars.org/p/265338/
from Bio import SeqIO
from Bio import pairwise2
from Bio.pairwise2 import format_alignment
import re
import os
#%%
os.chdir('/home/tanu/git/LSHTM_analysis/scripts/examples')
#%%
def myalign(ref_seq, pdb_seq):
myalign_dict = {}
alignments = pairwise2.align.globalxx(ref_seq, pdb_seq)
#alignments = pairwise2.align.localxx(ref, struct)
match = []
for a, b in zip(alignments[0][0], alignments[0][1]):
if a == b:
match.append('|')
else:
match.append(' ')
#print(match)
print(alignments[0][0])
print("".join(match))
print(alignments[0][1])
result_align = alignments[0][1]
#print(result_align)
print('===============================================================\n')
# update dict
myalign_dict.update({'aligned_fasta': result_align})
aa_regex = '\w'
m = re.search(aa_regex, result_align)
#m = my_match.span()
offset = m.start()
offset_end = m.end()
print('start of match:', offset
, '\nend of match:', offset_end)
print('===============================================================\n')
# update dict
myalign_dict.update({'start_match' : offset})
myalign_dict.update({'end_match' : offset_end})
return myalign_dict
def main():
"""
read file containing reference and pdb_sequence to align
"""
my_dict = {}
align_fastas_to_align = open('align_fastas.txt', 'r')
for record in SeqIO.parse(align_fastas_to_align,"fasta"):
myid = record.id
seq = str(record.seq)
my_dict.update({myid : seq})
my_keys = list(my_dict.keys())
my_ref_seq = my_dict[my_keys[0]] # ref_seq
my_pdb_seq = my_dict[my_keys[1]] # pdb_seq
fasta_alignment = myalign(my_ref_seq, my_pdb_seq)
print('this is my result:', fasta_alignment)
if __name__ == '__main__':
main()
#%% debug: individually
my_dict = {}
align_fastas_to_align = open('align_fastas.txt', 'r')
for record in SeqIO.parse(align_fastas_to_align,"fasta"):
myid =record.id
seq=str(record.seq)
#print(myid, seq)
my_dict.update({myid: seq})
print(my_dict)
print(my_dict.keys())
my_keys = list(my_dict.keys())
alignments = pairwise2.align.globalxx(my_dict[my_keys[0]], my_dict[my_keys[1]])
match = []
for a, b in zip(alignments[0][0], alignments[0][1]):
if a == b:
match.append('|')
else:
match.append(' ')
#print(match)
print(alignments[0][0])
print("".join(match))
print(alignments[0][1])
result_align = alignments[0][1]
#print(result_align)
print('===============================================================\n')
#offset = ''
aa_regex = '\w'
m = re.search(aa_regex, result_align)
#m = my_match.span()
offset = m.start()
offset_end = m.end()
print('start of match:', offset, '\nend of match:', offset_end)
print('===============================================================\n')