renamed aa_index folder to aa_index_scripts

This commit is contained in:
Tanushree Tunstall 2022-05-30 02:24:54 +01:00
parent 650d357afc
commit 0c316e4a41
9 changed files with 4452 additions and 0 deletions

File diff suppressed because it is too large Load diff

Binary file not shown.

File diff suppressed because it is too large Load diff

Binary file not shown.

View file

@ -0,0 +1,90 @@
from collections import defaultdict
import os
import pickle
DATA_FOLDER = "/home/chmrodrigues/Documents/ppi2/reverse_mutations/data/aaindex"
def main():
aaindex2_file = os.path.join(DATA_FOLDER,"aaindex2")
aaindex3_file = os.path.join(DATA_FOLDER,"aaindex3")
lines_index2 = ' '.join([item for item in open(aaindex2_file,'r').readlines()])
lines_index3 = ' '.join([item for item in open(aaindex3_file,'r').readlines()])
attrs_index2 = [item for item in lines_index2.split('//\n') if len(item) != 0]
attrs_index3 = [item for item in lines_index3.split('//\n') if len(item) != 0]
attr_name = str()
all_matrices = dict()
for line in attrs_index2:
attr_elements = line.split('\n')
attr_name = [item for item in attr_elements if item.strip().startswith("H ")][0].split()[-1]
rows_columns_index = [attr_elements.index(item) for item in attr_elements if item.startswith(" M rows =")][0]
rows = attr_elements[rows_columns_index].split()[3].replace(",","")
columns = attr_elements[rows_columns_index].split()[-1]
attr_dict = dict()
for row in rows:
attr_dict[row] = dict()
for col in columns:
attr_dict[row][col] = None
for i in range(rows_columns_index+1,len(attr_elements)):
values = attr_elements[i].split()
try:
row = rows[i-(rows_columns_index+1)]
for idx,value in enumerate(values):
col = columns[idx]
try:
attr_dict[row][col] = float(value)
except ValueError:
attr_dict[row][col] = value
except IndexError:
pass
all_matrices[attr_name] = attr_dict
print(len(all_matrices))
pickle.dump(all_matrices, open('index2.p','wb'),protocol=2)
attr_name = str()
all_matrices = dict()
for line in attrs_index3:
attr_elements = line.split('\n')
attr_name = [item for item in attr_elements if item.strip().startswith("H ")][0].split()[-1]
rows_columns_index = [attr_elements.index(item) for item in attr_elements if item.startswith(" M rows =")][0]
rows = attr_elements[rows_columns_index].split()[3].replace(",","")
columns = attr_elements[rows_columns_index].split()[-1]
attr_dict = dict()
for row in rows:
attr_dict[row] = dict()
for col in columns:
attr_dict[row][col] = None
for i in range(rows_columns_index+1,len(attr_elements)):
values = attr_elements[i].split()
try:
row = rows[i-(rows_columns_index+1)]
for idx,value in enumerate(values):
col = columns[idx]
try:
attr_dict[row][col] = float(value)
except ValueError:
attr_dict[row][col] = value
except IndexError:
pass
all_matrices[attr_name] = attr_dict
pickle.dump(all_matrices, open('index3.p','wb'),protocol=2)
print(len(all_matrices))
return True
if __name__ == "__main__":
main()