renamed aa_index folder to aa_index_scripts
This commit is contained in:
parent
650d357afc
commit
0c316e4a41
9 changed files with 4452 additions and 0 deletions
90
scripts/aa_index_scripts/aaindex/data/parse_aaindex.py
Normal file
90
scripts/aa_index_scripts/aaindex/data/parse_aaindex.py
Normal file
|
@ -0,0 +1,90 @@
|
|||
from collections import defaultdict
|
||||
|
||||
import os
|
||||
import pickle
|
||||
|
||||
DATA_FOLDER = "/home/chmrodrigues/Documents/ppi2/reverse_mutations/data/aaindex"
|
||||
|
||||
def main():
|
||||
|
||||
aaindex2_file = os.path.join(DATA_FOLDER,"aaindex2")
|
||||
aaindex3_file = os.path.join(DATA_FOLDER,"aaindex3")
|
||||
|
||||
lines_index2 = ' '.join([item for item in open(aaindex2_file,'r').readlines()])
|
||||
lines_index3 = ' '.join([item for item in open(aaindex3_file,'r').readlines()])
|
||||
|
||||
attrs_index2 = [item for item in lines_index2.split('//\n') if len(item) != 0]
|
||||
attrs_index3 = [item for item in lines_index3.split('//\n') if len(item) != 0]
|
||||
|
||||
attr_name = str()
|
||||
all_matrices = dict()
|
||||
for line in attrs_index2:
|
||||
attr_elements = line.split('\n')
|
||||
|
||||
attr_name = [item for item in attr_elements if item.strip().startswith("H ")][0].split()[-1]
|
||||
rows_columns_index = [attr_elements.index(item) for item in attr_elements if item.startswith(" M rows =")][0]
|
||||
|
||||
rows = attr_elements[rows_columns_index].split()[3].replace(",","")
|
||||
columns = attr_elements[rows_columns_index].split()[-1]
|
||||
|
||||
attr_dict = dict()
|
||||
for row in rows:
|
||||
attr_dict[row] = dict()
|
||||
for col in columns:
|
||||
attr_dict[row][col] = None
|
||||
|
||||
for i in range(rows_columns_index+1,len(attr_elements)):
|
||||
values = attr_elements[i].split()
|
||||
try:
|
||||
row = rows[i-(rows_columns_index+1)]
|
||||
for idx,value in enumerate(values):
|
||||
col = columns[idx]
|
||||
try:
|
||||
attr_dict[row][col] = float(value)
|
||||
except ValueError:
|
||||
attr_dict[row][col] = value
|
||||
except IndexError:
|
||||
pass
|
||||
|
||||
all_matrices[attr_name] = attr_dict
|
||||
print(len(all_matrices))
|
||||
pickle.dump(all_matrices, open('index2.p','wb'),protocol=2)
|
||||
|
||||
attr_name = str()
|
||||
all_matrices = dict()
|
||||
for line in attrs_index3:
|
||||
attr_elements = line.split('\n')
|
||||
|
||||
attr_name = [item for item in attr_elements if item.strip().startswith("H ")][0].split()[-1]
|
||||
rows_columns_index = [attr_elements.index(item) for item in attr_elements if item.startswith(" M rows =")][0]
|
||||
|
||||
rows = attr_elements[rows_columns_index].split()[3].replace(",","")
|
||||
columns = attr_elements[rows_columns_index].split()[-1]
|
||||
|
||||
attr_dict = dict()
|
||||
for row in rows:
|
||||
attr_dict[row] = dict()
|
||||
for col in columns:
|
||||
attr_dict[row][col] = None
|
||||
|
||||
for i in range(rows_columns_index+1,len(attr_elements)):
|
||||
values = attr_elements[i].split()
|
||||
try:
|
||||
row = rows[i-(rows_columns_index+1)]
|
||||
for idx,value in enumerate(values):
|
||||
col = columns[idx]
|
||||
try:
|
||||
attr_dict[row][col] = float(value)
|
||||
except ValueError:
|
||||
attr_dict[row][col] = value
|
||||
except IndexError:
|
||||
pass
|
||||
|
||||
all_matrices[attr_name] = attr_dict
|
||||
pickle.dump(all_matrices, open('index3.p','wb'),protocol=2)
|
||||
print(len(all_matrices))
|
||||
|
||||
return True
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
Loading…
Add table
Add a link
Reference in a new issue