renamed aa_index folder to aa_index_scripts

This commit is contained in:
Tanushree Tunstall 2022-05-30 02:24:54 +01:00
parent 650d357afc
commit 0c316e4a41
9 changed files with 4452 additions and 0 deletions

File diff suppressed because it is too large Load diff

Binary file not shown.

File diff suppressed because it is too large Load diff

Binary file not shown.

View file

@ -0,0 +1,90 @@
from collections import defaultdict
import os
import pickle
DATA_FOLDER = "/home/chmrodrigues/Documents/ppi2/reverse_mutations/data/aaindex"
def main():
aaindex2_file = os.path.join(DATA_FOLDER,"aaindex2")
aaindex3_file = os.path.join(DATA_FOLDER,"aaindex3")
lines_index2 = ' '.join([item for item in open(aaindex2_file,'r').readlines()])
lines_index3 = ' '.join([item for item in open(aaindex3_file,'r').readlines()])
attrs_index2 = [item for item in lines_index2.split('//\n') if len(item) != 0]
attrs_index3 = [item for item in lines_index3.split('//\n') if len(item) != 0]
attr_name = str()
all_matrices = dict()
for line in attrs_index2:
attr_elements = line.split('\n')
attr_name = [item for item in attr_elements if item.strip().startswith("H ")][0].split()[-1]
rows_columns_index = [attr_elements.index(item) for item in attr_elements if item.startswith(" M rows =")][0]
rows = attr_elements[rows_columns_index].split()[3].replace(",","")
columns = attr_elements[rows_columns_index].split()[-1]
attr_dict = dict()
for row in rows:
attr_dict[row] = dict()
for col in columns:
attr_dict[row][col] = None
for i in range(rows_columns_index+1,len(attr_elements)):
values = attr_elements[i].split()
try:
row = rows[i-(rows_columns_index+1)]
for idx,value in enumerate(values):
col = columns[idx]
try:
attr_dict[row][col] = float(value)
except ValueError:
attr_dict[row][col] = value
except IndexError:
pass
all_matrices[attr_name] = attr_dict
print(len(all_matrices))
pickle.dump(all_matrices, open('index2.p','wb'),protocol=2)
attr_name = str()
all_matrices = dict()
for line in attrs_index3:
attr_elements = line.split('\n')
attr_name = [item for item in attr_elements if item.strip().startswith("H ")][0].split()[-1]
rows_columns_index = [attr_elements.index(item) for item in attr_elements if item.startswith(" M rows =")][0]
rows = attr_elements[rows_columns_index].split()[3].replace(",","")
columns = attr_elements[rows_columns_index].split()[-1]
attr_dict = dict()
for row in rows:
attr_dict[row] = dict()
for col in columns:
attr_dict[row][col] = None
for i in range(rows_columns_index+1,len(attr_elements)):
values = attr_elements[i].split()
try:
row = rows[i-(rows_columns_index+1)]
for idx,value in enumerate(values):
col = columns[idx]
try:
attr_dict[row][col] = float(value)
except ValueError:
attr_dict[row][col] = value
except IndexError:
pass
all_matrices[attr_name] = attr_dict
pickle.dump(all_matrices, open('index3.p','wb'),protocol=2)
print(len(all_matrices))
return True
if __name__ == "__main__":
main()

View file

@ -0,0 +1,161 @@
"""
RSA <= 0.2 Buried (Inaccessible)
RSA > 0.2 Exposed (Accessible)
SST = [H,I,G] - Helix
SST = [B,E] - Beta
SST = [T] - Turn
SST = [S,-] - Coil
"""
from Bio.PDB import PDBParser, DSSP
import pickle
import os
import sys
import warnings
warnings.filterwarnings("ignore")
CURRENT_FOLDER = '/home/local/BHRI/sportelli/Desktop/Important_Code/structural/aaindex'
DATA_FOLDER = os.path.join(CURRENT_FOLDER,'data')
RSA_SST_DEPENDENT = {
'exposed_helix' : 'KOSJ950101',
'exposed_beta' : 'KOSJ950102',
'exposed_turn' : 'KOSJ950103',
'exposed_coil' : 'KOSJ950104',
'buried_helix' : 'KOSJ950105',
'buried_beta' : 'KOSJ950106',
'buried_turn' : 'KOSJ950107',
'buried_coil' : 'KOSJ950108',
}
SST_DEPENDENT = {
'helix' : 'KOSJ950109',
'beta' : 'KOSJ950110',
'turn' : 'KOSJ950111',
'coil' : 'KOSJ950112',
}
RSA_DEPENDENT1 = {
'exposed' : 'KOSJ950113',
'buried' : 'KOSJ950114',
}
RSA_DEPENDENT2 = {
'exposed' : 'OVEJ920104',
'buried' : 'OVEJ920105',
}
def get_environment(pdb_file, chain, position, insertion_code=' '):
parser = PDBParser()
structure = parser.get_structure(pdb_file, pdb_file)
model = structure[0]
dssp = DSSP(model, pdb_file)
dssp_key = [item for item in dssp.keys() if item[0] == chain and item[1][1] == int(position) and item[1][2] == insertion_code]
dssp_key = dssp_key[0]
sst = dssp[dssp_key][2]
rsa = float(dssp[dssp_key][3])
return{'sst':sst, 'rsa':rsa}
def main():
"""
READ IMPUT
"""
pdb_file = sys.argv[1]
chain_id = sys.argv[2]
mutation_code = sys.argv[3]
aa_from = mutation_code[0]
aa_to = mutation_code[-1]
position = mutation_code[1:-1]
insertion_code = ' '
if not position[-1].isdigit():
insertion_code = position[-1]
position = position[:-1]
"""
READ DATABASES
index2 - Amino acid substitution indexes
index3 - Statistical protein contact potentials
"""
index2 = pickle.load(open('{}/aaindex2.p'.format(DATA_FOLDER),'rb'))
index3 = pickle.load(open('{}/aaindex3.p'.format(DATA_FOLDER),'rb'))
"""
LOOP THROUGH TABLES AND EXTRACT VALUES
"""
results_index2 = dict()
results_index3 = dict()
for key in index2.keys():
if index2[key][aa_from][aa_to] != None:
results_index2[key] = index2[key][aa_from][aa_to]
else:
results_index2[key] = index2[key][aa_to][aa_from]
for key in index3.keys():
if index3[key][aa_from][aa_to] != None:
results_index3[key] = index3[key][aa_from][aa_to]
else:
results_index3[key] = index3[key][aa_to][aa_from]
"""
GET ENVIRONMENT CHARACTERISTICS
"""
environment = get_environment(pdb_file, chain_id, position, insertion_code)
buried = 'buried'
sst = str()
if environment['rsa'] <= 0.2:
buried = 'exposed'
if environment['sst'] in ['H','I','G']:
sst = 'helix'
elif environment['sst'] in ['B','E']:
sst = 'beta'
elif environment['sst'] in ['T']:
sst = 'turn'
else:
sst = 'coil'
results_index2['KOSJ950100_RSA_SST'] = results_index2[RSA_SST_DEPENDENT['{}_{}'.format(buried,sst)]]
results_index2['KOSJ950100_SST'] = results_index2[SST_DEPENDENT[sst]]
results_index2['KOSJ950110_RSA'] = results_index2[RSA_DEPENDENT1[buried]]
results_index2['OVEJ920100_RSA'] = results_index2[RSA_DEPENDENT2[buried]]
for value in RSA_SST_DEPENDENT.values():
results_index2.pop(value)
for value in SST_DEPENDENT.values():
results_index2.pop(value)
for value in RSA_DEPENDENT1.values():
results_index2.pop(value)
for value in RSA_DEPENDENT2.values():
results_index2.pop(value)
"""
PRINT RESULTS
"""
output_dict = dict()
output_dict.update(results_index2)
output_dict.update(results_index3)
keys = list(output_dict.keys())
keys.sort()
values = [str(output_dict[item]) for item in keys]
# print(",".join(keys))
print(",".join(values))
return True
if __name__ == "__main__":
if len(sys.argv) != 4:
print("Error on parsing argument list")
print("Please provide a one letter code for wild-type and mutant residues")
print("Eg.: python get_scores.py pdb_file chain_id mutation_code")
sys.exit(1)
main()