Compare commits
475 commits
Author | SHA1 | Date | |
---|---|---|---|
727ca1ee76 | |||
6550be3350 | |||
7fd5e2710d | |||
69b8ba9d08 | |||
45c48485f1 | |||
1ddc5045d5 | |||
34ee2519d3 | |||
246cd636a1 | |||
80f73a3697 | |||
7d6087c82e | |||
9c37dbee31 | |||
1e3670f935 | |||
0c16937b68 | |||
c0c30fd527 | |||
9cb33ed67b | |||
067fc85163 | |||
9b1d1d009d | |||
1ea42097ae | |||
2e9d142184 | |||
2eee69ee80 | |||
938dba7fcc | |||
59a370b45a | |||
3086972480 | |||
a641347f63 | |||
e48f215227 | |||
96277d78f6 | |||
7c0824d0f2 | |||
656639e871 | |||
718f92d7ff | |||
6759649c61 | |||
5eb07cdf86 | |||
4bf4650c88 | |||
a6f0832a42 | |||
b679068a5e | |||
c599d28377 | |||
9f5b983bc0 | |||
89e6b03673 | |||
a9f9cec494 | |||
29d9717abe | |||
20976c31bb | |||
0f983d2889 | |||
b614962e45 | |||
639ccf1cd7 | |||
f1a8fb583a | |||
e75cfd2665 | |||
71d874e350 | |||
5eba273a55 | |||
506e639a7b | |||
762b1a3931 | |||
e822f9f690 | |||
1c27bbff11 | |||
8f4daba98d | |||
7e6affea84 | |||
2aec79af31 | |||
c6d1260f74 | |||
13c61e7813 | |||
ac383165ec | |||
04a7cf15dc | |||
e10ab6a7c6 | |||
064182d784 | |||
920007cc83 | |||
8a301e8bb1 | |||
9534fc57d4 | |||
f79aea254e | |||
f6a2e029cb | |||
86ed1805fc | |||
ddb1a7a7aa | |||
57e4d8cd1e | |||
81ab3fe5ba | |||
ca1a0e10ca | |||
687adf0ec7 | |||
8fa9faa17d | |||
f88e2665e9 | |||
7686aa39b4 | |||
931f8ec2f9 | |||
b6df47a0cd | |||
acda9f13e5 | |||
e78707067c | |||
a2431b59e5 | |||
f6259aa517 | |||
0c3645705d | |||
dccdfe9742 | |||
5c018e23be | |||
4bee48f545 | |||
786eaabe1a | |||
225360fb93 | |||
3f58a5c64c | |||
776c4e0279 | |||
d45a9499a2 | |||
6f24fc1fac | |||
ce8abafdfe | |||
b25511a239 | |||
b8d0bc416a | |||
d21605b31f | |||
4f60e93abb | |||
7242b3516b | |||
d52534f676 | |||
18af246c24 | |||
8009c3fe3d | |||
c59e3f178d | |||
bbec97b00c | |||
9062751790 | |||
77efd0b76d | |||
88dad2696f | |||
34c0b808ea | |||
05562399ce | |||
9f03e6a6fd | |||
2995299179 | |||
f9249d7bf2 | |||
d683e971d4 | |||
8dc3a790c0 | |||
69b62e54a5 | |||
cfdd18086a | |||
9a0e98eb24 | |||
2168007f12 | |||
19d89230f5 | |||
a9a4483aee | |||
cd06a83e13 | |||
013bba2503 | |||
b69d9d729a | |||
7a74fecbda | |||
322979406c | |||
1f72001689 | |||
c99f1cac92 | |||
b2397ea99d | |||
9c221e6786 | |||
7f75b92553 | |||
56f5479c0b | |||
80f7e039ab | |||
4e19961283 | |||
7116b45bf8 | |||
28521104f8 | |||
1d8e6f0d75 | |||
2e047fd548 | |||
5d6ddb7639 | |||
cfe9028a9c | |||
2eab17cb9e | |||
d159a81cfb | |||
fad1526ce5 | |||
0fd3e75ab0 | |||
600f829972 | |||
d139342074 | |||
491b317752 | |||
98287b3c20 | |||
ab7bed9f4b | |||
56ca9db40d | |||
5e735af323 | |||
0c95b3a512 | |||
bcf4467c44 | |||
64018cce4c | |||
6b6921d45f | |||
534a6754cd | |||
4163ede798 | |||
8302d01867 | |||
725e9b53ca | |||
56150ae3c8 | |||
ca68996264 | |||
86670bbac3 | |||
9df3913a84 | |||
99b77434b5 | |||
fa25a30dcf | |||
1f8cfc2403 | |||
7a9b16255a | |||
08ad16adbb | |||
fc4313045f | |||
20bba2ad70 | |||
802522d1c6 | |||
ac5b86a9cd | |||
2ac4ea8f5c | |||
ccdd6029be | |||
f9fd74812a | |||
b0b9e91af7 | |||
b2284f7216 | |||
1f9ea3f789 | |||
59911687c8 | |||
2f1f02e1de | |||
667804ad83 | |||
7f5ca7f5a4 | |||
69f3629cc0 | |||
be50636b15 | |||
4285bbd59f | |||
18b6407539 | |||
9784cba232 | |||
e60b4c5492 | |||
9d2d6cfd84 | |||
a549e52825 | |||
5f441d09d9 | |||
f240c969ec | |||
07104a8c8e | |||
74c4ef16ae | |||
4c345ea9f4 | |||
9597997741 | |||
8a6c7968f5 | |||
a77b472dfa | |||
d2093e7a4c | |||
81796df71a | |||
c58fa8cd4d | |||
48050752db | |||
a3aab4556a | |||
6d08b646fc | |||
5579e9527b | |||
|
f7280ceada | ||
|
807876d919 | ||
|
baedea8c5b | ||
|
0eca5cf859 | ||
|
ac3c8a8086 | ||
5ceea2e7b7 | |||
63fa0c596a | |||
7239ab220b | |||
2297617af2 | |||
be8fa7e639 | |||
7e8d5c869e | |||
edabe0d776 | |||
771995d1ab | |||
093ae0d832 | |||
369c906a33 | |||
24b1cc2440 | |||
5e1c920a0c | |||
b8575c6e69 | |||
40e4ddd70a | |||
8ddca4a8b1 | |||
883207bc4b | |||
ea5d5bda44 | |||
f0ee1ff6c9 | |||
1b5280145b | |||
fb0646373b | |||
5f335a5051 | |||
63e04ae600 | |||
375cdc2068 | |||
a5b03e53e8 | |||
351e472e73 | |||
b36bfc9e9d | |||
25f2f9e4a2 | |||
ba02107e23 | |||
0f6bf3875d | |||
83deb64e1c | |||
445f3e2047 | |||
44d1f64e88 | |||
645827570f | |||
ee69445f11 | |||
09e20cf7b3 | |||
3612ef0f2d | |||
a5fdf01d25 | |||
e1da853cf1 | |||
968b57105f | |||
431e606448 | |||
fadd61bf57 | |||
7e4be21575 | |||
8d9ede186c | |||
ecbc7541e9 | |||
1262df40c9 | |||
078644c322 | |||
c124f49041 | |||
26d0d7f42d | |||
c1041ad273 | |||
e690f5beba | |||
c4225cec4f | |||
d4e75d5f64 | |||
8be1418a32 | |||
6934faca10 | |||
5102bbea1b | |||
f415b0b239 | |||
cf732a3bcc | |||
65841e4f5b | |||
68050a93b4 | |||
fdecc944fc | |||
d43ecfa1dc | |||
1708194912 | |||
fc47c58f91 | |||
9bee97052e | |||
f3f86d6651 | |||
2c2c2c1a60 | |||
f85b1bd902 | |||
e570454cf2 | |||
5025e47983 | |||
f424f4e2d6 | |||
080cd6375d | |||
19a984f228 | |||
31b98fb3d3 | |||
774b34ef00 | |||
09e4f7bfbd | |||
b7c7ffc018 | |||
46b43cf261 | |||
eb5491aad9 | |||
42986bb119 | |||
fe49a45447 | |||
5d9561f88a | |||
648be02665 | |||
b4affa0c94 | |||
2ef767f046 | |||
db87f98d32 | |||
7460c7c97f | |||
dd1158a66c | |||
645868ea27 | |||
ddefcd7841 | |||
bba3487829 | |||
3f8d6695a4 | |||
0220960975 | |||
89e881b5d4 | |||
0e3f9e584b | |||
482eeadb9a | |||
ed739aeb71 | |||
b754f26f9b | |||
73877942f4 | |||
75273cebbf | |||
54f9fd073b | |||
d76345c3de | |||
f468554427 | |||
a448d9276b | |||
d78626048c | |||
acd0b8355b | |||
841d18d10b | |||
48773a19ef | |||
f8f33abad8 | |||
2d8cb01cb7 | |||
dcd9a985ec | |||
13203e6fe0 | |||
61e41f1697 | |||
efe0178f4e | |||
7d1ecbb660 | |||
5e1b39cea0 | |||
1f44f8ec0a | |||
1e785a08a1 | |||
3cb33df009 | |||
55f03bc343 | |||
e41fb78e37 | |||
e4270b67c8 | |||
2bc5be20b9 | |||
7d36e0e36b | |||
46b1505fdf | |||
83383b4493 | |||
9e8469abe3 | |||
57a966c7c4 | |||
f9500d5324 | |||
5677175423 | |||
c80faef0bf | |||
aaf3f5e084 | |||
d3d82623d2 | |||
e4a7deae7b | |||
0379d3e241 | |||
91348aaae2 | |||
f8e345f5bc | |||
6402990154 | |||
01fbc2a87b | |||
0e71b23759 | |||
1fa0dc6ad4 | |||
c958cc1081 | |||
a4670b9944 | |||
a7f21cfb14 | |||
943513a338 | |||
5addb85851 | |||
a220288c5f | |||
262bd79204 | |||
90cbb49560 | |||
f758c01159 | |||
4d686e2933 | |||
af65a86ff9 | |||
3c6122a296 | |||
b82cc11dbe | |||
626ed3a57b | |||
a298071309 | |||
003b22ce3f | |||
a1cc7ee33d | |||
1e43ca8136 | |||
18998092f4 | |||
8f272bdc17 | |||
ada205962b | |||
0c3c6fd143 | |||
3497d1ef54 | |||
fa2bcb5f05 | |||
76ecb65a1a | |||
6c2c7e0a90 | |||
b33419c939 | |||
010ef133dd | |||
fdba990b80 | |||
8d1daabff4 | |||
e21635fe02 | |||
e2f319ba42 | |||
f6fc6e47ab | |||
3fe1d35df5 | |||
ca36e004c1 | |||
15dea0cbf6 | |||
548d9a5192 | |||
f7e371a585 | |||
01a7cbf26e | |||
65db4a090e | |||
3425d8fa2b | |||
7f66d5d19e | |||
b28d866237 | |||
a405aa17c3 | |||
e94da61871 | |||
e50466da39 | |||
7aafa72e10 | |||
45889990e7 | |||
7d2241ad81 | |||
398eccd246 | |||
f5241048b4 | |||
0550cfe0e2 | |||
7cee9b21e2 | |||
7a8bbc6595 | |||
fe3d431a3d | |||
c025a22343 | |||
30aa64fd2b | |||
49a38dd1ae | |||
569b7c6c7f | |||
811027e34d | |||
02488ea23e | |||
6afe202931 | |||
44577b4a0c | |||
24c7ade7c4 | |||
f690c75ca0 | |||
d161fcd0f3 | |||
b0e56328ef | |||
cc9cdbcad5 | |||
b5aa524914 | |||
34a2057d29 | |||
b1e4dcd376 | |||
e7f2a3aada | |||
ab541aa3de | |||
d1da203df0 | |||
82e96fcdba | |||
afd6ca8881 | |||
69e2567ffc | |||
c0bac6fd7b | |||
5bab99c15f | |||
0b7a938fbd | |||
4c2fa2b600 | |||
87a847109a | |||
de1822f491 | |||
96ebb85069 | |||
c184841951 | |||
dd91692673 | |||
22a0d38563 | |||
d42e6fbdb3 | |||
b4dbad7e54 | |||
b331227023 | |||
eb021349fe | |||
8df0b7d920 | |||
77cc5bf42c | |||
95e8205189 | |||
f9837b474c | |||
e9a95e9d3a | |||
ed8fc4d488 | |||
d7ef8ef51e | |||
b56c0b8b68 | |||
4ef68bdc1b | |||
b97712edb0 | |||
9e4b3c5dce | |||
0653a8c1e3 | |||
d12ef0ef00 | |||
d9519b6262 | |||
134dea609d | |||
8c7c389562 | |||
632b78320a | |||
c15d1a8a95 | |||
3390f80168 | |||
1d80186ab9 | |||
15daa6dfc1 | |||
ac34de9e79 | |||
f1584bddb1 | |||
6cbef0c3d7 | |||
1edfe3f8f8 | |||
|
8d2456f7f2 | ||
15391a5700 | |||
c3c50f65f2 | |||
4d2d03f634 | |||
bcf822d6e4 | |||
4f06e42ee4 | |||
4bcb81e9be | |||
be213cb7e9 | |||
cae9c550a4 | |||
2df031c02a | |||
c1ea688c5c | |||
|
ec37e3c1f6 | ||
|
50ade050c2 |
329 changed files with 2472 additions and 1761372 deletions
|
@ -34,12 +34,6 @@ subdirs within this repo
|
|||
*.py
|
||||
*.sh
|
||||
|
||||
```
|
||||
## ML\_analysis:
|
||||
|
||||
located in:
|
||||
```
|
||||
scripts/ml
|
||||
```
|
||||
More docs here as I write them.
|
||||
|
||||
|
|
176
config/alr.R
176
config/alr.R
|
@ -1,176 +0,0 @@
|
|||
gene = "alr"
|
||||
drug = "cycloserine"
|
||||
|
||||
#==========
|
||||
# LIGPLUS
|
||||
#===========
|
||||
aa_ligplus_dcs = c(66, 64, 70, 112, 196
|
||||
, 236, 237, 252, 253
|
||||
, 254, 255, 388)
|
||||
|
||||
aa_ligplus_dcs_hbond = c(255, 254, 237, 66, 196)
|
||||
aa_ligplus_dcs_other = aa_ligplus_dcs[!aa_ligplus_dcs%in%aa_ligplus_dcs_hbond]
|
||||
|
||||
c1 = length(aa_ligplus_dcs_other) == length(aa_ligplus_dcs) - length(aa_ligplus_dcs_hbond)
|
||||
|
||||
#==========
|
||||
# PLIP
|
||||
#===========
|
||||
aa_plip_dcs = c(66, 70, 112, 196, 237
|
||||
, 252, 254, 255, 295
|
||||
, 314, 343)
|
||||
aa_plip_dcs_hbond = c(66, 70, 196, 237
|
||||
, 252, 254, 255, 295
|
||||
, 314, 343)
|
||||
|
||||
aa_plip_dcs_other = aa_plip_dcs[!aa_plip_dcs%in%aa_plip_dcs_hbond]
|
||||
|
||||
c2 = length(aa_plip_dcs_other) == length(aa_plip_dcs) - length(aa_plip_dcs_hbond)
|
||||
|
||||
|
||||
#==========
|
||||
# Arpeggio
|
||||
#===========
|
||||
aa_arpeg_dcs = c(64, 66, 70, 112, 157, 164
|
||||
, 194, 196, 200, 236, 237, 252, 253
|
||||
, 254, 255, 256, 295, 314, 342, 343
|
||||
, 344, 386, 388)
|
||||
|
||||
aa_arpeg_dcs_other = aa_arpeg_dcs[!aa_arpeg_dcs%in%c(aa_ligplus_dcs_other
|
||||
, aa_plip_dcs_other)]
|
||||
|
||||
c3 = length(aa_arpeg_dcs_other) == length(aa_arpeg_dcs) - ( length(aa_ligplus_dcs_other) + length(aa_plip_dcs_other) )
|
||||
|
||||
#######################################################################
|
||||
#NEW AFTER ADDING PLP to structure! huh
|
||||
# ADDED: 18 Aug 2022
|
||||
# PLIP server for co factor PLP (CONFUSING!)
|
||||
#and 2019 lit:lys42, M319, and Y364 : OFFSET is 24
|
||||
#K42: K66, Y271:Y295, M319:M343, W89: W113, W203: W227, H209:H233, Q321:Q345
|
||||
aa_pos_paper = sort(unique(c(66,70,112,113,164,196,227,233,237,252,254,255,295,342,343,344,345,388)))
|
||||
plp_pos_paper = sort(unique(c(66, 70, 112, 196, 227, 237, 252, 254, 255, 388)))
|
||||
|
||||
#active_aa_pos = sort(unique(c(aa_pos_paper, active_aa_pos)))
|
||||
aa_pos_plp = sort(unique(c(plp_pos_paper, 66, 70, 112, 237, 252, 254, 255, 196)))
|
||||
|
||||
#######################################################################
|
||||
# this is post inspection on chimera
|
||||
#remove_pos = c(295, 314, 342, 343, 344)
|
||||
remove_pos = c(0)
|
||||
#select :295.A, 314.A, 342.A, 343.A, 344.A
|
||||
#===============
|
||||
# Active site aa
|
||||
#===============
|
||||
active_aa_pos = sort(unique(c(aa_ligplus_dcs
|
||||
, aa_plip_dcs
|
||||
, aa_arpeg_dcs
|
||||
, aa_pos_plp)))
|
||||
|
||||
active_aa_pos = active_aa_pos[!active_aa_pos%in%remove_pos]
|
||||
#=================
|
||||
# Drug binding aa
|
||||
#=================
|
||||
aa_pos_dcs = sort(unique(c(aa_ligplus_dcs
|
||||
, aa_plip_dcs
|
||||
, aa_arpeg_dcs)))
|
||||
|
||||
aa_pos_dcs = aa_pos_dcs[!aa_pos_dcs%in%remove_pos]
|
||||
aa_pos_drug = aa_pos_dcs
|
||||
|
||||
#===============
|
||||
# Co-factor: PLP aa
|
||||
#===============
|
||||
aa_pos_plp = aa_pos_plp
|
||||
|
||||
#aa_pos_plp = aa_pos_plp[!aa_pos_plp%in%remove_pos]
|
||||
|
||||
#===============
|
||||
# Hbond aa
|
||||
#===============
|
||||
aa_pos_dcs_hbond = sort(unique(c(aa_ligplus_dcs_hbond
|
||||
, aa_plip_dcs_hbond)))
|
||||
|
||||
aa_pos_dcs_hbond = aa_pos_dcs_hbond[!aa_pos_dcs_hbond%in%remove_pos]
|
||||
|
||||
#=======================
|
||||
# Other interactions aa
|
||||
#=======================
|
||||
aa_pos_dcs_other = active_aa_pos[!active_aa_pos%in%aa_pos_dcs_hbond]
|
||||
|
||||
aa_pos_dcs_other = aa_pos_dcs_other[!aa_pos_dcs_other%in%remove_pos]
|
||||
|
||||
c3 = length(aa_pos_dcs_other) == length(active_aa_pos) - length(aa_pos_dcs_hbond)
|
||||
|
||||
#######################################################################
|
||||
if ( all(c1, c2, c3) ) {
|
||||
|
||||
cat("\nPASS:All active site residues and interctions checked and identified for"
|
||||
, "\ngene:", gene
|
||||
, "\ndrug:", drug
|
||||
, "\n==================================================="
|
||||
, "\nActive site residues for:", length(active_aa_pos)
|
||||
, "\n==================================================="
|
||||
, "\n"
|
||||
, active_aa_pos
|
||||
|
||||
, "\n=================================================="
|
||||
, "\nDrug binding residues:", length(aa_pos_drug)
|
||||
, "\n==================================================="
|
||||
, "\n"
|
||||
#, aa_pos_dcs
|
||||
, aa_pos_drug
|
||||
|
||||
, "\n==================================================="
|
||||
, "\nHbond residues:", length(aa_pos_dcs_hbond)
|
||||
, "\n==================================================="
|
||||
, "\n"
|
||||
, aa_pos_dcs_hbond
|
||||
|
||||
, "\n=================================================="
|
||||
, "\nOther interaction residues:", length(aa_pos_dcs_other)
|
||||
, "\n==================================================="
|
||||
, "\n"
|
||||
, aa_pos_dcs_other
|
||||
, "\n\nNO other co-factors or ligands present\n")
|
||||
|
||||
}
|
||||
######################################################################
|
||||
#NEW
|
||||
# PLIP server for co factor PLP (CONFUSING!)
|
||||
#and 2019 lit:lys42, M319, and Y364 : OFFSET is 24
|
||||
#K42: K66, Y271:Y295, M319:M343, W89: W113, W203: W227, H209:H233, Q321:Q345
|
||||
aa_pos_paper = sort(unique(c(66,70,112,113,164,196,227,233,237,252,254,255,295,342,343,344,345,388)))
|
||||
plp_pos_paper = sort(unique(c(66, 70, 112, 196, 227, 237, 252, 254, 255, 388)))
|
||||
#add_to_dcs = c(113, 227, 233, 345)
|
||||
#add_to_plp = c(113, 227, 233, 345) # 227 not in plp and 227, 233 and 345 not with snp
|
||||
|
||||
#active_aa_pos = sort(unique(c(aa_pos_paper, active_aa_pos)))
|
||||
#aa_pos_plp = sort(unique(c(plp_pos_paper, 66, 70, 112, 237, 252, 254, 255, 196, add_to_plp)))
|
||||
aa_pos_plp = sort(unique(c(plp_pos_paper, 66, 70, 112, 237, 252, 254, 255, 196)))
|
||||
#aa_pos_dcs = sort(unique(c(aa_pos_dcs, add_to_dcs)))
|
||||
#aa_pos_drug = aa_pos_dcs
|
||||
|
||||
# add two key residues
|
||||
#aa_pos_drug = sort(unique(c(319, 364, aa_pos_drug)))
|
||||
#active_aa_pos = sort(unique(c(319, 364, active_aa_pos, aa_pos_plp)))
|
||||
|
||||
# FIXME: these should be populated!
|
||||
aa_pos_lig1 = aa_pos_plp
|
||||
aa_pos_lig2 = NULL
|
||||
aa_pos_lig3 = NULL
|
||||
|
||||
tile_map=data.frame(tile=c("DCS","PLP"),
|
||||
tile_colour=c("green","navyblue")) #darkslategrey
|
||||
|
||||
|
||||
######
|
||||
chain_suffix = ".A"
|
||||
|
||||
toString(paste0(aa_pos_drug, chain_suffix))
|
||||
toString(paste0(aa_pos_plp, chain_suffix))
|
||||
toString(paste0(active_aa_pos, chain_suffix))
|
||||
|
||||
common_pos = aa_pos_drug[aa_pos_drug%in%aa_pos_plp]
|
||||
cat("\nCommon interacting partners:", length(common_pos))
|
||||
common_pos
|
||||
toString(paste0(common_pos, chain_suffix))
|
123
config/embb.R
123
config/embb.R
|
@ -1,123 +0,0 @@
|
|||
gene = "embB"
|
||||
drug = "ethambutol"
|
||||
|
||||
# interacting chain B
|
||||
#==========
|
||||
# LIGPLUS
|
||||
#===========
|
||||
aa_ligplus_emb = c(299, 302, 303, 306, 334, 594, 988, 1028)
|
||||
aa_ligplus_emb_hbond = c(299, 594)
|
||||
|
||||
aa_ligplus_ca = c(952, 954, 959)
|
||||
aa_ligplus_ca_hbond = c(952, 954, 959)
|
||||
|
||||
aa_ligplus_cdl = c(460, 665, 568, 601, 572, 579, 580, 583)
|
||||
aa_ligplus_cdl_hbond = c(601, 568, 665)
|
||||
|
||||
aa_ligplus_dsl = c(435, 442, 489, 452, 330, 589, 509, 446, 445, 506, 592, 590, 514, 403, 515)
|
||||
aa_ligplus_dsl_hbond = c(445, 590, 592, 403)
|
||||
|
||||
#==========
|
||||
# PLIP
|
||||
#===========
|
||||
aa_plip_emb = c(299, 302, 303, 327, 594, 988, 1028)
|
||||
aa_plip_emb_hbond = c(299, 327, 594)
|
||||
|
||||
aa_plip_ca = c(952, 954, 959)
|
||||
|
||||
aa_plip_cdl = c(456, 572, 579, 583, 568)
|
||||
#aa_plip_cdl_sb = c(537, 568, 601, 665)
|
||||
|
||||
aa_plip_dsl = c(330, 435, 446, 452, 489, 506, 589, 590, 445, 403, 595)
|
||||
aa_plip_dsl_hbond = c(445, 590)
|
||||
#aa_plip_dsl_sb = c(403, 595)
|
||||
|
||||
#==========
|
||||
# Arpeggio
|
||||
#===========
|
||||
# emb:1402, 1403
|
||||
aa_arpeg_emb = c(298, 299, 302, 303, 306, 318, 327, 334, 403, 445, 592, 594, 988, 1028)
|
||||
aa_arpeg_ca = c(847, 853, 854, 952, 954, 955, 956, 959, 960)
|
||||
aa_arpeg_cdl = c(456, 457, 460, 461, 521, 525, 533, 537, 554, 558, 568
|
||||
, 569, 572, 573, 575, 576, 579, 580, 582, 583, 586, 601, 605, 616, 658
|
||||
, 661, 662, 665)
|
||||
aa_arpeg_dsl = c(299, 322, 329, 330, 403, 435, 438, 439, 442, 445, 446
|
||||
, 449, 452, 455, 486, 489, 490, 493, 506, 509, 510, 513, 514
|
||||
, 515, 587, 589, 590, 592, 595)
|
||||
|
||||
##############################################################
|
||||
active_aa_pos = sort(unique(c(aa_ligplus_emb
|
||||
, aa_plip_emb
|
||||
, aa_arpeg_emb
|
||||
|
||||
, aa_ligplus_ca
|
||||
, aa_plip_ca
|
||||
, aa_arpeg_ca
|
||||
|
||||
, aa_ligplus_cdl
|
||||
, aa_plip_cdl
|
||||
, aa_arpeg_cdl
|
||||
|
||||
, aa_ligplus_dsl
|
||||
, aa_plip_dsl
|
||||
, aa_arpeg_dsl)))
|
||||
##############################################################
|
||||
cat("\nNo. of active site residues for gene"
|
||||
, gene, ":"
|
||||
, length(active_aa_pos)
|
||||
, "\nThese are:\n"
|
||||
, active_aa_pos)
|
||||
|
||||
##############################################################
|
||||
aa_pos_emb = sort(unique(c( aa_ligplus_emb
|
||||
, aa_plip_emb
|
||||
, aa_arpeg_emb)))
|
||||
aa_pos_drug = aa_pos_emb
|
||||
|
||||
aa_pos_emb_hbond = sort(unique(c( aa_ligplus_emb_hbond
|
||||
, aa_plip_emb_hbond)))
|
||||
|
||||
aa_pos_ca = sort(unique(c( aa_ligplus_ca
|
||||
, aa_plip_ca
|
||||
, aa_arpeg_ca)))
|
||||
|
||||
aa_pos_cdl = sort(unique(c( aa_ligplus_cdl
|
||||
, aa_plip_cdl
|
||||
, aa_arpeg_cdl )))
|
||||
|
||||
aa_pos_cdl_hbond = sort(unique(c( aa_ligplus_cdl_hbond )))
|
||||
|
||||
aa_pos_dsl = sort(unique(c( aa_ligplus_dsl
|
||||
, aa_plip_dsl
|
||||
, aa_arpeg_dsl)))
|
||||
|
||||
aa_pos_dsl_hbond = sort(unique(c( aa_ligplus_dsl_hbond
|
||||
, aa_plip_dsl_hbond)))
|
||||
|
||||
|
||||
cat("\n==================================================="
|
||||
, "\nActive site residues for", gene, "comprise of..."
|
||||
, "\n==================================================="
|
||||
, "\nNo. of", drug, "binding residues:" , length(aa_pos_emb), "\n"
|
||||
, aa_pos_emb
|
||||
, "\nNo. of co-factor 'Ca' binding residues:", length(aa_pos_ca) , "\n"
|
||||
, aa_pos_ca
|
||||
, "\nNo. of ligand 'CDL' binding residues:" , length(aa_pos_cdl), "\n"
|
||||
, aa_pos_cdl
|
||||
, "\nNo. of ligand 'DPA' binding residues:" , length(aa_pos_dsl), "\n"
|
||||
, aa_pos_dsl, "\n"
|
||||
)
|
||||
##############################################################
|
||||
# var for position customisation for plots
|
||||
# aa_pos_lig1 = aa_pos_ca
|
||||
# aa_pos_lig2 = aa_pos_cdl
|
||||
# aa_pos_lig3 = aa_pos_dsl
|
||||
|
||||
aa_pos_lig1 = aa_pos_dsl #slategray
|
||||
aa_pos_lig2 = aa_pos_cdl #navy blue
|
||||
aa_pos_lig3 = aa_pos_ca #purple
|
||||
|
||||
tile_map=data.frame(tile=c("EMB","DPA","CDL","Ca"),
|
||||
tile_colour=c("green","darkslategrey","navyblue","purple"))
|
||||
|
||||
drug_main_res = c(299 , 302, 303 , 306 , 327 , 592 , 594, 988, 1028)
|
143
config/gid.R
143
config/gid.R
|
@ -1,143 +0,0 @@
|
|||
gene = "gid"
|
||||
drug = "streptomycin"
|
||||
|
||||
#rna_site = G518
|
||||
#rna_bind_aa_pos = c(96, 97, 118, 163)
|
||||
#binding_aa_pos = c(48, 51, 137, 200)
|
||||
|
||||
# SAM: 226
|
||||
# SRY: 1601
|
||||
#==========
|
||||
# LIGPLUS
|
||||
#===========
|
||||
aa_ligplus_sry = c(118, 220, 223) # 526 (rna) and 7mg527
|
||||
aa_ligplus_sry_hbond = c(118, 220, 223)
|
||||
|
||||
aa_ligplus_sam = c(148, 137, 138, 139
|
||||
, 93, 69, 119, 120
|
||||
, 220, 219, 118, 223)
|
||||
aa_ligplus_sam_hbond = c(220, 223)
|
||||
|
||||
aa_ligplus_amp = c(123, 125, 213, 214)
|
||||
aa_ligplus_amp_hbond = c(125, 123, 213)
|
||||
|
||||
aa_ligplus_rna = c(137, 47, 48, 38, 35, 36, 37, 94, 33, 97, 139, 138, 163, 165, 164, 199)
|
||||
aa_ligplus_rna_hbond = c(33, 97, 37, 47, 137)
|
||||
|
||||
#==========
|
||||
# PLIP
|
||||
#===========
|
||||
aa_plip_sry = c(118, 220, 223)
|
||||
aa_plip_sry_hbond = c(118, 220, 223)
|
||||
|
||||
aa_plip_sam = c(92, 118, 119, 120, 139, 220, 223, 148)
|
||||
aa_plip_sam_hbond = c(92, 118, 119, 120, 139, 220, 223)
|
||||
|
||||
aa_plip_amp = c(123, 125, 213)
|
||||
aa_plip_amp_hbond = c(123, 125, 213)
|
||||
|
||||
aa_plip_rna = c(33, 34, 36, 37, 47, 48, 51, 97, 137, 199)
|
||||
aa_plip_rna_hbond = c(33, 34, 36, 37, 47, 51, 137, 199)
|
||||
|
||||
#==========
|
||||
# Arpeggio
|
||||
#===========
|
||||
aa_arpeg_sry = c(118, 148, 220, 223, 224)
|
||||
aa_arpeg_sam = c(68, 69, 92, 93, 97, 117
|
||||
, 118, 119, 120, 136, 137
|
||||
, 138, 139, 140, 148, 218
|
||||
, 219, 220, 221, 222, 223)
|
||||
aa_arpeg_amp = c(123, 125, 213)
|
||||
##############################################################
|
||||
#=============
|
||||
# Active site
|
||||
#=============
|
||||
active_aa_pos = sort(unique(c(
|
||||
#rna_bind_aa_pos
|
||||
#, binding_aa_pos
|
||||
aa_ligplus_sry
|
||||
, aa_ligplus_sam
|
||||
, aa_ligplus_amp
|
||||
, aa_ligplus_rna
|
||||
, aa_plip_sry
|
||||
, aa_plip_sam
|
||||
, aa_plip_amp
|
||||
, aa_plip_rna
|
||||
, aa_arpeg_sry
|
||||
, aa_arpeg_sam
|
||||
, aa_arpeg_amp
|
||||
)))
|
||||
|
||||
##############################################################
|
||||
cat("\nNo. of active site residues for gene"
|
||||
, gene, ":"
|
||||
, length(active_aa_pos)
|
||||
, "\nThese are:\n"
|
||||
, active_aa_pos)
|
||||
|
||||
##############################################################
|
||||
aa_pos_sry = sort(unique(c(
|
||||
aa_ligplus_sry
|
||||
, aa_plip_sry
|
||||
, aa_arpeg_sry)))
|
||||
|
||||
aa_pos_drug = aa_pos_sry
|
||||
|
||||
aa_pos_sry_hbond = sort(unique(c(
|
||||
aa_ligplus_sry_hbond
|
||||
, aa_plip_sry_hbond)))
|
||||
|
||||
|
||||
aa_pos_rna = sort(unique(c(
|
||||
aa_ligplus_rna
|
||||
, aa_plip_rna)))
|
||||
|
||||
aa_pos_rna_hbond = sort(unique(c(
|
||||
aa_ligplus_rna_hbond
|
||||
, aa_plip_rna_hbond)))
|
||||
|
||||
aa_pos_sam = sort(unique(c(
|
||||
aa_ligplus_sam
|
||||
, aa_plip_sam
|
||||
, aa_arpeg_sam)))
|
||||
|
||||
aa_pos_sam_hbond = sort(unique(c(
|
||||
aa_ligplus_sam_hbond
|
||||
, aa_plip_sam_hbond)))
|
||||
|
||||
aa_pos_amp = sort(unique(c(
|
||||
aa_ligplus_amp
|
||||
, aa_plip_amp
|
||||
, aa_arpeg_amp)))
|
||||
|
||||
aa_pos_amp_hbond = sort(unique(c(
|
||||
aa_ligplus_amp_hbond
|
||||
, aa_plip_amp_hbond)))
|
||||
|
||||
|
||||
cat("\n==================================================="
|
||||
, "\nActive site residues for", gene, "comprise of..."
|
||||
, "\n==================================================="
|
||||
, "\nNo. of", drug, "binding residues:" , length(aa_pos_sry), "\n"
|
||||
, aa_pos_sry
|
||||
, "\nNo. of RNA binding residues:" , length(aa_pos_rna), "\n"
|
||||
, aa_pos_rna
|
||||
, "\nNo. of ligand 'SAM' binding residues:", length(aa_pos_sam), "\n"
|
||||
, aa_pos_sam
|
||||
, "\nNo. of ligand 'AMP' binding residues:", length(aa_pos_amp), "\n"
|
||||
, aa_pos_amp, "\n")
|
||||
|
||||
##############################################################
|
||||
# var for position customisation for plots
|
||||
#aa_pos_drug = #00ff00 # green # as STR doesn't bind
|
||||
aa_pos_lig1 = aa_pos_sam #2f4f4f # darkslategrey
|
||||
aa_pos_lig2 = aa_pos_rna #ff1493 #deeppink
|
||||
aa_pos_lig3 = aa_pos_amp #000080 #navyblue
|
||||
|
||||
tile_map=data.frame(tile=c("STR","SAM","RNA","AMP"),
|
||||
tile_colour=c("#00ff00","#2f4f4f","#ff1493","#000080"))
|
||||
|
||||
# green: #00ff00
|
||||
# darkslategrey : #2f4f4f
|
||||
# deeppink : #ff1493
|
||||
# navyblue :#000080
|
116
config/katg.R
116
config/katg.R
|
@ -1,116 +0,0 @@
|
|||
gene = "katG"
|
||||
drug = "isoniazid"
|
||||
|
||||
#==========
|
||||
# LIGPLUS
|
||||
#===========
|
||||
# hem (1500)
|
||||
aa_ligplus_inh = c(107, 108, 137, 229, 230)
|
||||
#aa_ligplus_inh_hbond # none
|
||||
|
||||
aa_ligplus_hem = c(94, 276, 315, 274, 270, 381, 273, 104, 314, 275,
|
||||
100, 101, 321, 103, 269, 107, 266, 230, 380, 275, 314)
|
||||
|
||||
aa_ligplus_hem_hbond = c(94, 276, 315, 274, 270, 381)
|
||||
aa_ligplus_hem_other = aa_ligplus_hem[!aa_ligplus_hem%in%aa_ligplus_hem_hbond]
|
||||
|
||||
c1 = length(aa_ligplus_hem_other) == length(aa_ligplus_hem) - length(aa_ligplus_hem_hbond)
|
||||
|
||||
#==========
|
||||
# PLIP
|
||||
#===========
|
||||
aa_plip_inh = c(104, 229, 230)
|
||||
aa_plip_inh_hbond = c(104, 229, 230)
|
||||
|
||||
aa_plip_hem = c(104, 107, 248, 252, 265, 275, 321, 412, 274, 276, 315)
|
||||
aa_plip_hem_hbond = c(274, 276, 315)
|
||||
#aa_plip_hem_sb = c(104, 276)
|
||||
#aa_plip_hem_pi = c(107)
|
||||
aa_plip_hem_other = aa_plip_hem[!aa_plip_hem%in%aa_plip_hem_hbond]
|
||||
|
||||
c2 = length(aa_plip_hem_other) == length(aa_plip_hem) - length(aa_plip_hem_hbond)
|
||||
|
||||
#==========
|
||||
# Arpeggio
|
||||
#===========
|
||||
aa_arpeg_inh = c(104, 107, 108, 136, 137, 228, 229, 230, 232, 315)
|
||||
aa_arpeg_inh_hbond = c(104, 137)
|
||||
|
||||
aa_arpeg_hem = c(94, 100, 101, 103, 104, 107, 230, 231, 232, 248
|
||||
, 252, 265, 266, 269, 270, 272, 273, 274, 275, 276, 314, 315
|
||||
, 317, 321, 378, 380, 408, 412)
|
||||
|
||||
#from here
|
||||
|
||||
##############################################################
|
||||
#===============
|
||||
# Active site aa
|
||||
#===============
|
||||
active_aa_pos = sort(unique(c(aa_ligplus_inh
|
||||
, aa_plip_inh
|
||||
, aa_arpeg_inh
|
||||
|
||||
, aa_ligplus_hem
|
||||
, aa_plip_hem
|
||||
, aa_arpeg_hem
|
||||
)))
|
||||
cat("\nNo. of active site residues for gene"
|
||||
, gene, ":"
|
||||
, length(active_aa_pos)
|
||||
, "\nThese are:\n"
|
||||
, active_aa_pos)
|
||||
|
||||
#=================
|
||||
# Drug binding aa
|
||||
#=================
|
||||
aa_pos_inh = sort(unique(c( aa_ligplus_inh
|
||||
, aa_plip_inh
|
||||
, aa_arpeg_inh)))
|
||||
aa_pos_drug = aa_pos_inh
|
||||
|
||||
|
||||
#===============
|
||||
# Hbond aa
|
||||
#===============
|
||||
aa_pos_inh_hbond = sort(unique(c( aa_plip_inh_hbond
|
||||
, aa_arpeg_inh_hbond)))
|
||||
|
||||
#=======================
|
||||
# Other interactions aa
|
||||
#=======================
|
||||
|
||||
|
||||
|
||||
#---------------------------------------------
|
||||
|
||||
aa_pos_hem = sort(unique(c( aa_ligplus_hem
|
||||
, aa_plip_hem
|
||||
, aa_arpeg_hem)))
|
||||
|
||||
aa_pos_hem_hbond = sort(unique(c( aa_ligplus_hem_hbond
|
||||
, aa_plip_hem_hbond
|
||||
#, aa_arpeg_hem_hbond
|
||||
)))
|
||||
|
||||
|
||||
cat("\n==================================================="
|
||||
, "\nActive site residues for", gene, "comprise of..."
|
||||
, "\n==================================================="
|
||||
, "\nNo. of", drug, "binding residues:" , length(aa_pos_inh) , "\n"
|
||||
, aa_pos_inh
|
||||
, "\nNo. of 'HEM' binding residues:" , length(aa_pos_hem) , "\n"
|
||||
, aa_pos_hem, "\n")
|
||||
|
||||
##############################################################
|
||||
# var for position customisation for plots
|
||||
aa_pos_lig1 = aa_pos_hem
|
||||
aa_pos_lig2 = NULL
|
||||
aa_pos_lig3 = NULL
|
||||
tile_map=data.frame(tile=c("INH","HEME"),
|
||||
tile_colour=c("green","darkslategrey"))
|
||||
|
||||
|
||||
|
||||
#toString(aa_pos_hem)
|
||||
#toString(aa_pos_drug)
|
||||
#toString(active_aa_pos)
|
|
@ -1,61 +0,0 @@
|
|||
gene = "pncA"
|
||||
drug = "pyrazinamide"
|
||||
|
||||
#===================================
|
||||
#Iron centre --> purple
|
||||
#Catalytic triad --> yellow
|
||||
#Substrate binding --> teal and blue
|
||||
#H-bond --> green
|
||||
#====================================
|
||||
#aa_plip = c(49, 51, 57, 71, 96 , 133, 134, 138)
|
||||
#aa_ligplus = c(8, 13 , 49 , 133, 134 , 138, 137)
|
||||
#active_aa_pos = sort(unique(c(aa_plip, aa_ligplus)))
|
||||
|
||||
#aa_pos_substrate = c(13, 68, 103, 137)
|
||||
aa_pos_pza = c(13, 68, 103, 137)
|
||||
aa_pos_fe = c(49, 51, 57, 71)
|
||||
aa_pos_catalytic = c(8, 96, 138)
|
||||
aa_pos_hbond = c(133, 134, 8, 138)
|
||||
|
||||
aa_pos_drug = aa_pos_pza
|
||||
#==========
|
||||
# Arpeggio
|
||||
#===========
|
||||
# all same except one extra
|
||||
aa_arpeg = c(102)
|
||||
|
||||
##############################################################
|
||||
active_aa_pos = sort(unique(c(aa_pos_pza
|
||||
, aa_pos_fe
|
||||
, aa_pos_catalytic
|
||||
, aa_pos_hbond
|
||||
, aa_arpeg)))
|
||||
##############################################################
|
||||
cat("\nNo. of active site residues for gene"
|
||||
, gene, ":"
|
||||
, length(active_aa_pos)
|
||||
, "\nThese are:\n"
|
||||
, active_aa_pos)
|
||||
|
||||
cat("\n==================================================="
|
||||
, "\nActive site residues for", gene, "comprise of..."
|
||||
, "\n==================================================="
|
||||
, "\nNo. of", drug, "binding residues:" , length(aa_pos_pza) , "\n"
|
||||
, aa_pos_pza
|
||||
, "\nMetal coordination centre residues:" , length(aa_pos_fe) , "\n"
|
||||
, aa_pos_fe
|
||||
, "\nCatalytic triad residues:" , length(aa_pos_catalytic) , "\n"
|
||||
, aa_pos_catalytic
|
||||
, "\nH-bonding residues:" , length(aa_pos_hbond) , "\n"
|
||||
, aa_pos_hbond , "\n")
|
||||
|
||||
##############################################################
|
||||
# var for position customisation for plots
|
||||
aa_pos_lig1 = aa_pos_fe
|
||||
aa_pos_lig2 = NULL
|
||||
aa_pos_lig3 = NULL
|
||||
#aa_pos_lig2 = aa_pos_catalytic
|
||||
#aa_pos_lig3 = aa_pos_hbond
|
||||
tile_map=data.frame(tile=c("PZA","DPA","CDL","Ca"),
|
||||
tile_colour=c("green","darkslategrey","navyblue","purple"))
|
||||
|
|
@ -1,80 +0,0 @@
|
|||
gene = "rpoB"
|
||||
drug = "rifampicin"
|
||||
|
||||
#==========
|
||||
# LIGPLUS
|
||||
#===========
|
||||
# Error! No atom records found!
|
||||
|
||||
#==========
|
||||
# PLIP
|
||||
#===========
|
||||
aa_plip_rfp = c(429, 432, 491, 487)
|
||||
aa_plip_rfp_hbond = c(429, 432, 487)
|
||||
|
||||
# chainC: equivalent with offset (-6 from 5uhc) accounted
|
||||
aa_plip_5uhc_rfp = c(430, 452, 483
|
||||
, 491, 432, 433
|
||||
, 448, 450, 459, 487)
|
||||
aa_plip_5uhc_rfp_hbond = c(432, 433, 448, 450, 459, 487)
|
||||
|
||||
#==========
|
||||
# Arpeggio
|
||||
#===========
|
||||
# rfp: 1894
|
||||
aa_arpeg_rfp = c(170, 428, 429, 430, 431, 432
|
||||
, 433, 435, 445, 448, 450, 452
|
||||
, 453, 458, 483, 487, 491, 604
|
||||
, 607, 674)
|
||||
|
||||
##############################################################
|
||||
remove_pos = c(170, 674, 604)
|
||||
active_aa_pos = sort(unique(c(aa_plip_rfp
|
||||
, aa_plip_5uhc_rfp
|
||||
, aa_arpeg_rfp)))
|
||||
|
||||
active_aa_pos = active_aa_pos[!active_aa_pos%in%remove_pos]
|
||||
##############################################################
|
||||
cat("\nNo. of active site residues for gene"
|
||||
, gene, ":"
|
||||
, length(active_aa_pos)
|
||||
, "\nThese are:\n"
|
||||
, active_aa_pos)
|
||||
##############################################################
|
||||
aa_pos_rfp = sort(unique(c(aa_plip_rfp
|
||||
, aa_plip_5uhc_rfp
|
||||
, aa_arpeg_rfp)))
|
||||
|
||||
aa_pos_rfp = aa_pos_rfp[!aa_pos_rfp%in%remove_pos]
|
||||
aa_pos_drug = aa_pos_rfp
|
||||
|
||||
aa_pos_rfp_hbond = sort(unique(c(aa_plip_rfp_hbond
|
||||
, aa_plip_5uhc_rfp_hbond)))
|
||||
|
||||
aa_pos_rfp_hbond = aa_pos_rfp_hbond[!aa_pos_rfp_hbond%in%remove_pos]
|
||||
|
||||
cat("\n==================================================="
|
||||
, "\nActive site residues for", gene, "comprise of..."
|
||||
, "\n==================================================="
|
||||
, "\nNo. of", drug, "binding residues:" , length(aa_pos_rfp), "\n"
|
||||
, aa_pos_rfp
|
||||
, "\n\nNO other co-factors or ligands present\n")
|
||||
|
||||
##############################################################
|
||||
# FIXME: these should be populated!
|
||||
aa_pos_lig1 = NULL
|
||||
aa_pos_lig2 = NULL
|
||||
aa_pos_lig3 = NULL
|
||||
tile_map=data.frame(tile=c("RFP"),
|
||||
tile_colour=c("green"))
|
||||
|
||||
|
||||
####
|
||||
chain_suffix = ".C"
|
||||
print(toString(paste0(aa_pos_drug, chain_suffix)))
|
||||
|
||||
# # equivalent resiudes on 5uhc:
|
||||
# active_aa_pos_5uhc = active_aa_pos+6
|
||||
# active_aa_pos_5uhc
|
||||
# print(toString(paste0(active_aa_pos_5uhc, chain_suffix)))
|
||||
|
0
dynamut/format_results_dynamut.py
Normal file → Executable file
0
dynamut/format_results_dynamut.py
Normal file → Executable file
0
dynamut/format_results_dynamut2.py
Normal file → Executable file
0
dynamut/format_results_dynamut2.py
Normal file → Executable file
|
@ -1,817 +0,0 @@
|
|||
A G24V
|
||||
A K27I
|
||||
A K27E
|
||||
A Y28L
|
||||
A Y28H
|
||||
A P29S
|
||||
A V30A
|
||||
A G32S
|
||||
A G33S
|
||||
A G34V
|
||||
A G34A
|
||||
A Q36P
|
||||
A Q36H
|
||||
A D37G
|
||||
A P40T
|
||||
A L43R
|
||||
A L43P
|
||||
A K46N
|
||||
A V47I
|
||||
A L48P
|
||||
A L48R
|
||||
A P52S
|
||||
A D56H
|
||||
A P57S
|
||||
A A61S
|
||||
A F62L
|
||||
A D63G
|
||||
A Y64C
|
||||
A A65T
|
||||
A A66T
|
||||
A V68G
|
||||
A I71F
|
||||
A I71S
|
||||
A V73A
|
||||
A V73G
|
||||
A A75P
|
||||
A L76P
|
||||
A T77R
|
||||
A R78P
|
||||
A R78G
|
||||
A E81V
|
||||
A E82D
|
||||
A V83L
|
||||
A V83G
|
||||
A M84I
|
||||
A M84T
|
||||
A M84L
|
||||
A T85A
|
||||
A T85P
|
||||
A T86P
|
||||
A T86N
|
||||
A S87L
|
||||
A Q88P
|
||||
A Q88E
|
||||
A P89D
|
||||
A W90R
|
||||
A W90C
|
||||
A W91G
|
||||
A W91R
|
||||
A W91L
|
||||
A W91S
|
||||
A P92T
|
||||
A A93G
|
||||
A A93D
|
||||
A A93T
|
||||
A D94N
|
||||
A Y95F
|
||||
A Y95S
|
||||
A H97N
|
||||
A H97P
|
||||
A H97S
|
||||
A Y98C
|
||||
A Y98D
|
||||
A Y98N
|
||||
A G99R
|
||||
A G99E
|
||||
A P100T
|
||||
A L101F
|
||||
A L101M
|
||||
A F102M
|
||||
A F102S
|
||||
A F102I
|
||||
A I103N
|
||||
A I103V
|
||||
A I103T
|
||||
A R104Q
|
||||
A R104W
|
||||
A M105I
|
||||
A A106S
|
||||
A A106V
|
||||
A A106T
|
||||
A A106R
|
||||
A A106G
|
||||
A A109T
|
||||
A A109V
|
||||
A A109S
|
||||
A A109D
|
||||
A A110V
|
||||
A A110T
|
||||
A G111D
|
||||
A T112I
|
||||
A Y113C
|
||||
A I115V
|
||||
A I115S
|
||||
A I115T
|
||||
A H116T
|
||||
A H116E
|
||||
A H116L
|
||||
A H116G
|
||||
A H116A
|
||||
A H116Q
|
||||
A H116F
|
||||
A H116S
|
||||
A H116P
|
||||
A D117E
|
||||
A G120S
|
||||
A G121A
|
||||
A G121S
|
||||
A A122G
|
||||
A A122D
|
||||
A A122T
|
||||
A A122V
|
||||
A G123R
|
||||
A G123E
|
||||
A G124A
|
||||
A G124Q
|
||||
A G124D
|
||||
A G124S
|
||||
A G124H
|
||||
A G124E
|
||||
A G124R
|
||||
A G124T
|
||||
A G125D
|
||||
A G125S
|
||||
A M126Q
|
||||
A M126I
|
||||
A M126A
|
||||
A M126L
|
||||
A M126S
|
||||
A Q127P
|
||||
A R128Q
|
||||
A R128L
|
||||
A R128G
|
||||
A R128W
|
||||
A F129S
|
||||
A A130E
|
||||
A P131Q
|
||||
A P131A
|
||||
A P131L
|
||||
A P131S
|
||||
A L132R
|
||||
A N133S
|
||||
A N133D
|
||||
A S134R
|
||||
A W135S
|
||||
A P136L
|
||||
A N138S
|
||||
A N138H
|
||||
A N138D
|
||||
A A139V
|
||||
A A139P
|
||||
A A139G
|
||||
A S140N
|
||||
A S140G
|
||||
A S140I
|
||||
A L141S
|
||||
A L141F
|
||||
A L141I
|
||||
A L141V
|
||||
A D142G
|
||||
A D142N
|
||||
A K143N
|
||||
A K143E
|
||||
A A144T
|
||||
A A144V
|
||||
A R145H
|
||||
A R145C
|
||||
A R145S
|
||||
A R146L
|
||||
A L148I
|
||||
A W149R
|
||||
A W149L
|
||||
A W149G
|
||||
A W149C
|
||||
A V151L
|
||||
A V151I
|
||||
A K152E
|
||||
A K152T
|
||||
A K153Q
|
||||
A Y155C
|
||||
A Y155S
|
||||
A Y155H
|
||||
A G156D
|
||||
A G156S
|
||||
A K157N
|
||||
A K157R
|
||||
A K157Q
|
||||
A K158S
|
||||
A K158N
|
||||
A L159I
|
||||
A L159F
|
||||
A L159P
|
||||
A W161C
|
||||
A W161R
|
||||
A A162V
|
||||
A A162E
|
||||
A A162T
|
||||
A D163N
|
||||
A D163A
|
||||
A L164R
|
||||
A I165M
|
||||
A I165L
|
||||
A I165Y
|
||||
A I165T
|
||||
A V166I
|
||||
A V166T
|
||||
A F167S
|
||||
A F167L
|
||||
A F167C
|
||||
A A168V
|
||||
A A168T
|
||||
A A168G
|
||||
A G169S
|
||||
A N170K
|
||||
A C171V
|
||||
A C171G
|
||||
A A172T
|
||||
A A172V
|
||||
A L173R
|
||||
A M176T
|
||||
A M176I
|
||||
A F178I
|
||||
A F178S
|
||||
A K179E
|
||||
A T180M
|
||||
A T180K
|
||||
A G182R
|
||||
A G182E
|
||||
A F183L
|
||||
A F183S
|
||||
A G184D
|
||||
A G184A
|
||||
A G184C
|
||||
A G186A
|
||||
A G186S
|
||||
A G186D
|
||||
A R187P
|
||||
A D189N
|
||||
A D189G
|
||||
A D189A
|
||||
A D189Y
|
||||
A W191R
|
||||
A W191G
|
||||
A E192A
|
||||
A E192D
|
||||
A D194N
|
||||
A E195K
|
||||
A V196G
|
||||
A Y197D
|
||||
A W204S
|
||||
A L205R
|
||||
A G206R
|
||||
A E208K
|
||||
A R209C
|
||||
A S211N
|
||||
A S211T
|
||||
A K213E
|
||||
A K213N
|
||||
A R214L
|
||||
A D215H
|
||||
A D215E
|
||||
A N218S
|
||||
A P219L
|
||||
A A222T
|
||||
A Q224R
|
||||
A M225V
|
||||
A I228L
|
||||
A N231K
|
||||
A P232S
|
||||
A P232R
|
||||
A P232T
|
||||
A P232A
|
||||
A E233G
|
||||
A E233Q
|
||||
A G234R
|
||||
A N236D
|
||||
A G237A
|
||||
A G237D
|
||||
A P241H
|
||||
A M242V
|
||||
A M242T
|
||||
A M242I
|
||||
A A243T
|
||||
A A244G
|
||||
A V246R
|
||||
A V246G
|
||||
A I248T
|
||||
A R249G
|
||||
A R249C
|
||||
A R249H
|
||||
A T251K
|
||||
A T251M
|
||||
A F252L
|
||||
A R253G
|
||||
A R253W
|
||||
A R254S
|
||||
A R254C
|
||||
A R254H
|
||||
A R254L
|
||||
A A256T
|
||||
A A256V
|
||||
A A256G
|
||||
A M257I
|
||||
A M257T
|
||||
A M257V
|
||||
A D259G
|
||||
A D259E
|
||||
A D259Y
|
||||
A V260I
|
||||
A V260E
|
||||
A T262P
|
||||
A A264V
|
||||
A A264T
|
||||
A V267A
|
||||
A G268S
|
||||
A G269S
|
||||
A G269D
|
||||
A T271P
|
||||
A T271S
|
||||
A T271I
|
||||
A T271A
|
||||
A F272L
|
||||
A F272S
|
||||
A F272V
|
||||
A G273R
|
||||
A G273C
|
||||
A T275P
|
||||
A T275A
|
||||
A H276Q
|
||||
A G277S
|
||||
A G279D
|
||||
A P280S
|
||||
A P280Q
|
||||
A A281V
|
||||
A A281G
|
||||
A A281T
|
||||
A D282G
|
||||
A G285C
|
||||
A G285S
|
||||
A G285V
|
||||
A G285D
|
||||
A G285A
|
||||
A P286L
|
||||
A P288H
|
||||
A P288L
|
||||
A E289A
|
||||
A E289K
|
||||
A A290V
|
||||
A A290P
|
||||
A A291D
|
||||
A P292A
|
||||
A Q295A
|
||||
A Q295P
|
||||
A Q295E
|
||||
A M296V
|
||||
A M296T
|
||||
A G297V
|
||||
A G297L
|
||||
A L298S
|
||||
A G299S
|
||||
A G299C
|
||||
A G299V
|
||||
A G299A
|
||||
A G299D
|
||||
A W300S
|
||||
A W300G
|
||||
A W300R
|
||||
A W300C
|
||||
A S302R
|
||||
A S302T
|
||||
A G305C
|
||||
A G305A
|
||||
A T306A
|
||||
A T306S
|
||||
A G307R
|
||||
A T308P
|
||||
A T308S
|
||||
A T308K
|
||||
A T308A
|
||||
A T308V
|
||||
A T308I
|
||||
A D311G
|
||||
A A312P
|
||||
A A312E
|
||||
A A312V
|
||||
A T314S
|
||||
A T314N
|
||||
A T314A
|
||||
A S315T
|
||||
A S315N
|
||||
A S315I
|
||||
A S315G
|
||||
A S315R
|
||||
A I317L
|
||||
A I317V
|
||||
A I317T
|
||||
A E318K
|
||||
A V320L
|
||||
A V320A
|
||||
A T322A
|
||||
A T322M
|
||||
A N323P
|
||||
A N323S
|
||||
A N323H
|
||||
A T324N
|
||||
A T324P
|
||||
A T324S
|
||||
A T324L
|
||||
A P325S
|
||||
A P325T
|
||||
A T326P
|
||||
A T326M
|
||||
A K327T
|
||||
A W328L
|
||||
A W328S
|
||||
A W328R
|
||||
A W328C
|
||||
A D329A
|
||||
A D329E
|
||||
A D329H
|
||||
A S331T
|
||||
A S331I
|
||||
A S331R
|
||||
A L333F
|
||||
A L333C
|
||||
A E334K
|
||||
A I335V
|
||||
A I335T
|
||||
A I335N
|
||||
A L336M
|
||||
A Y337C
|
||||
A Y337H
|
||||
A Y337F
|
||||
A Y337S
|
||||
A G338S
|
||||
A Y339N
|
||||
A Y339C
|
||||
A Y339S
|
||||
A E340D
|
||||
A E342G
|
||||
A T344L
|
||||
A T344K
|
||||
A T344S
|
||||
A T344M
|
||||
A A348V
|
||||
A A348G
|
||||
A G349D
|
||||
A Q352Y
|
||||
A Y353H
|
||||
A Y353F
|
||||
A T354I
|
||||
A D357H
|
||||
A I364N
|
||||
A D366N
|
||||
A P367L
|
||||
A F368L
|
||||
A S374A
|
||||
A S374P
|
||||
A L378P
|
||||
A L378M
|
||||
A A379V
|
||||
A A379T
|
||||
A T380S
|
||||
A T380P
|
||||
A T380I
|
||||
A T380A
|
||||
A T380N
|
||||
A D381A
|
||||
A L382I
|
||||
A L382R
|
||||
A S383W
|
||||
A S383A
|
||||
A L384R
|
||||
A R385P
|
||||
A V386M
|
||||
A V386E
|
||||
A D387N
|
||||
A Y390C
|
||||
A R392W
|
||||
A T394P
|
||||
A T394M
|
||||
A T394A
|
||||
A R395C
|
||||
A L398R
|
||||
A E399D
|
||||
A E399K
|
||||
A H400Y
|
||||
A H400P
|
||||
A E402A
|
||||
A E402K
|
||||
A L404W
|
||||
A D406A
|
||||
A D406E
|
||||
A D406G
|
||||
A E407A
|
||||
A E407K
|
||||
A F408Y
|
||||
A F408S
|
||||
A F408L
|
||||
A F408V
|
||||
A A411D
|
||||
A Y413C
|
||||
A Y413F
|
||||
A Y413H
|
||||
A Y413S
|
||||
A K414R
|
||||
A I416M
|
||||
A I416T
|
||||
A I416L
|
||||
A I416V
|
||||
A D419H
|
||||
A D419G
|
||||
A D419Y
|
||||
A D419V
|
||||
A P422H
|
||||
A P422L
|
||||
A V423I
|
||||
A A424V
|
||||
A A424G
|
||||
A R425K
|
||||
A L427P
|
||||
A L427R
|
||||
A L427F
|
||||
A L430A
|
||||
A P432L
|
||||
A P432T
|
||||
A K433T
|
||||
A Q434P
|
||||
A L437R
|
||||
A W438G
|
||||
A Q439K
|
||||
A Q439H
|
||||
A Q439R
|
||||
A Q439T
|
||||
A D440G
|
||||
A P441L
|
||||
A V442L
|
||||
A V442A
|
||||
A V445I
|
||||
A S446N
|
||||
A D448A
|
||||
A D448E
|
||||
A V450I
|
||||
A V450A
|
||||
A G451D
|
||||
A E452Q
|
||||
A I455L
|
||||
A L458H
|
||||
A K459T
|
||||
A S460N
|
||||
A Q461P
|
||||
A Q461R
|
||||
A Q461E
|
||||
A I462S
|
||||
A R463L
|
||||
A R463W
|
||||
A S465P
|
||||
A T468P
|
||||
A V469L
|
||||
A V469I
|
||||
A Q471R
|
||||
A V473L
|
||||
A V473F
|
||||
A S474Q
|
||||
A T475I
|
||||
A T475A
|
||||
A A476E
|
||||
A A476V
|
||||
A A478R
|
||||
A A479P
|
||||
A A479G
|
||||
A A479V
|
||||
A A479Q
|
||||
A A480Q
|
||||
A A480S
|
||||
A S481A
|
||||
A S481L
|
||||
A S482T
|
||||
A F483L
|
||||
A R484H
|
||||
A R484G
|
||||
A K488E
|
||||
A R489C
|
||||
A G490D
|
||||
A G490C
|
||||
A G490S
|
||||
A G491S
|
||||
A A492V
|
||||
A A492D
|
||||
A N493K
|
||||
A G494S
|
||||
A G494A
|
||||
A G495S
|
||||
A G495A
|
||||
A G495C
|
||||
A R496L
|
||||
A R496C
|
||||
A R498S
|
||||
A P501S
|
||||
A V503A
|
||||
A V503S
|
||||
A W505L
|
||||
A V507I
|
||||
A N508D
|
||||
A D509E
|
||||
A D509N
|
||||
A P510A
|
||||
A D511N
|
||||
A D513N
|
||||
A L514P
|
||||
A L514V
|
||||
A R515H
|
||||
A K516R
|
||||
A R519H
|
||||
A T520A
|
||||
A L521P
|
||||
A E522K
|
||||
A E523D
|
||||
A Q525P
|
||||
A Q525A
|
||||
A Q525K
|
||||
A Q525S
|
||||
A E526D
|
||||
A S527L
|
||||
A N529T
|
||||
A A532P
|
||||
A A532V
|
||||
A P533L
|
||||
A G534A
|
||||
A G534R
|
||||
A K537E
|
||||
A V538A
|
||||
A F540S
|
||||
A A541T
|
||||
A D542E
|
||||
A L546F
|
||||
A C549S
|
||||
A A550D
|
||||
A A551S
|
||||
A A555P
|
||||
A A556S
|
||||
A K557N
|
||||
A G560R
|
||||
A G560A
|
||||
A G560S
|
||||
A H561R
|
||||
A N562H
|
||||
A V565G
|
||||
A P566L
|
||||
A F567S
|
||||
A F567L
|
||||
A F567V
|
||||
A T568P
|
||||
A P569L
|
||||
A G570F
|
||||
A R571L
|
||||
A A574V
|
||||
A T579A
|
||||
A T579S
|
||||
A S583P
|
||||
A F584V
|
||||
A V586M
|
||||
A L587R
|
||||
A L587P
|
||||
A E588G
|
||||
A A591T
|
||||
A G593C
|
||||
A F594I
|
||||
A F594L
|
||||
A N596S
|
||||
A Y597H
|
||||
A Y597S
|
||||
A Y597D
|
||||
A L598F
|
||||
A L598R
|
||||
A G599R
|
||||
A K600Q
|
||||
A N602D
|
||||
A P603L
|
||||
A P605S
|
||||
A A606P
|
||||
A A606T
|
||||
A E607D
|
||||
A Y608D
|
||||
A M609T
|
||||
A L611R
|
||||
A D612G
|
||||
A A614T
|
||||
A A614G
|
||||
A A614E
|
||||
A L616S
|
||||
A T618M
|
||||
A S620T
|
||||
A A621T
|
||||
A A621D
|
||||
A M624V
|
||||
A M624K
|
||||
A M624I
|
||||
A T625A
|
||||
A T625K
|
||||
A L627P
|
||||
A V628I
|
||||
A G629D
|
||||
A G629C
|
||||
A G630R
|
||||
A G630V
|
||||
A V633A
|
||||
A V633I
|
||||
A L634I
|
||||
A A636T
|
||||
A N637D
|
||||
A N637H
|
||||
A N637K
|
||||
A Y638C
|
||||
A Y638H
|
||||
A G644D
|
||||
A G644S
|
||||
A G644V
|
||||
A E648D
|
||||
A A649T
|
||||
A A649G
|
||||
A S650F
|
||||
A S650P
|
||||
A E651D
|
||||
A L653Q
|
||||
A T654S
|
||||
A N655D
|
||||
A F657S
|
||||
A F657L
|
||||
A N660D
|
||||
A L661M
|
||||
A L662V
|
||||
A D663G
|
||||
A D663Y
|
||||
A I666V
|
||||
A T667P
|
||||
A T667I
|
||||
A W668C
|
||||
A W668L
|
||||
A A673V
|
||||
A D675Y
|
||||
A D675G
|
||||
A D675H
|
||||
A T677P
|
||||
A Y678C
|
||||
A Q679E
|
||||
A Q679Y
|
||||
A G680D
|
||||
A K681Q
|
||||
A K681T
|
||||
A S684R
|
||||
A K686E
|
||||
A W689G
|
||||
A W689R
|
||||
A T690I
|
||||
A T690P
|
||||
A G691D
|
||||
A S692R
|
||||
A R693C
|
||||
A R693H
|
||||
A D695A
|
||||
A L696Q
|
||||
A L696P
|
||||
A V697A
|
||||
A F698V
|
||||
A G699E
|
||||
A G699V
|
||||
A S700P
|
||||
A S700F
|
||||
A E703Q
|
||||
A L704W
|
||||
A L704S
|
||||
A R705L
|
||||
A R705G
|
||||
A R705W
|
||||
A L707R
|
||||
A L707F
|
||||
A E709A
|
||||
A E709G
|
||||
A V710I
|
||||
A V710A
|
||||
A Y711D
|
||||
A A713S
|
||||
A D714E
|
||||
A D714N
|
||||
A D714G
|
||||
A P718S
|
||||
A F720S
|
||||
A D723N
|
||||
A D723A
|
||||
A A726T
|
||||
A A727S
|
||||
A A727T
|
||||
A W728R
|
||||
A D729N
|
||||
A D729V
|
||||
A D729G
|
||||
A D729T
|
||||
A V731M
|
||||
A V731A
|
||||
A N733S
|
||||
A L734R
|
||||
A D735A
|
||||
A R736K
|
||||
A R736S
|
||||
A V739M
|
||||
A R740S
|
|
|
@ -1,11 +0,0 @@
|
|||
Dynamut was painfully run for gid, part manually, part programatically!
|
||||
|
||||
However, it was decided to ditch that and only run Dynamut2 for future targets
|
||||
|
||||
Dynamut2 was run through the website in batches of 50 for
|
||||
katG: 17 batches (00..16)
|
||||
rpoB: 23 batches (00..22)
|
||||
alr: 6 batches (00..05)
|
||||
|
||||
However, the use of API was made for rpoB batches (09-22) from 13 Oct 2021
|
||||
as jobs started to flake and fail through the website!
|
5
dynamut/run_format_results_dynamut.py
Normal file → Executable file
5
dynamut/run_format_results_dynamut.py
Normal file → Executable file
|
@ -26,6 +26,7 @@ arg_parser.add_argument('-i', '--input_dir' , help = 'Input dir containing pdb f
|
|||
arg_parser.add_argument('-o', '--output_dir', help = 'Output dir for results. By default, it assmes homedir + <drug> + output')
|
||||
#arg_parser.add_argument('--mkdir_name' , help = 'Output dir for processed results. This will be created if it does not exist')
|
||||
arg_parser.add_argument('-m', '--make_dirs' , help = 'Make dir for input and output', action='store_true')
|
||||
|
||||
arg_parser.add_argument('--debug' , action = 'store_true' , help = 'Debug Mode')
|
||||
|
||||
args = arg_parser.parse_args()
|
||||
|
@ -56,8 +57,8 @@ outdir_dynamut = outdir + 'dynamut_results/'
|
|||
outdir_dynamut2 = outdir + 'dynamut_results/dynamut2/'
|
||||
|
||||
# Input file
|
||||
#infile_dynamut = outdir_dynamut + gene.lower() + '_dynamut_all_output_clean.csv'
|
||||
infile_dynamut2 = outdir_dynamut2 + gene.lower() + '_dynamut2_output_combined_clean.csv'
|
||||
infile_dynamut = outdir_dynamut + gene + '_dynamut_all_output_clean.csv'
|
||||
infile_dynamut2 = outdir_dynamut2 + gene + '_dynamut2_output_combined_clean.csv'
|
||||
|
||||
# Formatted output filename
|
||||
outfile_dynamut_f = outdir_dynamut2 + gene + '_dynamut_norm.csv'
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
#!/bin/bash
|
||||
|
||||
# FIXME: This is written for expediency to kickstart running dynamut, mcsm-PPI2 (batch pf 50) and mCSM-NA (batch of 20)
|
||||
# FIXME: This is written for expediency to kickstart running dynamut and mcsm-NA
|
||||
|
||||
# Usage: ~/git/LSHTM_analysis/dynamut/split_csv.sh <input file> <output dir> <chunk size in lines>
|
||||
# copy your snp file to split into the dynamut dir
|
||||
|
@ -12,13 +12,8 @@ CHUNK=$3
|
|||
mkdir -p ${OUTDIR}/${CHUNK}
|
||||
cd ${OUTDIR}/${CHUNK}
|
||||
|
||||
# makes the 2 dirs, hence ../..
|
||||
split ../../${INFILE} -l ${CHUNK} -d snp_batch_
|
||||
|
||||
# use case
|
||||
#~/git/LSHTM_analysis/dynamut/split_csv.sh gid_mcsm_formatted_snps.csv snp_batches 50
|
||||
#~/git/LSHTM_analysis/dynamut/split_csv.sh embb_mcsm_formatted_snps.csv snp_batches 50
|
||||
#~/git/LSHTM_analysis/dynamut/split_csv.sh pnca_mcsm_formatted_snps.csv snp_batches 50
|
||||
#~/git/LSHTM_analysis/dynamut/split_csv.sh katg_mcsm_formatted_snps.csv snp_batches 50 #Date: 20/09/2021
|
||||
|
||||
# add .txt to the files
|
||||
|
|
|
@ -1,41 +0,0 @@
|
|||
#!/bin/bash
|
||||
|
||||
# FIXME: This is written for expediency to kickstart running dynamut, mcsm-PPI2 (batch pf 50) and mCSM-NA (batch of 20)
|
||||
|
||||
# Usage: ~/git/LSHTM_analysis/dynamut/split_csv.sh <input file> <output dir> <chunk size in lines>
|
||||
# copy your snp file to split into the dynamut dir
|
||||
# use sed to add chain ID to snp file and then split to avoid post processing
|
||||
|
||||
INFILE=$1
|
||||
OUTDIR=$2
|
||||
CHUNK=$3
|
||||
|
||||
mkdir -p ${OUTDIR}/${CHUNK}/chain_added
|
||||
cd ${OUTDIR}/${CHUNK}/chain_added
|
||||
|
||||
# makes the 3 dirs, hence ../..
|
||||
split ../../../${INFILE} -l ${CHUNK} -d snp_batch_
|
||||
|
||||
########################################################################
|
||||
# use cases
|
||||
# Date: 20/09/2021
|
||||
# sed -e 's/^/A /g' katg_mcsm_formatted_snps.csv > katg_mcsm_formatted_snps_chain.csv
|
||||
#~/git/LSHTM_analysis/dynamut/split_csv_chain.sh katg_mcsm_formatted_snps_chain.csv snp_batches 50
|
||||
|
||||
# Date: 01/10/2021
|
||||
# sed -e 's/^/A /g' rpob_mcsm_formatted_snps.csv > rpob_mcsm_formatted_snps_chain.csv
|
||||
#~/git/LSHTM_analysis/dynamut/split_csv_chain.sh rpob_mcsm_formatted_snps_chain.csv snp_batches 50
|
||||
|
||||
# Date: 02/10/2021
|
||||
# sed -e 's/^/A /g' alr_mcsm_formatted_snps.csv > alr_mcsm_formatted_snps_chain.csv
|
||||
#~/git/LSHTM_analysis/dynamut/split_csv_chain.sh alr_mcsm_formatted_snps_chain.csv snp_batches 50
|
||||
|
||||
# Date: 05/10/2021
|
||||
#~/git/LSHTM_analysis/dynamut/split_csv_chain.sh alr_mcsm_formatted_snps_chain.csv snp_batches 20
|
||||
|
||||
# Date: 30/11/2021
|
||||
#~/git/LSHTM_analysis/dynamut/split_csv_chain.sh katg_mcsm_formatted_snps_chain.csv snp_batches 20
|
||||
for i in {00..40}; do mv snp_batch_${i} snp_batch_${i}.txt; done
|
||||
|
||||
# add .txt to the files
|
||||
########################################################################
|
|
@ -41,7 +41,7 @@ arg_parser.add_argument('-o', '--output_dir', help = 'Output dir for results. By
|
|||
arg_parser.add_argument('-p', '--process_dir', help = 'Temp processing dir for running foldX. By default, it assmes homedir + <drug> + processing. Make sure it is somewhere with LOTS of storage as it writes all output!') #FIXME
|
||||
|
||||
arg_parser.add_argument('-P', '--pdb_file', help = 'PDB File to process. By default, it assmumes a file called <gene>_complex.pdb in input_dir')
|
||||
arg_parser.add_argument('-m', '--mutation_file', help = 'Mutation list. By default, assumes a file called <gene>_mcsm_formatted_snps.csv exists')
|
||||
arg_parser.add_argument('-m', '--mutation_file', help = 'Mutation list. By default, assumes a file called <gene>_mcsm_snps.csv exists')
|
||||
|
||||
# FIXME: Doesn't work with 2 chains yet!
|
||||
arg_parser.add_argument('-c1', '--chain1', help = 'Chain1 ID', default = 'A') # case sensitive
|
||||
|
@ -148,16 +148,6 @@ print('Arguments being passed:'
|
|||
, '\noutput file:', outfile_foldx
|
||||
, '\n=============================================================')
|
||||
|
||||
|
||||
# make sure rotabase.txt exists in the process_dir
|
||||
rotabase_file = process_dir + '/' + 'rotabase.txt'
|
||||
|
||||
if Path(rotabase_file).is_file():
|
||||
print(f'rotabase file: {rotabase_file} exists')
|
||||
else:
|
||||
print(f'ERROR: rotabase file: {rotabase_file} does not exist. Please download it and put it in {process_dir}')
|
||||
sys.exit()
|
||||
|
||||
#### Delay for 10 seconds to check the params ####
|
||||
print('Sleeping for 10 seconds to give you time to cancel')
|
||||
time.sleep(10)
|
||||
|
@ -245,13 +235,6 @@ def main():
|
|||
nmuts = len(mutlist)
|
||||
print(nmuts)
|
||||
print(mutlist)
|
||||
print('start')
|
||||
#subprocess.check_output(['bash','repairPDB.sh', pdbname, process_dir])
|
||||
print('\033[95mSTAGE: repair PDB\033[0m')
|
||||
print('EXECUTING: repairPDB.sh %s %s %s' % (indir, actual_pdb_filename, process_dir))
|
||||
#subprocess.check_output(['bash','repairPDB.sh', indir, actual_pdb_filename, process_dir])
|
||||
# once you decide to use the function
|
||||
# repairPDB(pdbname)
|
||||
|
||||
print('start')
|
||||
# some common parameters for foldX
|
||||
|
@ -259,74 +242,61 @@ def main():
|
|||
|
||||
print('\033[95mSTAGE: repair PDB (foldx subprocess) \033[0m')
|
||||
print('Running foldx RepairPDB for WT')
|
||||
|
||||
fold_RepairDB = ['foldx'
|
||||
subprocess.call(['foldx'
|
||||
, '--command=RepairPDB'
|
||||
, foldx_common
|
||||
# , '--pdb-dir=' + os.path.dirname(pdb_filename)
|
||||
, '--pdb-dir=' + indir
|
||||
, '--pdb-dir=' + os.path.dirname(pdb_filename)
|
||||
, '--pdb=' + actual_pdb_filename
|
||||
, 'outPDB=true'
|
||||
, '--output-dir=' + process_dir]
|
||||
print('CMD:', fold_RepairDB)
|
||||
subprocess.call(fold_RepairDB)
|
||||
, '--output-dir=' + process_dir])
|
||||
print('\033[95mCOMPLETED STAGE: repair PDB\033[0m')
|
||||
print('\n==========================================================')
|
||||
|
||||
|
||||
print('\033[95mSTAGE: Foldx commands BM, PN and SD (foldx subprocess) for WT\033[0m')
|
||||
print('Running foldx BuildModel for WT')
|
||||
|
||||
foldx_BuildModel = ['foldx'
|
||||
subprocess.call(['foldx'
|
||||
, '--command=BuildModel'
|
||||
, foldx_common
|
||||
, '--pdb-dir=' + process_dir
|
||||
, '--pdb=' + pdbname + '_Repair.pdb'
|
||||
, '--mutant-file=' + process_dir + '/' + 'individual_list_' + pdbname +'.txt'
|
||||
, '--mutant-file="individual_list_' + pdbname +'.txt"'
|
||||
, 'outPDB=true'
|
||||
, '--numberOfRuns=1'
|
||||
, '--output-dir=' + process_dir]
|
||||
print('CMD:', foldx_BuildModel)
|
||||
subprocess.call( foldx_BuildModel, cwd=process_dir)
|
||||
, '--output-dir=' + process_dir], cwd=process_dir)
|
||||
|
||||
print('Running foldx PrintNetworks for WT')
|
||||
foldx_PrintNetworks = ['foldx'
|
||||
subprocess.call(['foldx'
|
||||
, '--command=PrintNetworks'
|
||||
, '--pdb-dir=' + process_dir
|
||||
, '--pdb=' + pdbname + '_Repair.pdb'
|
||||
, '--water=PREDICT'
|
||||
, '--vdwDesign=1'
|
||||
, '--output-dir=' + process_dir]
|
||||
print('CMD:', foldx_PrintNetworks)
|
||||
subprocess.call(foldx_PrintNetworks, cwd=process_dir)
|
||||
, '--output-dir=' + process_dir], cwd=process_dir)
|
||||
|
||||
print('Running foldx SequenceDetail for WT')
|
||||
foldx_SequenceDetail = ['foldx'
|
||||
subprocess.call(['foldx'
|
||||
, '--command=SequenceDetail'
|
||||
, '--pdb-dir=' + process_dir
|
||||
, '--pdb=' + pdbname + '_Repair.pdb'
|
||||
, '--water=PREDICT'
|
||||
, '--vdwDesign=1'
|
||||
, '--output-dir=' + process_dir]
|
||||
print('CMD:', foldx_SequenceDetail)
|
||||
subprocess.call(foldx_SequenceDetail , cwd=process_dir)
|
||||
|
||||
, '--output-dir=' + process_dir], cwd=process_dir)
|
||||
print('\033[95mCOMPLETED STAGE: Foldx commands BM, PN and SD\033[0m')
|
||||
print('\n==========================================================')
|
||||
|
||||
|
||||
print('\033[95mSTAGE: Print Networks (foldx subprocess) for MT\033[0m')
|
||||
for n in range(1,nmuts+1):
|
||||
print('\033[95mNETWORK:\033[0m', n)
|
||||
print('Running foldx PrintNetworks for mutation', n)
|
||||
foldx_PrintNetworksMT = ['foldx'
|
||||
subprocess.call(['foldx'
|
||||
, '--command=PrintNetworks'
|
||||
, '--pdb-dir=' + process_dir
|
||||
, '--pdb=' + pdbname + '_Repair_' + str(n) + '.pdb'
|
||||
, '--water=PREDICT'
|
||||
, '--vdwDesign=1'
|
||||
, '--output-dir=' + process_dir]
|
||||
print('CMD:', foldx_PrintNetworksMT)
|
||||
subprocess.call( foldx_PrintNetworksMT , cwd=process_dir)
|
||||
, '--output-dir=' + process_dir], cwd=process_dir)
|
||||
print('\033[95mCOMPLETED STAGE: Print Networks (foldx subprocess) for MT\033[0m')
|
||||
print('\n==========================================================')
|
||||
|
||||
|
@ -353,16 +323,14 @@ def main():
|
|||
print('\033[95mSTAGE: Running foldx AnalyseComplex (foldx subprocess) for WT\033[0m')
|
||||
chain1=chainA
|
||||
chain2=chainB
|
||||
foldx_AnalyseComplex = ['foldx'
|
||||
subprocess.call(['foldx'
|
||||
, '--command=AnalyseComplex'
|
||||
, '--pdb-dir=' + process_dir
|
||||
, '--pdb=' + pdbname + '_Repair.pdb'
|
||||
, '--analyseComplexChains=' + chain1 + ',' + chain2
|
||||
, '--water=PREDICT'
|
||||
, '--vdwDesign=1'
|
||||
, '--output-dir=' + process_dir]
|
||||
print('CMD:',foldx_AnalyseComplex)
|
||||
subprocess.call(foldx_AnalyseComplex, cwd=process_dir)
|
||||
, '--output-dir=' + process_dir], cwd=process_dir)
|
||||
|
||||
# FIXME why would we ever need to do this?!? Cargo-culted from runcomplex.sh
|
||||
ac_source = process_dir + '/Summary_' + pdbname + '_Repair_AC.fxout'
|
||||
|
@ -372,16 +340,14 @@ def main():
|
|||
|
||||
for n in range(1,nmuts+1):
|
||||
print('\033[95mSTAGE: Running foldx AnalyseComplex (foldx subprocess) for mutation:\033[0m', n)
|
||||
foldx_AnalyseComplex = ['foldx'
|
||||
subprocess.call(['foldx'
|
||||
, '--command=AnalyseComplex'
|
||||
, '--pdb-dir=' + process_dir
|
||||
, '--pdb=' + pdbname + '_Repair_' + str(n) + '.pdb'
|
||||
, '--analyseComplexChains=' + chain1 + ',' + chain2
|
||||
, '--water=PREDICT'
|
||||
, '--vdwDesign=1'
|
||||
, '--output-dir=' + process_dir]
|
||||
print('CMD:', foldx_AnalyseComplex)
|
||||
subprocess.call( foldx_AnalyseComplex , cwd=process_dir)
|
||||
, '--output-dir=' + process_dir], cwd=process_dir)
|
||||
|
||||
# FIXME why would we ever need to do this?!? Cargo-culted from runcomplex.sh
|
||||
ac_mut_source = process_dir + '/Summary_' + pdbname + '_Repair_' + str(n) +'_AC.fxout'
|
||||
|
|
0
mcsm_na/examples.py
Normal file → Executable file
0
mcsm_na/examples.py
Normal file → Executable file
7
mcsm_na/format_results_mcsm_na.py
Normal file → Executable file
7
mcsm_na/format_results_mcsm_na.py
Normal file → Executable file
|
@ -51,7 +51,7 @@ def format_mcsm_na_output(mcsm_na_output_tsv):
|
|||
print('Assigning meaningful colnames'
|
||||
, '\n=======================================================')
|
||||
my_colnames_dict = {'PDB_FILE': 'pdb_file' # relevant info from this col will be extracted and the column discarded
|
||||
, 'CHAIN': 'chain'
|
||||
, 'CHAIN': 'chain' # {wild_type}<position>{mutant_type}
|
||||
, 'WILD_RES': 'wild_type' # one letter amino acid code
|
||||
, 'RES_POS': 'position' # number
|
||||
, 'MUT_RES': 'mutant_type' # one letter amino acid code
|
||||
|
@ -65,8 +65,8 @@ def format_mcsm_na_output(mcsm_na_output_tsv):
|
|||
#############
|
||||
# create mutationinformation column
|
||||
#############
|
||||
#mcsm_na_data['mutationinformation'] = mcsm_na_data['wild_type'] + mcsm_na_data.position.map(str) + mcsm_na_data['mutant_type']
|
||||
mcsm_na_data['mutationinformation'] = mcsm_na_data.loc[:,'wild_type'] + mcsm_na_data.loc[:,'position'].astype(int).apply(str) + mcsm_na_data.loc[:,'mutant_type']
|
||||
mcsm_na_data['mutationinformation'] = mcsm_na_data['wild_type'] + mcsm_na_data.position.map(str) + mcsm_na_data['mutant_type']
|
||||
|
||||
#%%=====================================================================
|
||||
#############
|
||||
# Create col: mcsm_na_outcome
|
||||
|
@ -132,3 +132,4 @@ def format_mcsm_na_output(mcsm_na_output_tsv):
|
|||
, 'pdb_file']]
|
||||
return(mcsm_na_dataf)
|
||||
#%%#####################################################################
|
||||
|
||||
|
|
0
mcsm_na/get_results_mcsm_na.py
Normal file → Executable file
0
mcsm_na/get_results_mcsm_na.py
Normal file → Executable file
0
mcsm_na/run_format_results_mcsm_na.py
Normal file → Executable file
0
mcsm_na/run_format_results_mcsm_na.py
Normal file → Executable file
|
@ -1,27 +0,0 @@
|
|||
#!/bin/bash
|
||||
|
||||
# FIXME: This is written for expediency to kickstart running dynamut, mcsm-PPI2 (batch pf 50) and mCSM-NA (batch of 20)
|
||||
|
||||
# Usage: ~/git/LSHTM_analysis/dynamut/split_csv.sh <input file> <output dir> <chunk size in lines>
|
||||
# copy your snp file to split into the dynamut dir
|
||||
# use sed to add chain ID to snp file and then split to avoid post processing
|
||||
|
||||
INFILE=$1
|
||||
OUTDIR=$2
|
||||
CHUNK=$3
|
||||
|
||||
mkdir -p ${OUTDIR}/${CHUNK}/chain_added
|
||||
cd ${OUTDIR}/${CHUNK}/chain_added
|
||||
|
||||
# makes the 3 dirs, hence ../..
|
||||
split ../../../${INFILE} -l ${CHUNK} -d snp_batch_
|
||||
|
||||
########################################################################
|
||||
# use cases
|
||||
|
||||
# Date: 29/10/2021, 5UHC (for rifampicin)
|
||||
~/git/LSHTM_analysis/mcsm_na/split_csv_chain.sh rpob_mcsm_formatted_snps_chain.csv snp_batches 20
|
||||
|
||||
# add .txt to the files
|
||||
for i in {00..56}; do mv snp_batch_${i} snp_batch_${i}_chain.txt; done
|
||||
########################################################################
|
0
mcsm_na/submit_mcsm_na.py
Normal file → Executable file
0
mcsm_na/submit_mcsm_na.py
Normal file → Executable file
|
@ -24,7 +24,7 @@ from reference_dict import up_3letter_aa_dict
|
|||
from reference_dict import oneletter_aa_dict
|
||||
#%%============================================================================
|
||||
|
||||
def format_mcsm_ppi2_output(mcsm_ppi2_output_csv, gene_name):
|
||||
def format_mcsm_ppi2_output(mcsm_ppi2_output_csv):
|
||||
"""
|
||||
@param mcsm_ppi2_output_csv: file containing mcsm_ppi2_results for all mcsm snps
|
||||
which is the result of combining all mcsm_ppi2 batch results, and using
|
||||
|
@ -79,21 +79,7 @@ def format_mcsm_ppi2_output(mcsm_ppi2_output_csv, gene_name):
|
|||
# # check
|
||||
# mcsm_ppi2_data['wild-type'].equals(mcsm_ppi2_data['WILD'])
|
||||
# mcsm_ppi2_data['mutant'].equals(mcsm_ppi2_data['MUT'])
|
||||
#%%=====================================================================
|
||||
# add offset specified position number for rpob since 5uhc with chain 'C' was
|
||||
# used to run the analysis
|
||||
|
||||
geneL_sp = ['rpob']
|
||||
if gene_name.lower() in geneL_sp:
|
||||
offset = 6
|
||||
chain_orig = 'A'
|
||||
|
||||
# Add offset corrected position number. matching with rpob nsSNPs used for mCSM-lig
|
||||
# and also add corresponding chain id matching with rpob nsSNPs used for mCSM-lig
|
||||
mcsm_ppi2_data['position'] = mcsm_ppi2_data['res-number'] - offset
|
||||
mcsm_ppi2_data['chain'] = chain_orig
|
||||
mcsm_ppi2_data['5uhc_offset'] = offset
|
||||
|
||||
#%%============================================================================
|
||||
#############
|
||||
# rename cols
|
||||
#############
|
||||
|
@ -102,30 +88,17 @@ def format_mcsm_ppi2_output(mcsm_ppi2_output_csv, gene_name):
|
|||
print('Assigning meaningful colnames'
|
||||
, '\n=======================================================')
|
||||
|
||||
my_colnames_dict = {'chain' : 'chain'
|
||||
, 'position' : 'position'
|
||||
, '5uhc_offset' : '5uhc_offset'
|
||||
, 'wild-type' : 'wt_upper'
|
||||
, 'res-number' : '5uhc_position'
|
||||
, 'mutant' : 'mut_upper'
|
||||
my_colnames_dict = {'chain': 'chain'
|
||||
, 'wild-type': 'wt_upper'
|
||||
, 'res-number': 'position'
|
||||
, 'mutant': 'mut_upper'
|
||||
, 'distance-to-interface': 'interface_dist'
|
||||
, 'mcsm-ppi2-prediction' : 'mcsm_ppi2_affinity'
|
||||
, 'affinity' : 'mcsm_ppi2_outcome'
|
||||
, 'w_type' : 'wild_type' # one letter amino acid code
|
||||
, 'm_type' : 'mutant_type' # one letter amino acid code
|
||||
}
|
||||
else:
|
||||
my_colnames_dict = {'chain' : 'chain'
|
||||
, 'wild-type' : 'wt_upper'
|
||||
, 'res-number' : 'position'
|
||||
, 'mutant' : 'mut_upper'
|
||||
, 'distance-to-interface': 'interface_dist'
|
||||
, 'mcsm-ppi2-prediction' : 'mcsm_ppi2_affinity'
|
||||
, 'affinity' : 'mcsm_ppi2_outcome'
|
||||
, 'w_type' : 'wild_type' # one letter amino acid code
|
||||
, 'm_type' : 'mutant_type' # one letter amino acid code
|
||||
}
|
||||
#%%==============================================================================
|
||||
, 'mcsm-ppi2-prediction': 'mcsm_ppi2_affinity'
|
||||
, 'affinity': 'mcsm_ppi2_outcome'
|
||||
, 'w_type': 'wild_type' # one letter amino acid code
|
||||
, 'm_type': 'mutant_type' # one letter amino acid code
|
||||
}
|
||||
|
||||
mcsm_ppi2_data.rename(columns = my_colnames_dict, inplace = True)
|
||||
mcsm_ppi2_data.columns
|
||||
|
||||
|
@ -164,17 +137,13 @@ def format_mcsm_ppi2_output(mcsm_ppi2_output_csv, gene_name):
|
|||
, '\nExpected number:', mcsm_ppi2_pos
|
||||
, '\nGot:', mcsm_ppi2_pos2
|
||||
, '\n======================================================')
|
||||
|
||||
#%%=====================================================================
|
||||
###################
|
||||
#############
|
||||
# reorder columns
|
||||
###################
|
||||
#############
|
||||
mcsm_ppi2_data.columns
|
||||
|
||||
#---------------------
|
||||
# Determine col order
|
||||
#---------------------
|
||||
|
||||
core_cols = ['mutationinformation'
|
||||
mcsm_ppi2_dataf = mcsm_ppi2_data[['mutationinformation'
|
||||
, 'mcsm_ppi2_affinity'
|
||||
, 'mcsm_ppi2_scaled'
|
||||
, 'mcsm_ppi2_outcome'
|
||||
|
@ -184,27 +153,6 @@ def format_mcsm_ppi2_output(mcsm_ppi2_output_csv, gene_name):
|
|||
, 'mutant_type'
|
||||
, 'wt_upper'
|
||||
, 'mut_upper'
|
||||
, 'chain']
|
||||
|
||||
if gene_name.lower() in geneL_sp:
|
||||
|
||||
column_order = core_cols + ['5uhc_offset', '5uhc_position']
|
||||
|
||||
else:
|
||||
|
||||
column_order = core_cols.copy()
|
||||
|
||||
#--------------
|
||||
# reorder now
|
||||
#--------------
|
||||
mcsm_ppi2_dataf = mcsm_ppi2_data[column_order]
|
||||
|
||||
#%%============================================================================
|
||||
###################
|
||||
# Sort df based on
|
||||
# position columns
|
||||
###################
|
||||
mcsm_ppi2_dataf.sort_values(by = ['position', 'mutant_type'], inplace = True, ascending = True)
|
||||
|
||||
, 'chain']]
|
||||
return(mcsm_ppi2_dataf)
|
||||
#%%#####################################################################
|
|
@ -67,7 +67,7 @@ outfile_mcsm_ppi2_f = outdir_ppi2 + gene.lower() + '_complex_mcsm_ppi2_norm.csv'
|
|||
# Data: gid+streptomycin
|
||||
#==========================
|
||||
print('Formatting results for:', infile_mcsm_ppi2)
|
||||
mcsm_ppi2_df_f = format_mcsm_ppi2_output(mcsm_ppi2_output_csv = infile_mcsm_ppi2, gene_name = gene)
|
||||
mcsm_ppi2_df_f = format_mcsm_ppi2_output(mcsm_ppi2_output_csv = infile_mcsm_ppi2)
|
||||
|
||||
# writing file
|
||||
print('Writing formatted df to csv')
|
||||
|
|
109
my_header.R
109
my_header.R
|
@ -1,31 +1,21 @@
|
|||
#########################################################
|
||||
# A) Installing and loading required packages
|
||||
# B) My functions
|
||||
#########################################################
|
||||
|
||||
### A) Installing and loading required packages
|
||||
#########################################################
|
||||
#lib_loc = "/usr/local/lib/R/site-library")
|
||||
|
||||
require("getopt", quietly = TRUE) # cmd parse arguments
|
||||
#if (!require("gplots")) {
|
||||
# install.packages("gplots", dependencies = TRUE)
|
||||
# library(gplots)
|
||||
#}
|
||||
|
||||
if (!require("tidyverse")) {
|
||||
install.packages("tidyverse", dependencies = TRUE)
|
||||
library(tidyverse)
|
||||
}
|
||||
#if (!require("tidyverse")) {
|
||||
# install.packages("tidyverse", dependencies = TRUE)
|
||||
# library(tidyverse)
|
||||
#}
|
||||
|
||||
if (!require("shiny")) {
|
||||
install.packages("shiny", dependencies = TRUE)
|
||||
library(shiny)
|
||||
}
|
||||
|
||||
if (!require("shinyBS")) {
|
||||
install.packages("shinyBS", dependencies = TRUE)
|
||||
library(shinyBS)
|
||||
}
|
||||
|
||||
if (!require("gridExtra")) {
|
||||
install.packages("gridExtra", dependencies = TRUE)
|
||||
library(gridExtra)
|
||||
if (!require("ggplot2")) {
|
||||
install.packages("ggplot2", dependencies = TRUE)
|
||||
library(ggplot2)
|
||||
}
|
||||
|
||||
if (!require("ggridges")) {
|
||||
|
@ -33,35 +23,6 @@ if (!require("ggridges")) {
|
|||
library(ggridges)
|
||||
}
|
||||
|
||||
# if (!require("ggplot2")) {
|
||||
# install.packages("ggplot2", dependencies = TRUE)
|
||||
# library(ggplot2)
|
||||
# }
|
||||
|
||||
# if (!require ("dplyr")){
|
||||
# install.packages("dplyr")
|
||||
# library(dplyr)
|
||||
# }
|
||||
|
||||
if (!require ("DT")){
|
||||
install.packages("DT")
|
||||
library(DT)
|
||||
}
|
||||
|
||||
if (!require ("plyr")){
|
||||
install.packages("plyr")
|
||||
library(plyr)
|
||||
}
|
||||
|
||||
# Install
|
||||
#if(!require(devtools)) install.packages("devtools")
|
||||
#devtools::install_github("kassambara/ggcorrplot")
|
||||
|
||||
if (!require ("ggbeeswarm")){
|
||||
install.packages("ggbeeswarm")
|
||||
library(ggbeeswarm)
|
||||
}
|
||||
|
||||
if (!require("plotly")) {
|
||||
install.packages("plotly", dependencies = TRUE)
|
||||
library(plotly)
|
||||
|
@ -124,7 +85,7 @@ install.packages("data.table")
|
|||
|
||||
if (!require("PerformanceAnalytics")){
|
||||
install.packages("PerformanceAnalytics", dependencies = T)
|
||||
library(PerformanceAnalytics)
|
||||
library(PerformaceAnalytics)
|
||||
}
|
||||
|
||||
if (!require ("GGally")){
|
||||
|
@ -142,6 +103,11 @@ if (!require ("psych")){
|
|||
library(psych)
|
||||
}
|
||||
|
||||
if (!require ("dplyr")){
|
||||
install.packages("dplyr")
|
||||
library(dplyr)
|
||||
}
|
||||
|
||||
if (!require ("compare")){
|
||||
install.packages("compare")
|
||||
library(compare)
|
||||
|
@ -152,37 +118,18 @@ if (!require ("arsenal")){
|
|||
library(arsenal)
|
||||
}
|
||||
|
||||
if(!require(ggseqlogo)){
|
||||
install.packages("ggseqlogo")
|
||||
library(ggseqlogo)
|
||||
}
|
||||
|
||||
# for PDB files
|
||||
####TIDYVERSE
|
||||
# Install
|
||||
#if(!require(devtools)) install.packages("devtools")
|
||||
#devtools::install_github("kassambara/ggcorrplot")
|
||||
|
||||
#library(ggcorrplot)
|
||||
|
||||
|
||||
###for PDB files
|
||||
#install.packages("bio3d")
|
||||
if(!require(bio3d)){
|
||||
install.packages("bio3d")
|
||||
library(bio3d)
|
||||
}
|
||||
|
||||
library(protr)
|
||||
if(!require(protr)){
|
||||
install.packages("protr")
|
||||
library(protr)
|
||||
}
|
||||
|
||||
#if (!requireNamespace("BiocManager", quietly = TRUE))
|
||||
# install.packages("BiocManager")
|
||||
|
||||
#BiocManager::install("Logolas")
|
||||
library("Logolas")
|
||||
|
||||
|
||||
####################################
|
||||
# Load all my functions:
|
||||
# only works if tidyverse is loaded
|
||||
# hence included it here!
|
||||
####################################
|
||||
|
||||
func_path = "~/git/LSHTM_analysis/scripts/functions/"
|
||||
source_files <- list.files(func_path, "\\.R$") # locate all .R files
|
||||
map(paste0(func_path, source_files), source) # source all your R scripts!
|
||||
|
||||
|
|
|
@ -1,12 +0,0 @@
|
|||
./combining_dfs.py -d cycloserine -g alr
|
||||
|
||||
./combining_dfs.py -d ethambutol -g embB
|
||||
|
||||
./combining_dfs.py -d streptomycin -g gid
|
||||
|
||||
./combining_dfs.py -d isoniazid -g katG
|
||||
|
||||
./combining_dfs.py -d pyrazinamide -g pncA
|
||||
|
||||
./combining_dfs.py -d rifampicin -g rpoB
|
||||
|
BIN
scripts/.swp
BIN
scripts/.swp
Binary file not shown.
1590
scripts/DE_CHECK_DEL
1590
scripts/DE_CHECK_DEL
File diff suppressed because it is too large
Load diff
|
@ -1,277 +0,0 @@
|
|||
#########################################################
|
||||
# A) Installing and loading required packages
|
||||
# B) My functions
|
||||
#########################################################
|
||||
check = function(x) tryCatch(if(class(x) == 'logical') 1 else 1, error = function(e) 0)
|
||||
|
||||
#########################################################
|
||||
#lib_loc = "/usr/local/lib/R/site-library")
|
||||
|
||||
require("getopt", quietly = TRUE) # cmd parse arguments
|
||||
|
||||
if (!require ("DT")){
|
||||
install.packages("DT")
|
||||
library(DT)
|
||||
}
|
||||
|
||||
if (!require ("plyr")){
|
||||
install.packages("plyr")
|
||||
library(plyr)
|
||||
}
|
||||
|
||||
if (!require("tidyverse")) {
|
||||
install.packages("tidyverse", dependencies = TRUE)
|
||||
library(tidyverse)
|
||||
}
|
||||
|
||||
#---------------------------
|
||||
# covered by tidyverse
|
||||
|
||||
# if (!require("ggplot2")) {
|
||||
# install.packages("ggplot2", dependencies = TRUE)
|
||||
# library(ggplot2)
|
||||
# }
|
||||
|
||||
# if (!require ("dplyr")){
|
||||
# install.packages("dplyr")
|
||||
# library(dplyr)
|
||||
# }
|
||||
#-----------------------------
|
||||
|
||||
if (!require("shiny")) {
|
||||
install.packages("shiny", dependencies = TRUE)
|
||||
library(shiny)
|
||||
}
|
||||
|
||||
if (!require("shinyBS")) {
|
||||
install.packages("shinyBS", dependencies = TRUE)
|
||||
library(shinyBS)
|
||||
}
|
||||
|
||||
if (!require("shinydashboard")) {
|
||||
install.packages("shinydashboard", dependencies = TRUE)
|
||||
library(shinydashboard)
|
||||
}
|
||||
|
||||
if (!require("gridExtra")) {
|
||||
install.packages("gridExtra", dependencies = TRUE)
|
||||
library(gridExtra)
|
||||
}
|
||||
|
||||
if (!require("ggridges")) {
|
||||
install.packages("ggridges", dependencies = TRUE)
|
||||
library(ggridges)
|
||||
}
|
||||
|
||||
# Install
|
||||
#if(!require(devtools)) install.packages("devtools")
|
||||
#devtools::install_github("kassambara/ggcorrplot")
|
||||
|
||||
if (!require ("ggbeeswarm")){
|
||||
install.packages("ggbeeswarm")
|
||||
library(ggbeeswarm)
|
||||
}
|
||||
|
||||
if (!require("plotly")) {
|
||||
install.packages("plotly", dependencies = TRUE)
|
||||
library(plotly)
|
||||
}
|
||||
|
||||
if (!require("cowplot")) {
|
||||
install.packages("copwplot", dependencies = TRUE)
|
||||
library(cowplot)
|
||||
}
|
||||
|
||||
if (!require("ggcorrplot")) {
|
||||
install.packages("ggcorrplot", dependencies = TRUE)
|
||||
library(ggcorrplot)
|
||||
}
|
||||
|
||||
if (!require("ggpubr")) {
|
||||
install.packages("ggpubr", dependencies = TRUE)
|
||||
library(ggpubr)
|
||||
}
|
||||
|
||||
if (!require("RColorBrewer")) {
|
||||
install.packages("RColorBrewer", dependencies = TRUE)
|
||||
library(RColorBrewer)
|
||||
}
|
||||
|
||||
if (!require ("GOplot")) {
|
||||
install.packages("GOplot")
|
||||
library(GOplot)
|
||||
}
|
||||
|
||||
if(!require("VennDiagram")) {
|
||||
install.packages("VennDiagram", dependencies = T)
|
||||
library(VennDiagram)
|
||||
}
|
||||
|
||||
if(!require("scales")) {
|
||||
install.packages("scales", dependencies = T)
|
||||
library(scales)
|
||||
}
|
||||
|
||||
if(!require("plotrix")) {
|
||||
install.packages("plotrix", dependencies = T)
|
||||
library(plotrix)
|
||||
}
|
||||
|
||||
if(!require("stats")) {
|
||||
install.packages("stats", dependencies = T)
|
||||
library(stats)
|
||||
}
|
||||
|
||||
if(!require("stats4")) {
|
||||
install.packages("stats4", dependencies = T)
|
||||
library(stats4)
|
||||
}
|
||||
|
||||
if(!require("data.table")) {
|
||||
install.packages("data.table")
|
||||
library(data.table)
|
||||
}
|
||||
|
||||
if (!require("PerformanceAnalytics")){
|
||||
install.packages("PerformanceAnalytics", dependencies = T)
|
||||
library(PerformanceAnalytics)
|
||||
}
|
||||
|
||||
if (!require ("GGally")){
|
||||
install.packages("GGally")
|
||||
library(GGally)
|
||||
}
|
||||
|
||||
if (!require ("corrr")){
|
||||
install.packages("corrr")
|
||||
library(corrr)
|
||||
}
|
||||
|
||||
if (!require ("psych")){
|
||||
install.packages("psych")
|
||||
library(psych)
|
||||
}
|
||||
|
||||
if (!require ("compare")){
|
||||
install.packages("compare")
|
||||
library(compare)
|
||||
}
|
||||
|
||||
if (!require ("arsenal")){
|
||||
install.packages("arsenal")
|
||||
library(arsenal)
|
||||
}
|
||||
|
||||
if(!require(ggseqlogo)){
|
||||
install.packages("ggseqlogo")
|
||||
library(ggseqlogo)
|
||||
}
|
||||
|
||||
# for PDB files
|
||||
if(!require(bio3d)){
|
||||
install.packages("bio3d")
|
||||
library(bio3d)
|
||||
}
|
||||
|
||||
library(protr)
|
||||
if(!require(protr)){
|
||||
install.packages("protr")
|
||||
library(protr)
|
||||
}
|
||||
|
||||
# if (!requireNamespace("BiocManager", quietly = TRUE))
|
||||
# install.packages("BiocManager")
|
||||
|
||||
#BiocManager::install("Logolas")
|
||||
#library("Logolas")
|
||||
library("Biostrings")
|
||||
|
||||
####################################
|
||||
# Load all my functions:
|
||||
# only works if tidyverse is loaded
|
||||
# hence included it here!
|
||||
####################################
|
||||
|
||||
func_path = "~/git/LSHTM_analysis/scripts/functions/"
|
||||
source_files <- list.files(func_path, "\\.R$") # locate all .R files
|
||||
map(paste0(func_path, source_files), source) # source all your R scripts!
|
||||
|
||||
# set plot script dir
|
||||
plot_script_path = "~/git/LSHTM_analysis/scripts/plotting/"
|
||||
|
||||
####################################################
|
||||
consurf_palette1 = c("0" = "yellow2"
|
||||
, "1" = "cyan1"
|
||||
, "2" = "steelblue2"
|
||||
, "3" = "cadetblue2"
|
||||
, "4" = "paleturquoise2"
|
||||
, "5" = "thistle3"
|
||||
, "6" = "thistle2"
|
||||
, "7" = "plum2"
|
||||
, "8" = "maroon"
|
||||
, "9" = "violetred2")
|
||||
|
||||
consurf_palette2 = c("0" = "yellow2"
|
||||
, "1" = "forestgreen"
|
||||
, "2" = "seagreen3"
|
||||
, "3" = "palegreen1"
|
||||
, "4" = "darkseagreen2"
|
||||
, "5" = "thistle3"
|
||||
, "6" = "lightpink1"
|
||||
, "7" = "orchid3"
|
||||
, "8" = "orchid4"
|
||||
, "9" = "darkorchid4")
|
||||
|
||||
# decreasing levels mess legend
|
||||
# consurf_colours_LEVEL = c(
|
||||
# "0" = rgb(1.00,1.00,0.59)
|
||||
# , "9" = rgb(0.63,0.16,0.37)
|
||||
# , "8" = rgb(0.94,0.49,0.67)
|
||||
# , "7" = rgb(0.98,0.78,0.86)
|
||||
# , "6" = rgb(0.98,0.92,0.96)
|
||||
# , "5" = rgb(1.00,1.00,1.00)
|
||||
# , "4" = rgb(0.84,0.94,0.94)
|
||||
# , "3" = rgb(0.65,0.86,0.90)
|
||||
# , "2" = rgb(0.29,0.69,0.75)
|
||||
# , "1" = rgb(0.04,0.49,0.51)
|
||||
# )
|
||||
|
||||
consurf_colours = c(
|
||||
"0" = rgb(1.00,1.00,0.59)
|
||||
, "1" = rgb(0.04,0.49,0.51)
|
||||
, "2" = rgb(0.29,0.69,0.75)
|
||||
, "3" = rgb(0.65,0.86,0.90)
|
||||
, "4" = rgb(0.84,0.94,0.94)
|
||||
, "5" = rgb(1.00,1.00,1.00)
|
||||
, "6" = rgb(0.98,0.92,0.96)
|
||||
, "7" = rgb(0.98,0.78,0.86)
|
||||
, "8" = rgb(0.94,0.49,0.67)
|
||||
, "9" = rgb(0.63,0.16,0.37)
|
||||
)
|
||||
|
||||
consurf_colours_no_isd = c(
|
||||
#"0" = rgb(1.00,1.00,0.59)
|
||||
"1" = rgb(0.04,0.49,0.51)
|
||||
, "2" = rgb(0.29,0.69,0.75)
|
||||
, "3" = rgb(0.65,0.86,0.90)
|
||||
, "4" = rgb(0.84,0.94,0.94)
|
||||
, "5" = rgb(1.00,1.00,1.00)
|
||||
, "6" = rgb(0.98,0.92,0.96)
|
||||
, "7" = rgb(0.98,0.78,0.86)
|
||||
, "8" = rgb(0.94,0.49,0.67)
|
||||
, "9" = rgb(0.63,0.16,0.37)
|
||||
)
|
||||
|
||||
##################################################
|
||||
|
||||
# Function name clashes with plyr and dplyr
|
||||
# # loading dplyr after plyr causes issues
|
||||
# if("dplyr" %in% (.packages())){
|
||||
# detach("package:dplyr", unload=TRUE)
|
||||
# detach("package:plyr", unload=TRUE)
|
||||
# }
|
||||
# library(plyr)
|
||||
# library(dplyr)
|
||||
|
||||
# another solution is to requireNamespace() instead of library()
|
||||
# so its function names don't collide with dplyr's.
|
85
scripts/aa_index/aa_index.R
Normal file
85
scripts/aa_index/aa_index.R
Normal file
|
@ -0,0 +1,85 @@
|
|||
library(bio3d)
|
||||
library(seqinr)
|
||||
library(bios2mds)
|
||||
library(protr)
|
||||
#############################################################
|
||||
#%% TASK
|
||||
# use this to return df for AA index and mutation properties
|
||||
|
||||
source()
|
||||
|
||||
##############################################################
|
||||
my_fasta_file = "~/git/Data/streptomycin/input/gid_complex.fasta"
|
||||
my_mcsmf_snps = "~/git/Data/streptomycin/output/gid_mcsm_formatted_snps.csv"
|
||||
###############################################################
|
||||
#%% fasta as vector
|
||||
gid_aa_seq_v= read.fasta(my_fasta_file
|
||||
, seqtype = "AA"
|
||||
, as.string = F)
|
||||
|
||||
gid_aa_v = as.character(gid_aa_seq_v[[1]]); gid_aa_v
|
||||
|
||||
#%% fasta as string
|
||||
gid_aa_seq_s = read.fasta(my_fasta_file
|
||||
, seqtype = "AA"
|
||||
, as.string = T)
|
||||
|
||||
gid_aa_s = as.character(gid_aa_seq_s[[1]]); gid_aa_s
|
||||
###############################################################
|
||||
#===================
|
||||
# AA indices
|
||||
# https://www.genome.jp/aaindex/AAindex/list_of_indices
|
||||
#===================
|
||||
data(aa.index)
|
||||
|
||||
# default
|
||||
aai_kd = aa2index(gid_aa_v, index = "KYTJ820101") # Hydropathy, KD
|
||||
|
||||
aai_rv = aa2index(gid_aa_v, index = "BIGC670101") # Residue volume, Bigelow, 1967
|
||||
aai_rv2 = aa2index(gid_aa_v, index = "GOLD730102") # Residue volume (Goldsack-Chalifoux, 1973)
|
||||
aai_b = aa2index(gid_aa_v, index = "VENT840101") # Bitterness (Venanzi, 1984)
|
||||
|
||||
par(mfrow = c(1,1))
|
||||
barplot(aai_kd)
|
||||
barplot(aai_rv)
|
||||
barplot(aai_rv2)
|
||||
#barplot(aai_b, col = c("black", "yellow"))
|
||||
|
||||
##########################################################
|
||||
#===================
|
||||
# mutation matrices
|
||||
#===================
|
||||
data(sub.mat)
|
||||
snps = read.csv(my_mcsmf_snps
|
||||
, header = 0)
|
||||
snps
|
||||
colnames(snps) <- "mutationinformation"
|
||||
|
||||
# run using all matrices
|
||||
sub_mat_names = as.character(unlist(attributes(sub.mat)))
|
||||
#sub_mat_names = "BLOSUM80"
|
||||
|
||||
for (j in sub_mat_names){
|
||||
print(j)
|
||||
snps[[j]] <- NA
|
||||
for (i in 1:nrow(snps)) {
|
||||
curr_snp = snps$mutationinformation[i]
|
||||
m1 = str_match(curr_snp, "^([A-Z]{1})[0-9]*([A-Z]{1})")
|
||||
aa1 = m1[,2]
|
||||
aa2 = m1[,3]
|
||||
#snps$blosum_80[i]
|
||||
snps[[j]][i] = sub.mat[[j]][aa1,aa2]
|
||||
}
|
||||
|
||||
}
|
||||
snps
|
||||
##########################################################
|
||||
gid_aac = extractAAC(gid_aa_s)
|
||||
gid_dc = extractDC(gid_aa_s)
|
||||
gid_tc = extractTC(gid_aa_s)
|
||||
|
||||
par(mfrow = c(1, 3))
|
||||
barplot(gid_aac)
|
||||
barplot(gid_dc)
|
||||
barplot(gid_tc)
|
||||
###########################################################
|
101
scripts/aa_index/run_aa_index.R
Normal file
101
scripts/aa_index/run_aa_index.R
Normal file
|
@ -0,0 +1,101 @@
|
|||
#!/usr/bin/env Rscript
|
||||
library(bio3d)
|
||||
library(seqinr)
|
||||
library(bios2mds)
|
||||
library(protr)
|
||||
library(stringr)
|
||||
####################################################################
|
||||
# TASK: use this to return df for AA index and mutation properties
|
||||
# useful for dfs
|
||||
|
||||
|
||||
#####################################################################
|
||||
# working dir and loading libraries
|
||||
getwd()
|
||||
setwd("~/git/LSHTM_analysis/scripts/")
|
||||
getwd()
|
||||
|
||||
drug = "streptomycin"
|
||||
gene = "gid"
|
||||
|
||||
source("functions/plotting_globals.R")
|
||||
import_dirs(drug_name = drug, gene_name = gene)
|
||||
|
||||
##############################################################
|
||||
my_fasta_file = paste0(indir, "/", gene, "_complex.fasta")
|
||||
|
||||
my_mcsmf_snps = paste0(outdir, "/", gene, "_mcsm_formatted_snps.csv")
|
||||
|
||||
###############################################################
|
||||
#%% fasta as vector
|
||||
gid_aa_seq_v= read.fasta(my_fasta_file
|
||||
, seqtype = "AA"
|
||||
, as.string = F)
|
||||
|
||||
gid_aa_v = as.character(gid_aa_seq_v[[1]]); gid_aa_v
|
||||
|
||||
#%% fasta as string
|
||||
gid_aa_seq_s = read.fasta(my_fasta_file
|
||||
, seqtype = "AA"
|
||||
, as.string = T)
|
||||
|
||||
gid_aa_s = as.character(gid_aa_seq_s[[1]]); gid_aa_s
|
||||
###############################################################
|
||||
#===================
|
||||
# AA indices
|
||||
# https://www.genome.jp/aaindex/AAindex/list_of_indices
|
||||
#===================
|
||||
data(aa.index)
|
||||
|
||||
# default
|
||||
aai_kd = aa2index(gid_aa_v, index = "KYTJ820101") # Hydropathy, KD
|
||||
|
||||
aai_rv = aa2index(gid_aa_v, index = "BIGC670101") # Residue volume, Bigelow, 1967
|
||||
aai_rv2 = aa2index(gid_aa_v, index = "GOLD730102") # Residue volume (Goldsack-Chalifoux, 1973)
|
||||
aai_b = aa2index(gid_aa_v, index = "VENT840101") # Bitterness (Venanzi, 1984)
|
||||
##########################################################
|
||||
#===================
|
||||
# mutation matrices
|
||||
#===================
|
||||
data(sub.mat)
|
||||
snps = read.csv(my_mcsmf_snps
|
||||
, header = 0)
|
||||
snps
|
||||
colnames(snps) <- "mutationinformation"
|
||||
|
||||
# run using all matrices
|
||||
sub_mat_names = as.character(unlist(attributes(sub.mat)))
|
||||
#sub_mat_names = "BLOSUM80"
|
||||
|
||||
for (j in sub_mat_names){
|
||||
print(j)
|
||||
snps[[j]] <- NA
|
||||
for (i in 1:nrow(snps)) {
|
||||
curr_snp = snps$mutationinformation[i]
|
||||
m1 = str_match(curr_snp, "^([A-Z]{1})[0-9]*([A-Z]{1})")
|
||||
aa1 = m1[,2]
|
||||
aa2 = m1[,3]
|
||||
#snps$blosum_80[i]
|
||||
snps[[j]][i] = sub.mat[[j]][aa1,aa2]
|
||||
}
|
||||
|
||||
}
|
||||
snps
|
||||
##########################################################
|
||||
gid_aac = extractAAC(gid_aa_s)
|
||||
gid_dc = extractDC(gid_aa_s)
|
||||
gid_tc = extractTC(gid_aa_s)
|
||||
|
||||
##########################################################
|
||||
# Plots
|
||||
par(mfrow = c(3,2))
|
||||
|
||||
barplot(aai_kd , main = "AA index: KD")
|
||||
#barplot(aai_rv , main = "AA index: Residue Volume, 1967")
|
||||
barplot(aai_rv2 , main = "AA index: Residue Volume") #1973
|
||||
barplot(aai_b , main = "AA index: Bitterness")
|
||||
|
||||
barplot(gid_aac , main = "AA: composition")
|
||||
barplot(gid_dc , main = "AA: Dipeptide composition")
|
||||
barplot(gid_tc , main = "AA: Tripeptide composition")
|
||||
###########################################################
|
|
@ -1 +0,0 @@
|
|||
mutationinformation,ALTS910101,AZAE970101,AZAE970102,BASU010101,BENS940101,BENS940102,BENS940103,BENS940104,BETM990101,BLAJ010101,BONM030101,BONM030102,BONM030103,BONM030104,BONM030105,BONM030106,BRYS930101,CROG050101,CSEM940101,DAYM780301,DAYM780302,DOSZ010101,DOSZ010102,DOSZ010103,DOSZ010104,FEND850101,FITW660101,GEOD900101,GIAG010101,GODA950101,GONG920101,GRAR740104,HENS920101,HENS920102,HENS920103,HENS920104,JOHM930101,JOND920103,JOND940101,KANM000101,KAPO950101,KESO980101,KESO980102,KOLA920101,KOLA930101,KOSJ950100_RSA_SST,KOSJ950100_SST,KOSJ950110_RSA,KOSJ950115,LEVJ860101,LINK010101,LIWA970101,LUTR910101,LUTR910102,LUTR910103,LUTR910104,LUTR910105,LUTR910106,LUTR910107,LUTR910108,LUTR910109,MCLA710101,MCLA720101,MEHP950101,MEHP950102,MEHP950103,MICC010101,MIRL960101,MIYS850102,MIYS850103,MIYS930101,MIYS960101,MIYS960102,MIYS960103,MIYS990106,MIYS990107,MIYT790101,MOHR870101,MOOG990101,MUET010101,MUET020101,MUET020102,NAOD960101,NGPC000101,NIEK910101,NIEK910102,OGAK980101,OVEJ920100_RSA,OVEJ920101,OVEJ920102,OVEJ920103,PARB960101,PARB960102,PRLA000101,PRLA000102,QUIB020101,QU_C930101,QU_C930102,QU_C930103,RIER950101,RISJ880101,ROBB790102,RUSR970101,RUSR970102,RUSR970103,SIMK990101,SIMK990102,SIMK990103,SIMK990104,SIMK990105,SKOJ000101,SKOJ000102,SKOJ970101,TANS760101,TANS760102,THOP960101,TOBD000101,TOBD000102,TUDE900101,VENM980101,VOGG950101,WEIL970101,WEIL970102,ZHAC000101,ZHAC000102,ZHAC000103,ZHAC000104,ZHAC000105,ZHAC000106
|
|
|
@ -1,142 +0,0 @@
|
|||
# Name Version Build Channel
|
||||
_libgcc_mutex 0.1 main
|
||||
_py-xgboost-mutex 2.0 cpu_0
|
||||
_r-mutex 1.0.0 anacondar_1
|
||||
agate 1.6.1 py38_2
|
||||
agate-dbf 0.2.1 py_0
|
||||
agate-excel 0.2.3 py_0
|
||||
agate-sql 0.5.4 py_0
|
||||
babel 2.8.0 py_0
|
||||
beautifulsoup4 4.9.0 py38_0
|
||||
binutils_impl_linux-64 2.33.1 he6710b0_7
|
||||
binutils_linux-64 2.33.1 h9595d00_15
|
||||
biopython 1.76 py38h7b6447c_0
|
||||
blas 1.0 mkl
|
||||
brotlipy 0.7.0 py38h7b6447c_1000
|
||||
bwidget 1.9.11 1
|
||||
bzip2 1.0.8 h7b6447c_0
|
||||
ca-certificates 2020.11.8 ha878542_0 conda-forge
|
||||
cairo 1.14.12 h8948797_3
|
||||
certifi 2020.11.8 py38h578d9bd_0 conda-forge
|
||||
cffi 1.14.0 py38h2e261b9_0
|
||||
chardet 3.0.4 py38_1003
|
||||
cryptography 2.9.2 py38h1ba5d50_0
|
||||
csvkit 1.0.4 py38_0 anaconda
|
||||
curl 7.67.0 hbc83047_0
|
||||
cycler 0.10.0 py38_0
|
||||
dbfread 2.0.7 py38_0
|
||||
dbus 1.13.16 hb2f20db_0
|
||||
dssp 3.0.0 hf484d3e_3 salilab
|
||||
et_xmlfile 1.0.1 py_1001
|
||||
expat 2.2.9 he6710b0_2
|
||||
fontconfig 2.13.0 h9420a91_0
|
||||
freetype 2.10.2 h5ab3b9f_0
|
||||
fribidi 1.0.9 h7b6447c_0
|
||||
gcc_impl_linux-64 7.3.0 habb00fd_1
|
||||
gcc_linux-64 7.3.0 h553295d_15
|
||||
gfortran_impl_linux-64 7.3.0 hdf63c60_1
|
||||
gfortran_linux-64 7.3.0 h553295d_15
|
||||
glib 2.63.1 h5a9c865_0
|
||||
glob2 0.7 py_0 conda-forge
|
||||
graphite2 1.3.14 h23475e2_0
|
||||
gsl 2.4 h14c3975_4
|
||||
gst-plugins-base 1.14.0 hbbd80ab_1
|
||||
gstreamer 1.14.0 hb453b48_1
|
||||
gxx_impl_linux-64 7.3.0 hdf63c60_1
|
||||
gxx_linux-64 7.3.0 h553295d_15
|
||||
harfbuzz 1.8.8 hffaf4a1_0
|
||||
icu 58.2 he6710b0_3
|
||||
idna 2.10 py_0
|
||||
intel-openmp 2020.1 217
|
||||
isodate 0.6.0 py_1
|
||||
jdcal 1.4.1 py_0
|
||||
joblib 0.16.0 py_0
|
||||
jpeg 9b h024ee3a_2
|
||||
kiwisolver 1.2.0 py38hfd86e86_0
|
||||
krb5 1.16.4 h173b8e3_0
|
||||
ld_impl_linux-64 2.33.1 h53a641e_7
|
||||
leather 0.3.3 py38_0
|
||||
libboost 1.67.0 h46d08c1_4
|
||||
libcurl 7.67.0 h20c2e04_0
|
||||
libedit 3.1.20191231 h14c3975_1
|
||||
libffi 3.2.1 hd88cf55_4
|
||||
libgcc-ng 9.1.0 hdf63c60_0
|
||||
libgfortran-ng 7.3.0 hdf63c60_0
|
||||
libpng 1.6.37 hbc83047_0
|
||||
libssh2 1.9.0 h1ba5d50_1
|
||||
libstdcxx-ng 9.1.0 hdf63c60_0
|
||||
libtiff 4.1.0 h2733197_1
|
||||
libuuid 1.0.3 h1bed415_2
|
||||
libxcb 1.14 h7b6447c_0
|
||||
libxgboost 0.90 he1b5a44_4 conda-forge
|
||||
libxml2 2.9.10 he19cac6_1
|
||||
lz4-c 1.9.2 he6710b0_1
|
||||
make 4.2.1 h1bed415_1
|
||||
matplotlib 3.1.3 py38_0
|
||||
matplotlib-base 3.1.3 py38hef1b27d_0
|
||||
mkl 2020.1 217
|
||||
mkl-service 2.3.0 py38he904b0f_0
|
||||
mkl_fft 1.1.0 py38h23d657b_0
|
||||
mkl_random 1.1.1 py38h0573a6f_0
|
||||
ncurses 6.2 he6710b0_1
|
||||
numpy 1.19.1 py38hbc911f0_0
|
||||
numpy-base 1.19.1 py38hfa32c7d_0
|
||||
openpyxl 3.0.4 py_0
|
||||
openssl 1.1.1h h516909a_0 conda-forge
|
||||
os 0.1.4 0 jmcmurray
|
||||
pandas 1.0.2 py38h0573a6f_0
|
||||
pango 1.42.4 h049681c_0
|
||||
parsedatetime 2.4 py38_0
|
||||
pcre 8.44 he6710b0_0
|
||||
perl 5.26.2 h14c3975_0
|
||||
perl-perlio-utf8_strict 0.007 pl526h6bb024c_1 bioconda
|
||||
perl-test-warnings 0.026 pl526_1 bioconda
|
||||
perl-xsloader 0.24 pl526_0 bioconda
|
||||
pip 20.1.1 py38_1
|
||||
pixman 0.40.0 h7b6447c_0
|
||||
py-xgboost 0.90 py38_4 conda-forge
|
||||
pycparser 2.20 py_2
|
||||
pyopenssl 19.1.0 py_1
|
||||
pyparsing 2.4.7 py_0
|
||||
pyqt 5.9.2 py38h05f1152_4
|
||||
pysocks 1.7.1 py38_0
|
||||
python 3.8.2 h191fe78_0
|
||||
python-dateutil 2.8.1 py_0
|
||||
python-slugify 3.0.4 py_0
|
||||
python_abi 3.8 1_cp38 conda-forge
|
||||
pytimeparse 1.1.8 py38_0
|
||||
pytz 2020.1 py_0
|
||||
qt 5.9.7 h5867ecd_1
|
||||
qutil 3.2.1 6 jmcmurray
|
||||
r-base 3.6.1 h9bb98a2_1
|
||||
r-sys 3.2 r36h96ca727_0 r
|
||||
readline 7.0 h7b6447c_5
|
||||
requests 2.23.0 py38_0 prometeia
|
||||
scikit-learn 0.22.1 py38hd81dba3_0
|
||||
scipy 1.4.1 py38h0b6359f_0
|
||||
seaborn 0.10.1 py_0
|
||||
setuptools 49.2.0 py38_0
|
||||
sip 4.19.13 py38he6710b0_0
|
||||
six 1.15.0 py_0
|
||||
soupsieve 2.0.1 py_0
|
||||
sqlalchemy 1.3.18 py38h7b6447c_0
|
||||
sqlite 3.32.3 h62c20be_0
|
||||
terminalplot 0.3.0 pypi_0 pypi
|
||||
text-unidecode 1.3 py_0
|
||||
tk 8.6.10 hbc83047_0
|
||||
tktable 2.10 h14c3975_0
|
||||
tornado 6.0.4 py38h7b6447c_1
|
||||
unidecode 1.1.1 py_0
|
||||
urllib3 1.25.9 py_0
|
||||
wheel 0.34.2 py38_0
|
||||
xgboost 0.90 py38he1b5a44_4 conda-forge
|
||||
xlrd 1.2.0 py_0
|
||||
xz 5.2.5 h7b6447c_0
|
||||
zlib 1.2.11 h7b6447c_3
|
||||
zstd 1.4.5 h9ceee32_0
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
|
@ -1 +0,0 @@
|
|||
ALTS910101,AZAE970101,AZAE970102,BASU010101,BENS940101,BENS940102,BENS940103,BENS940104,BETM990101,BLAJ010101,BONM030101,BONM030102,BONM030103,BONM030104,BONM030105,BONM030106,BRYS930101,CROG050101,CSEM940101,DAYM780301,DAYM780302,DOSZ010101,DOSZ010102,DOSZ010103,DOSZ010104,FEND850101,FITW660101,GEOD900101,GIAG010101,GODA950101,GONG920101,GRAR740104,HENS920101,HENS920102,HENS920103,HENS920104,JOHM930101,JOND920103,JOND940101,KANM000101,KAPO950101,KESO980101,KESO980102,KOLA920101,KOLA930101,KOSJ950100_RSA_SST,KOSJ950100_SST,KOSJ950110_RSA,KOSJ950115,LEVJ860101,LINK010101,LIWA970101,LUTR910101,LUTR910102,LUTR910103,LUTR910104,LUTR910105,LUTR910106,LUTR910107,LUTR910108,LUTR910109,MCLA710101,MCLA720101,MEHP950101,MEHP950102,MEHP950103,MICC010101,MIRL960101,MIYS850102,MIYS850103,MIYS930101,MIYS960101,MIYS960102,MIYS960103,MIYS990106,MIYS990107,MIYT790101,MOHR870101,MOOG990101,MUET010101,MUET020101,MUET020102,NAOD960101,NGPC000101,NIEK910101,NIEK910102,OGAK980101,OVEJ920100_RSA,OVEJ920101,OVEJ920102,OVEJ920103,PARB960101,PARB960102,PRLA000101,PRLA000102,QUIB020101,QU_C930101,QU_C930102,QU_C930103,RIER950101,RISJ880101,ROBB790102,RUSR970101,RUSR970102,RUSR970103,SIMK990101,SIMK990102,SIMK990103,SIMK990104,SIMK990105,SKOJ000101,SKOJ000102,SKOJ970101,TANS760101,TANS760102,THOP960101,TOBD000101,TOBD000102,TUDE900101,VENM980101,VOGG950101,WEIL970101,WEIL970102,ZHAC000101,ZHAC000102,ZHAC000103,ZHAC000104,ZHAC000105,ZHAC000106
|
|
|
@ -1,10 +0,0 @@
|
|||
#!/bin/sh
|
||||
|
||||
# get the list of AA indices and then combine these into one file
|
||||
wget -c https://www.genome.jp/aaindex/AAindex/list_of_indices https://www.genome.jp/aaindex/AAindex/list_of_potentials https://www.genome.jp/aaindex/AAindex/list_of_matrices
|
||||
cat list_of_* > combined_aa_list
|
||||
|
||||
# get the description for the header used in our script
|
||||
for i in $(cat aa_headerT.csv); do
|
||||
grep $i combined_aa_list >> aa_headerNames
|
||||
done
|
|
@ -1,125 +0,0 @@
|
|||
ALTS910101 The PAM-120 matrix (Altschul, 1991)
|
||||
AZAE970101 The single residue substitution matrix from interchanges of spatially neighbouring residues (Azarya-Sprinzak et al., 1997)
|
||||
AZAE970102 The substitution matrix derived from spatially conserved motifs (Azarya-Sprinzak et al., 1997)
|
||||
BASU010101 Optimization-based potential derived by the modified perceptron criterion
|
||||
BENS940101 Log-odds scoring matrix collected in 6.4-8.7 PAM (Benner et al., 1994)
|
||||
BENS940102 Log-odds scoring matrix collected in 22-29 PAM (Benner et al., 1994)
|
||||
BENS940103 Log-odds scoring matrix collected in 74-100 PAM (Benner et al., 1994)
|
||||
BENS940104 Genetic code matrix (Benner et al., 1994)
|
||||
BETM990101 Modified version of the Miyazawa-Jernigan transfer energy
|
||||
BLAJ010101 Matrix built from structural superposition data for identifying potential remote homologues (Blake-Cohen, 2001)
|
||||
BONM030101 Quasichemical statistical potential for the antiparallel orientation of interacting side groups
|
||||
BONM030102 Quasichemical statistical potential for the intermediate orientation of interacting side groups
|
||||
BONM030103 Quasichemical statistical potential for the parallel orientation of interacting side groups
|
||||
BONM030104 Distances between centers of interacting side chains in the antiparallel orientation
|
||||
BONM030105 Distances between centers of interacting side chains in the intermediate orientation
|
||||
BONM030106 Distances between centers of interacting side chains in the parallel orientation
|
||||
BRYS930101 Distance-dependent statistical potential (only energies of contacts within 0-5 Angstrooms are included)
|
||||
CROG050101 Substitution matrix computed from the Dirichlet Mixture Model (Crooks-Brenner, 2005)
|
||||
CSEM940101 Residue replace ability matrix (Cserzo et al., 1994)
|
||||
DAYM780301 Log odds matrix for 250 PAMs (Dayhoff et al., 1978)
|
||||
DAYM780302 Log odds matrix for 40 PAMs (Dayhoff et al., 1978)
|
||||
DOSZ010101 Amino acid similarity matrix based on the sausage force field (Dosztanyi-Torda, 2001)
|
||||
DOSZ010102 Normalised version of SM_SAUSAGE (Dosztanyi-Torda, 2001)
|
||||
DOSZ010103 An amino acid similarity matrix based on the THREADER force field (Dosztanyi-Torda, 2001)
|
||||
DOSZ010104 Normalised version of SM_THREADER (Dosztanyi-Torda, 2001)
|
||||
FEND850101 Structure-Genetic matrix (Feng et al., 1985)
|
||||
FITW660101 Mutation values for the interconversion of amino acid pairs (Fitch, 1966)
|
||||
GEOD900101 Hydrophobicity scoring matrix (George et al., 1990)
|
||||
GIAG010101 Residue substitutions matrix from thermo/mesophilic to psychrophilic enzymes (Gianese et al., 2001)
|
||||
GODA950101 Quasichemical statistical potential derived from buried contacts
|
||||
GONG920101 The mutation matrix for initially aligning (Gonnet et al., 1992)
|
||||
GRAR740104 Chemical distance (Grantham, 1974)
|
||||
HENS920101 BLOSUM45 substitution matrix (Henikoff-Henikoff, 1992)
|
||||
HENS920102 BLOSUM62 substitution matrix (Henikoff-Henikoff, 1992)
|
||||
HENS920103 BLOSUM80 substitution matrix (Henikoff-Henikoff, 1992)
|
||||
HENS920104 BLOSUM50 substitution matrix (Henikoff-Henikoff, 1992)
|
||||
JOHM930101 Structure-based amino acid scoring table (Johnson-Overington, 1993)
|
||||
JOND920103 The 250 PAM PET91 matrix (Jones et al., 1992)
|
||||
JOND940101 The 250 PAM transmembrane protein exchange matrix (Jones et al., 1994)
|
||||
KANM000101 Substitution matrix (OPTIMA) derived by maximizing discrimination between homologs and non-homologs (Kann et al., 2000)
|
||||
KAPO950101 (Kapp et al., 1995)
|
||||
KESO980101 Quasichemical transfer energy derived from interfacial regions of protein-protein complexes
|
||||
KESO980102 Quasichemical energy in an average protein environment derived from interfacial regions of protein-protein complexes
|
||||
KOLA920101 Conformational similarity weight matrix (Kolaskar-Kulkarni-Kale, 1992)
|
||||
KOLA930101 Statistical potential derived by the quasichemical approximation
|
||||
KOSJ950115 Context-dependent optimal substitution matrices for all residues (Koshi-Goldstein, 1995)
|
||||
LEVJ860101 The secondary structure similarity matrix (Levin et al., 1986)
|
||||
LINK010101 Substitution matrices from an neural network model (Lin et al., 2001)
|
||||
LIWA970101 Modified version of the Miyazawa-Jernigan transfer energy
|
||||
LUTR910101 Structure-based comparison table for outside other class (Luthy et al., 1991)
|
||||
LUTR910102 Structure-based comparison table for inside other class (Luthy et al., 1991)
|
||||
LUTR910103 Structure-based comparison table for outside alpha class (Luthy et al., 1991)
|
||||
LUTR910104 Structure-based comparison table for inside alpha class (Luthy et al., 1991)
|
||||
LUTR910105 Structure-based comparison table for outside beta class (Luthy et al., 1991)
|
||||
LUTR910106 Structure-based comparison table for inside beta class (Luthy et al., 1991)
|
||||
LUTR910107 Structure-based comparison table for other class (Luthy et al., 1991)
|
||||
LUTR910108 Structure-based comparison table for alpha helix class (Luthy et al., 1991)
|
||||
LUTR910109 Structure-based comparison table for beta strand class (Luthy et al., 1991)
|
||||
MCLA710101 The similarity of pairs of amino acids (McLachlan, 1971)
|
||||
MCLA720101 Chemical similarity scores (McLachlan, 1972)
|
||||
MEHP950101 (Mehta et al., 1995)
|
||||
MEHP950102 (Mehta et al., 1995)
|
||||
MEHP950103 (Mehta et al., 1995)
|
||||
MICC010101 Optimization-derived potential
|
||||
MIRL960101 Statistical potential derived by the maximization of the harmonic mean of Z scores
|
||||
MIYS850102 Quasichemical energy of transfer of amino acids from water to the protein environment
|
||||
MIYS850103 Quasichemical energy of interactions in an average buried environment
|
||||
MIYS930101 Base-substitution-protein-stability matrix (Miyazawa-Jernigan, 1993)
|
||||
MIYS960101 Quasichemical energy of transfer of amino acids from water to the protein environment
|
||||
MIYS960102 Quasichemical energy of interactions in an average buried environment
|
||||
MIYS960103 Number of contacts between side chains derived from 1168 x-ray protein structures
|
||||
MIYS990106 Quasichemical energy of transfer of amino acids from water to the protein environment
|
||||
MIYS990107 Quasichemical energy of interactions in an average buried environment
|
||||
MIYT790101 Amino acid pair distance (Miyata et al., 1979)
|
||||
MOHR870101 EMPAR matrix (Mohana Rao, 1987)
|
||||
MOOG990101 Quasichemical potential derived from interfacial regions of protein-protein complexes
|
||||
MUET010101 Non-symmetric substitution matrix (SLIM) for detection of homologous transmembrane proteins (Mueller et al., 2001)
|
||||
MUET020101 Substitution matrix (VTML160) obtained by maximum likelihood estimation (Mueller et al., 2002)
|
||||
MUET020102 Substitution matrix (VTML250) obtained by maximum likelihood estimation (Mueller et al., 2002)
|
||||
NAOD960101 Substitution matrix derived from the single residue interchanges at spatially conserved regions of proteins (Naor et al., 1996)
|
||||
NGPC000101 Substitution matrix (PHAT) built from hydrophobic and transmembrane regions of the Blocks database (Ng et al., 2000)
|
||||
NIEK910101 Structure-derived correlation matrix 1 (Niefind-Schomburg, 1991)
|
||||
NIEK910102 Structure-derived correlation matrix 2 (Niefind-Schomburg, 1991)
|
||||
OGAK980101 Substitution matrix derived from structural alignments by maximizing entropy (Ogata et al., 1998)
|
||||
OVEJ920101 STR matrix from structure-based alignments (Overington et al., 1992)
|
||||
OVEJ920102 Environment-specific amino acid substitution matrix for alpha residues (Overington et al., 1992)
|
||||
OVEJ920103 Environment-specific amino acid substitution matrix for beta residues (Overington et al., 1992)
|
||||
PARB960101 Statistical contact potential derived by the quasichemical approximation
|
||||
PARB960102 Modified version of the Miyazawa-Jernigan transfer energy
|
||||
PRLA000101 Structure derived matrix (SDM) for alignment of distantly related sequences (Prlic et al., 2000)
|
||||
PRLA000102 Homologous structure dereived matrix (HSDM) for alignment of distantly related sequences (Prlic et al., 2000)
|
||||
QUIB020101 STROMA score matrix for the alignment of known distant homologs (Qian-Goldstein, 2002)
|
||||
QU_C930101 Cross-correlation coefficients of preference factors main chain (Qu et al., 1993)
|
||||
QU_C930102 Cross-correlation coefficients of preference factors side chain (Qu et al., 1993)
|
||||
QU_C930103 The mutant distance based on spatial preference factor (Qu et al., 1993)
|
||||
RIER950101 Hydrophobicity scoring matrix (Riek et al., 1995)
|
||||
RISJ880101 Scoring matrix (Risler et al., 1988)
|
||||
ROBB790102 Interaction energies derived from side chain contacts in the interiors of known protein structures
|
||||
RUSR970101 Substitution matrix based on structural alignments of analogous proteins (Russell et al., 1997)
|
||||
RUSR970102 Substitution matrix based on structural alignments of remote homolous proteins (Russell et al., 1997)
|
||||
RUSR970103 Substitution matrix based on structural alignments of analogous and remote homolous proteins (Russell et al., 1997)
|
||||
SIMK990101 Distance-dependent statistical potential (contacts within 0-5 Angstrooms)
|
||||
SIMK990102 Distance-dependent statistical potential (contacts within 5-7.5 Angstrooms)
|
||||
SIMK990103 Distance-dependent statistical potential (contacts within 7.5-10 Angstrooms)
|
||||
SIMK990104 Distance-dependent statistical potential (contacts within 10-12 Angstrooms)
|
||||
SIMK990105 Distance-dependent statistical potential (contacts longer than 12 Angstrooms)
|
||||
SKOJ000101 Statistical quasichemical potential with the partially composition-corrected pair scale
|
||||
SKOJ000102 Statistical quasichemical potential with the composition-corrected pair scale
|
||||
SKOJ970101 Statistical potential derived by the quasichemical approximation
|
||||
TANS760101 Statistical contact potential derived from 25 x-ray protein structures
|
||||
TANS760102 Number of contacts between side chains derived from 25 x-ray protein structures
|
||||
THOP960101 Mixed quasichemical and optimization-based protein contact potential
|
||||
TOBD000101 Optimization-derived potential obtained for small set of decoys
|
||||
TOBD000102 Optimization-derived potential obtained for large set of decoys
|
||||
TUDE900101 isomorphicity of replacements (Tudos et al., 1990)
|
||||
VENM980101 Statistical potential derived by the maximization of the perceptron criterion
|
||||
VOGG950101 (Vogt et al., 1995)
|
||||
WEIL970101 WAC matrix constructed from amino acid comparative profiles (Wei et al., 1997)
|
||||
WEIL970102 Difference matrix obtained by subtracting the BLOSUM62 from the WAC matrix (Wei et al., 1997)
|
||||
ZHAC000101 Environment-dependent residue contact energies (rows = helix, cols = helix)
|
||||
ZHAC000102 Environment-dependent residue contact energies (rows = helix, cols = strand)
|
||||
ZHAC000103 Environment-dependent residue contact energies (rows = helix, cols = coil)
|
||||
ZHAC000104 Environment-dependent residue contact energies (rows = strand, cols = strand)
|
||||
ZHAC000105 Environment-dependent residue contact energies (rows = strand, cols = coil)
|
||||
ZHAC000106 Environment-dependent residue contact energies (rows = coil, cols = coil)
|
|
@ -1,129 +0,0 @@
|
|||
ALTS910101
|
||||
AZAE970101
|
||||
AZAE970102
|
||||
BASU010101
|
||||
BENS940101
|
||||
BENS940102
|
||||
BENS940103
|
||||
BENS940104
|
||||
BETM990101
|
||||
BLAJ010101
|
||||
BONM030101
|
||||
BONM030102
|
||||
BONM030103
|
||||
BONM030104
|
||||
BONM030105
|
||||
BONM030106
|
||||
BRYS930101
|
||||
CROG050101
|
||||
CSEM940101
|
||||
DAYM780301
|
||||
DAYM780302
|
||||
DOSZ010101
|
||||
DOSZ010102
|
||||
DOSZ010103
|
||||
DOSZ010104
|
||||
FEND850101
|
||||
FITW660101
|
||||
GEOD900101
|
||||
GIAG010101
|
||||
GODA950101
|
||||
GONG920101
|
||||
GRAR740104
|
||||
HENS920101
|
||||
HENS920102
|
||||
HENS920103
|
||||
HENS920104
|
||||
JOHM930101
|
||||
JOND920103
|
||||
JOND940101
|
||||
KANM000101
|
||||
KAPO950101
|
||||
KESO980101
|
||||
KESO980102
|
||||
KOLA920101
|
||||
KOLA930101
|
||||
KOSJ950100_RSA_SST
|
||||
KOSJ950100_SST
|
||||
KOSJ950110_RSA
|
||||
KOSJ950115
|
||||
LEVJ860101
|
||||
LINK010101
|
||||
LIWA970101
|
||||
LUTR910101
|
||||
LUTR910102
|
||||
LUTR910103
|
||||
LUTR910104
|
||||
LUTR910105
|
||||
LUTR910106
|
||||
LUTR910107
|
||||
LUTR910108
|
||||
LUTR910109
|
||||
MCLA710101
|
||||
MCLA720101
|
||||
MEHP950101
|
||||
MEHP950102
|
||||
MEHP950103
|
||||
MICC010101
|
||||
MIRL960101
|
||||
MIYS850102
|
||||
MIYS850103
|
||||
MIYS930101
|
||||
MIYS960101
|
||||
MIYS960102
|
||||
MIYS960103
|
||||
MIYS990106
|
||||
MIYS990107
|
||||
MIYT790101
|
||||
MOHR870101
|
||||
MOOG990101
|
||||
MUET010101
|
||||
MUET020101
|
||||
MUET020102
|
||||
NAOD960101
|
||||
NGPC000101
|
||||
NIEK910101
|
||||
NIEK910102
|
||||
OGAK980101
|
||||
OVEJ920100_RSA
|
||||
OVEJ920101
|
||||
OVEJ920102
|
||||
OVEJ920103
|
||||
PARB960101
|
||||
PARB960102
|
||||
PRLA000101
|
||||
PRLA000102
|
||||
QUIB020101
|
||||
QU_C930101
|
||||
QU_C930102
|
||||
QU_C930103
|
||||
RIER950101
|
||||
RISJ880101
|
||||
ROBB790102
|
||||
RUSR970101
|
||||
RUSR970102
|
||||
RUSR970103
|
||||
SIMK990101
|
||||
SIMK990102
|
||||
SIMK990103
|
||||
SIMK990104
|
||||
SIMK990105
|
||||
SKOJ000101
|
||||
SKOJ000102
|
||||
SKOJ970101
|
||||
TANS760101
|
||||
TANS760102
|
||||
THOP960101
|
||||
TOBD000101
|
||||
TOBD000102
|
||||
TUDE900101
|
||||
VENM980101
|
||||
VOGG950101
|
||||
WEIL970101
|
||||
WEIL970102
|
||||
ZHAC000101
|
||||
ZHAC000102
|
||||
ZHAC000103
|
||||
ZHAC000104
|
||||
ZHAC000105
|
||||
ZHAC000106
|
|
|
@ -1,2 +0,0 @@
|
|||
ALTS910101,AZAE970101,AZAE970102,BASU010101,BENS940101,BENS940102,BENS940103,BENS940104,BETM990101,BLAJ010101,BONM030101,BONM030102,BONM030103,BONM030104,BONM030105,BONM030106,BRYS930101,CROG050101,CSEM940101,DAYM780301,DAYM780302,DOSZ010101,DOSZ010102,DOSZ010103,DOSZ010104,FEND850101,FITW660101,GEOD900101,GIAG010101,GODA950101,GONG920101,GRAR740104,HENS920101,HENS920102,HENS920103,HENS920104,JOHM930101,JOND920103,JOND940101,KANM000101,KAPO950101,KESO980101,KESO980102,KOLA920101,KOLA930101,KOSJ950100_RSA_SST,KOSJ950100_SST,KOSJ950110_RSA,KOSJ950115,LEVJ860101,LINK010101,LIWA970101,LUTR910101,LUTR910102,LUTR910103,LUTR910104,LUTR910105,LUTR910106,LUTR910107,LUTR910108,LUTR910109,MCLA710101,MCLA720101,MEHP950101,MEHP950102,MEHP950103,MICC010101,MIRL960101,MIYS850102,MIYS850103,MIYS930101,MIYS960101,MIYS960102,MIYS960103,MIYS990106,MIYS990107,MIYT790101,MOHR870101,MOOG990101,MUET010101,MUET020101,MUET020102,NAOD960101,NGPC000101,NIEK910101,NIEK910102,OGAK980101,OVEJ920100_RSA,OVEJ920101,OVEJ920102,OVEJ920103,PARB960101,PARB960102,PRLA000101,PRLA000102,QUIB020101,QU_C930101,QU_C930102,QU_C930103,RIER950101,RISJ880101,ROBB790102,RUSR970101,RUSR970102,RUSR970103,SIMK990101,SIMK990102,SIMK990103,SIMK990104,SIMK990105,SKOJ000101,SKOJ000102,SKOJ970101,TANS760101,TANS760102,THOP960101,TOBD000101,TOBD000102,TUDE900101,VENM980101,VOGG950101,WEIL970101,WEIL970102,ZHAC000101,ZHAC000102,ZHAC000103,ZHAC000104,ZHAC000105,ZHAC000106
|
||||
1.0,2.0,-1.0,0.1462,1.1,0.8,0.4,0.8,0.07,-1.0,0.4,0.5,0.4,5.2,5.3,4.9,0.022,-1.0,-0.07,1.0,-1.0,4.9,-5.95,0.3,-1.73,5.0,1.0,9.0,2.0,0.0,0.3,27.0,-1.0,-1.0,-1.0,-1.0,-1.0,1.0,0.0,-5.0,17.7,-1.78,0.19,0.0,0.3,3.7,5.1,4.6,3.5,-1.0,0.056,-2.81,1.0,-5.0,-2.0,5.0,6.0,4.0,0.0,1.0,5.0,4.0,1.0,0.94,0.77,1.69,-0.005081,0.1,-1.81,0.1,0.17,-2.03,0.08,6368.0,0.15,0.06,0.06,6.0,-0.56,-4.0,0.0,0.0,0.0,-3.0,0.1,0.11,-6.8,0.014,-1.0,0.022,0.014,0.6,-2.3,-0.53,-1.11,0.7,0.183,0.656,3.0,89.0,-0.2,-1.47,2.0,0.0,0.0,0.03615,0.08,0.04566,0.02263,0.00258,0.8,0.7,0.6,-3.4,33.0,0.41,0.87,0.08,-2.0,0.07816,5.5,0.0,1.0,-0.26,0.63,0.78,-1.64,0.17,0.48
|
|
|
@ -1,6 +0,0 @@
|
|||
BENS940104 Genetic code matrix (Benner et al., 1994)
|
||||
DOSZ010103 An amino acid similarity matrix based on the THREADER force field (Dosztanyi-Torda, 2001)
|
||||
GIAG010101 Residue substitutions matrix from thermo/mesophilic to psychrophilic enzymes (Gianese et al., 2001)
|
||||
MIYT790101 Amino acid pair distance (Miyata et al., 1979)
|
||||
OVEJ920102 Environment-specific amino acid substitution matrix for alpha residues (Overington et al., 1992)
|
||||
RISJ880101 Scoring matrix (Risler et al., 1988)
|
Binary file not shown.
File diff suppressed because it is too large
Load diff
Binary file not shown.
File diff suppressed because it is too large
Load diff
Binary file not shown.
|
@ -1,90 +0,0 @@
|
|||
from collections import defaultdict
|
||||
|
||||
import os
|
||||
import pickle
|
||||
|
||||
DATA_FOLDER = "/home/chmrodrigues/Documents/ppi2/reverse_mutations/data/aaindex"
|
||||
|
||||
def main():
|
||||
|
||||
aaindex2_file = os.path.join(DATA_FOLDER,"aaindex2")
|
||||
aaindex3_file = os.path.join(DATA_FOLDER,"aaindex3")
|
||||
|
||||
lines_index2 = ' '.join([item for item in open(aaindex2_file,'r').readlines()])
|
||||
lines_index3 = ' '.join([item for item in open(aaindex3_file,'r').readlines()])
|
||||
|
||||
attrs_index2 = [item for item in lines_index2.split('//\n') if len(item) != 0]
|
||||
attrs_index3 = [item for item in lines_index3.split('//\n') if len(item) != 0]
|
||||
|
||||
attr_name = str()
|
||||
all_matrices = dict()
|
||||
for line in attrs_index2:
|
||||
attr_elements = line.split('\n')
|
||||
|
||||
attr_name = [item for item in attr_elements if item.strip().startswith("H ")][0].split()[-1]
|
||||
rows_columns_index = [attr_elements.index(item) for item in attr_elements if item.startswith(" M rows =")][0]
|
||||
|
||||
rows = attr_elements[rows_columns_index].split()[3].replace(",","")
|
||||
columns = attr_elements[rows_columns_index].split()[-1]
|
||||
|
||||
attr_dict = dict()
|
||||
for row in rows:
|
||||
attr_dict[row] = dict()
|
||||
for col in columns:
|
||||
attr_dict[row][col] = None
|
||||
|
||||
for i in range(rows_columns_index+1,len(attr_elements)):
|
||||
values = attr_elements[i].split()
|
||||
try:
|
||||
row = rows[i-(rows_columns_index+1)]
|
||||
for idx,value in enumerate(values):
|
||||
col = columns[idx]
|
||||
try:
|
||||
attr_dict[row][col] = float(value)
|
||||
except ValueError:
|
||||
attr_dict[row][col] = value
|
||||
except IndexError:
|
||||
pass
|
||||
|
||||
all_matrices[attr_name] = attr_dict
|
||||
print(len(all_matrices))
|
||||
pickle.dump(all_matrices, open('index2.p','wb'),protocol=2)
|
||||
|
||||
attr_name = str()
|
||||
all_matrices = dict()
|
||||
for line in attrs_index3:
|
||||
attr_elements = line.split('\n')
|
||||
|
||||
attr_name = [item for item in attr_elements if item.strip().startswith("H ")][0].split()[-1]
|
||||
rows_columns_index = [attr_elements.index(item) for item in attr_elements if item.startswith(" M rows =")][0]
|
||||
|
||||
rows = attr_elements[rows_columns_index].split()[3].replace(",","")
|
||||
columns = attr_elements[rows_columns_index].split()[-1]
|
||||
|
||||
attr_dict = dict()
|
||||
for row in rows:
|
||||
attr_dict[row] = dict()
|
||||
for col in columns:
|
||||
attr_dict[row][col] = None
|
||||
|
||||
for i in range(rows_columns_index+1,len(attr_elements)):
|
||||
values = attr_elements[i].split()
|
||||
try:
|
||||
row = rows[i-(rows_columns_index+1)]
|
||||
for idx,value in enumerate(values):
|
||||
col = columns[idx]
|
||||
try:
|
||||
attr_dict[row][col] = float(value)
|
||||
except ValueError:
|
||||
attr_dict[row][col] = value
|
||||
except IndexError:
|
||||
pass
|
||||
|
||||
all_matrices[attr_name] = attr_dict
|
||||
pickle.dump(all_matrices, open('index3.p','wb'),protocol=2)
|
||||
print(len(all_matrices))
|
||||
|
||||
return True
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
|
@ -1,162 +0,0 @@
|
|||
"""
|
||||
RSA <= 0.2 Buried (Inaccessible)
|
||||
RSA > 0.2 Exposed (Accessible)
|
||||
|
||||
SST = [H,I,G] - Helix
|
||||
SST = [B,E] - Beta
|
||||
SST = [T] - Turn
|
||||
SST = [S,-] - Coil
|
||||
"""
|
||||
from Bio.PDB import PDBParser, DSSP
|
||||
import pickle
|
||||
import os
|
||||
import sys
|
||||
import warnings
|
||||
|
||||
warnings.filterwarnings("ignore")
|
||||
|
||||
#CURRENT_FOLDER = '/home/local/BHRI/sportelli/Desktop/Important_Code/structural/aaindex'
|
||||
CURRENT_FOLDER = '/home/tanu/git/LSHTM_analysis/scripts/aa_index_scripts/aaindex'
|
||||
DATA_FOLDER = os.path.join(CURRENT_FOLDER,'data')
|
||||
|
||||
RSA_SST_DEPENDENT = {
|
||||
'exposed_helix' : 'KOSJ950101',
|
||||
'exposed_beta' : 'KOSJ950102',
|
||||
'exposed_turn' : 'KOSJ950103',
|
||||
'exposed_coil' : 'KOSJ950104',
|
||||
'buried_helix' : 'KOSJ950105',
|
||||
'buried_beta' : 'KOSJ950106',
|
||||
'buried_turn' : 'KOSJ950107',
|
||||
'buried_coil' : 'KOSJ950108',
|
||||
}
|
||||
|
||||
SST_DEPENDENT = {
|
||||
'helix' : 'KOSJ950109',
|
||||
'beta' : 'KOSJ950110',
|
||||
'turn' : 'KOSJ950111',
|
||||
'coil' : 'KOSJ950112',
|
||||
}
|
||||
|
||||
RSA_DEPENDENT1 = {
|
||||
'exposed' : 'KOSJ950113',
|
||||
'buried' : 'KOSJ950114',
|
||||
}
|
||||
|
||||
RSA_DEPENDENT2 = {
|
||||
'exposed' : 'OVEJ920104',
|
||||
'buried' : 'OVEJ920105',
|
||||
}
|
||||
|
||||
def get_environment(pdb_file, chain, position, insertion_code=' '):
|
||||
parser = PDBParser()
|
||||
structure = parser.get_structure(pdb_file, pdb_file)
|
||||
model = structure[0]
|
||||
|
||||
dssp = DSSP(model, pdb_file, dssp='mkdssp')
|
||||
dssp_key = [item for item in dssp.keys() if item[0] == chain and item[1][1] == int(position) and item[1][2] == insertion_code]
|
||||
|
||||
dssp_key = dssp_key[0]
|
||||
sst = dssp[dssp_key][2]
|
||||
rsa = float(dssp[dssp_key][3])
|
||||
|
||||
return{'sst':sst, 'rsa':rsa}
|
||||
|
||||
def main():
|
||||
"""
|
||||
READ INPUT
|
||||
"""
|
||||
pdb_file = sys.argv[1]
|
||||
chain_id = sys.argv[2]
|
||||
mutation_code = sys.argv[3]
|
||||
|
||||
aa_from = mutation_code[0]
|
||||
aa_to = mutation_code[-1]
|
||||
position = mutation_code[1:-1]
|
||||
insertion_code = ' '
|
||||
if not position[-1].isdigit():
|
||||
insertion_code = position[-1]
|
||||
position = position[:-1]
|
||||
|
||||
"""
|
||||
READ DATABASES
|
||||
index2 - Amino acid substitution indexes
|
||||
index3 - Statistical protein contact potentials
|
||||
"""
|
||||
index2 = pickle.load(open('{}/aaindex2.p'.format(DATA_FOLDER),'rb'))
|
||||
index3 = pickle.load(open('{}/aaindex3.p'.format(DATA_FOLDER),'rb'))
|
||||
|
||||
"""
|
||||
LOOP THROUGH TABLES AND EXTRACT VALUES
|
||||
"""
|
||||
results_index2 = dict()
|
||||
results_index3 = dict()
|
||||
for key in index2.keys():
|
||||
if index2[key][aa_from][aa_to] != None:
|
||||
results_index2[key] = index2[key][aa_from][aa_to]
|
||||
else:
|
||||
results_index2[key] = index2[key][aa_to][aa_from]
|
||||
|
||||
for key in index3.keys():
|
||||
if index3[key][aa_from][aa_to] != None:
|
||||
results_index3[key] = index3[key][aa_from][aa_to]
|
||||
else:
|
||||
results_index3[key] = index3[key][aa_to][aa_from]
|
||||
|
||||
"""
|
||||
GET ENVIRONMENT CHARACTERISTICS
|
||||
"""
|
||||
environment = get_environment(pdb_file, chain_id, position, insertion_code)
|
||||
|
||||
buried = 'buried'
|
||||
sst = str()
|
||||
if environment['rsa'] <= 0.2:
|
||||
buried = 'exposed'
|
||||
|
||||
if environment['sst'] in ['H','I','G']:
|
||||
sst = 'helix'
|
||||
elif environment['sst'] in ['B','E']:
|
||||
sst = 'beta'
|
||||
elif environment['sst'] in ['T']:
|
||||
sst = 'turn'
|
||||
else:
|
||||
sst = 'coil'
|
||||
|
||||
results_index2['KOSJ950100_RSA_SST'] = results_index2[RSA_SST_DEPENDENT['{}_{}'.format(buried,sst)]]
|
||||
results_index2['KOSJ950100_SST'] = results_index2[SST_DEPENDENT[sst]]
|
||||
results_index2['KOSJ950110_RSA'] = results_index2[RSA_DEPENDENT1[buried]]
|
||||
results_index2['OVEJ920100_RSA'] = results_index2[RSA_DEPENDENT2[buried]]
|
||||
|
||||
for value in RSA_SST_DEPENDENT.values():
|
||||
results_index2.pop(value)
|
||||
for value in SST_DEPENDENT.values():
|
||||
results_index2.pop(value)
|
||||
for value in RSA_DEPENDENT1.values():
|
||||
results_index2.pop(value)
|
||||
for value in RSA_DEPENDENT2.values():
|
||||
results_index2.pop(value)
|
||||
|
||||
"""
|
||||
PRINT RESULTS
|
||||
"""
|
||||
output_dict = dict()
|
||||
output_dict.update(results_index2)
|
||||
output_dict.update(results_index3)
|
||||
|
||||
keys = list(output_dict.keys())
|
||||
keys.sort()
|
||||
values = [str(output_dict[item]) for item in keys]
|
||||
|
||||
# print(",".join(keys))
|
||||
print(",".join(values))
|
||||
|
||||
return True
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
|
||||
if len(sys.argv) != 4:
|
||||
print("Error on parsing argument list")
|
||||
print("Please provide a one letter code for wild-type and mutant residues")
|
||||
print("Eg.: python get_scores.py pdb_file chain_id mutation_code")
|
||||
sys.exit(1)
|
||||
main()
|
|
@ -1,722 +0,0 @@
|
|||
List of 566 Amino Acid Indices in AAindex ver.9.2
|
||||
|
||||
The columns correspond to the AAindex accession number and the description of
|
||||
each index.
|
||||
|
||||
ANDN920101 alpha-CH chemical shifts (Andersen et al., 1992)
|
||||
ARGP820101 Hydrophobicity index (Argos et al., 1982)
|
||||
ARGP820102 Signal sequence helical potential (Argos et al., 1982)
|
||||
ARGP820103 Membrane-buried preference parameters (Argos et al., 1982)
|
||||
BEGF750101 Conformational parameter of inner helix (Beghin-Dirkx, 1975)
|
||||
BEGF750102 Conformational parameter of beta-structure (Beghin-Dirkx, 1975)
|
||||
BEGF750103 Conformational parameter of beta-turn (Beghin-Dirkx, 1975)
|
||||
BHAR880101 Average flexibility indices (Bhaskaran-Ponnuswamy, 1988)
|
||||
BIGC670101 Residue volume (Bigelow, 1967)
|
||||
BIOV880101 Information value for accessibility; average fraction 35% (Biou et al., 1988)
|
||||
BIOV880102 Information value for accessibility; average fraction 23% (Biou et al., 1988)
|
||||
BROC820101 Retention coefficient in TFA (Browne et al., 1982)
|
||||
BROC820102 Retention coefficient in HFBA (Browne et al., 1982)
|
||||
BULH740101 Transfer free energy to surface (Bull-Breese, 1974)
|
||||
BULH740102 Apparent partial specific volume (Bull-Breese, 1974)
|
||||
BUNA790101 alpha-NH chemical shifts (Bundi-Wuthrich, 1979)
|
||||
BUNA790102 alpha-CH chemical shifts (Bundi-Wuthrich, 1979)
|
||||
BUNA790103 Spin-spin coupling constants 3JHalpha-NH (Bundi-Wuthrich, 1979)
|
||||
BURA740101 Normalized frequency of alpha-helix (Burgess et al., 1974)
|
||||
BURA740102 Normalized frequency of extended structure (Burgess et al., 1974)
|
||||
CHAM810101 Steric parameter (Charton, 1981)
|
||||
CHAM820101 Polarizability parameter (Charton-Charton, 1982)
|
||||
CHAM820102 Free energy of solution in water, kcal/mole (Charton-Charton, 1982)
|
||||
CHAM830101 The Chou-Fasman parameter of the coil conformation (Charton-Charton, 1983)
|
||||
CHAM830102 A parameter defined from the residuals obtained from the best correlation of the Chou-Fasman parameter of beta-sheet (Charton-Charton, 1983)
|
||||
CHAM830103 The number of atoms in the side chain labelled 1+1 (Charton-Charton, 1983)
|
||||
CHAM830104 The number of atoms in the side chain labelled 2+1 (Charton-Charton, 1983)
|
||||
CHAM830105 The number of atoms in the side chain labelled 3+1 (Charton-Charton, 1983)
|
||||
CHAM830106 The number of bonds in the longest chain (Charton-Charton, 1983)
|
||||
CHAM830107 A parameter of charge transfer capability (Charton-Charton, 1983)
|
||||
CHAM830108 A parameter of charge transfer donor capability (Charton-Charton, 1983)
|
||||
CHOC750101 Average volume of buried residue (Chothia, 1975)
|
||||
CHOC760101 Residue accessible surface area in tripeptide (Chothia, 1976)
|
||||
CHOC760102 Residue accessible surface area in folded protein (Chothia, 1976)
|
||||
CHOC760103 Proportion of residues 95% buried (Chothia, 1976)
|
||||
CHOC760104 Proportion of residues 100% buried (Chothia, 1976)
|
||||
CHOP780101 Normalized frequency of beta-turn (Chou-Fasman, 1978a)
|
||||
CHOP780201 Normalized frequency of alpha-helix (Chou-Fasman, 1978b)
|
||||
CHOP780202 Normalized frequency of beta-sheet (Chou-Fasman, 1978b)
|
||||
CHOP780203 Normalized frequency of beta-turn (Chou-Fasman, 1978b)
|
||||
CHOP780204 Normalized frequency of N-terminal helix (Chou-Fasman, 1978b)
|
||||
CHOP780205 Normalized frequency of C-terminal helix (Chou-Fasman, 1978b)
|
||||
CHOP780206 Normalized frequency of N-terminal non helical region (Chou-Fasman, 1978b)
|
||||
CHOP780207 Normalized frequency of C-terminal non helical region (Chou-Fasman, 1978b)
|
||||
CHOP780208 Normalized frequency of N-terminal beta-sheet (Chou-Fasman, 1978b)
|
||||
CHOP780209 Normalized frequency of C-terminal beta-sheet (Chou-Fasman, 1978b)
|
||||
CHOP780210 Normalized frequency of N-terminal non beta region (Chou-Fasman, 1978b)
|
||||
CHOP780211 Normalized frequency of C-terminal non beta region (Chou-Fasman, 1978b)
|
||||
CHOP780212 Frequency of the 1st residue in turn (Chou-Fasman, 1978b)
|
||||
CHOP780213 Frequency of the 2nd residue in turn (Chou-Fasman, 1978b)
|
||||
CHOP780214 Frequency of the 3rd residue in turn (Chou-Fasman, 1978b)
|
||||
CHOP780215 Frequency of the 4th residue in turn (Chou-Fasman, 1978b)
|
||||
CHOP780216 Normalized frequency of the 2nd and 3rd residues in turn (Chou-Fasman, 1978b)
|
||||
CIDH920101 Normalized hydrophobicity scales for alpha-proteins (Cid et al., 1992)
|
||||
CIDH920102 Normalized hydrophobicity scales for beta-proteins (Cid et al., 1992)
|
||||
CIDH920103 Normalized hydrophobicity scales for alpha+beta-proteins (Cid et al., 1992)
|
||||
CIDH920104 Normalized hydrophobicity scales for alpha/beta-proteins (Cid et al., 1992)
|
||||
CIDH920105 Normalized average hydrophobicity scales (Cid et al., 1992)
|
||||
COHE430101 Partial specific volume (Cohn-Edsall, 1943)
|
||||
CRAJ730101 Normalized frequency of middle helix (Crawford et al., 1973)
|
||||
CRAJ730102 Normalized frequency of beta-sheet (Crawford et al., 1973)
|
||||
CRAJ730103 Normalized frequency of turn (Crawford et al., 1973)
|
||||
DAWD720101 Size (Dawson, 1972)
|
||||
DAYM780101 Amino acid composition (Dayhoff et al., 1978a)
|
||||
DAYM780201 Relative mutability (Dayhoff et al., 1978b)
|
||||
DESM900101 Membrane preference for cytochrome b: MPH89 (Degli Esposti et al., 1990)
|
||||
DESM900102 Average membrane preference: AMP07 (Degli Esposti et al., 1990)
|
||||
EISD840101 Consensus normalized hydrophobicity scale (Eisenberg, 1984)
|
||||
EISD860101 Solvation free energy (Eisenberg-McLachlan, 1986)
|
||||
EISD860102 Atom-based hydrophobic moment (Eisenberg-McLachlan, 1986)
|
||||
EISD860103 Direction of hydrophobic moment (Eisenberg-McLachlan, 1986)
|
||||
FASG760101 Molecular weight (Fasman, 1976)
|
||||
FASG760102 Melting point (Fasman, 1976)
|
||||
FASG760103 Optical rotation (Fasman, 1976)
|
||||
FASG760104 pK-N (Fasman, 1976)
|
||||
FASG760105 pK-C (Fasman, 1976)
|
||||
FAUJ830101 Hydrophobic parameter pi (Fauchere-Pliska, 1983)
|
||||
FAUJ880101 Graph shape index (Fauchere et al., 1988)
|
||||
FAUJ880102 Smoothed upsilon steric parameter (Fauchere et al., 1988)
|
||||
FAUJ880103 Normalized van der Waals volume (Fauchere et al., 1988)
|
||||
FAUJ880104 STERIMOL length of the side chain (Fauchere et al., 1988)
|
||||
FAUJ880105 STERIMOL minimum width of the side chain (Fauchere et al., 1988)
|
||||
FAUJ880106 STERIMOL maximum width of the side chain (Fauchere et al., 1988)
|
||||
FAUJ880107 N.m.r. chemical shift of alpha-carbon (Fauchere et al., 1988)
|
||||
FAUJ880108 Localized electrical effect (Fauchere et al., 1988)
|
||||
FAUJ880109 Number of hydrogen bond donors (Fauchere et al., 1988)
|
||||
FAUJ880110 Number of full nonbonding orbitals (Fauchere et al., 1988)
|
||||
FAUJ880111 Positive charge (Fauchere et al., 1988)
|
||||
FAUJ880112 Negative charge (Fauchere et al., 1988)
|
||||
FAUJ880113 pK-a(RCOOH) (Fauchere et al., 1988)
|
||||
FINA770101 Helix-coil equilibrium constant (Finkelstein-Ptitsyn, 1977)
|
||||
FINA910101 Helix initiation parameter at posision i-1 (Finkelstein et al., 1991)
|
||||
FINA910102 Helix initiation parameter at posision i,i+1,i+2 (Finkelstein et al., 1991)
|
||||
FINA910103 Helix termination parameter at posision j-2,j-1,j (Finkelstein et al., 1991)
|
||||
FINA910104 Helix termination parameter at posision j+1 (Finkelstein et al., 1991)
|
||||
GARJ730101 Partition coefficient (Garel et al., 1973)
|
||||
GEIM800101 Alpha-helix indices (Geisow-Roberts, 1980)
|
||||
GEIM800102 Alpha-helix indices for alpha-proteins (Geisow-Roberts, 1980)
|
||||
GEIM800103 Alpha-helix indices for beta-proteins (Geisow-Roberts, 1980)
|
||||
GEIM800104 Alpha-helix indices for alpha/beta-proteins (Geisow-Roberts, 1980)
|
||||
GEIM800105 Beta-strand indices (Geisow-Roberts, 1980)
|
||||
GEIM800106 Beta-strand indices for beta-proteins (Geisow-Roberts, 1980)
|
||||
GEIM800107 Beta-strand indices for alpha/beta-proteins (Geisow-Roberts, 1980)
|
||||
GEIM800108 Aperiodic indices (Geisow-Roberts, 1980)
|
||||
GEIM800109 Aperiodic indices for alpha-proteins (Geisow-Roberts, 1980)
|
||||
GEIM800110 Aperiodic indices for beta-proteins (Geisow-Roberts, 1980)
|
||||
GEIM800111 Aperiodic indices for alpha/beta-proteins (Geisow-Roberts, 1980)
|
||||
GOLD730101 Hydrophobicity factor (Goldsack-Chalifoux, 1973)
|
||||
GOLD730102 Residue volume (Goldsack-Chalifoux, 1973)
|
||||
GRAR740101 Composition (Grantham, 1974)
|
||||
GRAR740102 Polarity (Grantham, 1974)
|
||||
GRAR740103 Volume (Grantham, 1974)
|
||||
GUYH850101 Partition energy (Guy, 1985)
|
||||
HOPA770101 Hydration number (Hopfinger, 1971), Cited by Charton-Charton (1982)
|
||||
HOPT810101 Hydrophilicity value (Hopp-Woods, 1981)
|
||||
HUTJ700101 Heat capacity (Hutchens, 1970)
|
||||
HUTJ700102 Absolute entropy (Hutchens, 1970)
|
||||
HUTJ700103 Entropy of formation (Hutchens, 1970)
|
||||
ISOY800101 Normalized relative frequency of alpha-helix (Isogai et al., 1980)
|
||||
ISOY800102 Normalized relative frequency of extended structure (Isogai et al., 1980)
|
||||
ISOY800103 Normalized relative frequency of bend (Isogai et al., 1980)
|
||||
ISOY800104 Normalized relative frequency of bend R (Isogai et al., 1980)
|
||||
ISOY800105 Normalized relative frequency of bend S (Isogai et al., 1980)
|
||||
ISOY800106 Normalized relative frequency of helix end (Isogai et al., 1980)
|
||||
ISOY800107 Normalized relative frequency of double bend (Isogai et al., 1980)
|
||||
ISOY800108 Normalized relative frequency of coil (Isogai et al., 1980)
|
||||
JANJ780101 Average accessible surface area (Janin et al., 1978)
|
||||
JANJ780102 Percentage of buried residues (Janin et al., 1978)
|
||||
JANJ780103 Percentage of exposed residues (Janin et al., 1978)
|
||||
JANJ790101 Ratio of buried and accessible molar fractions (Janin, 1979)
|
||||
JANJ790102 Transfer free energy (Janin, 1979)
|
||||
JOND750101 Hydrophobicity (Jones, 1975)
|
||||
JOND750102 pK (-COOH) (Jones, 1975)
|
||||
JOND920101 Relative frequency of occurrence (Jones et al., 1992)
|
||||
JOND920102 Relative mutability (Jones et al., 1992)
|
||||
JUKT750101 Amino acid distribution (Jukes et al., 1975)
|
||||
JUNJ780101 Sequence frequency (Jungck, 1978)
|
||||
KANM800101 Average relative probability of helix (Kanehisa-Tsong, 1980)
|
||||
KANM800102 Average relative probability of beta-sheet (Kanehisa-Tsong, 1980)
|
||||
KANM800103 Average relative probability of inner helix (Kanehisa-Tsong, 1980)
|
||||
KANM800104 Average relative probability of inner beta-sheet (Kanehisa-Tsong, 1980)
|
||||
KARP850101 Flexibility parameter for no rigid neighbors (Karplus-Schulz, 1985)
|
||||
KARP850102 Flexibility parameter for one rigid neighbor (Karplus-Schulz, 1985)
|
||||
KARP850103 Flexibility parameter for two rigid neighbors (Karplus-Schulz, 1985)
|
||||
KHAG800101 The Kerr-constant increments (Khanarian-Moore, 1980)
|
||||
KLEP840101 Net charge (Klein et al., 1984)
|
||||
KRIW710101 Side chain interaction parameter (Krigbaum-Rubin, 1971)
|
||||
KRIW790101 Side chain interaction parameter (Krigbaum-Komoriya, 1979)
|
||||
KRIW790102 Fraction of site occupied by water (Krigbaum-Komoriya, 1979)
|
||||
KRIW790103 Side chain volume (Krigbaum-Komoriya, 1979)
|
||||
KYTJ820101 Hydropathy index (Kyte-Doolittle, 1982)
|
||||
LAWE840101 Transfer free energy, CHP/water (Lawson et al., 1984)
|
||||
LEVM760101 Hydrophobic parameter (Levitt, 1976)
|
||||
LEVM760102 Distance between C-alpha and centroid of side chain (Levitt, 1976)
|
||||
LEVM760103 Side chain angle theta(AAR) (Levitt, 1976)
|
||||
LEVM760104 Side chain torsion angle phi(AAAR) (Levitt, 1976)
|
||||
LEVM760105 Radius of gyration of side chain (Levitt, 1976)
|
||||
LEVM760106 van der Waals parameter R0 (Levitt, 1976)
|
||||
LEVM760107 van der Waals parameter epsilon (Levitt, 1976)
|
||||
LEVM780101 Normalized frequency of alpha-helix, with weights (Levitt, 1978)
|
||||
LEVM780102 Normalized frequency of beta-sheet, with weights (Levitt, 1978)
|
||||
LEVM780103 Normalized frequency of reverse turn, with weights (Levitt, 1978)
|
||||
LEVM780104 Normalized frequency of alpha-helix, unweighted (Levitt, 1978)
|
||||
LEVM780105 Normalized frequency of beta-sheet, unweighted (Levitt, 1978)
|
||||
LEVM780106 Normalized frequency of reverse turn, unweighted (Levitt, 1978)
|
||||
LEWP710101 Frequency of occurrence in beta-bends (Lewis et al., 1971)
|
||||
LIFS790101 Conformational preference for all beta-strands (Lifson-Sander, 1979)
|
||||
LIFS790102 Conformational preference for parallel beta-strands (Lifson-Sander, 1979)
|
||||
LIFS790103 Conformational preference for antiparallel beta-strands (Lifson-Sander, 1979)
|
||||
MANP780101 Average surrounding hydrophobicity (Manavalan-Ponnuswamy, 1978)
|
||||
MAXF760101 Normalized frequency of alpha-helix (Maxfield-Scheraga, 1976)
|
||||
MAXF760102 Normalized frequency of extended structure (Maxfield-Scheraga, 1976)
|
||||
MAXF760103 Normalized frequency of zeta R (Maxfield-Scheraga, 1976)
|
||||
MAXF760104 Normalized frequency of left-handed alpha-helix (Maxfield-Scheraga, 1976)
|
||||
MAXF760105 Normalized frequency of zeta L (Maxfield-Scheraga, 1976)
|
||||
MAXF760106 Normalized frequency of alpha region (Maxfield-Scheraga, 1976)
|
||||
MCMT640101 Refractivity (McMeekin et al., 1964), Cited by Jones (1975)
|
||||
MEEJ800101 Retention coefficient in HPLC, pH7.4 (Meek, 1980)
|
||||
MEEJ800102 Retention coefficient in HPLC, pH2.1 (Meek, 1980)
|
||||
MEEJ810101 Retention coefficient in NaClO4 (Meek-Rossetti, 1981)
|
||||
MEEJ810102 Retention coefficient in NaH2PO4 (Meek-Rossetti, 1981)
|
||||
MEIH800101 Average reduced distance for C-alpha (Meirovitch et al., 1980)
|
||||
MEIH800102 Average reduced distance for side chain (Meirovitch et al., 1980)
|
||||
MEIH800103 Average side chain orientation angle (Meirovitch et al., 1980)
|
||||
MIYS850101 Effective partition energy (Miyazawa-Jernigan, 1985)
|
||||
NAGK730101 Normalized frequency of alpha-helix (Nagano, 1973)
|
||||
NAGK730102 Normalized frequency of bata-structure (Nagano, 1973)
|
||||
NAGK730103 Normalized frequency of coil (Nagano, 1973)
|
||||
NAKH900101 AA composition of total proteins (Nakashima et al., 1990)
|
||||
NAKH900102 SD of AA composition of total proteins (Nakashima et al., 1990)
|
||||
NAKH900103 AA composition of mt-proteins (Nakashima et al., 1990)
|
||||
NAKH900104 Normalized composition of mt-proteins (Nakashima et al., 1990)
|
||||
NAKH900105 AA composition of mt-proteins from animal (Nakashima et al., 1990)
|
||||
NAKH900106 Normalized composition from animal (Nakashima et al., 1990)
|
||||
NAKH900107 AA composition of mt-proteins from fungi and plant (Nakashima et al., 1990)
|
||||
NAKH900108 Normalized composition from fungi and plant (Nakashima et al., 1990)
|
||||
NAKH900109 AA composition of membrane proteins (Nakashima et al., 1990)
|
||||
NAKH900110 Normalized composition of membrane proteins (Nakashima et al., 1990)
|
||||
NAKH900111 Transmembrane regions of non-mt-proteins (Nakashima et al., 1990)
|
||||
NAKH900112 Transmembrane regions of mt-proteins (Nakashima et al., 1990)
|
||||
NAKH900113 Ratio of average and computed composition (Nakashima et al., 1990)
|
||||
NAKH920101 AA composition of CYT of single-spanning proteins (Nakashima-Nishikawa, 1992)
|
||||
NAKH920102 AA composition of CYT2 of single-spanning proteins (Nakashima-Nishikawa, 1992)
|
||||
NAKH920103 AA composition of EXT of single-spanning proteins (Nakashima-Nishikawa, 1992)
|
||||
NAKH920104 AA composition of EXT2 of single-spanning proteins (Nakashima-Nishikawa, 1992)
|
||||
NAKH920105 AA composition of MEM of single-spanning proteins (Nakashima-Nishikawa, 1992)
|
||||
NAKH920106 AA composition of CYT of multi-spanning proteins (Nakashima-Nishikawa, 1992)
|
||||
NAKH920107 AA composition of EXT of multi-spanning proteins (Nakashima-Nishikawa, 1992)
|
||||
NAKH920108 AA composition of MEM of multi-spanning proteins (Nakashima-Nishikawa, 1992)
|
||||
NISK800101 8 A contact number (Nishikawa-Ooi, 1980)
|
||||
NISK860101 14 A contact number (Nishikawa-Ooi, 1986)
|
||||
NOZY710101 Transfer energy, organic solvent/water (Nozaki-Tanford, 1971)
|
||||
OOBM770101 Average non-bonded energy per atom (Oobatake-Ooi, 1977)
|
||||
OOBM770102 Short and medium range non-bonded energy per atom (Oobatake-Ooi, 1977)
|
||||
OOBM770103 Long range non-bonded energy per atom (Oobatake-Ooi, 1977)
|
||||
OOBM770104 Average non-bonded energy per residue (Oobatake-Ooi, 1977)
|
||||
OOBM770105 Short and medium range non-bonded energy per residue (Oobatake-Ooi, 1977)
|
||||
OOBM850101 Optimized beta-structure-coil equilibrium constant (Oobatake et al., 1985)
|
||||
OOBM850102 Optimized propensity to form reverse turn (Oobatake et al., 1985)
|
||||
OOBM850103 Optimized transfer energy parameter (Oobatake et al., 1985)
|
||||
OOBM850104 Optimized average non-bonded energy per atom (Oobatake et al., 1985)
|
||||
OOBM850105 Optimized side chain interaction parameter (Oobatake et al., 1985)
|
||||
PALJ810101 Normalized frequency of alpha-helix from LG (Palau et al., 1981)
|
||||
PALJ810102 Normalized frequency of alpha-helix from CF (Palau et al., 1981)
|
||||
PALJ810103 Normalized frequency of beta-sheet from LG (Palau et al., 1981)
|
||||
PALJ810104 Normalized frequency of beta-sheet from CF (Palau et al., 1981)
|
||||
PALJ810105 Normalized frequency of turn from LG (Palau et al., 1981)
|
||||
PALJ810106 Normalized frequency of turn from CF (Palau et al., 1981)
|
||||
PALJ810107 Normalized frequency of alpha-helix in all-alpha class (Palau et al., 1981)
|
||||
PALJ810108 Normalized frequency of alpha-helix in alpha+beta class (Palau et al., 1981)
|
||||
PALJ810109 Normalized frequency of alpha-helix in alpha/beta class (Palau et al., 1981)
|
||||
PALJ810110 Normalized frequency of beta-sheet in all-beta class (Palau et al., 1981)
|
||||
PALJ810111 Normalized frequency of beta-sheet in alpha+beta class (Palau et al., 1981)
|
||||
PALJ810112 Normalized frequency of beta-sheet in alpha/beta class (Palau et al., 1981)
|
||||
PALJ810113 Normalized frequency of turn in all-alpha class (Palau et al., 1981)
|
||||
PALJ810114 Normalized frequency of turn in all-beta class (Palau et al., 1981)
|
||||
PALJ810115 Normalized frequency of turn in alpha+beta class (Palau et al., 1981)
|
||||
PALJ810116 Normalized frequency of turn in alpha/beta class (Palau et al., 1981)
|
||||
PARJ860101 HPLC parameter (Parker et al., 1986)
|
||||
PLIV810101 Partition coefficient (Pliska et al., 1981)
|
||||
PONP800101 Surrounding hydrophobicity in folded form (Ponnuswamy et al., 1980)
|
||||
PONP800102 Average gain in surrounding hydrophobicity (Ponnuswamy et al., 1980)
|
||||
PONP800103 Average gain ratio in surrounding hydrophobicity (Ponnuswamy et al., 1980)
|
||||
PONP800104 Surrounding hydrophobicity in alpha-helix (Ponnuswamy et al., 1980)
|
||||
PONP800105 Surrounding hydrophobicity in beta-sheet (Ponnuswamy et al., 1980)
|
||||
PONP800106 Surrounding hydrophobicity in turn (Ponnuswamy et al., 1980)
|
||||
PONP800107 Accessibility reduction ratio (Ponnuswamy et al., 1980)
|
||||
PONP800108 Average number of surrounding residues (Ponnuswamy et al., 1980)
|
||||
PRAM820101 Intercept in regression analysis (Prabhakaran-Ponnuswamy, 1982)
|
||||
PRAM820102 Slope in regression analysis x 1.0E1 (Prabhakaran-Ponnuswamy, 1982)
|
||||
PRAM820103 Correlation coefficient in regression analysis (Prabhakaran-Ponnuswamy, 1982)
|
||||
PRAM900101 Hydrophobicity (Prabhakaran, 1990)
|
||||
PRAM900102 Relative frequency in alpha-helix (Prabhakaran, 1990)
|
||||
PRAM900103 Relative frequency in beta-sheet (Prabhakaran, 1990)
|
||||
PRAM900104 Relative frequency in reverse-turn (Prabhakaran, 1990)
|
||||
PTIO830101 Helix-coil equilibrium constant (Ptitsyn-Finkelstein, 1983)
|
||||
PTIO830102 Beta-coil equilibrium constant (Ptitsyn-Finkelstein, 1983)
|
||||
QIAN880101 Weights for alpha-helix at the window position of -6 (Qian-Sejnowski, 1988)
|
||||
QIAN880102 Weights for alpha-helix at the window position of -5 (Qian-Sejnowski, 1988)
|
||||
QIAN880103 Weights for alpha-helix at the window position of -4 (Qian-Sejnowski, 1988)
|
||||
QIAN880104 Weights for alpha-helix at the window position of -3 (Qian-Sejnowski, 1988)
|
||||
QIAN880105 Weights for alpha-helix at the window position of -2 (Qian-Sejnowski, 1988)
|
||||
QIAN880106 Weights for alpha-helix at the window position of -1 (Qian-Sejnowski, 1988)
|
||||
QIAN880107 Weights for alpha-helix at the window position of 0 (Qian-Sejnowski, 1988)
|
||||
QIAN880108 Weights for alpha-helix at the window position of 1 (Qian-Sejnowski, 1988)
|
||||
QIAN880109 Weights for alpha-helix at the window position of 2 (Qian-Sejnowski, 1988)
|
||||
QIAN880110 Weights for alpha-helix at the window position of 3 (Qian-Sejnowski, 1988)
|
||||
QIAN880111 Weights for alpha-helix at the window position of 4 (Qian-Sejnowski, 1988)
|
||||
QIAN880112 Weights for alpha-helix at the window position of 5 (Qian-Sejnowski, 1988)
|
||||
QIAN880113 Weights for alpha-helix at the window position of 6 (Qian-Sejnowski, 1988)
|
||||
QIAN880114 Weights for beta-sheet at the window position of -6 (Qian-Sejnowski, 1988)
|
||||
QIAN880115 Weights for beta-sheet at the window position of -5 (Qian-Sejnowski, 1988)
|
||||
QIAN880116 Weights for beta-sheet at the window position of -4 (Qian-Sejnowski, 1988)
|
||||
QIAN880117 Weights for beta-sheet at the window position of -3 (Qian-Sejnowski, 1988)
|
||||
QIAN880118 Weights for beta-sheet at the window position of -2 (Qian-Sejnowski, 1988)
|
||||
QIAN880119 Weights for beta-sheet at the window position of -1 (Qian-Sejnowski, 1988)
|
||||
QIAN880120 Weights for beta-sheet at the window position of 0 (Qian-Sejnowski, 1988)
|
||||
QIAN880121 Weights for beta-sheet at the window position of 1 (Qian-Sejnowski, 1988)
|
||||
QIAN880122 Weights for beta-sheet at the window position of 2 (Qian-Sejnowski, 1988)
|
||||
QIAN880123 Weights for beta-sheet at the window position of 3 (Qian-Sejnowski, 1988)
|
||||
QIAN880124 Weights for beta-sheet at the window position of 4 (Qian-Sejnowski, 1988)
|
||||
QIAN880125 Weights for beta-sheet at the window position of 5 (Qian-Sejnowski, 1988)
|
||||
QIAN880126 Weights for beta-sheet at the window position of 6 (Qian-Sejnowski, 1988)
|
||||
QIAN880127 Weights for coil at the window position of -6 (Qian-Sejnowski, 1988)
|
||||
QIAN880128 Weights for coil at the window position of -5 (Qian-Sejnowski, 1988)
|
||||
QIAN880129 Weights for coil at the window position of -4 (Qian-Sejnowski, 1988)
|
||||
QIAN880130 Weights for coil at the window position of -3 (Qian-Sejnowski, 1988)
|
||||
QIAN880131 Weights for coil at the window position of -2 (Qian-Sejnowski, 1988)
|
||||
QIAN880132 Weights for coil at the window position of -1 (Qian-Sejnowski, 1988)
|
||||
QIAN880133 Weights for coil at the window position of 0 (Qian-Sejnowski, 1988)
|
||||
QIAN880134 Weights for coil at the window position of 1 (Qian-Sejnowski, 1988)
|
||||
QIAN880135 Weights for coil at the window position of 2 (Qian-Sejnowski, 1988)
|
||||
QIAN880136 Weights for coil at the window position of 3 (Qian-Sejnowski, 1988)
|
||||
QIAN880137 Weights for coil at the window position of 4 (Qian-Sejnowski, 1988)
|
||||
QIAN880138 Weights for coil at the window position of 5 (Qian-Sejnowski, 1988)
|
||||
QIAN880139 Weights for coil at the window position of 6 (Qian-Sejnowski, 1988)
|
||||
RACS770101 Average reduced distance for C-alpha (Rackovsky-Scheraga, 1977)
|
||||
RACS770102 Average reduced distance for side chain (Rackovsky-Scheraga, 1977)
|
||||
RACS770103 Side chain orientational preference (Rackovsky-Scheraga, 1977)
|
||||
RACS820101 Average relative fractional occurrence in A0(i) (Rackovsky-Scheraga, 1982)
|
||||
RACS820102 Average relative fractional occurrence in AR(i) (Rackovsky-Scheraga, 1982)
|
||||
RACS820103 Average relative fractional occurrence in AL(i) (Rackovsky-Scheraga, 1982)
|
||||
RACS820104 Average relative fractional occurrence in EL(i) (Rackovsky-Scheraga, 1982)
|
||||
RACS820105 Average relative fractional occurrence in E0(i) (Rackovsky-Scheraga, 1982)
|
||||
RACS820106 Average relative fractional occurrence in ER(i) (Rackovsky-Scheraga, 1982)
|
||||
RACS820107 Average relative fractional occurrence in A0(i-1) (Rackovsky-Scheraga, 1982)
|
||||
RACS820108 Average relative fractional occurrence in AR(i-1) (Rackovsky-Scheraga, 1982)
|
||||
RACS820109 Average relative fractional occurrence in AL(i-1) (Rackovsky-Scheraga, 1982)
|
||||
RACS820110 Average relative fractional occurrence in EL(i-1) (Rackovsky-Scheraga, 1982)
|
||||
RACS820111 Average relative fractional occurrence in E0(i-1) (Rackovsky-Scheraga, 1982)
|
||||
RACS820112 Average relative fractional occurrence in ER(i-1) (Rackovsky-Scheraga, 1982)
|
||||
RACS820113 Value of theta(i) (Rackovsky-Scheraga, 1982)
|
||||
RACS820114 Value of theta(i-1) (Rackovsky-Scheraga, 1982)
|
||||
RADA880101 Transfer free energy from chx to wat (Radzicka-Wolfenden, 1988)
|
||||
RADA880102 Transfer free energy from oct to wat (Radzicka-Wolfenden, 1988)
|
||||
RADA880103 Transfer free energy from vap to chx (Radzicka-Wolfenden, 1988)
|
||||
RADA880104 Transfer free energy from chx to oct (Radzicka-Wolfenden, 1988)
|
||||
RADA880105 Transfer free energy from vap to oct (Radzicka-Wolfenden, 1988)
|
||||
RADA880106 Accessible surface area (Radzicka-Wolfenden, 1988)
|
||||
RADA880107 Energy transfer from out to in(95%buried) (Radzicka-Wolfenden, 1988)
|
||||
RADA880108 Mean polarity (Radzicka-Wolfenden, 1988)
|
||||
RICJ880101 Relative preference value at N" (Richardson-Richardson, 1988)
|
||||
RICJ880102 Relative preference value at N' (Richardson-Richardson, 1988)
|
||||
RICJ880103 Relative preference value at N-cap (Richardson-Richardson, 1988)
|
||||
RICJ880104 Relative preference value at N1 (Richardson-Richardson, 1988)
|
||||
RICJ880105 Relative preference value at N2 (Richardson-Richardson, 1988)
|
||||
RICJ880106 Relative preference value at N3 (Richardson-Richardson, 1988)
|
||||
RICJ880107 Relative preference value at N4 (Richardson-Richardson, 1988)
|
||||
RICJ880108 Relative preference value at N5 (Richardson-Richardson, 1988)
|
||||
RICJ880109 Relative preference value at Mid (Richardson-Richardson, 1988)
|
||||
RICJ880110 Relative preference value at C5 (Richardson-Richardson, 1988)
|
||||
RICJ880111 Relative preference value at C4 (Richardson-Richardson, 1988)
|
||||
RICJ880112 Relative preference value at C3 (Richardson-Richardson, 1988)
|
||||
RICJ880113 Relative preference value at C2 (Richardson-Richardson, 1988)
|
||||
RICJ880114 Relative preference value at C1 (Richardson-Richardson, 1988)
|
||||
RICJ880115 Relative preference value at C-cap (Richardson-Richardson, 1988)
|
||||
RICJ880116 Relative preference value at C' (Richardson-Richardson, 1988)
|
||||
RICJ880117 Relative preference value at C" (Richardson-Richardson, 1988)
|
||||
ROBB760101 Information measure for alpha-helix (Robson-Suzuki, 1976)
|
||||
ROBB760102 Information measure for N-terminal helix (Robson-Suzuki, 1976)
|
||||
ROBB760103 Information measure for middle helix (Robson-Suzuki, 1976)
|
||||
ROBB760104 Information measure for C-terminal helix (Robson-Suzuki, 1976)
|
||||
ROBB760105 Information measure for extended (Robson-Suzuki, 1976)
|
||||
ROBB760106 Information measure for pleated-sheet (Robson-Suzuki, 1976)
|
||||
ROBB760107 Information measure for extended without H-bond (Robson-Suzuki, 1976)
|
||||
ROBB760108 Information measure for turn (Robson-Suzuki, 1976)
|
||||
ROBB760109 Information measure for N-terminal turn (Robson-Suzuki, 1976)
|
||||
ROBB760110 Information measure for middle turn (Robson-Suzuki, 1976)
|
||||
ROBB760111 Information measure for C-terminal turn (Robson-Suzuki, 1976)
|
||||
ROBB760112 Information measure for coil (Robson-Suzuki, 1976)
|
||||
ROBB760113 Information measure for loop (Robson-Suzuki, 1976)
|
||||
ROBB790101 Hydration free energy (Robson-Osguthorpe, 1979)
|
||||
ROSG850101 Mean area buried on transfer (Rose et al., 1985)
|
||||
ROSG850102 Mean fractional area loss (Rose et al., 1985)
|
||||
ROSM880101 Side chain hydropathy, uncorrected for solvation (Roseman, 1988)
|
||||
ROSM880102 Side chain hydropathy, corrected for solvation (Roseman, 1988)
|
||||
ROSM880103 Loss of Side chain hydropathy by helix formation (Roseman, 1988)
|
||||
SIMZ760101 Transfer free energy (Simon, 1976), Cited by Charton-Charton (1982)
|
||||
SNEP660101 Principal component I (Sneath, 1966)
|
||||
SNEP660102 Principal component II (Sneath, 1966)
|
||||
SNEP660103 Principal component III (Sneath, 1966)
|
||||
SNEP660104 Principal component IV (Sneath, 1966)
|
||||
SUEM840101 Zimm-Bragg parameter s at 20 C (Sueki et al., 1984)
|
||||
SUEM840102 Zimm-Bragg parameter sigma x 1.0E4 (Sueki et al., 1984)
|
||||
SWER830101 Optimal matching hydrophobicity (Sweet-Eisenberg, 1983)
|
||||
TANS770101 Normalized frequency of alpha-helix (Tanaka-Scheraga, 1977)
|
||||
TANS770102 Normalized frequency of isolated helix (Tanaka-Scheraga, 1977)
|
||||
TANS770103 Normalized frequency of extended structure (Tanaka-Scheraga, 1977)
|
||||
TANS770104 Normalized frequency of chain reversal R (Tanaka-Scheraga, 1977)
|
||||
TANS770105 Normalized frequency of chain reversal S (Tanaka-Scheraga, 1977)
|
||||
TANS770106 Normalized frequency of chain reversal D (Tanaka-Scheraga, 1977)
|
||||
TANS770107 Normalized frequency of left-handed helix (Tanaka-Scheraga, 1977)
|
||||
TANS770108 Normalized frequency of zeta R (Tanaka-Scheraga, 1977)
|
||||
TANS770109 Normalized frequency of coil (Tanaka-Scheraga, 1977)
|
||||
TANS770110 Normalized frequency of chain reversal (Tanaka-Scheraga, 1977)
|
||||
VASM830101 Relative population of conformational state A (Vasquez et al., 1983)
|
||||
VASM830102 Relative population of conformational state C (Vasquez et al., 1983)
|
||||
VASM830103 Relative population of conformational state E (Vasquez et al., 1983)
|
||||
VELV850101 Electron-ion interaction potential (Veljkovic et al., 1985)
|
||||
VENT840101 Bitterness (Venanzi, 1984)
|
||||
VHEG790101 Transfer free energy to lipophilic phase (von Heijne-Blomberg, 1979)
|
||||
WARP780101 Average interactions per side chain atom (Warme-Morgan, 1978)
|
||||
WEBA780101 RF value in high salt chromatography (Weber-Lacey, 1978)
|
||||
WERD780101 Propensity to be buried inside (Wertz-Scheraga, 1978)
|
||||
WERD780102 Free energy change of epsilon(i) to epsilon(ex) (Wertz-Scheraga, 1978)
|
||||
WERD780103 Free energy change of alpha(Ri) to alpha(Rh) (Wertz-Scheraga, 1978)
|
||||
WERD780104 Free energy change of epsilon(i) to alpha(Rh) (Wertz-Scheraga, 1978)
|
||||
WOEC730101 Polar requirement (Woese, 1973)
|
||||
WOLR810101 Hydration potential (Wolfenden et al., 1981)
|
||||
WOLS870101 Principal property value z1 (Wold et al., 1987)
|
||||
WOLS870102 Principal property value z2 (Wold et al., 1987)
|
||||
WOLS870103 Principal property value z3 (Wold et al., 1987)
|
||||
YUTK870101 Unfolding Gibbs energy in water, pH7.0 (Yutani et al., 1987)
|
||||
YUTK870102 Unfolding Gibbs energy in water, pH9.0 (Yutani et al., 1987)
|
||||
YUTK870103 Activation Gibbs energy of unfolding, pH7.0 (Yutani et al., 1987)
|
||||
YUTK870104 Activation Gibbs energy of unfolding, pH9.0 (Yutani et al., 1987)
|
||||
ZASB820101 Dependence of partition coefficient on ionic strength (Zaslavsky et al., 1982)
|
||||
ZIMJ680101 Hydrophobicity (Zimmerman et al., 1968)
|
||||
ZIMJ680102 Bulkiness (Zimmerman et al., 1968)
|
||||
ZIMJ680103 Polarity (Zimmerman et al., 1968)
|
||||
ZIMJ680104 Isoelectric point (Zimmerman et al., 1968)
|
||||
ZIMJ680105 RF rank (Zimmerman et al., 1968)
|
||||
AURR980101 Normalized positional residue frequency at helix termini N4'(Aurora-Rose, 1998)
|
||||
AURR980102 Normalized positional residue frequency at helix termini N"' (Aurora-Rose, 1998)
|
||||
AURR980103 Normalized positional residue frequency at helix termini N" (Aurora-Rose, 1998)
|
||||
AURR980104 Normalized positional residue frequency at helix termini N'(Aurora-Rose, 1998)
|
||||
AURR980105 Normalized positional residue frequency at helix termini Nc (Aurora-Rose, 1998)
|
||||
AURR980106 Normalized positional residue frequency at helix termini N1 (Aurora-Rose, 1998)
|
||||
AURR980107 Normalized positional residue frequency at helix termini N2 (Aurora-Rose, 1998)
|
||||
AURR980108 Normalized positional residue frequency at helix termini N3 (Aurora-Rose, 1998)
|
||||
AURR980109 Normalized positional residue frequency at helix termini N4 (Aurora-Rose, 1998)
|
||||
AURR980110 Normalized positional residue frequency at helix termini N5 (Aurora-Rose, 1998)
|
||||
AURR980111 Normalized positional residue frequency at helix termini C5 (Aurora-Rose, 1998)
|
||||
AURR980112 Normalized positional residue frequency at helix termini C4 (Aurora-Rose, 1998)
|
||||
AURR980113 Normalized positional residue frequency at helix termini C3 (Aurora-Rose, 1998)
|
||||
AURR980114 Normalized positional residue frequency at helix termini C2 (Aurora-Rose, 1998)
|
||||
AURR980115 Normalized positional residue frequency at helix termini C1 (Aurora-Rose, 1998)
|
||||
AURR980116 Normalized positional residue frequency at helix termini Cc (Aurora-Rose, 1998)
|
||||
AURR980117 Normalized positional residue frequency at helix termini C' (Aurora-Rose, 1998)
|
||||
AURR980118 Normalized positional residue frequency at helix termini C" (Aurora-Rose, 1998)
|
||||
AURR980119 Normalized positional residue frequency at helix termini C"' (Aurora-Rose, 1998)
|
||||
AURR980120 Normalized positional residue frequency at helix termini C4' (Aurora-Rose, 1998)
|
||||
ONEK900101 Delta G values for the peptides extrapolated to 0 M urea (O'Neil-DeGrado, 1990)
|
||||
ONEK900102 Helix formation parameters (delta delta G) (O'Neil-DeGrado, 1990)
|
||||
VINM940101 Normalized flexibility parameters (B-values), average (Vihinen et al., 1994)
|
||||
VINM940102 Normalized flexibility parameters (B-values) for each residue surrounded by none rigid neighbours (Vihinen et al., 1994)
|
||||
VINM940103 Normalized flexibility parameters (B-values) for each residue surrounded by one rigid neighbours (Vihinen et al., 1994)
|
||||
VINM940104 Normalized flexibility parameters (B-values) for each residue surrounded by two rigid neighbours (Vihinen et al., 1994)
|
||||
MUNV940101 Free energy in alpha-helical conformation (Munoz-Serrano, 1994)
|
||||
MUNV940102 Free energy in alpha-helical region (Munoz-Serrano, 1994)
|
||||
MUNV940103 Free energy in beta-strand conformation (Munoz-Serrano, 1994)
|
||||
MUNV940104 Free energy in beta-strand region (Munoz-Serrano, 1994)
|
||||
MUNV940105 Free energy in beta-strand region (Munoz-Serrano, 1994)
|
||||
WIMW960101 Free energies of transfer of AcWl-X-LL peptides from bilayer interface to water (Wimley-White, 1996)
|
||||
KIMC930101 Thermodynamic beta sheet propensity (Kim-Berg, 1993)
|
||||
MONM990101 Turn propensity scale for transmembrane helices (Monne et al., 1999)
|
||||
BLAM930101 Alpha helix propensity of position 44 in T4 lysozyme (Blaber et al., 1993)
|
||||
PARS000101 p-Values of mesophilic proteins based on the distributions of B values (Parthasarathy-Murthy, 2000)
|
||||
PARS000102 p-Values of thermophilic proteins based on the distributions of B values (Parthasarathy-Murthy, 2000)
|
||||
KUMS000101 Distribution of amino acid residues in the 18 non-redundant families of thermophilic proteins (Kumar et al., 2000)
|
||||
KUMS000102 Distribution of amino acid residues in the 18 non-redundant families of mesophilic proteins (Kumar et al., 2000)
|
||||
KUMS000103 Distribution of amino acid residues in the alpha-helices in thermophilic proteins (Kumar et al., 2000)
|
||||
KUMS000104 Distribution of amino acid residues in the alpha-helices in mesophilic proteins (Kumar et al., 2000)
|
||||
TAKK010101 Side-chain contribution to protein stability (kJ/mol) (Takano-Yutani, 2001)
|
||||
FODM020101 Propensity of amino acids within pi-helices (Fodje-Al-Karadaghi, 2002)
|
||||
NADH010101 Hydropathy scale based on self-information values in the two-state model (5% accessibility) (Naderi-Manesh et al., 2001)
|
||||
NADH010102 Hydropathy scale based on self-information values in the two-state model (9% accessibility) (Naderi-Manesh et al., 2001)
|
||||
NADH010103 Hydropathy scale based on self-information values in the two-state model (16% accessibility) (Naderi-Manesh et al., 2001)
|
||||
NADH010104 Hydropathy scale based on self-information values in the two-state model (20% accessibility) (Naderi-Manesh et al., 2001)
|
||||
NADH010105 Hydropathy scale based on self-information values in the two-state model (25% accessibility) (Naderi-Manesh et al., 2001)
|
||||
NADH010106 Hydropathy scale based on self-information values in the two-state model (36% accessibility) (Naderi-Manesh et al., 2001)
|
||||
NADH010107 Hydropathy scale based on self-information values in the two-state model (50% accessibility) (Naderi-Manesh et al., 2001)
|
||||
MONM990201 Averaged turn propensities in a transmembrane helix (Monne et al., 1999)
|
||||
KOEP990101 Alpha-helix propensity derived from designed sequences (Koehl-Levitt, 1999)
|
||||
KOEP990102 Beta-sheet propensity derived from designed sequences (Koehl-Levitt, 1999)
|
||||
CEDJ970101 Composition of amino acids in extracellular proteins (percent) (Cedano et al., 1997)
|
||||
CEDJ970102 Composition of amino acids in anchored proteins (percent) (Cedano et al., 1997)
|
||||
CEDJ970103 Composition of amino acids in membrane proteins (percent) (Cedano et al., 1997)
|
||||
CEDJ970104 Composition of amino acids in intracellular proteins (percent) (Cedano et al., 1997)
|
||||
CEDJ970105 Composition of amino acids in nuclear proteins (percent) (Cedano et al., 1997)
|
||||
FUKS010101 Surface composition of amino acids in intracellular proteins of thermophiles (percent) (Fukuchi-Nishikawa, 2001)
|
||||
FUKS010102 Surface composition of amino acids in intracellular proteins of mesophiles (percent) (Fukuchi-Nishikawa, 2001)
|
||||
FUKS010103 Surface composition of amino acids in extracellular proteins of mesophiles (percent) (Fukuchi-Nishikawa, 2001)
|
||||
FUKS010104 Surface composition of amino acids in nuclear proteins (percent) (Fukuchi-Nishikawa, 2001)
|
||||
FUKS010105 Interior composition of amino acids in intracellular proteins of thermophiles (percent) (Fukuchi-Nishikawa, 2001)
|
||||
FUKS010106 Interior composition of amino acids in intracellular proteins of mesophiles (percent) (Fukuchi-Nishikawa, 2001)
|
||||
FUKS010107 Interior composition of amino acids in extracellular proteins of mesophiles (percent) (Fukuchi-Nishikawa, 2001)
|
||||
FUKS010108 Interior composition of amino acids in nuclear proteins (percent) (Fukuchi-Nishikawa, 2001)
|
||||
FUKS010109 Entire chain composition of amino acids in intracellular proteins of thermophiles (percent) (Fukuchi-Nishikawa, 2001)
|
||||
FUKS010110 Entire chain composition of amino acids in intracellular proteins of mesophiles (percent) (Fukuchi-Nishikawa, 2001)
|
||||
FUKS010111 Entire chain composition of amino acids in extracellular proteins of mesophiles (percent) (Fukuchi-Nishikawa, 2001)
|
||||
FUKS010112 Entire chain compositino of amino acids in nuclear proteins (percent) (Fukuchi-Nishikawa, 2001)
|
||||
AVBF000101 Screening coefficients gamma, local (Avbelj, 2000)
|
||||
AVBF000102 Screening coefficients gamma, non-local (Avbelj, 2000)
|
||||
AVBF000103 Slopes tripeptide, FDPB VFF neutral (Avbelj, 2000)
|
||||
AVBF000104 Slopes tripeptides, LD VFF neutral (Avbelj, 2000)
|
||||
AVBF000105 Slopes tripeptide, FDPB VFF noside (Avbelj, 2000)
|
||||
AVBF000106 Slopes tripeptide FDPB VFF all (Avbelj, 2000)
|
||||
AVBF000107 Slopes tripeptide FDPB PARSE neutral (Avbelj, 2000)
|
||||
AVBF000108 Slopes dekapeptide, FDPB VFF neutral (Avbelj, 2000)
|
||||
AVBF000109 Slopes proteins, FDPB VFF neutral (Avbelj, 2000)
|
||||
YANJ020101 Side-chain conformation by gaussian evolutionary method (Yang et al., 2002)
|
||||
MITS020101 Amphiphilicity index (Mitaku et al., 2002)
|
||||
TSAJ990101 Volumes including the crystallographic waters using the ProtOr (Tsai et al., 1999)
|
||||
TSAJ990102 Volumes not including the crystallographic waters using the ProtOr (Tsai et al., 1999)
|
||||
COSI940101 Electron-ion interaction potential values (Cosic, 1994)
|
||||
PONP930101 Hydrophobicity scales (Ponnuswamy, 1993)
|
||||
WILM950101 Hydrophobicity coefficient in RP-HPLC, C18 with 0.1%TFA/MeCN/H2O (Wilce et al. 1995)
|
||||
WILM950102 Hydrophobicity coefficient in RP-HPLC, C8 with 0.1%TFA/MeCN/H2O (Wilce et al. 1995)
|
||||
WILM950103 Hydrophobicity coefficient in RP-HPLC, C4 with 0.1%TFA/MeCN/H2O (Wilce et al. 1995)
|
||||
WILM950104 Hydrophobicity coefficient in RP-HPLC, C18 with 0.1%TFA/2-PrOH/MeCN/H2O (Wilce et al. 1995)
|
||||
KUHL950101 Hydrophilicity scale (Kuhn et al., 1995)
|
||||
GUOD860101 Retention coefficient at pH 2 (Guo et al., 1986)
|
||||
JURD980101 Modified Kyte-Doolittle hydrophobicity scale (Juretic et al., 1998)
|
||||
BASU050101 Interactivity scale obtained from the contact matrix (Bastolla et al., 2005)
|
||||
BASU050102 Interactivity scale obtained by maximizing the mean of correlation coefficient over single-domain globular proteins (Bastolla et al., 2005)
|
||||
BASU050103 Interactivity scale obtained by maximizing the mean of correlation coefficient over pairs of sequences sharing the TIM barrel fold (Bastolla et al., 2005)
|
||||
SUYM030101 Linker propensity index (Suyama-Ohara, 2003)
|
||||
PUNT030101 Knowledge-based membrane-propensity scale from 1D_Helix in MPtopo databases (Punta-Maritan, 2003)
|
||||
PUNT030102 Knowledge-based membrane-propensity scale from 3D_Helix in MPtopo databases (Punta-Maritan, 2003)
|
||||
GEOR030101 Linker propensity from all dataset (George-Heringa, 2003)
|
||||
GEOR030102 Linker propensity from 1-linker dataset (George-Heringa, 2003)
|
||||
GEOR030103 Linker propensity from 2-linker dataset (George-Heringa, 2003)
|
||||
GEOR030104 Linker propensity from 3-linker dataset (George-Heringa, 2003)
|
||||
GEOR030105 Linker propensity from small dataset (linker length is less than six residues) (George-Heringa, 2003)
|
||||
GEOR030106 Linker propensity from medium dataset (linker length is between six and 14 residues) (George-Heringa, 2003)
|
||||
GEOR030107 Linker propensity from long dataset (linker length is greater than 14 residues) (George-Heringa, 2003)
|
||||
GEOR030108 Linker propensity from helical (annotated by DSSP) dataset (George-Heringa, 2003)
|
||||
GEOR030109 Linker propensity from non-helical (annotated by DSSP) dataset (George-Heringa, 2003)
|
||||
ZHOH040101 The stability scale from the knowledge-based atom-atom potential (Zhou-Zhou, 2004)
|
||||
ZHOH040102 The relative stability scale extracted from mutation experiments (Zhou-Zhou, 2004)
|
||||
ZHOH040103 Buriability (Zhou-Zhou, 2004)
|
||||
BAEK050101 Linker index (Bae et al., 2005)
|
||||
HARY940101 Mean volumes of residues buried in protein interiors (Harpaz et al., 1994)
|
||||
PONJ960101 Average volumes of residues (Pontius et al., 1996)
|
||||
DIGM050101 Hydrostatic pressure asymmetry index, PAI (Di Giulio, 2005)
|
||||
WOLR790101 Hydrophobicity index (Wolfenden et al., 1979)
|
||||
OLSK800101 Average internal preferences (Olsen, 1980)
|
||||
KIDA850101 Hydrophobicity-related index (Kidera et al., 1985)
|
||||
GUYH850102 Apparent partition energies calculated from Wertz-Scheraga index (Guy, 1985)
|
||||
GUYH850103 Apparent partition energies calculated from Robson-Osguthorpe index (Guy, 1985)
|
||||
GUYH850104 Apparent partition energies calculated from Janin index (Guy, 1985)
|
||||
GUYH850105 Apparent partition energies calculated from Chothia index (Guy, 1985)
|
||||
ROSM880104 Hydropathies of amino acid side chains, neutral form (Roseman, 1988)
|
||||
ROSM880105 Hydropathies of amino acid side chains, pi-values in pH 7.0 (Roseman, 1988)
|
||||
JACR890101 Weights from the IFH scale (Jacobs-White, 1989)
|
||||
COWR900101 Hydrophobicity index, 3.0 pH (Cowan-Whittaker, 1990)
|
||||
BLAS910101 Scaled side chain hydrophobicity values (Black-Mould, 1991)
|
||||
CASG920101 Hydrophobicity scale from native protein structures (Casari-Sippl, 1992)
|
||||
CORJ870101 NNEIG index (Cornette et al., 1987)
|
||||
CORJ870102 SWEIG index (Cornette et al., 1987)
|
||||
CORJ870103 PRIFT index (Cornette et al., 1987)
|
||||
CORJ870104 PRILS index (Cornette et al., 1987)
|
||||
CORJ870105 ALTFT index (Cornette et al., 1987)
|
||||
CORJ870106 ALTLS index (Cornette et al., 1987)
|
||||
CORJ870107 TOTFT index (Cornette et al., 1987)
|
||||
CORJ870108 TOTLS index (Cornette et al., 1987)
|
||||
MIYS990101 Relative partition energies derived by the Bethe approximation (Miyazawa-Jernigan, 1999)
|
||||
MIYS990102 Optimized relative partition energies - method A (Miyazawa-Jernigan, 1999)
|
||||
MIYS990103 Optimized relative partition energies - method B (Miyazawa-Jernigan, 1999)
|
||||
MIYS990104 Optimized relative partition energies - method C (Miyazawa-Jernigan, 1999)
|
||||
MIYS990105 Optimized relative partition energies - method D (Miyazawa-Jernigan, 1999)
|
||||
ENGD860101 Hydrophobicity index (Engelman et al., 1986)
|
||||
FASG890101 Hydrophobicity index (Fasman, 1989)
|
||||
KARS160101 Number of vertices (order of the graph) (Karkbara-Knisley, 2016)
|
||||
KARS160102 Number of edges (size of the graph) (Karkbara-Knisley, 2016)
|
||||
KARS160103 Total weighted degree of the graph (obtained by adding all the weights of all the vertices) (Karkbara-Knisley, 2016)
|
||||
KARS160104 Weighted domination number (Karkbara-Knisley, 2016)
|
||||
KARS160105 Average eccentricity (Karkbara-Knisley, 2016)
|
||||
KARS160106 Radius (minimum eccentricity) (Karkbara-Knisley, 2016)
|
||||
KARS160107 Diameter (maximum eccentricity) (Karkbara-Knisley, 2016)
|
||||
KARS160108 Average weighted degree (total degree, divided by the number of vertices) (Karkbara-Knisley, 2016)
|
||||
KARS160109 Maximum eigenvalue of the weighted Laplacian matrix of the graph (Karkbara-Knisley, 2016)
|
||||
KARS160110 Minimum eigenvalue of the weighted Laplacian matrix of the graph (Karkbara-Knisley, 2016)
|
||||
KARS160111 Average eigenvalue of the Laplacian matrix of the the graph (Karkbara-Knisley, 2016)
|
||||
KARS160112 Second smallest eigenvalue of the Laplacian matrix of the graph (Karkbara-Knisley, 2016)
|
||||
KARS160113 Weighted domination number using the atomic number (Karkbara-Knisley, 2016)
|
||||
KARS160114 Average weighted eccentricity based on the the atomic number (Karkbara-Knisley, 2016)
|
||||
KARS160115 Weighted radius based on the atomic number (minimum eccentricity) (Karkbara-Knisley, 2016)
|
||||
KARS160116 Weighted diameter based on the atomic number (maximum eccentricity) (Karkbara-Knisley, 2016)
|
||||
KARS160117 Total weighted atomic number of the graph (obtained by summing all the atomic number of each of the vertices in the graph) (Karkbara-Knisley, 2016)
|
||||
KARS160118 Average weighted atomic number or degree based on atomic number in the graph (Karkbara-Knisley, 2016)
|
||||
KARS160119 Weighted maximum eigenvalue based on the atomic numbers (Karkbara-Knisley, 2016)
|
||||
KARS160120 Weighted minimum eigenvalue based on the atomic numbers (Karkbara-Knisley, 2016)
|
||||
KARS160121 Weighted average eigenvalue based on the atomic numbers (Karkbara-Knisley, 2016)
|
||||
KARS160122 Weighted second smallest eigenvalue of the weighted Laplacian matrix (Karkbara-Knisley, 2016)
|
||||
List of 94 Amino Acid Matrices in AAindex ver.9.2
|
||||
|
||||
The columns correspond to the AAindex accession number and the description of
|
||||
each matrix.
|
||||
|
||||
ALTS910101 The PAM-120 matrix (Altschul, 1991)
|
||||
BENS940101 Log-odds scoring matrix collected in 6.4-8.7 PAM (Benner et al., 1994)
|
||||
BENS940102 Log-odds scoring matrix collected in 22-29 PAM (Benner et al., 1994)
|
||||
BENS940103 Log-odds scoring matrix collected in 74-100 PAM (Benner et al., 1994)
|
||||
BENS940104 Genetic code matrix (Benner et al., 1994)
|
||||
CSEM940101 Residue replace ability matrix (Cserzo et al., 1994)
|
||||
DAYM780301 Log odds matrix for 250 PAMs (Dayhoff et al., 1978)
|
||||
FEND850101 Structure-Genetic matrix (Feng et al., 1985)
|
||||
FITW660101 Mutation values for the interconversion of amino acid pairs (Fitch, 1966)
|
||||
GEOD900101 Hydrophobicity scoring matrix (George et al., 1990)
|
||||
GONG920101 The mutation matrix for initially aligning (Gonnet et al., 1992)
|
||||
GRAR740104 Chemical distance (Grantham, 1974)
|
||||
HENS920101 BLOSUM45 substitution matrix (Henikoff-Henikoff, 1992)
|
||||
HENS920102 BLOSUM62 substitution matrix (Henikoff-Henikoff, 1992)
|
||||
HENS920103 BLOSUM80 substitution matrix (Henikoff-Henikoff, 1992)
|
||||
JOHM930101 Structure-based amino acid scoring table (Johnson-Overington, 1993)
|
||||
JOND920103 The 250 PAM PET91 matrix (Jones et al., 1992)
|
||||
JOND940101 The 250 PAM transmembrane protein exchange matrix (Jones et al., 1994)
|
||||
KOLA920101 Conformational similarity weight matrix (Kolaskar-Kulkarni-Kale, 1992)
|
||||
LEVJ860101 The secondary structure similarity matrix (Levin et al., 1986)
|
||||
LUTR910101 Structure-based comparison table for outside other class (Luthy et al., 1991)
|
||||
LUTR910102 Structure-based comparison table for inside other class (Luthy et al., 1991)
|
||||
LUTR910103 Structure-based comparison table for outside alpha class (Luthy et al., 1991)
|
||||
LUTR910104 Structure-based comparison table for inside alpha class (Luthy et al., 1991)
|
||||
LUTR910105 Structure-based comparison table for outside beta class (Luthy et al., 1991)
|
||||
LUTR910106 Structure-based comparison table for inside beta class (Luthy et al., 1991)
|
||||
LUTR910107 Structure-based comparison table for other class (Luthy et al., 1991)
|
||||
LUTR910108 Structure-based comparison table for alpha helix class (Luthy et al., 1991)
|
||||
LUTR910109 Structure-based comparison table for beta strand class (Luthy et al., 1991)
|
||||
MCLA710101 The similarity of pairs of amino acids (McLachlan, 1971)
|
||||
MCLA720101 Chemical similarity scores (McLachlan, 1972)
|
||||
MIYS930101 Base-substitution-protein-stability matrix (Miyazawa-Jernigan, 1993)
|
||||
MIYT790101 Amino acid pair distance (Miyata et al., 1979)
|
||||
MOHR870101 EMPAR matrix (Mohana Rao, 1987)
|
||||
NIEK910101 Structure-derived correlation matrix 1 (Niefind-Schomburg, 1991)
|
||||
NIEK910102 Structure-derived correlation matrix 2 (Niefind-Schomburg, 1991)
|
||||
OVEJ920101 STR matrix from structure-based alignments (Overington et al., 1992)
|
||||
QU_C930101 Cross-correlation coefficients of preference factors main chain (Qu et al., 1993)
|
||||
QU_C930102 Cross-correlation coefficients of preference factors side chain (Qu et al., 1993)
|
||||
QU_C930103 The mutant distance based on spatial preference factor (Qu et al., 1993)
|
||||
RISJ880101 Scoring matrix (Risler et al., 1988)
|
||||
TUDE900101 isomorphicity of replacements (Tudos et al., 1990)
|
||||
AZAE970101 The single residue substitution matrix from interchanges of spatially neighbouring residues (Azarya-Sprinzak et al., 1997)
|
||||
AZAE970102 The substitution matrix derived from spatially conserved motifs (Azarya-Sprinzak et al., 1997)
|
||||
RIER950101 Hydrophobicity scoring matrix (Riek et al., 1995)
|
||||
WEIL970101 WAC matrix constructed from amino acid comparative profiles (Wei et al., 1997)
|
||||
WEIL970102 Difference matrix obtained by subtracting the BLOSUM62 from the WAC matrix (Wei et al., 1997)
|
||||
MEHP950101 (Mehta et al., 1995)
|
||||
MEHP950102 (Mehta et al., 1995)
|
||||
MEHP950103 (Mehta et al., 1995)
|
||||
KAPO950101 (Kapp et al., 1995)
|
||||
VOGG950101 (Vogt et al., 1995)
|
||||
KOSJ950101 Context-dependent optimal substitution matrices for exposed helix (Koshi-Goldstein, 1995)
|
||||
KOSJ950102 Context-dependent optimal substitution matrices for exposed beta (Koshi-Goldstein, 1995)
|
||||
KOSJ950103 Context-dependent optimal substitution matrices for exposed turn (Koshi-Goldstein, 1995)
|
||||
KOSJ950104 Context-dependent optimal substitution matrices for exposed coil (Koshi-Goldstein, 1995)
|
||||
KOSJ950105 Context-dependent optimal substitution matrices for buried helix (Koshi-Goldstein, 1995)
|
||||
KOSJ950106 Context-dependent optimal substitution matrices for buried beta (Koshi-Goldstein, 1995)
|
||||
KOSJ950107 Context-dependent optimal substitution matrices for buried turn (Koshi-Goldstein, 1995)
|
||||
KOSJ950108 Context-dependent optimal substitution matrices for buried coil (Koshi-Goldstein, 1995)
|
||||
KOSJ950109 Context-dependent optimal substitution matrices for alpha helix (Koshi-Goldstein, 1995)
|
||||
KOSJ950110 Context-dependent optimal substitution matrices for beta sheet (Koshi-Goldstein, 1995)
|
||||
KOSJ950111 Context-dependent optimal substitution matrices for turn (Koshi-Goldstein, 1995)
|
||||
KOSJ950112 Context-dependent optimal substitution matrices for coil (Koshi-Goldstein, 1995)
|
||||
KOSJ950113 Context-dependent optimal substitution matrices for exposed residues (Koshi-Goldstein, 1995)
|
||||
KOSJ950114 Context-dependent optimal substitution matrices for buried residues (Koshi-Goldstein, 1995)
|
||||
KOSJ950115 Context-dependent optimal substitution matrices for all residues (Koshi-Goldstein, 1995)
|
||||
OVEJ920102 Environment-specific amino acid substitution matrix for alpha residues (Overington et al., 1992)
|
||||
OVEJ920103 Environment-specific amino acid substitution matrix for beta residues (Overington et al., 1992)
|
||||
OVEJ920104 Environment-specific amino acid substitution matrix for accessible residues (Overington et al., 1992)
|
||||
OVEJ920105 Environment-specific amino acid substitution matrix for inaccessible residues (Overington et al., 1992)
|
||||
LINK010101 Substitution matrices from an neural network model (Lin et al., 2001)
|
||||
BLAJ010101 Matrix built from structural superposition data for identifying potential remote homologues (Blake-Cohen, 2001)
|
||||
PRLA000101 Structure derived matrix (SDM) for alignment of distantly related sequences (Prlic et al., 2000)
|
||||
PRLA000102 Homologous structure dereived matrix (HSDM) for alignment of distantly related sequences (Prlic et al., 2000)
|
||||
DOSZ010101 Amino acid similarity matrix based on the sausage force field (Dosztanyi-Torda, 2001)
|
||||
DOSZ010102 Normalised version of SM_SAUSAGE (Dosztanyi-Torda, 2001)
|
||||
DOSZ010103 An amino acid similarity matrix based on the THREADER force field (Dosztanyi-Torda, 2001)
|
||||
DOSZ010104 Normalised version of SM_THREADER (Dosztanyi-Torda, 2001)
|
||||
GIAG010101 Residue substitutions matrix from thermo/mesophilic to psychrophilic enzymes (Gianese et al., 2001)
|
||||
DAYM780302 Log odds matrix for 40 PAMs (Dayhoff et al., 1978)
|
||||
HENS920104 BLOSUM50 substitution matrix (Henikoff-Henikoff, 1992)
|
||||
QUIB020101 STROMA score matrix for the alignment of known distant homologs (Qian-Goldstein, 2002)
|
||||
NAOD960101 Substitution matrix derived from the single residue interchanges at spatially conserved regions of proteins (Naor et al., 1996)
|
||||
RUSR970101 Substitution matrix based on structural alignments of analogous proteins (Russell et al., 1997)
|
||||
RUSR970102 Substitution matrix based on structural alignments of remote homolous proteins (Russell et al., 1997)
|
||||
RUSR970103 Substitution matrix based on structural alignments of analogous and remote homolous proteins (Russell et al., 1997)
|
||||
OGAK980101 Substitution matrix derived from structural alignments by maximizing entropy (Ogata et al., 1998)
|
||||
KANM000101 Substitution matrix (OPTIMA) derived by maximizing discrimination between homologs and non-homologs (Kann et al., 2000)
|
||||
NGPC000101 Substitution matrix (PHAT) built from hydrophobic and transmembrane regions of the Blocks database (Ng et al., 2000)
|
||||
MUET010101 Non-symmetric substitution matrix (SLIM) for detection of homologous transmembrane proteins (Mueller et al., 2001)
|
||||
MUET020101 Substitution matrix (VTML160) obtained by maximum likelihood estimation (Mueller et al., 2002)
|
||||
MUET020102 Substitution matrix (VTML250) obtained by maximum likelihood estimation (Mueller et al., 2002)
|
||||
CROG050101 Substitution matrix computed from the Dirichlet Mixture Model (Crooks-Brenner, 2005)
|
||||
List of 47 Amino Acid Matrices in AAindex ver.9.2
|
||||
|
||||
The columns correspond to the AAindex accession number and the description of
|
||||
each contact potential matrix.
|
||||
|
||||
TANS760101 Statistical contact potential derived from 25 x-ray protein structures
|
||||
TANS760102 Number of contacts between side chains derived from 25 x-ray protein structures
|
||||
ROBB790102 Interaction energies derived from side chain contacts in the interiors of known protein structures
|
||||
BRYS930101 Distance-dependent statistical potential (only energies of contacts within 0-5 Angstrooms are included)
|
||||
THOP960101 Mixed quasichemical and optimization-based protein contact potential
|
||||
MIRL960101 Statistical potential derived by the maximization of the harmonic mean of Z scores
|
||||
VENM980101 Statistical potential derived by the maximization of the perceptron criterion
|
||||
BASU010101 Optimization-based potential derived by the modified perceptron criterion
|
||||
MIYS850102 Quasichemical energy of transfer of amino acids from water to the protein environment
|
||||
MIYS850103 Quasichemical energy of interactions in an average buried environment
|
||||
MIYS960101 Quasichemical energy of transfer of amino acids from water to the protein environment
|
||||
MIYS960102 Quasichemical energy of interactions in an average buried environment
|
||||
MIYS960103 Number of contacts between side chains derived from 1168 x-ray protein structures
|
||||
MIYS990106 Quasichemical energy of transfer of amino acids from water to the protein environment
|
||||
MIYS990107 Quasichemical energy of interactions in an average buried environment
|
||||
LIWA970101 Modified version of the Miyazawa-Jernigan transfer energy
|
||||
KESO980101 Quasichemical transfer energy derived from interfacial regions of protein-protein complexes
|
||||
KESO980102 Quasichemical energy in an average protein environment derived from interfacial regions of protein-protein complexes
|
||||
MOOG990101 Quasichemical potential derived from interfacial regions of protein-protein complexes
|
||||
BETM990101 Modified version of the Miyazawa-Jernigan transfer energy
|
||||
TOBD000101 Optimization-derived potential obtained for small set of decoys
|
||||
TOBD000102 Optimization-derived potential obtained for large set of decoys
|
||||
PARB960101 Statistical contact potential derived by the quasichemical approximation
|
||||
PARB960102 Modified version of the Miyazawa-Jernigan transfer energy
|
||||
KOLA930101 Statistical potential derived by the quasichemical approximation
|
||||
GODA950101 Quasichemical statistical potential derived from buried contacts
|
||||
SKOJ970101 Statistical potential derived by the quasichemical approximation
|
||||
SKOJ000101 Statistical quasichemical potential with the partially composition-corrected pair scale
|
||||
SKOJ000102 Statistical quasichemical potential with the composition-corrected pair scale
|
||||
BONM030101 Quasichemical statistical potential for the antiparallel orientation of interacting side groups
|
||||
BONM030102 Quasichemical statistical potential for the intermediate orientation of interacting side groups
|
||||
BONM030103 Quasichemical statistical potential for the parallel orientation of interacting side groups
|
||||
BONM030104 Distances between centers of interacting side chains in the antiparallel orientation
|
||||
BONM030105 Distances between centers of interacting side chains in the intermediate orientation
|
||||
BONM030106 Distances between centers of interacting side chains in the parallel orientation
|
||||
MICC010101 Optimization-derived potential
|
||||
SIMK990101 Distance-dependent statistical potential (contacts within 0-5 Angstrooms)
|
||||
SIMK990102 Distance-dependent statistical potential (contacts within 5-7.5 Angstrooms)
|
||||
SIMK990103 Distance-dependent statistical potential (contacts within 7.5-10 Angstrooms)
|
||||
SIMK990104 Distance-dependent statistical potential (contacts within 10-12 Angstrooms)
|
||||
SIMK990105 Distance-dependent statistical potential (contacts longer than 12 Angstrooms)
|
||||
ZHAC000101 Environment-dependent residue contact energies (rows = helix, cols = helix)
|
||||
ZHAC000102 Environment-dependent residue contact energies (rows = helix, cols = strand)
|
||||
ZHAC000103 Environment-dependent residue contact energies (rows = helix, cols = coil)
|
||||
ZHAC000104 Environment-dependent residue contact energies (rows = strand, cols = strand)
|
||||
ZHAC000105 Environment-dependent residue contact energies (rows = strand, cols = coil)
|
||||
ZHAC000106 Environment-dependent residue contact energies (rows = coil, cols = coil)
|
|
@ -1,571 +0,0 @@
|
|||
List of 566 Amino Acid Indices in AAindex ver.9.2
|
||||
|
||||
The columns correspond to the AAindex accession number and the description of
|
||||
each index.
|
||||
|
||||
ANDN920101 alpha-CH chemical shifts (Andersen et al., 1992)
|
||||
ARGP820101 Hydrophobicity index (Argos et al., 1982)
|
||||
ARGP820102 Signal sequence helical potential (Argos et al., 1982)
|
||||
ARGP820103 Membrane-buried preference parameters (Argos et al., 1982)
|
||||
BEGF750101 Conformational parameter of inner helix (Beghin-Dirkx, 1975)
|
||||
BEGF750102 Conformational parameter of beta-structure (Beghin-Dirkx, 1975)
|
||||
BEGF750103 Conformational parameter of beta-turn (Beghin-Dirkx, 1975)
|
||||
BHAR880101 Average flexibility indices (Bhaskaran-Ponnuswamy, 1988)
|
||||
BIGC670101 Residue volume (Bigelow, 1967)
|
||||
BIOV880101 Information value for accessibility; average fraction 35% (Biou et al., 1988)
|
||||
BIOV880102 Information value for accessibility; average fraction 23% (Biou et al., 1988)
|
||||
BROC820101 Retention coefficient in TFA (Browne et al., 1982)
|
||||
BROC820102 Retention coefficient in HFBA (Browne et al., 1982)
|
||||
BULH740101 Transfer free energy to surface (Bull-Breese, 1974)
|
||||
BULH740102 Apparent partial specific volume (Bull-Breese, 1974)
|
||||
BUNA790101 alpha-NH chemical shifts (Bundi-Wuthrich, 1979)
|
||||
BUNA790102 alpha-CH chemical shifts (Bundi-Wuthrich, 1979)
|
||||
BUNA790103 Spin-spin coupling constants 3JHalpha-NH (Bundi-Wuthrich, 1979)
|
||||
BURA740101 Normalized frequency of alpha-helix (Burgess et al., 1974)
|
||||
BURA740102 Normalized frequency of extended structure (Burgess et al., 1974)
|
||||
CHAM810101 Steric parameter (Charton, 1981)
|
||||
CHAM820101 Polarizability parameter (Charton-Charton, 1982)
|
||||
CHAM820102 Free energy of solution in water, kcal/mole (Charton-Charton, 1982)
|
||||
CHAM830101 The Chou-Fasman parameter of the coil conformation (Charton-Charton, 1983)
|
||||
CHAM830102 A parameter defined from the residuals obtained from the best correlation of the Chou-Fasman parameter of beta-sheet (Charton-Charton, 1983)
|
||||
CHAM830103 The number of atoms in the side chain labelled 1+1 (Charton-Charton, 1983)
|
||||
CHAM830104 The number of atoms in the side chain labelled 2+1 (Charton-Charton, 1983)
|
||||
CHAM830105 The number of atoms in the side chain labelled 3+1 (Charton-Charton, 1983)
|
||||
CHAM830106 The number of bonds in the longest chain (Charton-Charton, 1983)
|
||||
CHAM830107 A parameter of charge transfer capability (Charton-Charton, 1983)
|
||||
CHAM830108 A parameter of charge transfer donor capability (Charton-Charton, 1983)
|
||||
CHOC750101 Average volume of buried residue (Chothia, 1975)
|
||||
CHOC760101 Residue accessible surface area in tripeptide (Chothia, 1976)
|
||||
CHOC760102 Residue accessible surface area in folded protein (Chothia, 1976)
|
||||
CHOC760103 Proportion of residues 95% buried (Chothia, 1976)
|
||||
CHOC760104 Proportion of residues 100% buried (Chothia, 1976)
|
||||
CHOP780101 Normalized frequency of beta-turn (Chou-Fasman, 1978a)
|
||||
CHOP780201 Normalized frequency of alpha-helix (Chou-Fasman, 1978b)
|
||||
CHOP780202 Normalized frequency of beta-sheet (Chou-Fasman, 1978b)
|
||||
CHOP780203 Normalized frequency of beta-turn (Chou-Fasman, 1978b)
|
||||
CHOP780204 Normalized frequency of N-terminal helix (Chou-Fasman, 1978b)
|
||||
CHOP780205 Normalized frequency of C-terminal helix (Chou-Fasman, 1978b)
|
||||
CHOP780206 Normalized frequency of N-terminal non helical region (Chou-Fasman, 1978b)
|
||||
CHOP780207 Normalized frequency of C-terminal non helical region (Chou-Fasman, 1978b)
|
||||
CHOP780208 Normalized frequency of N-terminal beta-sheet (Chou-Fasman, 1978b)
|
||||
CHOP780209 Normalized frequency of C-terminal beta-sheet (Chou-Fasman, 1978b)
|
||||
CHOP780210 Normalized frequency of N-terminal non beta region (Chou-Fasman, 1978b)
|
||||
CHOP780211 Normalized frequency of C-terminal non beta region (Chou-Fasman, 1978b)
|
||||
CHOP780212 Frequency of the 1st residue in turn (Chou-Fasman, 1978b)
|
||||
CHOP780213 Frequency of the 2nd residue in turn (Chou-Fasman, 1978b)
|
||||
CHOP780214 Frequency of the 3rd residue in turn (Chou-Fasman, 1978b)
|
||||
CHOP780215 Frequency of the 4th residue in turn (Chou-Fasman, 1978b)
|
||||
CHOP780216 Normalized frequency of the 2nd and 3rd residues in turn (Chou-Fasman, 1978b)
|
||||
CIDH920101 Normalized hydrophobicity scales for alpha-proteins (Cid et al., 1992)
|
||||
CIDH920102 Normalized hydrophobicity scales for beta-proteins (Cid et al., 1992)
|
||||
CIDH920103 Normalized hydrophobicity scales for alpha+beta-proteins (Cid et al., 1992)
|
||||
CIDH920104 Normalized hydrophobicity scales for alpha/beta-proteins (Cid et al., 1992)
|
||||
CIDH920105 Normalized average hydrophobicity scales (Cid et al., 1992)
|
||||
COHE430101 Partial specific volume (Cohn-Edsall, 1943)
|
||||
CRAJ730101 Normalized frequency of middle helix (Crawford et al., 1973)
|
||||
CRAJ730102 Normalized frequency of beta-sheet (Crawford et al., 1973)
|
||||
CRAJ730103 Normalized frequency of turn (Crawford et al., 1973)
|
||||
DAWD720101 Size (Dawson, 1972)
|
||||
DAYM780101 Amino acid composition (Dayhoff et al., 1978a)
|
||||
DAYM780201 Relative mutability (Dayhoff et al., 1978b)
|
||||
DESM900101 Membrane preference for cytochrome b: MPH89 (Degli Esposti et al., 1990)
|
||||
DESM900102 Average membrane preference: AMP07 (Degli Esposti et al., 1990)
|
||||
EISD840101 Consensus normalized hydrophobicity scale (Eisenberg, 1984)
|
||||
EISD860101 Solvation free energy (Eisenberg-McLachlan, 1986)
|
||||
EISD860102 Atom-based hydrophobic moment (Eisenberg-McLachlan, 1986)
|
||||
EISD860103 Direction of hydrophobic moment (Eisenberg-McLachlan, 1986)
|
||||
FASG760101 Molecular weight (Fasman, 1976)
|
||||
FASG760102 Melting point (Fasman, 1976)
|
||||
FASG760103 Optical rotation (Fasman, 1976)
|
||||
FASG760104 pK-N (Fasman, 1976)
|
||||
FASG760105 pK-C (Fasman, 1976)
|
||||
FAUJ830101 Hydrophobic parameter pi (Fauchere-Pliska, 1983)
|
||||
FAUJ880101 Graph shape index (Fauchere et al., 1988)
|
||||
FAUJ880102 Smoothed upsilon steric parameter (Fauchere et al., 1988)
|
||||
FAUJ880103 Normalized van der Waals volume (Fauchere et al., 1988)
|
||||
FAUJ880104 STERIMOL length of the side chain (Fauchere et al., 1988)
|
||||
FAUJ880105 STERIMOL minimum width of the side chain (Fauchere et al., 1988)
|
||||
FAUJ880106 STERIMOL maximum width of the side chain (Fauchere et al., 1988)
|
||||
FAUJ880107 N.m.r. chemical shift of alpha-carbon (Fauchere et al., 1988)
|
||||
FAUJ880108 Localized electrical effect (Fauchere et al., 1988)
|
||||
FAUJ880109 Number of hydrogen bond donors (Fauchere et al., 1988)
|
||||
FAUJ880110 Number of full nonbonding orbitals (Fauchere et al., 1988)
|
||||
FAUJ880111 Positive charge (Fauchere et al., 1988)
|
||||
FAUJ880112 Negative charge (Fauchere et al., 1988)
|
||||
FAUJ880113 pK-a(RCOOH) (Fauchere et al., 1988)
|
||||
FINA770101 Helix-coil equilibrium constant (Finkelstein-Ptitsyn, 1977)
|
||||
FINA910101 Helix initiation parameter at posision i-1 (Finkelstein et al., 1991)
|
||||
FINA910102 Helix initiation parameter at posision i,i+1,i+2 (Finkelstein et al., 1991)
|
||||
FINA910103 Helix termination parameter at posision j-2,j-1,j (Finkelstein et al., 1991)
|
||||
FINA910104 Helix termination parameter at posision j+1 (Finkelstein et al., 1991)
|
||||
GARJ730101 Partition coefficient (Garel et al., 1973)
|
||||
GEIM800101 Alpha-helix indices (Geisow-Roberts, 1980)
|
||||
GEIM800102 Alpha-helix indices for alpha-proteins (Geisow-Roberts, 1980)
|
||||
GEIM800103 Alpha-helix indices for beta-proteins (Geisow-Roberts, 1980)
|
||||
GEIM800104 Alpha-helix indices for alpha/beta-proteins (Geisow-Roberts, 1980)
|
||||
GEIM800105 Beta-strand indices (Geisow-Roberts, 1980)
|
||||
GEIM800106 Beta-strand indices for beta-proteins (Geisow-Roberts, 1980)
|
||||
GEIM800107 Beta-strand indices for alpha/beta-proteins (Geisow-Roberts, 1980)
|
||||
GEIM800108 Aperiodic indices (Geisow-Roberts, 1980)
|
||||
GEIM800109 Aperiodic indices for alpha-proteins (Geisow-Roberts, 1980)
|
||||
GEIM800110 Aperiodic indices for beta-proteins (Geisow-Roberts, 1980)
|
||||
GEIM800111 Aperiodic indices for alpha/beta-proteins (Geisow-Roberts, 1980)
|
||||
GOLD730101 Hydrophobicity factor (Goldsack-Chalifoux, 1973)
|
||||
GOLD730102 Residue volume (Goldsack-Chalifoux, 1973)
|
||||
GRAR740101 Composition (Grantham, 1974)
|
||||
GRAR740102 Polarity (Grantham, 1974)
|
||||
GRAR740103 Volume (Grantham, 1974)
|
||||
GUYH850101 Partition energy (Guy, 1985)
|
||||
HOPA770101 Hydration number (Hopfinger, 1971), Cited by Charton-Charton (1982)
|
||||
HOPT810101 Hydrophilicity value (Hopp-Woods, 1981)
|
||||
HUTJ700101 Heat capacity (Hutchens, 1970)
|
||||
HUTJ700102 Absolute entropy (Hutchens, 1970)
|
||||
HUTJ700103 Entropy of formation (Hutchens, 1970)
|
||||
ISOY800101 Normalized relative frequency of alpha-helix (Isogai et al., 1980)
|
||||
ISOY800102 Normalized relative frequency of extended structure (Isogai et al., 1980)
|
||||
ISOY800103 Normalized relative frequency of bend (Isogai et al., 1980)
|
||||
ISOY800104 Normalized relative frequency of bend R (Isogai et al., 1980)
|
||||
ISOY800105 Normalized relative frequency of bend S (Isogai et al., 1980)
|
||||
ISOY800106 Normalized relative frequency of helix end (Isogai et al., 1980)
|
||||
ISOY800107 Normalized relative frequency of double bend (Isogai et al., 1980)
|
||||
ISOY800108 Normalized relative frequency of coil (Isogai et al., 1980)
|
||||
JANJ780101 Average accessible surface area (Janin et al., 1978)
|
||||
JANJ780102 Percentage of buried residues (Janin et al., 1978)
|
||||
JANJ780103 Percentage of exposed residues (Janin et al., 1978)
|
||||
JANJ790101 Ratio of buried and accessible molar fractions (Janin, 1979)
|
||||
JANJ790102 Transfer free energy (Janin, 1979)
|
||||
JOND750101 Hydrophobicity (Jones, 1975)
|
||||
JOND750102 pK (-COOH) (Jones, 1975)
|
||||
JOND920101 Relative frequency of occurrence (Jones et al., 1992)
|
||||
JOND920102 Relative mutability (Jones et al., 1992)
|
||||
JUKT750101 Amino acid distribution (Jukes et al., 1975)
|
||||
JUNJ780101 Sequence frequency (Jungck, 1978)
|
||||
KANM800101 Average relative probability of helix (Kanehisa-Tsong, 1980)
|
||||
KANM800102 Average relative probability of beta-sheet (Kanehisa-Tsong, 1980)
|
||||
KANM800103 Average relative probability of inner helix (Kanehisa-Tsong, 1980)
|
||||
KANM800104 Average relative probability of inner beta-sheet (Kanehisa-Tsong, 1980)
|
||||
KARP850101 Flexibility parameter for no rigid neighbors (Karplus-Schulz, 1985)
|
||||
KARP850102 Flexibility parameter for one rigid neighbor (Karplus-Schulz, 1985)
|
||||
KARP850103 Flexibility parameter for two rigid neighbors (Karplus-Schulz, 1985)
|
||||
KHAG800101 The Kerr-constant increments (Khanarian-Moore, 1980)
|
||||
KLEP840101 Net charge (Klein et al., 1984)
|
||||
KRIW710101 Side chain interaction parameter (Krigbaum-Rubin, 1971)
|
||||
KRIW790101 Side chain interaction parameter (Krigbaum-Komoriya, 1979)
|
||||
KRIW790102 Fraction of site occupied by water (Krigbaum-Komoriya, 1979)
|
||||
KRIW790103 Side chain volume (Krigbaum-Komoriya, 1979)
|
||||
KYTJ820101 Hydropathy index (Kyte-Doolittle, 1982)
|
||||
LAWE840101 Transfer free energy, CHP/water (Lawson et al., 1984)
|
||||
LEVM760101 Hydrophobic parameter (Levitt, 1976)
|
||||
LEVM760102 Distance between C-alpha and centroid of side chain (Levitt, 1976)
|
||||
LEVM760103 Side chain angle theta(AAR) (Levitt, 1976)
|
||||
LEVM760104 Side chain torsion angle phi(AAAR) (Levitt, 1976)
|
||||
LEVM760105 Radius of gyration of side chain (Levitt, 1976)
|
||||
LEVM760106 van der Waals parameter R0 (Levitt, 1976)
|
||||
LEVM760107 van der Waals parameter epsilon (Levitt, 1976)
|
||||
LEVM780101 Normalized frequency of alpha-helix, with weights (Levitt, 1978)
|
||||
LEVM780102 Normalized frequency of beta-sheet, with weights (Levitt, 1978)
|
||||
LEVM780103 Normalized frequency of reverse turn, with weights (Levitt, 1978)
|
||||
LEVM780104 Normalized frequency of alpha-helix, unweighted (Levitt, 1978)
|
||||
LEVM780105 Normalized frequency of beta-sheet, unweighted (Levitt, 1978)
|
||||
LEVM780106 Normalized frequency of reverse turn, unweighted (Levitt, 1978)
|
||||
LEWP710101 Frequency of occurrence in beta-bends (Lewis et al., 1971)
|
||||
LIFS790101 Conformational preference for all beta-strands (Lifson-Sander, 1979)
|
||||
LIFS790102 Conformational preference for parallel beta-strands (Lifson-Sander, 1979)
|
||||
LIFS790103 Conformational preference for antiparallel beta-strands (Lifson-Sander, 1979)
|
||||
MANP780101 Average surrounding hydrophobicity (Manavalan-Ponnuswamy, 1978)
|
||||
MAXF760101 Normalized frequency of alpha-helix (Maxfield-Scheraga, 1976)
|
||||
MAXF760102 Normalized frequency of extended structure (Maxfield-Scheraga, 1976)
|
||||
MAXF760103 Normalized frequency of zeta R (Maxfield-Scheraga, 1976)
|
||||
MAXF760104 Normalized frequency of left-handed alpha-helix (Maxfield-Scheraga, 1976)
|
||||
MAXF760105 Normalized frequency of zeta L (Maxfield-Scheraga, 1976)
|
||||
MAXF760106 Normalized frequency of alpha region (Maxfield-Scheraga, 1976)
|
||||
MCMT640101 Refractivity (McMeekin et al., 1964), Cited by Jones (1975)
|
||||
MEEJ800101 Retention coefficient in HPLC, pH7.4 (Meek, 1980)
|
||||
MEEJ800102 Retention coefficient in HPLC, pH2.1 (Meek, 1980)
|
||||
MEEJ810101 Retention coefficient in NaClO4 (Meek-Rossetti, 1981)
|
||||
MEEJ810102 Retention coefficient in NaH2PO4 (Meek-Rossetti, 1981)
|
||||
MEIH800101 Average reduced distance for C-alpha (Meirovitch et al., 1980)
|
||||
MEIH800102 Average reduced distance for side chain (Meirovitch et al., 1980)
|
||||
MEIH800103 Average side chain orientation angle (Meirovitch et al., 1980)
|
||||
MIYS850101 Effective partition energy (Miyazawa-Jernigan, 1985)
|
||||
NAGK730101 Normalized frequency of alpha-helix (Nagano, 1973)
|
||||
NAGK730102 Normalized frequency of bata-structure (Nagano, 1973)
|
||||
NAGK730103 Normalized frequency of coil (Nagano, 1973)
|
||||
NAKH900101 AA composition of total proteins (Nakashima et al., 1990)
|
||||
NAKH900102 SD of AA composition of total proteins (Nakashima et al., 1990)
|
||||
NAKH900103 AA composition of mt-proteins (Nakashima et al., 1990)
|
||||
NAKH900104 Normalized composition of mt-proteins (Nakashima et al., 1990)
|
||||
NAKH900105 AA composition of mt-proteins from animal (Nakashima et al., 1990)
|
||||
NAKH900106 Normalized composition from animal (Nakashima et al., 1990)
|
||||
NAKH900107 AA composition of mt-proteins from fungi and plant (Nakashima et al., 1990)
|
||||
NAKH900108 Normalized composition from fungi and plant (Nakashima et al., 1990)
|
||||
NAKH900109 AA composition of membrane proteins (Nakashima et al., 1990)
|
||||
NAKH900110 Normalized composition of membrane proteins (Nakashima et al., 1990)
|
||||
NAKH900111 Transmembrane regions of non-mt-proteins (Nakashima et al., 1990)
|
||||
NAKH900112 Transmembrane regions of mt-proteins (Nakashima et al., 1990)
|
||||
NAKH900113 Ratio of average and computed composition (Nakashima et al., 1990)
|
||||
NAKH920101 AA composition of CYT of single-spanning proteins (Nakashima-Nishikawa, 1992)
|
||||
NAKH920102 AA composition of CYT2 of single-spanning proteins (Nakashima-Nishikawa, 1992)
|
||||
NAKH920103 AA composition of EXT of single-spanning proteins (Nakashima-Nishikawa, 1992)
|
||||
NAKH920104 AA composition of EXT2 of single-spanning proteins (Nakashima-Nishikawa, 1992)
|
||||
NAKH920105 AA composition of MEM of single-spanning proteins (Nakashima-Nishikawa, 1992)
|
||||
NAKH920106 AA composition of CYT of multi-spanning proteins (Nakashima-Nishikawa, 1992)
|
||||
NAKH920107 AA composition of EXT of multi-spanning proteins (Nakashima-Nishikawa, 1992)
|
||||
NAKH920108 AA composition of MEM of multi-spanning proteins (Nakashima-Nishikawa, 1992)
|
||||
NISK800101 8 A contact number (Nishikawa-Ooi, 1980)
|
||||
NISK860101 14 A contact number (Nishikawa-Ooi, 1986)
|
||||
NOZY710101 Transfer energy, organic solvent/water (Nozaki-Tanford, 1971)
|
||||
OOBM770101 Average non-bonded energy per atom (Oobatake-Ooi, 1977)
|
||||
OOBM770102 Short and medium range non-bonded energy per atom (Oobatake-Ooi, 1977)
|
||||
OOBM770103 Long range non-bonded energy per atom (Oobatake-Ooi, 1977)
|
||||
OOBM770104 Average non-bonded energy per residue (Oobatake-Ooi, 1977)
|
||||
OOBM770105 Short and medium range non-bonded energy per residue (Oobatake-Ooi, 1977)
|
||||
OOBM850101 Optimized beta-structure-coil equilibrium constant (Oobatake et al., 1985)
|
||||
OOBM850102 Optimized propensity to form reverse turn (Oobatake et al., 1985)
|
||||
OOBM850103 Optimized transfer energy parameter (Oobatake et al., 1985)
|
||||
OOBM850104 Optimized average non-bonded energy per atom (Oobatake et al., 1985)
|
||||
OOBM850105 Optimized side chain interaction parameter (Oobatake et al., 1985)
|
||||
PALJ810101 Normalized frequency of alpha-helix from LG (Palau et al., 1981)
|
||||
PALJ810102 Normalized frequency of alpha-helix from CF (Palau et al., 1981)
|
||||
PALJ810103 Normalized frequency of beta-sheet from LG (Palau et al., 1981)
|
||||
PALJ810104 Normalized frequency of beta-sheet from CF (Palau et al., 1981)
|
||||
PALJ810105 Normalized frequency of turn from LG (Palau et al., 1981)
|
||||
PALJ810106 Normalized frequency of turn from CF (Palau et al., 1981)
|
||||
PALJ810107 Normalized frequency of alpha-helix in all-alpha class (Palau et al., 1981)
|
||||
PALJ810108 Normalized frequency of alpha-helix in alpha+beta class (Palau et al., 1981)
|
||||
PALJ810109 Normalized frequency of alpha-helix in alpha/beta class (Palau et al., 1981)
|
||||
PALJ810110 Normalized frequency of beta-sheet in all-beta class (Palau et al., 1981)
|
||||
PALJ810111 Normalized frequency of beta-sheet in alpha+beta class (Palau et al., 1981)
|
||||
PALJ810112 Normalized frequency of beta-sheet in alpha/beta class (Palau et al., 1981)
|
||||
PALJ810113 Normalized frequency of turn in all-alpha class (Palau et al., 1981)
|
||||
PALJ810114 Normalized frequency of turn in all-beta class (Palau et al., 1981)
|
||||
PALJ810115 Normalized frequency of turn in alpha+beta class (Palau et al., 1981)
|
||||
PALJ810116 Normalized frequency of turn in alpha/beta class (Palau et al., 1981)
|
||||
PARJ860101 HPLC parameter (Parker et al., 1986)
|
||||
PLIV810101 Partition coefficient (Pliska et al., 1981)
|
||||
PONP800101 Surrounding hydrophobicity in folded form (Ponnuswamy et al., 1980)
|
||||
PONP800102 Average gain in surrounding hydrophobicity (Ponnuswamy et al., 1980)
|
||||
PONP800103 Average gain ratio in surrounding hydrophobicity (Ponnuswamy et al., 1980)
|
||||
PONP800104 Surrounding hydrophobicity in alpha-helix (Ponnuswamy et al., 1980)
|
||||
PONP800105 Surrounding hydrophobicity in beta-sheet (Ponnuswamy et al., 1980)
|
||||
PONP800106 Surrounding hydrophobicity in turn (Ponnuswamy et al., 1980)
|
||||
PONP800107 Accessibility reduction ratio (Ponnuswamy et al., 1980)
|
||||
PONP800108 Average number of surrounding residues (Ponnuswamy et al., 1980)
|
||||
PRAM820101 Intercept in regression analysis (Prabhakaran-Ponnuswamy, 1982)
|
||||
PRAM820102 Slope in regression analysis x 1.0E1 (Prabhakaran-Ponnuswamy, 1982)
|
||||
PRAM820103 Correlation coefficient in regression analysis (Prabhakaran-Ponnuswamy, 1982)
|
||||
PRAM900101 Hydrophobicity (Prabhakaran, 1990)
|
||||
PRAM900102 Relative frequency in alpha-helix (Prabhakaran, 1990)
|
||||
PRAM900103 Relative frequency in beta-sheet (Prabhakaran, 1990)
|
||||
PRAM900104 Relative frequency in reverse-turn (Prabhakaran, 1990)
|
||||
PTIO830101 Helix-coil equilibrium constant (Ptitsyn-Finkelstein, 1983)
|
||||
PTIO830102 Beta-coil equilibrium constant (Ptitsyn-Finkelstein, 1983)
|
||||
QIAN880101 Weights for alpha-helix at the window position of -6 (Qian-Sejnowski, 1988)
|
||||
QIAN880102 Weights for alpha-helix at the window position of -5 (Qian-Sejnowski, 1988)
|
||||
QIAN880103 Weights for alpha-helix at the window position of -4 (Qian-Sejnowski, 1988)
|
||||
QIAN880104 Weights for alpha-helix at the window position of -3 (Qian-Sejnowski, 1988)
|
||||
QIAN880105 Weights for alpha-helix at the window position of -2 (Qian-Sejnowski, 1988)
|
||||
QIAN880106 Weights for alpha-helix at the window position of -1 (Qian-Sejnowski, 1988)
|
||||
QIAN880107 Weights for alpha-helix at the window position of 0 (Qian-Sejnowski, 1988)
|
||||
QIAN880108 Weights for alpha-helix at the window position of 1 (Qian-Sejnowski, 1988)
|
||||
QIAN880109 Weights for alpha-helix at the window position of 2 (Qian-Sejnowski, 1988)
|
||||
QIAN880110 Weights for alpha-helix at the window position of 3 (Qian-Sejnowski, 1988)
|
||||
QIAN880111 Weights for alpha-helix at the window position of 4 (Qian-Sejnowski, 1988)
|
||||
QIAN880112 Weights for alpha-helix at the window position of 5 (Qian-Sejnowski, 1988)
|
||||
QIAN880113 Weights for alpha-helix at the window position of 6 (Qian-Sejnowski, 1988)
|
||||
QIAN880114 Weights for beta-sheet at the window position of -6 (Qian-Sejnowski, 1988)
|
||||
QIAN880115 Weights for beta-sheet at the window position of -5 (Qian-Sejnowski, 1988)
|
||||
QIAN880116 Weights for beta-sheet at the window position of -4 (Qian-Sejnowski, 1988)
|
||||
QIAN880117 Weights for beta-sheet at the window position of -3 (Qian-Sejnowski, 1988)
|
||||
QIAN880118 Weights for beta-sheet at the window position of -2 (Qian-Sejnowski, 1988)
|
||||
QIAN880119 Weights for beta-sheet at the window position of -1 (Qian-Sejnowski, 1988)
|
||||
QIAN880120 Weights for beta-sheet at the window position of 0 (Qian-Sejnowski, 1988)
|
||||
QIAN880121 Weights for beta-sheet at the window position of 1 (Qian-Sejnowski, 1988)
|
||||
QIAN880122 Weights for beta-sheet at the window position of 2 (Qian-Sejnowski, 1988)
|
||||
QIAN880123 Weights for beta-sheet at the window position of 3 (Qian-Sejnowski, 1988)
|
||||
QIAN880124 Weights for beta-sheet at the window position of 4 (Qian-Sejnowski, 1988)
|
||||
QIAN880125 Weights for beta-sheet at the window position of 5 (Qian-Sejnowski, 1988)
|
||||
QIAN880126 Weights for beta-sheet at the window position of 6 (Qian-Sejnowski, 1988)
|
||||
QIAN880127 Weights for coil at the window position of -6 (Qian-Sejnowski, 1988)
|
||||
QIAN880128 Weights for coil at the window position of -5 (Qian-Sejnowski, 1988)
|
||||
QIAN880129 Weights for coil at the window position of -4 (Qian-Sejnowski, 1988)
|
||||
QIAN880130 Weights for coil at the window position of -3 (Qian-Sejnowski, 1988)
|
||||
QIAN880131 Weights for coil at the window position of -2 (Qian-Sejnowski, 1988)
|
||||
QIAN880132 Weights for coil at the window position of -1 (Qian-Sejnowski, 1988)
|
||||
QIAN880133 Weights for coil at the window position of 0 (Qian-Sejnowski, 1988)
|
||||
QIAN880134 Weights for coil at the window position of 1 (Qian-Sejnowski, 1988)
|
||||
QIAN880135 Weights for coil at the window position of 2 (Qian-Sejnowski, 1988)
|
||||
QIAN880136 Weights for coil at the window position of 3 (Qian-Sejnowski, 1988)
|
||||
QIAN880137 Weights for coil at the window position of 4 (Qian-Sejnowski, 1988)
|
||||
QIAN880138 Weights for coil at the window position of 5 (Qian-Sejnowski, 1988)
|
||||
QIAN880139 Weights for coil at the window position of 6 (Qian-Sejnowski, 1988)
|
||||
RACS770101 Average reduced distance for C-alpha (Rackovsky-Scheraga, 1977)
|
||||
RACS770102 Average reduced distance for side chain (Rackovsky-Scheraga, 1977)
|
||||
RACS770103 Side chain orientational preference (Rackovsky-Scheraga, 1977)
|
||||
RACS820101 Average relative fractional occurrence in A0(i) (Rackovsky-Scheraga, 1982)
|
||||
RACS820102 Average relative fractional occurrence in AR(i) (Rackovsky-Scheraga, 1982)
|
||||
RACS820103 Average relative fractional occurrence in AL(i) (Rackovsky-Scheraga, 1982)
|
||||
RACS820104 Average relative fractional occurrence in EL(i) (Rackovsky-Scheraga, 1982)
|
||||
RACS820105 Average relative fractional occurrence in E0(i) (Rackovsky-Scheraga, 1982)
|
||||
RACS820106 Average relative fractional occurrence in ER(i) (Rackovsky-Scheraga, 1982)
|
||||
RACS820107 Average relative fractional occurrence in A0(i-1) (Rackovsky-Scheraga, 1982)
|
||||
RACS820108 Average relative fractional occurrence in AR(i-1) (Rackovsky-Scheraga, 1982)
|
||||
RACS820109 Average relative fractional occurrence in AL(i-1) (Rackovsky-Scheraga, 1982)
|
||||
RACS820110 Average relative fractional occurrence in EL(i-1) (Rackovsky-Scheraga, 1982)
|
||||
RACS820111 Average relative fractional occurrence in E0(i-1) (Rackovsky-Scheraga, 1982)
|
||||
RACS820112 Average relative fractional occurrence in ER(i-1) (Rackovsky-Scheraga, 1982)
|
||||
RACS820113 Value of theta(i) (Rackovsky-Scheraga, 1982)
|
||||
RACS820114 Value of theta(i-1) (Rackovsky-Scheraga, 1982)
|
||||
RADA880101 Transfer free energy from chx to wat (Radzicka-Wolfenden, 1988)
|
||||
RADA880102 Transfer free energy from oct to wat (Radzicka-Wolfenden, 1988)
|
||||
RADA880103 Transfer free energy from vap to chx (Radzicka-Wolfenden, 1988)
|
||||
RADA880104 Transfer free energy from chx to oct (Radzicka-Wolfenden, 1988)
|
||||
RADA880105 Transfer free energy from vap to oct (Radzicka-Wolfenden, 1988)
|
||||
RADA880106 Accessible surface area (Radzicka-Wolfenden, 1988)
|
||||
RADA880107 Energy transfer from out to in(95%buried) (Radzicka-Wolfenden, 1988)
|
||||
RADA880108 Mean polarity (Radzicka-Wolfenden, 1988)
|
||||
RICJ880101 Relative preference value at N" (Richardson-Richardson, 1988)
|
||||
RICJ880102 Relative preference value at N' (Richardson-Richardson, 1988)
|
||||
RICJ880103 Relative preference value at N-cap (Richardson-Richardson, 1988)
|
||||
RICJ880104 Relative preference value at N1 (Richardson-Richardson, 1988)
|
||||
RICJ880105 Relative preference value at N2 (Richardson-Richardson, 1988)
|
||||
RICJ880106 Relative preference value at N3 (Richardson-Richardson, 1988)
|
||||
RICJ880107 Relative preference value at N4 (Richardson-Richardson, 1988)
|
||||
RICJ880108 Relative preference value at N5 (Richardson-Richardson, 1988)
|
||||
RICJ880109 Relative preference value at Mid (Richardson-Richardson, 1988)
|
||||
RICJ880110 Relative preference value at C5 (Richardson-Richardson, 1988)
|
||||
RICJ880111 Relative preference value at C4 (Richardson-Richardson, 1988)
|
||||
RICJ880112 Relative preference value at C3 (Richardson-Richardson, 1988)
|
||||
RICJ880113 Relative preference value at C2 (Richardson-Richardson, 1988)
|
||||
RICJ880114 Relative preference value at C1 (Richardson-Richardson, 1988)
|
||||
RICJ880115 Relative preference value at C-cap (Richardson-Richardson, 1988)
|
||||
RICJ880116 Relative preference value at C' (Richardson-Richardson, 1988)
|
||||
RICJ880117 Relative preference value at C" (Richardson-Richardson, 1988)
|
||||
ROBB760101 Information measure for alpha-helix (Robson-Suzuki, 1976)
|
||||
ROBB760102 Information measure for N-terminal helix (Robson-Suzuki, 1976)
|
||||
ROBB760103 Information measure for middle helix (Robson-Suzuki, 1976)
|
||||
ROBB760104 Information measure for C-terminal helix (Robson-Suzuki, 1976)
|
||||
ROBB760105 Information measure for extended (Robson-Suzuki, 1976)
|
||||
ROBB760106 Information measure for pleated-sheet (Robson-Suzuki, 1976)
|
||||
ROBB760107 Information measure for extended without H-bond (Robson-Suzuki, 1976)
|
||||
ROBB760108 Information measure for turn (Robson-Suzuki, 1976)
|
||||
ROBB760109 Information measure for N-terminal turn (Robson-Suzuki, 1976)
|
||||
ROBB760110 Information measure for middle turn (Robson-Suzuki, 1976)
|
||||
ROBB760111 Information measure for C-terminal turn (Robson-Suzuki, 1976)
|
||||
ROBB760112 Information measure for coil (Robson-Suzuki, 1976)
|
||||
ROBB760113 Information measure for loop (Robson-Suzuki, 1976)
|
||||
ROBB790101 Hydration free energy (Robson-Osguthorpe, 1979)
|
||||
ROSG850101 Mean area buried on transfer (Rose et al., 1985)
|
||||
ROSG850102 Mean fractional area loss (Rose et al., 1985)
|
||||
ROSM880101 Side chain hydropathy, uncorrected for solvation (Roseman, 1988)
|
||||
ROSM880102 Side chain hydropathy, corrected for solvation (Roseman, 1988)
|
||||
ROSM880103 Loss of Side chain hydropathy by helix formation (Roseman, 1988)
|
||||
SIMZ760101 Transfer free energy (Simon, 1976), Cited by Charton-Charton (1982)
|
||||
SNEP660101 Principal component I (Sneath, 1966)
|
||||
SNEP660102 Principal component II (Sneath, 1966)
|
||||
SNEP660103 Principal component III (Sneath, 1966)
|
||||
SNEP660104 Principal component IV (Sneath, 1966)
|
||||
SUEM840101 Zimm-Bragg parameter s at 20 C (Sueki et al., 1984)
|
||||
SUEM840102 Zimm-Bragg parameter sigma x 1.0E4 (Sueki et al., 1984)
|
||||
SWER830101 Optimal matching hydrophobicity (Sweet-Eisenberg, 1983)
|
||||
TANS770101 Normalized frequency of alpha-helix (Tanaka-Scheraga, 1977)
|
||||
TANS770102 Normalized frequency of isolated helix (Tanaka-Scheraga, 1977)
|
||||
TANS770103 Normalized frequency of extended structure (Tanaka-Scheraga, 1977)
|
||||
TANS770104 Normalized frequency of chain reversal R (Tanaka-Scheraga, 1977)
|
||||
TANS770105 Normalized frequency of chain reversal S (Tanaka-Scheraga, 1977)
|
||||
TANS770106 Normalized frequency of chain reversal D (Tanaka-Scheraga, 1977)
|
||||
TANS770107 Normalized frequency of left-handed helix (Tanaka-Scheraga, 1977)
|
||||
TANS770108 Normalized frequency of zeta R (Tanaka-Scheraga, 1977)
|
||||
TANS770109 Normalized frequency of coil (Tanaka-Scheraga, 1977)
|
||||
TANS770110 Normalized frequency of chain reversal (Tanaka-Scheraga, 1977)
|
||||
VASM830101 Relative population of conformational state A (Vasquez et al., 1983)
|
||||
VASM830102 Relative population of conformational state C (Vasquez et al., 1983)
|
||||
VASM830103 Relative population of conformational state E (Vasquez et al., 1983)
|
||||
VELV850101 Electron-ion interaction potential (Veljkovic et al., 1985)
|
||||
VENT840101 Bitterness (Venanzi, 1984)
|
||||
VHEG790101 Transfer free energy to lipophilic phase (von Heijne-Blomberg, 1979)
|
||||
WARP780101 Average interactions per side chain atom (Warme-Morgan, 1978)
|
||||
WEBA780101 RF value in high salt chromatography (Weber-Lacey, 1978)
|
||||
WERD780101 Propensity to be buried inside (Wertz-Scheraga, 1978)
|
||||
WERD780102 Free energy change of epsilon(i) to epsilon(ex) (Wertz-Scheraga, 1978)
|
||||
WERD780103 Free energy change of alpha(Ri) to alpha(Rh) (Wertz-Scheraga, 1978)
|
||||
WERD780104 Free energy change of epsilon(i) to alpha(Rh) (Wertz-Scheraga, 1978)
|
||||
WOEC730101 Polar requirement (Woese, 1973)
|
||||
WOLR810101 Hydration potential (Wolfenden et al., 1981)
|
||||
WOLS870101 Principal property value z1 (Wold et al., 1987)
|
||||
WOLS870102 Principal property value z2 (Wold et al., 1987)
|
||||
WOLS870103 Principal property value z3 (Wold et al., 1987)
|
||||
YUTK870101 Unfolding Gibbs energy in water, pH7.0 (Yutani et al., 1987)
|
||||
YUTK870102 Unfolding Gibbs energy in water, pH9.0 (Yutani et al., 1987)
|
||||
YUTK870103 Activation Gibbs energy of unfolding, pH7.0 (Yutani et al., 1987)
|
||||
YUTK870104 Activation Gibbs energy of unfolding, pH9.0 (Yutani et al., 1987)
|
||||
ZASB820101 Dependence of partition coefficient on ionic strength (Zaslavsky et al., 1982)
|
||||
ZIMJ680101 Hydrophobicity (Zimmerman et al., 1968)
|
||||
ZIMJ680102 Bulkiness (Zimmerman et al., 1968)
|
||||
ZIMJ680103 Polarity (Zimmerman et al., 1968)
|
||||
ZIMJ680104 Isoelectric point (Zimmerman et al., 1968)
|
||||
ZIMJ680105 RF rank (Zimmerman et al., 1968)
|
||||
AURR980101 Normalized positional residue frequency at helix termini N4'(Aurora-Rose, 1998)
|
||||
AURR980102 Normalized positional residue frequency at helix termini N"' (Aurora-Rose, 1998)
|
||||
AURR980103 Normalized positional residue frequency at helix termini N" (Aurora-Rose, 1998)
|
||||
AURR980104 Normalized positional residue frequency at helix termini N'(Aurora-Rose, 1998)
|
||||
AURR980105 Normalized positional residue frequency at helix termini Nc (Aurora-Rose, 1998)
|
||||
AURR980106 Normalized positional residue frequency at helix termini N1 (Aurora-Rose, 1998)
|
||||
AURR980107 Normalized positional residue frequency at helix termini N2 (Aurora-Rose, 1998)
|
||||
AURR980108 Normalized positional residue frequency at helix termini N3 (Aurora-Rose, 1998)
|
||||
AURR980109 Normalized positional residue frequency at helix termini N4 (Aurora-Rose, 1998)
|
||||
AURR980110 Normalized positional residue frequency at helix termini N5 (Aurora-Rose, 1998)
|
||||
AURR980111 Normalized positional residue frequency at helix termini C5 (Aurora-Rose, 1998)
|
||||
AURR980112 Normalized positional residue frequency at helix termini C4 (Aurora-Rose, 1998)
|
||||
AURR980113 Normalized positional residue frequency at helix termini C3 (Aurora-Rose, 1998)
|
||||
AURR980114 Normalized positional residue frequency at helix termini C2 (Aurora-Rose, 1998)
|
||||
AURR980115 Normalized positional residue frequency at helix termini C1 (Aurora-Rose, 1998)
|
||||
AURR980116 Normalized positional residue frequency at helix termini Cc (Aurora-Rose, 1998)
|
||||
AURR980117 Normalized positional residue frequency at helix termini C' (Aurora-Rose, 1998)
|
||||
AURR980118 Normalized positional residue frequency at helix termini C" (Aurora-Rose, 1998)
|
||||
AURR980119 Normalized positional residue frequency at helix termini C"' (Aurora-Rose, 1998)
|
||||
AURR980120 Normalized positional residue frequency at helix termini C4' (Aurora-Rose, 1998)
|
||||
ONEK900101 Delta G values for the peptides extrapolated to 0 M urea (O'Neil-DeGrado, 1990)
|
||||
ONEK900102 Helix formation parameters (delta delta G) (O'Neil-DeGrado, 1990)
|
||||
VINM940101 Normalized flexibility parameters (B-values), average (Vihinen et al., 1994)
|
||||
VINM940102 Normalized flexibility parameters (B-values) for each residue surrounded by none rigid neighbours (Vihinen et al., 1994)
|
||||
VINM940103 Normalized flexibility parameters (B-values) for each residue surrounded by one rigid neighbours (Vihinen et al., 1994)
|
||||
VINM940104 Normalized flexibility parameters (B-values) for each residue surrounded by two rigid neighbours (Vihinen et al., 1994)
|
||||
MUNV940101 Free energy in alpha-helical conformation (Munoz-Serrano, 1994)
|
||||
MUNV940102 Free energy in alpha-helical region (Munoz-Serrano, 1994)
|
||||
MUNV940103 Free energy in beta-strand conformation (Munoz-Serrano, 1994)
|
||||
MUNV940104 Free energy in beta-strand region (Munoz-Serrano, 1994)
|
||||
MUNV940105 Free energy in beta-strand region (Munoz-Serrano, 1994)
|
||||
WIMW960101 Free energies of transfer of AcWl-X-LL peptides from bilayer interface to water (Wimley-White, 1996)
|
||||
KIMC930101 Thermodynamic beta sheet propensity (Kim-Berg, 1993)
|
||||
MONM990101 Turn propensity scale for transmembrane helices (Monne et al., 1999)
|
||||
BLAM930101 Alpha helix propensity of position 44 in T4 lysozyme (Blaber et al., 1993)
|
||||
PARS000101 p-Values of mesophilic proteins based on the distributions of B values (Parthasarathy-Murthy, 2000)
|
||||
PARS000102 p-Values of thermophilic proteins based on the distributions of B values (Parthasarathy-Murthy, 2000)
|
||||
KUMS000101 Distribution of amino acid residues in the 18 non-redundant families of thermophilic proteins (Kumar et al., 2000)
|
||||
KUMS000102 Distribution of amino acid residues in the 18 non-redundant families of mesophilic proteins (Kumar et al., 2000)
|
||||
KUMS000103 Distribution of amino acid residues in the alpha-helices in thermophilic proteins (Kumar et al., 2000)
|
||||
KUMS000104 Distribution of amino acid residues in the alpha-helices in mesophilic proteins (Kumar et al., 2000)
|
||||
TAKK010101 Side-chain contribution to protein stability (kJ/mol) (Takano-Yutani, 2001)
|
||||
FODM020101 Propensity of amino acids within pi-helices (Fodje-Al-Karadaghi, 2002)
|
||||
NADH010101 Hydropathy scale based on self-information values in the two-state model (5% accessibility) (Naderi-Manesh et al., 2001)
|
||||
NADH010102 Hydropathy scale based on self-information values in the two-state model (9% accessibility) (Naderi-Manesh et al., 2001)
|
||||
NADH010103 Hydropathy scale based on self-information values in the two-state model (16% accessibility) (Naderi-Manesh et al., 2001)
|
||||
NADH010104 Hydropathy scale based on self-information values in the two-state model (20% accessibility) (Naderi-Manesh et al., 2001)
|
||||
NADH010105 Hydropathy scale based on self-information values in the two-state model (25% accessibility) (Naderi-Manesh et al., 2001)
|
||||
NADH010106 Hydropathy scale based on self-information values in the two-state model (36% accessibility) (Naderi-Manesh et al., 2001)
|
||||
NADH010107 Hydropathy scale based on self-information values in the two-state model (50% accessibility) (Naderi-Manesh et al., 2001)
|
||||
MONM990201 Averaged turn propensities in a transmembrane helix (Monne et al., 1999)
|
||||
KOEP990101 Alpha-helix propensity derived from designed sequences (Koehl-Levitt, 1999)
|
||||
KOEP990102 Beta-sheet propensity derived from designed sequences (Koehl-Levitt, 1999)
|
||||
CEDJ970101 Composition of amino acids in extracellular proteins (percent) (Cedano et al., 1997)
|
||||
CEDJ970102 Composition of amino acids in anchored proteins (percent) (Cedano et al., 1997)
|
||||
CEDJ970103 Composition of amino acids in membrane proteins (percent) (Cedano et al., 1997)
|
||||
CEDJ970104 Composition of amino acids in intracellular proteins (percent) (Cedano et al., 1997)
|
||||
CEDJ970105 Composition of amino acids in nuclear proteins (percent) (Cedano et al., 1997)
|
||||
FUKS010101 Surface composition of amino acids in intracellular proteins of thermophiles (percent) (Fukuchi-Nishikawa, 2001)
|
||||
FUKS010102 Surface composition of amino acids in intracellular proteins of mesophiles (percent) (Fukuchi-Nishikawa, 2001)
|
||||
FUKS010103 Surface composition of amino acids in extracellular proteins of mesophiles (percent) (Fukuchi-Nishikawa, 2001)
|
||||
FUKS010104 Surface composition of amino acids in nuclear proteins (percent) (Fukuchi-Nishikawa, 2001)
|
||||
FUKS010105 Interior composition of amino acids in intracellular proteins of thermophiles (percent) (Fukuchi-Nishikawa, 2001)
|
||||
FUKS010106 Interior composition of amino acids in intracellular proteins of mesophiles (percent) (Fukuchi-Nishikawa, 2001)
|
||||
FUKS010107 Interior composition of amino acids in extracellular proteins of mesophiles (percent) (Fukuchi-Nishikawa, 2001)
|
||||
FUKS010108 Interior composition of amino acids in nuclear proteins (percent) (Fukuchi-Nishikawa, 2001)
|
||||
FUKS010109 Entire chain composition of amino acids in intracellular proteins of thermophiles (percent) (Fukuchi-Nishikawa, 2001)
|
||||
FUKS010110 Entire chain composition of amino acids in intracellular proteins of mesophiles (percent) (Fukuchi-Nishikawa, 2001)
|
||||
FUKS010111 Entire chain composition of amino acids in extracellular proteins of mesophiles (percent) (Fukuchi-Nishikawa, 2001)
|
||||
FUKS010112 Entire chain compositino of amino acids in nuclear proteins (percent) (Fukuchi-Nishikawa, 2001)
|
||||
AVBF000101 Screening coefficients gamma, local (Avbelj, 2000)
|
||||
AVBF000102 Screening coefficients gamma, non-local (Avbelj, 2000)
|
||||
AVBF000103 Slopes tripeptide, FDPB VFF neutral (Avbelj, 2000)
|
||||
AVBF000104 Slopes tripeptides, LD VFF neutral (Avbelj, 2000)
|
||||
AVBF000105 Slopes tripeptide, FDPB VFF noside (Avbelj, 2000)
|
||||
AVBF000106 Slopes tripeptide FDPB VFF all (Avbelj, 2000)
|
||||
AVBF000107 Slopes tripeptide FDPB PARSE neutral (Avbelj, 2000)
|
||||
AVBF000108 Slopes dekapeptide, FDPB VFF neutral (Avbelj, 2000)
|
||||
AVBF000109 Slopes proteins, FDPB VFF neutral (Avbelj, 2000)
|
||||
YANJ020101 Side-chain conformation by gaussian evolutionary method (Yang et al., 2002)
|
||||
MITS020101 Amphiphilicity index (Mitaku et al., 2002)
|
||||
TSAJ990101 Volumes including the crystallographic waters using the ProtOr (Tsai et al., 1999)
|
||||
TSAJ990102 Volumes not including the crystallographic waters using the ProtOr (Tsai et al., 1999)
|
||||
COSI940101 Electron-ion interaction potential values (Cosic, 1994)
|
||||
PONP930101 Hydrophobicity scales (Ponnuswamy, 1993)
|
||||
WILM950101 Hydrophobicity coefficient in RP-HPLC, C18 with 0.1%TFA/MeCN/H2O (Wilce et al. 1995)
|
||||
WILM950102 Hydrophobicity coefficient in RP-HPLC, C8 with 0.1%TFA/MeCN/H2O (Wilce et al. 1995)
|
||||
WILM950103 Hydrophobicity coefficient in RP-HPLC, C4 with 0.1%TFA/MeCN/H2O (Wilce et al. 1995)
|
||||
WILM950104 Hydrophobicity coefficient in RP-HPLC, C18 with 0.1%TFA/2-PrOH/MeCN/H2O (Wilce et al. 1995)
|
||||
KUHL950101 Hydrophilicity scale (Kuhn et al., 1995)
|
||||
GUOD860101 Retention coefficient at pH 2 (Guo et al., 1986)
|
||||
JURD980101 Modified Kyte-Doolittle hydrophobicity scale (Juretic et al., 1998)
|
||||
BASU050101 Interactivity scale obtained from the contact matrix (Bastolla et al., 2005)
|
||||
BASU050102 Interactivity scale obtained by maximizing the mean of correlation coefficient over single-domain globular proteins (Bastolla et al., 2005)
|
||||
BASU050103 Interactivity scale obtained by maximizing the mean of correlation coefficient over pairs of sequences sharing the TIM barrel fold (Bastolla et al., 2005)
|
||||
SUYM030101 Linker propensity index (Suyama-Ohara, 2003)
|
||||
PUNT030101 Knowledge-based membrane-propensity scale from 1D_Helix in MPtopo databases (Punta-Maritan, 2003)
|
||||
PUNT030102 Knowledge-based membrane-propensity scale from 3D_Helix in MPtopo databases (Punta-Maritan, 2003)
|
||||
GEOR030101 Linker propensity from all dataset (George-Heringa, 2003)
|
||||
GEOR030102 Linker propensity from 1-linker dataset (George-Heringa, 2003)
|
||||
GEOR030103 Linker propensity from 2-linker dataset (George-Heringa, 2003)
|
||||
GEOR030104 Linker propensity from 3-linker dataset (George-Heringa, 2003)
|
||||
GEOR030105 Linker propensity from small dataset (linker length is less than six residues) (George-Heringa, 2003)
|
||||
GEOR030106 Linker propensity from medium dataset (linker length is between six and 14 residues) (George-Heringa, 2003)
|
||||
GEOR030107 Linker propensity from long dataset (linker length is greater than 14 residues) (George-Heringa, 2003)
|
||||
GEOR030108 Linker propensity from helical (annotated by DSSP) dataset (George-Heringa, 2003)
|
||||
GEOR030109 Linker propensity from non-helical (annotated by DSSP) dataset (George-Heringa, 2003)
|
||||
ZHOH040101 The stability scale from the knowledge-based atom-atom potential (Zhou-Zhou, 2004)
|
||||
ZHOH040102 The relative stability scale extracted from mutation experiments (Zhou-Zhou, 2004)
|
||||
ZHOH040103 Buriability (Zhou-Zhou, 2004)
|
||||
BAEK050101 Linker index (Bae et al., 2005)
|
||||
HARY940101 Mean volumes of residues buried in protein interiors (Harpaz et al., 1994)
|
||||
PONJ960101 Average volumes of residues (Pontius et al., 1996)
|
||||
DIGM050101 Hydrostatic pressure asymmetry index, PAI (Di Giulio, 2005)
|
||||
WOLR790101 Hydrophobicity index (Wolfenden et al., 1979)
|
||||
OLSK800101 Average internal preferences (Olsen, 1980)
|
||||
KIDA850101 Hydrophobicity-related index (Kidera et al., 1985)
|
||||
GUYH850102 Apparent partition energies calculated from Wertz-Scheraga index (Guy, 1985)
|
||||
GUYH850103 Apparent partition energies calculated from Robson-Osguthorpe index (Guy, 1985)
|
||||
GUYH850104 Apparent partition energies calculated from Janin index (Guy, 1985)
|
||||
GUYH850105 Apparent partition energies calculated from Chothia index (Guy, 1985)
|
||||
ROSM880104 Hydropathies of amino acid side chains, neutral form (Roseman, 1988)
|
||||
ROSM880105 Hydropathies of amino acid side chains, pi-values in pH 7.0 (Roseman, 1988)
|
||||
JACR890101 Weights from the IFH scale (Jacobs-White, 1989)
|
||||
COWR900101 Hydrophobicity index, 3.0 pH (Cowan-Whittaker, 1990)
|
||||
BLAS910101 Scaled side chain hydrophobicity values (Black-Mould, 1991)
|
||||
CASG920101 Hydrophobicity scale from native protein structures (Casari-Sippl, 1992)
|
||||
CORJ870101 NNEIG index (Cornette et al., 1987)
|
||||
CORJ870102 SWEIG index (Cornette et al., 1987)
|
||||
CORJ870103 PRIFT index (Cornette et al., 1987)
|
||||
CORJ870104 PRILS index (Cornette et al., 1987)
|
||||
CORJ870105 ALTFT index (Cornette et al., 1987)
|
||||
CORJ870106 ALTLS index (Cornette et al., 1987)
|
||||
CORJ870107 TOTFT index (Cornette et al., 1987)
|
||||
CORJ870108 TOTLS index (Cornette et al., 1987)
|
||||
MIYS990101 Relative partition energies derived by the Bethe approximation (Miyazawa-Jernigan, 1999)
|
||||
MIYS990102 Optimized relative partition energies - method A (Miyazawa-Jernigan, 1999)
|
||||
MIYS990103 Optimized relative partition energies - method B (Miyazawa-Jernigan, 1999)
|
||||
MIYS990104 Optimized relative partition energies - method C (Miyazawa-Jernigan, 1999)
|
||||
MIYS990105 Optimized relative partition energies - method D (Miyazawa-Jernigan, 1999)
|
||||
ENGD860101 Hydrophobicity index (Engelman et al., 1986)
|
||||
FASG890101 Hydrophobicity index (Fasman, 1989)
|
||||
KARS160101 Number of vertices (order of the graph) (Karkbara-Knisley, 2016)
|
||||
KARS160102 Number of edges (size of the graph) (Karkbara-Knisley, 2016)
|
||||
KARS160103 Total weighted degree of the graph (obtained by adding all the weights of all the vertices) (Karkbara-Knisley, 2016)
|
||||
KARS160104 Weighted domination number (Karkbara-Knisley, 2016)
|
||||
KARS160105 Average eccentricity (Karkbara-Knisley, 2016)
|
||||
KARS160106 Radius (minimum eccentricity) (Karkbara-Knisley, 2016)
|
||||
KARS160107 Diameter (maximum eccentricity) (Karkbara-Knisley, 2016)
|
||||
KARS160108 Average weighted degree (total degree, divided by the number of vertices) (Karkbara-Knisley, 2016)
|
||||
KARS160109 Maximum eigenvalue of the weighted Laplacian matrix of the graph (Karkbara-Knisley, 2016)
|
||||
KARS160110 Minimum eigenvalue of the weighted Laplacian matrix of the graph (Karkbara-Knisley, 2016)
|
||||
KARS160111 Average eigenvalue of the Laplacian matrix of the the graph (Karkbara-Knisley, 2016)
|
||||
KARS160112 Second smallest eigenvalue of the Laplacian matrix of the graph (Karkbara-Knisley, 2016)
|
||||
KARS160113 Weighted domination number using the atomic number (Karkbara-Knisley, 2016)
|
||||
KARS160114 Average weighted eccentricity based on the the atomic number (Karkbara-Knisley, 2016)
|
||||
KARS160115 Weighted radius based on the atomic number (minimum eccentricity) (Karkbara-Knisley, 2016)
|
||||
KARS160116 Weighted diameter based on the atomic number (maximum eccentricity) (Karkbara-Knisley, 2016)
|
||||
KARS160117 Total weighted atomic number of the graph (obtained by summing all the atomic number of each of the vertices in the graph) (Karkbara-Knisley, 2016)
|
||||
KARS160118 Average weighted atomic number or degree based on atomic number in the graph (Karkbara-Knisley, 2016)
|
||||
KARS160119 Weighted maximum eigenvalue based on the atomic numbers (Karkbara-Knisley, 2016)
|
||||
KARS160120 Weighted minimum eigenvalue based on the atomic numbers (Karkbara-Knisley, 2016)
|
||||
KARS160121 Weighted average eigenvalue based on the atomic numbers (Karkbara-Knisley, 2016)
|
||||
KARS160122 Weighted second smallest eigenvalue of the weighted Laplacian matrix (Karkbara-Knisley, 2016)
|
|
@ -1,99 +0,0 @@
|
|||
List of 94 Amino Acid Matrices in AAindex ver.9.2
|
||||
|
||||
The columns correspond to the AAindex accession number and the description of
|
||||
each matrix.
|
||||
|
||||
ALTS910101 The PAM-120 matrix (Altschul, 1991)
|
||||
BENS940101 Log-odds scoring matrix collected in 6.4-8.7 PAM (Benner et al., 1994)
|
||||
BENS940102 Log-odds scoring matrix collected in 22-29 PAM (Benner et al., 1994)
|
||||
BENS940103 Log-odds scoring matrix collected in 74-100 PAM (Benner et al., 1994)
|
||||
BENS940104 Genetic code matrix (Benner et al., 1994)
|
||||
CSEM940101 Residue replace ability matrix (Cserzo et al., 1994)
|
||||
DAYM780301 Log odds matrix for 250 PAMs (Dayhoff et al., 1978)
|
||||
FEND850101 Structure-Genetic matrix (Feng et al., 1985)
|
||||
FITW660101 Mutation values for the interconversion of amino acid pairs (Fitch, 1966)
|
||||
GEOD900101 Hydrophobicity scoring matrix (George et al., 1990)
|
||||
GONG920101 The mutation matrix for initially aligning (Gonnet et al., 1992)
|
||||
GRAR740104 Chemical distance (Grantham, 1974)
|
||||
HENS920101 BLOSUM45 substitution matrix (Henikoff-Henikoff, 1992)
|
||||
HENS920102 BLOSUM62 substitution matrix (Henikoff-Henikoff, 1992)
|
||||
HENS920103 BLOSUM80 substitution matrix (Henikoff-Henikoff, 1992)
|
||||
JOHM930101 Structure-based amino acid scoring table (Johnson-Overington, 1993)
|
||||
JOND920103 The 250 PAM PET91 matrix (Jones et al., 1992)
|
||||
JOND940101 The 250 PAM transmembrane protein exchange matrix (Jones et al., 1994)
|
||||
KOLA920101 Conformational similarity weight matrix (Kolaskar-Kulkarni-Kale, 1992)
|
||||
LEVJ860101 The secondary structure similarity matrix (Levin et al., 1986)
|
||||
LUTR910101 Structure-based comparison table for outside other class (Luthy et al., 1991)
|
||||
LUTR910102 Structure-based comparison table for inside other class (Luthy et al., 1991)
|
||||
LUTR910103 Structure-based comparison table for outside alpha class (Luthy et al., 1991)
|
||||
LUTR910104 Structure-based comparison table for inside alpha class (Luthy et al., 1991)
|
||||
LUTR910105 Structure-based comparison table for outside beta class (Luthy et al., 1991)
|
||||
LUTR910106 Structure-based comparison table for inside beta class (Luthy et al., 1991)
|
||||
LUTR910107 Structure-based comparison table for other class (Luthy et al., 1991)
|
||||
LUTR910108 Structure-based comparison table for alpha helix class (Luthy et al., 1991)
|
||||
LUTR910109 Structure-based comparison table for beta strand class (Luthy et al., 1991)
|
||||
MCLA710101 The similarity of pairs of amino acids (McLachlan, 1971)
|
||||
MCLA720101 Chemical similarity scores (McLachlan, 1972)
|
||||
MIYS930101 Base-substitution-protein-stability matrix (Miyazawa-Jernigan, 1993)
|
||||
MIYT790101 Amino acid pair distance (Miyata et al., 1979)
|
||||
MOHR870101 EMPAR matrix (Mohana Rao, 1987)
|
||||
NIEK910101 Structure-derived correlation matrix 1 (Niefind-Schomburg, 1991)
|
||||
NIEK910102 Structure-derived correlation matrix 2 (Niefind-Schomburg, 1991)
|
||||
OVEJ920101 STR matrix from structure-based alignments (Overington et al., 1992)
|
||||
QU_C930101 Cross-correlation coefficients of preference factors main chain (Qu et al., 1993)
|
||||
QU_C930102 Cross-correlation coefficients of preference factors side chain (Qu et al., 1993)
|
||||
QU_C930103 The mutant distance based on spatial preference factor (Qu et al., 1993)
|
||||
RISJ880101 Scoring matrix (Risler et al., 1988)
|
||||
TUDE900101 isomorphicity of replacements (Tudos et al., 1990)
|
||||
AZAE970101 The single residue substitution matrix from interchanges of spatially neighbouring residues (Azarya-Sprinzak et al., 1997)
|
||||
AZAE970102 The substitution matrix derived from spatially conserved motifs (Azarya-Sprinzak et al., 1997)
|
||||
RIER950101 Hydrophobicity scoring matrix (Riek et al., 1995)
|
||||
WEIL970101 WAC matrix constructed from amino acid comparative profiles (Wei et al., 1997)
|
||||
WEIL970102 Difference matrix obtained by subtracting the BLOSUM62 from the WAC matrix (Wei et al., 1997)
|
||||
MEHP950101 (Mehta et al., 1995)
|
||||
MEHP950102 (Mehta et al., 1995)
|
||||
MEHP950103 (Mehta et al., 1995)
|
||||
KAPO950101 (Kapp et al., 1995)
|
||||
VOGG950101 (Vogt et al., 1995)
|
||||
KOSJ950101 Context-dependent optimal substitution matrices for exposed helix (Koshi-Goldstein, 1995)
|
||||
KOSJ950102 Context-dependent optimal substitution matrices for exposed beta (Koshi-Goldstein, 1995)
|
||||
KOSJ950103 Context-dependent optimal substitution matrices for exposed turn (Koshi-Goldstein, 1995)
|
||||
KOSJ950104 Context-dependent optimal substitution matrices for exposed coil (Koshi-Goldstein, 1995)
|
||||
KOSJ950105 Context-dependent optimal substitution matrices for buried helix (Koshi-Goldstein, 1995)
|
||||
KOSJ950106 Context-dependent optimal substitution matrices for buried beta (Koshi-Goldstein, 1995)
|
||||
KOSJ950107 Context-dependent optimal substitution matrices for buried turn (Koshi-Goldstein, 1995)
|
||||
KOSJ950108 Context-dependent optimal substitution matrices for buried coil (Koshi-Goldstein, 1995)
|
||||
KOSJ950109 Context-dependent optimal substitution matrices for alpha helix (Koshi-Goldstein, 1995)
|
||||
KOSJ950110 Context-dependent optimal substitution matrices for beta sheet (Koshi-Goldstein, 1995)
|
||||
KOSJ950111 Context-dependent optimal substitution matrices for turn (Koshi-Goldstein, 1995)
|
||||
KOSJ950112 Context-dependent optimal substitution matrices for coil (Koshi-Goldstein, 1995)
|
||||
KOSJ950113 Context-dependent optimal substitution matrices for exposed residues (Koshi-Goldstein, 1995)
|
||||
KOSJ950114 Context-dependent optimal substitution matrices for buried residues (Koshi-Goldstein, 1995)
|
||||
KOSJ950115 Context-dependent optimal substitution matrices for all residues (Koshi-Goldstein, 1995)
|
||||
OVEJ920102 Environment-specific amino acid substitution matrix for alpha residues (Overington et al., 1992)
|
||||
OVEJ920103 Environment-specific amino acid substitution matrix for beta residues (Overington et al., 1992)
|
||||
OVEJ920104 Environment-specific amino acid substitution matrix for accessible residues (Overington et al., 1992)
|
||||
OVEJ920105 Environment-specific amino acid substitution matrix for inaccessible residues (Overington et al., 1992)
|
||||
LINK010101 Substitution matrices from an neural network model (Lin et al., 2001)
|
||||
BLAJ010101 Matrix built from structural superposition data for identifying potential remote homologues (Blake-Cohen, 2001)
|
||||
PRLA000101 Structure derived matrix (SDM) for alignment of distantly related sequences (Prlic et al., 2000)
|
||||
PRLA000102 Homologous structure dereived matrix (HSDM) for alignment of distantly related sequences (Prlic et al., 2000)
|
||||
DOSZ010101 Amino acid similarity matrix based on the sausage force field (Dosztanyi-Torda, 2001)
|
||||
DOSZ010102 Normalised version of SM_SAUSAGE (Dosztanyi-Torda, 2001)
|
||||
DOSZ010103 An amino acid similarity matrix based on the THREADER force field (Dosztanyi-Torda, 2001)
|
||||
DOSZ010104 Normalised version of SM_THREADER (Dosztanyi-Torda, 2001)
|
||||
GIAG010101 Residue substitutions matrix from thermo/mesophilic to psychrophilic enzymes (Gianese et al., 2001)
|
||||
DAYM780302 Log odds matrix for 40 PAMs (Dayhoff et al., 1978)
|
||||
HENS920104 BLOSUM50 substitution matrix (Henikoff-Henikoff, 1992)
|
||||
QUIB020101 STROMA score matrix for the alignment of known distant homologs (Qian-Goldstein, 2002)
|
||||
NAOD960101 Substitution matrix derived from the single residue interchanges at spatially conserved regions of proteins (Naor et al., 1996)
|
||||
RUSR970101 Substitution matrix based on structural alignments of analogous proteins (Russell et al., 1997)
|
||||
RUSR970102 Substitution matrix based on structural alignments of remote homolous proteins (Russell et al., 1997)
|
||||
RUSR970103 Substitution matrix based on structural alignments of analogous and remote homolous proteins (Russell et al., 1997)
|
||||
OGAK980101 Substitution matrix derived from structural alignments by maximizing entropy (Ogata et al., 1998)
|
||||
KANM000101 Substitution matrix (OPTIMA) derived by maximizing discrimination between homologs and non-homologs (Kann et al., 2000)
|
||||
NGPC000101 Substitution matrix (PHAT) built from hydrophobic and transmembrane regions of the Blocks database (Ng et al., 2000)
|
||||
MUET010101 Non-symmetric substitution matrix (SLIM) for detection of homologous transmembrane proteins (Mueller et al., 2001)
|
||||
MUET020101 Substitution matrix (VTML160) obtained by maximum likelihood estimation (Mueller et al., 2002)
|
||||
MUET020102 Substitution matrix (VTML250) obtained by maximum likelihood estimation (Mueller et al., 2002)
|
||||
CROG050101 Substitution matrix computed from the Dirichlet Mixture Model (Crooks-Brenner, 2005)
|
|
@ -1,52 +0,0 @@
|
|||
List of 47 Amino Acid Matrices in AAindex ver.9.2
|
||||
|
||||
The columns correspond to the AAindex accession number and the description of
|
||||
each contact potential matrix.
|
||||
|
||||
TANS760101 Statistical contact potential derived from 25 x-ray protein structures
|
||||
TANS760102 Number of contacts between side chains derived from 25 x-ray protein structures
|
||||
ROBB790102 Interaction energies derived from side chain contacts in the interiors of known protein structures
|
||||
BRYS930101 Distance-dependent statistical potential (only energies of contacts within 0-5 Angstrooms are included)
|
||||
THOP960101 Mixed quasichemical and optimization-based protein contact potential
|
||||
MIRL960101 Statistical potential derived by the maximization of the harmonic mean of Z scores
|
||||
VENM980101 Statistical potential derived by the maximization of the perceptron criterion
|
||||
BASU010101 Optimization-based potential derived by the modified perceptron criterion
|
||||
MIYS850102 Quasichemical energy of transfer of amino acids from water to the protein environment
|
||||
MIYS850103 Quasichemical energy of interactions in an average buried environment
|
||||
MIYS960101 Quasichemical energy of transfer of amino acids from water to the protein environment
|
||||
MIYS960102 Quasichemical energy of interactions in an average buried environment
|
||||
MIYS960103 Number of contacts between side chains derived from 1168 x-ray protein structures
|
||||
MIYS990106 Quasichemical energy of transfer of amino acids from water to the protein environment
|
||||
MIYS990107 Quasichemical energy of interactions in an average buried environment
|
||||
LIWA970101 Modified version of the Miyazawa-Jernigan transfer energy
|
||||
KESO980101 Quasichemical transfer energy derived from interfacial regions of protein-protein complexes
|
||||
KESO980102 Quasichemical energy in an average protein environment derived from interfacial regions of protein-protein complexes
|
||||
MOOG990101 Quasichemical potential derived from interfacial regions of protein-protein complexes
|
||||
BETM990101 Modified version of the Miyazawa-Jernigan transfer energy
|
||||
TOBD000101 Optimization-derived potential obtained for small set of decoys
|
||||
TOBD000102 Optimization-derived potential obtained for large set of decoys
|
||||
PARB960101 Statistical contact potential derived by the quasichemical approximation
|
||||
PARB960102 Modified version of the Miyazawa-Jernigan transfer energy
|
||||
KOLA930101 Statistical potential derived by the quasichemical approximation
|
||||
GODA950101 Quasichemical statistical potential derived from buried contacts
|
||||
SKOJ970101 Statistical potential derived by the quasichemical approximation
|
||||
SKOJ000101 Statistical quasichemical potential with the partially composition-corrected pair scale
|
||||
SKOJ000102 Statistical quasichemical potential with the composition-corrected pair scale
|
||||
BONM030101 Quasichemical statistical potential for the antiparallel orientation of interacting side groups
|
||||
BONM030102 Quasichemical statistical potential for the intermediate orientation of interacting side groups
|
||||
BONM030103 Quasichemical statistical potential for the parallel orientation of interacting side groups
|
||||
BONM030104 Distances between centers of interacting side chains in the antiparallel orientation
|
||||
BONM030105 Distances between centers of interacting side chains in the intermediate orientation
|
||||
BONM030106 Distances between centers of interacting side chains in the parallel orientation
|
||||
MICC010101 Optimization-derived potential
|
||||
SIMK990101 Distance-dependent statistical potential (contacts within 0-5 Angstrooms)
|
||||
SIMK990102 Distance-dependent statistical potential (contacts within 5-7.5 Angstrooms)
|
||||
SIMK990103 Distance-dependent statistical potential (contacts within 7.5-10 Angstrooms)
|
||||
SIMK990104 Distance-dependent statistical potential (contacts within 10-12 Angstrooms)
|
||||
SIMK990105 Distance-dependent statistical potential (contacts longer than 12 Angstrooms)
|
||||
ZHAC000101 Environment-dependent residue contact energies (rows = helix, cols = helix)
|
||||
ZHAC000102 Environment-dependent residue contact energies (rows = helix, cols = strand)
|
||||
ZHAC000103 Environment-dependent residue contact energies (rows = helix, cols = coil)
|
||||
ZHAC000104 Environment-dependent residue contact energies (rows = strand, cols = strand)
|
||||
ZHAC000105 Environment-dependent residue contact energies (rows = strand, cols = coil)
|
||||
ZHAC000106 Environment-dependent residue contact energies (rows = coil, cols = coil)
|
|
@ -1,3 +0,0 @@
|
|||
grep -Ei "BENS940104|GIAG010101|DOSZ010103|RISJ880101|MIYT790101|OVEJ920102" aa_headerNames.txt
|
||||
|
||||
grep -Ei "BENS940104|GIAG010101|DOSZ010103|RISJ880101|MIYT790101|OVEJ920102" aaindex/data/*
|
File diff suppressed because it is too large
Load diff
|
@ -1,22 +0,0 @@
|
|||
#!/bin/sh
|
||||
#python /home/sportelli/Desktop/Important_Code/structural/aaindex/get_scores.py /home/sportelli/Desktop/Project_2_rpoB/leprae/RMLE_B_RFP.pdb C P28A
|
||||
#python /home/tanu/git/LSHTM_analysis/scripts/aa_index_scripts/aaindex/get_scores.py /home/tanu/git/LSHTM_analysis/scripts/aa_index_scripts/pnca_complex.pdb A L4S
|
||||
python /home/tanu/git/LSHTM_analysis/scripts/aa_index_scripts/aaindex/get_scores.py /home/tanu/git/Data/pyrazinamide/input/pnca_complex.pdb A L4S
|
||||
|
||||
#----------------------
|
||||
# How I want it to run
|
||||
#---------------------
|
||||
#drug = "pyrazinamide"
|
||||
#gene = "pncA" # force it to be lowercase
|
||||
#chain = "A"
|
||||
#mutfile = "/home/tanu/git/Data/output/<gene>_mcsm_snps.csv"
|
||||
#mut = for i in mutfile
|
||||
|
||||
|
||||
#$1 = "/home/tanu/git/Data/input/<gene>_complex.pdb
|
||||
#$2 = chain
|
||||
#$3 = mut
|
||||
|
||||
#python /home/tanu/git/LSHTM_analysis/scripts/aa_index_scripts/aaindex/get_scores.py $1 $2 $3
|
||||
|
||||
#for i in $(cat /home/tanu/git/Data/pyrazinamide/output/*mcsm_snps*); do echo -n "${i}," >>/home/tanu/git/Data/pyrazinamide/output/aa_index/pnca_aa; python /home/tanu/git/LSHTM_analysis/scripts/aa_index_scripts/aaindex/get_scores.py /home/tanu/git/Data/pyrazinamide/input/pnca_complex.pdb A $i >> /home/tanu/git/Data/pyrazinamide/output/aa_index/pnca_aa; done
|
|
@ -1,31 +0,0 @@
|
|||
#!/bin/sh
|
||||
drug=${1:-pyrazinamide}
|
||||
gene=${2:-pnca}
|
||||
chain=${3:-A}
|
||||
|
||||
aa_python="/home/tanu/git/LSHTM_analysis/scripts/aa_index_scripts/aaindex/get_scores.py"
|
||||
snp_dir="/home/tanu/git/Data/${drug}"
|
||||
aa_outfile="/home/tanu/git/Data/${drug}/output/aa_index/${gene}_aa.csv"
|
||||
|
||||
echo "Running for drug: ${drug} and gene ${gene}
|
||||
Input from: ${snp_dir}/input/${gene}_complex.pdb
|
||||
Chain: ${chain}
|
||||
Output to: ${aa_outfile}"
|
||||
|
||||
cat ADD_aa_header.csv > $aa_outfile
|
||||
|
||||
for i in $(cat ${snp_dir}/output/${gene}_mcsm_formatted_snps.csv)
|
||||
do
|
||||
echo -n "${i}," >> $aa_outfile
|
||||
python $aa_python $snp_dir/input/${gene}_complex.pdb $chain $i >> $aa_outfile
|
||||
done
|
||||
|
||||
|
||||
# TO RUN
|
||||
# gene should be in lowercase
|
||||
# ~/git/LSHTM_analysis/scripts/aa_index_scripts/run_aaindex.sh cycloserine alr A
|
||||
# ~/git/LSHTM_analysis/scripts/aa_index_scripts/run_aaindex.sh ethambutol embb B
|
||||
# ~/git/LSHTM_analysis/scripts/aa_index_scripts/run_aaindex.sh streptomycin gid A
|
||||
# ~/git/LSHTM_analysis/scripts/aa_index_scripts/run_aaindex.sh isoniazid katg A
|
||||
# ~/git/LSHTM_analysis/scripts/aa_index_scripts/run_aaindex.sh pyrazinamide pnca A
|
||||
# ~/git/LSHTM_analysis/scripts/aa_index_scripts/run_aaindex.sh rifampicin rpob A
|
|
@ -1,213 +0,0 @@
|
|||
==== Secondary Structure Definition by the program DSSP, CMBI version 2.0 ==== DATE=2022-05-30 .
|
||||
REFERENCE W. KABSCH AND C.SANDER, BIOPOLYMERS 22 (1983) 2577-2637 .
|
||||
HEADER HYDROLASE 12-NOV-10 3PL1 .
|
||||
COMPND MOL_ID: 1; MOLECULE: PYRAZINAMIDASE/NICOTINAMIDASE PNCA (PZASE); CHAIN .
|
||||
SOURCE MOL_ID: 1; ORGANISM_SCIENTIFIC: MYCOBACTERIUM TUBERCULOSIS; ORGANISM_T .
|
||||
AUTHOR S.PETRELLA,N.GELUS-ZIENTAL,C.MAYER,W.SOUGAKOFF .
|
||||
185 1 0 0 0 TOTAL NUMBER OF RESIDUES, NUMBER OF CHAINS, NUMBER OF SS-BRIDGES(TOTAL,INTRACHAIN,INTERCHAIN) .
|
||||
8635.1 ACCESSIBLE SURFACE OF PROTEIN (ANGSTROM**2) .
|
||||
121 65.4 TOTAL NUMBER OF HYDROGEN BONDS OF TYPE O(I)-->H-N(J) , SAME NUMBER PER 100 RESIDUES .
|
||||
35 18.9 TOTAL NUMBER OF HYDROGEN BONDS IN PARALLEL BRIDGES, SAME NUMBER PER 100 RESIDUES .
|
||||
6 3.2 TOTAL NUMBER OF HYDROGEN BONDS IN ANTIPARALLEL BRIDGES, SAME NUMBER PER 100 RESIDUES .
|
||||
0 0.0 TOTAL NUMBER OF HYDROGEN BONDS OF TYPE O(I)-->H-N(I-5), SAME NUMBER PER 100 RESIDUES .
|
||||
1 0.5 TOTAL NUMBER OF HYDROGEN BONDS OF TYPE O(I)-->H-N(I-4), SAME NUMBER PER 100 RESIDUES .
|
||||
2 1.1 TOTAL NUMBER OF HYDROGEN BONDS OF TYPE O(I)-->H-N(I-3), SAME NUMBER PER 100 RESIDUES .
|
||||
1 0.5 TOTAL NUMBER OF HYDROGEN BONDS OF TYPE O(I)-->H-N(I-2), SAME NUMBER PER 100 RESIDUES .
|
||||
1 0.5 TOTAL NUMBER OF HYDROGEN BONDS OF TYPE O(I)-->H-N(I-1), SAME NUMBER PER 100 RESIDUES .
|
||||
0 0.0 TOTAL NUMBER OF HYDROGEN BONDS OF TYPE O(I)-->H-N(I+0), SAME NUMBER PER 100 RESIDUES .
|
||||
0 0.0 TOTAL NUMBER OF HYDROGEN BONDS OF TYPE O(I)-->H-N(I+1), SAME NUMBER PER 100 RESIDUES .
|
||||
11 5.9 TOTAL NUMBER OF HYDROGEN BONDS OF TYPE O(I)-->H-N(I+2), SAME NUMBER PER 100 RESIDUES .
|
||||
20 10.8 TOTAL NUMBER OF HYDROGEN BONDS OF TYPE O(I)-->H-N(I+3), SAME NUMBER PER 100 RESIDUES .
|
||||
32 17.3 TOTAL NUMBER OF HYDROGEN BONDS OF TYPE O(I)-->H-N(I+4), SAME NUMBER PER 100 RESIDUES .
|
||||
4 2.2 TOTAL NUMBER OF HYDROGEN BONDS OF TYPE O(I)-->H-N(I+5), SAME NUMBER PER 100 RESIDUES .
|
||||
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 *** HISTOGRAMS OF *** .
|
||||
0 0 0 0 0 1 0 1 0 0 1 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 RESIDUES PER ALPHA HELIX .
|
||||
2 0 1 0 2 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 PARALLEL BRIDGES PER LADDER .
|
||||
3 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 ANTIPARALLEL BRIDGES PER LADDER .
|
||||
0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 LADDERS PER SHEET .
|
||||
# RESIDUE AA STRUCTURE BP1 BP2 ACC N-H-->O O-->H-N N-H-->O O-->H-N TCO KAPPA ALPHA PHI PSI X-CA Y-CA Z-CA CHAIN
|
||||
1 1 A M 0 0 88 0, 0.0 125,-2.3 0, 0.0 126,-1.7 0.000 360.0 360.0 360.0 -35.2 -7.8 -41.8 12.7
|
||||
2 2 A R E -a 127 0A 55 123,-0.2 40,-1.5 124,-0.2 41,-1.3 -0.954 360.0-178.6-117.9 134.0 -8.0 -38.7 10.3
|
||||
3 3 A A E -ab 128 43A 0 124,-2.2 126,-2.1 -2,-0.4 2,-0.4 -0.960 22.5-135.9-126.5 153.8 -5.3 -35.9 10.2
|
||||
4 4 A L E -ab 129 44A 0 39,-2.2 41,-2.2 -2,-0.3 2,-0.5 -0.914 16.6-158.4-103.2 132.4 -5.0 -32.7 8.2
|
||||
5 5 A I E -ab 130 45A 0 124,-2.8 126,-2.1 -2,-0.4 2,-0.7 -0.967 3.5-157.1-113.7 111.9 -1.5 -32.0 6.7
|
||||
6 6 A I E -ab 131 46A 1 39,-2.5 41,-2.3 -2,-0.5 2,-0.5 -0.814 16.9-146.7 -91.9 112.2 -0.8 -28.3 5.9
|
||||
7 7 A V E -ab 132 47A 0 124,-3.0 126,-2.4 -2,-0.7 41,-0.1 -0.695 52.3 -38.8 -95.6 119.1 1.9 -28.1 3.2
|
||||
8 8 A D + 0 0 5 39,-0.8 2,-1.8 -2,-0.5 -1,-0.2 0.705 62.9 168.0 50.5 45.6 4.5 -25.3 2.9
|
||||
9 9 A V + 0 0 0 38,-0.4 73,-1.9 -3,-0.3 2,-0.2 -0.534 32.8 142.1 -89.3 73.5 2.4 -22.3 3.8
|
||||
10 10 A Q B > -H 81 0B 0 -2,-1.8 3,-1.5 71,-0.3 4,-0.3 -0.749 66.1-107.2-123.8 153.3 5.5 -20.1 4.1
|
||||
11 11 A N G > S+ 0 0 42 69,-2.4 3,-1.7 66,-0.4 6,-0.3 0.827 114.3 56.9 -46.1 -47.9 6.5 -16.5 3.3
|
||||
12 12 A D G 3 S+ 0 0 22 66,-2.0 7,-2.5 1,-0.3 -1,-0.2 0.713 107.2 50.6 -64.4 -17.5 8.8 -17.4 0.3
|
||||
13 13 A F G < S+ 0 0 24 -3,-1.5 8,-1.3 65,-0.3 -1,-0.3 0.337 104.8 70.8-100.9 3.1 5.8 -19.1 -1.4
|
||||
14 14 A C S X S- 0 0 1 -3,-1.7 3,-2.7 -4,-0.3 5,-0.1 -0.677 103.9 -57.2-112.7 171.8 3.4 -16.1 -1.0
|
||||
15 15 A E T 3 S+ 0 0 92 6,-0.3 -1,-0.1 1,-0.3 3,-0.1 -0.153 128.4 23.3 -45.2 128.4 3.2 -12.6 -2.5
|
||||
16 16 A G T 3 S+ 0 0 87 1,-0.3 -1,-0.3 -4,-0.1 -4,-0.1 0.065 100.9 120.4 95.4 -23.9 6.5 -10.7 -1.9
|
||||
17 17 A G S X S- 0 0 18 -3,-2.7 3,-1.4 -6,-0.3 -1,-0.3 -0.219 78.8-112.5 -78.9 164.3 8.4 -14.1 -1.5
|
||||
18 18 A S T 3 S+ 0 0 53 1,-0.3 -5,-0.1 -3,-0.1 -1,-0.1 0.817 122.2 30.5 -65.0 -26.4 11.3 -15.3 -3.6
|
||||
19 19 A L T 3 S- 0 0 27 -7,-2.5 -1,-0.3 -5,-0.1 -4,-0.1 -0.276 91.6-175.4-121.3 42.4 9.0 -18.1 -5.0
|
||||
20 20 A A < - 0 0 52 -3,-1.4 2,-0.6 -6,-0.1 -6,-0.2 -0.119 12.7-162.2 -42.9 122.1 5.7 -16.2 -4.8
|
||||
21 21 A V > - 0 0 9 -8,-1.3 3,-2.0 143,-0.0 -6,-0.3 -0.983 28.1-121.4-105.5 107.8 2.5 -18.2 -5.8
|
||||
22 22 A T T 3 S+ 0 0 127 -2,-0.6 140,-0.1 1,-0.3 -8,-0.0 -0.309 101.4 31.3 -46.5 128.7 -0.3 -15.8 -6.5
|
||||
23 23 A G T 3> S+ 0 0 28 138,-0.1 4,-2.2 4,-0.0 -1,-0.3 0.308 87.1 113.8 98.9 -3.6 -3.2 -16.6 -4.1
|
||||
24 24 A G H <> S+ 0 0 0 -3,-2.0 4,-1.8 2,-0.2 -10,-0.1 0.959 76.1 45.9 -65.4 -50.4 -0.8 -17.7 -1.4
|
||||
25 25 A A H > S+ 0 0 26 -4,-0.2 4,-1.4 1,-0.2 3,-0.5 0.949 115.7 46.8 -56.8 -56.6 -1.6 -14.9 1.1
|
||||
26 26 A A H > S+ 0 0 56 1,-0.2 4,-2.0 2,-0.2 -1,-0.2 0.871 109.9 54.9 -48.4 -47.4 -5.4 -15.4 0.6
|
||||
27 27 A L H X S+ 0 0 11 -4,-2.2 4,-2.1 2,-0.2 -1,-0.2 0.844 101.0 58.6 -58.1 -36.1 -5.0 -19.2 0.9
|
||||
28 28 A A H X S+ 0 0 0 -4,-1.8 4,-1.8 -3,-0.5 58,-0.2 0.921 111.1 41.8 -62.7 -39.8 -3.2 -18.8 4.4
|
||||
29 29 A R H < S+ 0 0 144 -4,-1.4 5,-0.2 2,-0.2 -1,-0.2 0.862 109.6 56.9 -82.0 -28.0 -6.3 -17.0 5.8
|
||||
30 30 A A H X S+ 0 0 37 -4,-2.0 4,-0.7 1,-0.2 -1,-0.2 0.845 106.8 51.4 -63.2 -36.8 -8.8 -19.3 4.1
|
||||
31 31 A I H < S+ 0 0 0 -4,-2.1 3,-0.2 -5,-0.2 5,-0.2 0.940 103.1 64.0 -67.5 -47.6 -7.1 -22.3 5.9
|
||||
32 32 A S T < S+ 0 0 7 -4,-1.8 2,-1.7 1,-0.2 3,-0.2 -0.364 106.9 33.3 -58.5 157.1 -7.4 -20.4 9.3
|
||||
33 33 A D T > S+ 0 0 77 4,-0.1 4,-0.8 -2,-0.0 2,-0.3 0.135 108.3 71.8 68.6 -34.5 -11.1 -20.1 9.9
|
||||
34 34 A Y T < S+ 0 0 26 -2,-1.7 2,-2.5 -4,-0.7 5,-0.3 -0.515 99.3 44.0 -69.8-179.5 -11.4 -23.4 8.2
|
||||
35 35 A L T 4 S+ 0 0 35 3,-2.1 4,-0.2 -2,-0.3 -3,-0.1 -0.074 115.2 55.0 72.3 -49.9 -9.8 -25.1 11.3
|
||||
36 36 A A T 4 S+ 0 0 66 -2,-2.5 -1,-0.2 -5,-0.2 -2,-0.2 0.694 117.8 29.8 -78.2 -31.7 -12.3 -22.7 13.1
|
||||
37 37 A E S < S+ 0 0 138 1,-0.9 2,-0.3 -4,-0.8 -3,-0.1 0.835 130.3 14.7 -90.9 -92.1 -15.4 -24.0 11.2
|
||||
38 38 A A + 0 0 45 1,-0.2 -3,-2.1 -5,-0.1 -1,-0.9 -0.785 60.3 144.4 -97.1 160.6 -14.7 -27.6 10.4
|
||||
39 39 A A + 0 0 39 -2,-0.3 -1,-0.2 -5,-0.3 3,-0.1 0.110 5.7 161.7 99.1-174.9 -12.5 -29.3 11.8
|
||||
40 40 A D + 0 0 152 1,-0.2 2,-0.5 -2,-0.1 3,-0.1 0.816 40.4 140.6 77.3 35.7 -13.3 -33.0 12.4
|
||||
41 41 A Y - 0 0 35 1,-0.1 -1,-0.2 -6,-0.1 -38,-0.2 -0.944 49.7-160.5-112.0 126.2 -9.6 -33.9 12.7
|
||||
42 42 A H S S+ 0 0 128 -40,-1.5 2,-0.3 -2,-0.5 -39,-0.2 0.902 88.0 9.1 -59.3 -39.2 -8.1 -36.3 15.2
|
||||
43 43 A H E -b 3 0A 40 -41,-1.3 -39,-2.2 -3,-0.1 2,-0.4 -0.994 60.8-156.3-144.4 149.9 -4.7 -34.4 14.5
|
||||
44 44 A V E +b 4 0A 13 45,-0.3 47,-3.4 -2,-0.3 48,-0.7 -0.993 22.0 165.5-127.5 124.2 -3.3 -31.3 12.6
|
||||
45 45 A V E -bc 5 92A 0 -41,-2.2 -39,-2.5 -2,-0.4 2,-0.3 -0.862 17.8-151.0-131.1 161.3 0.4 -31.1 11.4
|
||||
46 46 A A E -bc 6 93A 0 46,-2.2 48,-2.6 -2,-0.3 2,-0.3 -0.951 4.4-147.0-131.5 158.7 2.3 -28.8 9.0
|
||||
47 47 A T E -bc 7 94A 0 -41,-2.3 -39,-0.8 -2,-0.3 -38,-0.4 -0.858 12.2-165.5-116.7 158.5 5.4 -29.2 6.8
|
||||
48 48 A K E - c 0 95A 19 46,-2.3 48,-1.7 -2,-0.3 2,-0.3 -0.982 23.9-129.0-143.1 133.0 8.0 -26.6 6.0
|
||||
49 49 A D E + c 0 96A 7 -2,-0.3 2,-0.3 46,-0.2 48,-0.2 -0.581 38.0 178.5 -65.2 134.3 10.7 -26.4 3.3
|
||||
50 50 A F - 0 0 56 46,-2.4 2,-0.5 -2,-0.3 23,-0.2 -0.753 8.4-169.0-154.9 98.3 13.9 -25.5 5.4
|
||||
51 51 A H B +i 73 0C 1 21,-2.8 23,-2.5 -2,-0.3 47,-0.2 -0.820 24.4 158.0-105.4 126.1 17.3 -25.2 3.8
|
||||
52 52 A I S S- 0 0 82 46,-1.9 47,-0.2 -2,-0.5 -1,-0.2 0.772 90.4 -16.8-102.3 -58.0 20.7 -24.9 5.6
|
||||
53 53 A D + 0 0 103 45,-3.1 45,-0.1 44,-0.2 44,-0.0 -0.486 67.0 162.0-151.5 90.8 22.8 -26.0 2.8
|
||||
54 54 A P > - 0 0 8 0, 0.0 3,-1.8 0, 0.0 47,-0.3 0.152 31.4-156.4 -91.8 14.0 21.1 -27.8 -0.1
|
||||
55 55 A G G > S+ 0 0 36 1,-0.3 3,-1.9 2,-0.2 45,-0.1 -0.198 73.8 16.3 58.5-128.2 23.9 -27.3 -2.7
|
||||
56 56 A D G 3 S+ 0 0 148 1,-0.3 -1,-0.3 11,-0.1 12,-0.2 0.591 106.6 84.3 -60.0 -18.9 22.7 -27.5 -6.4
|
||||
57 57 A H G < S+ 0 0 16 -3,-1.8 11,-1.5 10,-0.2 2,-0.4 0.725 93.1 52.2 -52.4 -28.0 19.0 -27.0 -5.4
|
||||
58 58 A F B < S+j 68 0D 43 -3,-1.9 2,-0.3 9,-0.2 3,-0.0 -0.901 71.6 172.3-110.6 147.2 19.8 -23.3 -5.4
|
||||
59 59 A S - 0 0 38 9,-2.0 -3,-0.0 -2,-0.4 8,-0.0 -0.991 42.2-138.3-152.7 150.8 21.4 -21.4 -8.3
|
||||
60 60 A G S S+ 0 0 80 -2,-0.3 -1,-0.1 1,-0.2 7,-0.0 0.672 114.2 40.1 -74.4 -20.0 22.2 -17.9 -9.5
|
||||
61 61 A T S S- 0 0 130 7,-0.1 -1,-0.2 -3,-0.0 6,-0.0 -0.462 93.4-171.6-123.5 57.5 20.9 -19.0 -13.0
|
||||
62 62 A P - 0 0 34 0, 0.0 6,-0.1 0, 0.0 -5,-0.0 -0.141 25.8-161.1 -67.7 146.7 17.8 -21.1 -11.9
|
||||
63 63 A D - 0 0 84 4,-0.1 5,-0.2 5,-0.0 0, 0.0 0.406 31.4-130.7 -99.6 2.5 15.8 -23.2 -14.3
|
||||
64 64 A Y S S+ 0 0 135 3,-1.6 4,-0.1 1,-0.1 0, 0.0 0.721 97.1 69.9 49.0 26.0 12.6 -23.6 -12.1
|
||||
65 65 A S S S- 0 0 97 2,-0.5 -1,-0.1 0, 0.0 3,-0.1 0.538 122.9 -7.7-128.1 -50.7 12.7 -27.3 -12.7
|
||||
66 66 A S S S+ 0 0 71 1,-0.2 2,-0.3 -10,-0.0 35,-0.1 0.468 131.5 50.7-121.9 -8.1 15.8 -28.7 -10.8
|
||||
67 67 A S - 0 0 15 -6,-0.0 -3,-1.6 -8,-0.0 -2,-0.5 -0.970 66.0-177.6-133.9 143.5 17.4 -25.4 -9.6
|
||||
68 68 A W B -j 58 0D 45 -11,-1.5 -9,-2.0 -2,-0.3 3,-0.1 -0.924 35.4 -95.1-134.5 156.5 15.7 -22.5 -7.8
|
||||
69 69 A P - 0 0 23 0, 0.0 -9,-0.1 0, 0.0 -51,-0.0 -0.488 69.5 -76.5 -67.8 154.0 16.5 -19.0 -6.4
|
||||
70 70 A P + 0 0 74 0, 0.0 2,-0.3 0, 0.0 3,-0.1 -0.252 68.6 163.7 -48.3 136.1 17.3 -19.3 -2.7
|
||||
71 71 A H + 0 0 5 1,-0.1 -20,-0.2 -3,-0.1 8,-0.1 -0.982 49.2 17.7-154.7 153.1 14.2 -19.8 -0.6
|
||||
72 72 A C S S- 0 0 0 -2,-0.3 -21,-2.8 1,-0.2 2,-0.4 0.852 75.3-179.8 51.3 43.8 13.1 -20.9 2.9
|
||||
73 73 A V B > -i 51 0C 46 -23,-0.2 3,-2.0 1,-0.1 6,-0.4 -0.644 36.1-102.2 -77.9 127.3 16.7 -20.3 4.2
|
||||
74 74 A S T 3 S+ 0 0 42 -23,-2.5 -1,-0.1 -2,-0.4 -24,-0.1 -0.257 107.4 21.6 -52.9 126.1 17.2 -21.2 7.9
|
||||
75 75 A G T 3 S+ 0 0 86 1,-0.3 -1,-0.3 2,-0.0 -23,-0.0 0.480 104.0 101.4 91.0 3.6 17.2 -17.9 10.0
|
||||
76 76 A T S X S- 0 0 56 -3,-2.0 3,-1.0 1,-0.1 4,-0.3 -0.814 81.1-115.3-111.7 161.0 15.4 -15.8 7.4
|
||||
77 77 A P G > S+ 0 0 94 0, 0.0 3,-1.5 0, 0.0 -66,-0.4 0.813 106.7 72.0 -61.6 -32.6 11.7 -14.6 7.3
|
||||
78 78 A G G 3 S+ 0 0 8 1,-0.3 -66,-2.0 -67,-0.2 -65,-0.3 0.804 94.1 55.9 -50.4 -31.2 11.1 -16.6 4.0
|
||||
79 79 A A G < S+ 0 0 1 -3,-1.0 -1,-0.3 -6,-0.4 3,-0.1 0.701 91.5 91.4 -82.7 -16.1 11.2 -19.8 6.0
|
||||
80 80 A D S < S- 0 0 86 -3,-1.5 -69,-2.4 -4,-0.3 -68,-0.2 -0.333 87.5 -86.6 -77.4 157.7 8.4 -18.8 8.5
|
||||
81 81 A F B -H 10 0B 40 -71,-0.3 -71,-0.3 1,-0.1 -1,-0.1 -0.292 53.2 -97.4 -50.4 137.0 4.6 -19.4 8.4
|
||||
82 82 A H - 0 0 33 -73,-1.9 -1,-0.1 1,-0.1 -73,-0.1 -0.379 37.0-119.7 -56.0 137.1 2.7 -16.8 6.5
|
||||
83 83 A P S S+ 0 0 111 0, 0.0 -1,-0.1 0, 0.0 -2,-0.1 0.755 103.8 67.4 -63.5 -22.1 1.4 -14.4 9.3
|
||||
84 84 A S S S+ 0 0 33 2,-0.1 -55,-0.2 -59,-0.1 -58,-0.1 -0.036 78.6 85.7 -65.5-171.1 -2.3 -15.1 8.4
|
||||
85 85 A L S S- 0 0 9 -57,-0.2 2,-0.1 1,-0.0 -56,-0.1 0.916 81.5-123.7 77.3 80.3 -2.6 -18.7 9.3
|
||||
86 86 A D - 0 0 97 -58,-0.2 3,-0.1 1,-0.2 -2,-0.1 -0.463 20.6-166.7 -55.3 129.6 -3.5 -19.4 13.0
|
||||
87 87 A T > + 0 0 86 -2,-0.1 3,-1.9 1,-0.1 -1,-0.2 0.520 63.4 98.3-101.5 -16.4 -0.8 -21.7 14.3
|
||||
88 88 A S T 3 S+ 0 0 124 1,-0.2 -1,-0.1 3,-0.0 -2,-0.0 0.800 82.8 47.7 -32.6 -47.6 -2.8 -22.6 17.5
|
||||
89 89 A A T 3 S+ 0 0 45 -3,-0.1 2,-0.6 -46,-0.0 -45,-0.3 0.557 82.7 107.9 -84.0 -6.8 -4.1 -25.9 16.1
|
||||
90 90 A I < - 0 0 24 -3,-1.9 -45,-0.2 1,-0.2 3,-0.1 -0.605 42.3-176.2 -89.1 114.8 -0.8 -27.3 14.8
|
||||
91 91 A E + 0 0 102 -47,-3.4 2,-0.3 -2,-0.6 -46,-0.2 0.813 67.0 11.2 -81.1 -34.7 0.5 -30.2 16.9
|
||||
92 92 A A E -c 45 0A 11 -48,-0.7 -46,-2.2 18,-0.0 2,-0.5 -0.995 59.4-143.7-147.9 145.1 3.8 -30.8 15.1
|
||||
93 93 A V E -c 46 0A 27 -2,-0.3 2,-0.5 -48,-0.2 18,-0.3 -0.921 12.9-156.0-105.2 125.9 6.0 -29.2 12.4
|
||||
94 94 A F E -c 47 0A 0 -48,-2.6 -46,-2.3 -2,-0.5 2,-0.4 -0.905 9.6-161.1-104.3 115.6 7.9 -31.5 9.8
|
||||
95 95 A Y E +cD 48 109A 75 14,-2.9 14,-2.3 -2,-0.5 2,-0.3 -0.770 12.8 175.8 -98.6 137.1 11.1 -29.8 8.3
|
||||
96 96 A K E +c 49 0A 8 -48,-1.7 -46,-2.4 -2,-0.4 6,-0.2 -0.969 51.5 41.1-136.2 155.8 12.6 -31.2 5.0
|
||||
97 97 A G + 0 0 7 -2,-0.3 3,-0.5 10,-0.2 -45,-0.4 0.573 55.6 144.3 89.8 16.6 15.5 -30.1 2.8
|
||||
98 98 A A S S+ 0 0 33 1,-0.2 -45,-3.1 -47,-0.2 -46,-1.9 0.925 92.1 5.4 -52.1 -48.6 18.2 -29.1 5.4
|
||||
99 99 A Y S S+ 0 0 161 -47,-0.2 2,-0.3 -48,-0.1 -1,-0.2 0.189 134.3 40.0-118.7 18.3 21.1 -30.4 3.2
|
||||
100 100 A T S S- 0 0 81 -3,-0.5 2,-0.3 -45,-0.1 -45,-0.1 -0.994 88.4 -88.1-161.7 150.7 19.2 -31.4 0.1
|
||||
101 101 A G - 0 0 12 -47,-0.3 2,-0.4 -2,-0.3 -4,-0.2 -0.578 48.7-152.4 -61.2 129.0 16.4 -30.4 -2.4
|
||||
102 102 A A + 0 0 10 -2,-0.3 3,-0.1 -6,-0.2 36,-0.1 -0.905 23.6 178.8-113.7 142.4 13.1 -31.9 -1.0
|
||||
103 103 A Y + 0 0 138 -2,-0.4 2,-0.3 1,-0.2 38,-0.2 0.713 62.8 38.2-107.8 -24.4 10.0 -32.9 -3.0
|
||||
104 104 A S > - 0 0 1 37,-0.2 3,-2.2 1,-0.1 4,-0.2 -0.967 67.8-126.4-138.9 141.5 7.5 -34.3 -0.5
|
||||
105 105 A G G > S+ 0 0 0 33,-0.5 3,-2.3 -2,-0.3 11,-0.3 0.780 106.7 73.9 -53.1 -23.7 6.2 -33.7 3.0
|
||||
106 106 A F G 3 S+ 0 0 29 1,-0.3 11,-0.3 10,-0.1 -1,-0.3 0.636 86.8 62.0 -75.7 -5.8 7.0 -37.4 3.6
|
||||
107 107 A E G < S+ 0 0 88 -3,-2.2 -1,-0.3 8,-0.1 -10,-0.2 0.551 85.8 108.8 -85.0 -8.1 10.7 -36.4 3.7
|
||||
108 108 A G < - 0 0 0 -3,-2.3 8,-1.9 -4,-0.2 2,-0.3 -0.388 49.9-163.2 -78.5 153.3 10.1 -34.2 6.7
|
||||
109 109 A V B -DE 95 115A 66 -14,-2.3 -14,-2.9 6,-0.2 5,-0.2 -0.964 15.2-131.1-125.7 142.4 11.2 -34.8 10.2
|
||||
110 110 A D - 0 0 17 4,-2.4 -16,-0.1 -2,-0.3 -18,-0.0 -0.236 45.0 -80.2 -85.4 179.4 10.0 -33.1 13.4
|
||||
111 111 A E S S+ 0 0 167 -18,-0.3 2,-0.1 2,-0.1 -17,-0.1 0.695 120.9 47.3 -60.0 -23.6 12.3 -31.6 16.1
|
||||
112 112 A N S S- 0 0 109 2,-0.1 0, 0.0 0, 0.0 0, 0.0 -0.243 125.3 -72.3 -88.3-162.8 13.0 -35.0 17.8
|
||||
113 113 A G S S+ 0 0 68 -2,-0.1 -2,-0.1 -4,-0.1 0, 0.0 0.670 86.0 138.1 -60.2 -5.0 14.0 -37.7 15.4
|
||||
114 114 A T - 0 0 29 -5,-0.2 -4,-2.4 1,-0.1 -2,-0.1 -0.329 40.6-154.5 -87.8 119.0 10.6 -38.3 13.9
|
||||
115 115 A P B > -E 109 0A 50 0, 0.0 4,-2.2 0, 0.0 3,-0.4 -0.365 33.9-111.2 -67.0 144.3 9.6 -38.8 10.2
|
||||
116 116 A L H > S+ 0 0 0 -8,-1.9 4,-2.1 -11,-0.3 5,-0.2 0.827 114.3 49.0 -44.3 -49.6 6.0 -37.8 9.2
|
||||
117 117 A L H > S+ 0 0 50 -11,-0.3 4,-2.5 2,-0.2 -1,-0.2 0.913 111.3 51.7 -67.0 -38.0 4.6 -41.4 8.5
|
||||
118 118 A N H > S+ 0 0 80 -3,-0.4 4,-1.9 2,-0.2 5,-0.2 0.969 108.7 49.7 -58.3 -53.4 6.0 -42.6 11.9
|
||||
119 119 A W H < S+ 0 0 14 -4,-2.2 4,-0.3 1,-0.2 -1,-0.2 0.890 113.6 47.9 -52.9 -42.4 4.4 -39.8 13.8
|
||||
120 120 A L H ><>S+ 0 0 0 -4,-2.1 5,-2.6 1,-0.2 3,-0.9 0.881 111.7 46.2 -68.5 -45.5 1.1 -40.5 12.1
|
||||
121 121 A R H ><5S+ 0 0 125 -4,-2.5 3,-1.4 1,-0.2 -1,-0.2 0.761 103.4 63.2 -69.4 -26.2 1.0 -44.3 12.6
|
||||
122 122 A Q T 3<5S+ 0 0 132 -4,-1.9 -1,-0.2 1,-0.3 -2,-0.2 0.583 106.4 46.6 -71.8 -13.0 2.0 -44.0 16.2
|
||||
123 123 A R T < 5S- 0 0 98 -3,-0.9 -1,-0.3 -4,-0.3 -2,-0.2 0.141 121.0-111.1-112.3 12.6 -1.3 -42.1 16.7
|
||||
124 124 A G T < 5 + 0 0 36 -3,-1.4 -3,-0.2 1,-0.2 2,-0.2 0.653 58.7 164.9 62.1 18.1 -3.2 -44.7 14.7
|
||||
125 125 A V < + 0 0 0 -5,-2.6 -1,-0.2 -6,-0.2 -123,-0.2 -0.456 10.6 162.7 -67.6 134.5 -3.8 -42.3 11.8
|
||||
126 126 A D + 0 0 66 -125,-2.3 26,-2.0 1,-0.3 2,-0.3 0.365 58.3 47.0-136.3 -6.9 -5.0 -44.1 8.7
|
||||
127 127 A E E -af 2 152A 55 -126,-1.7 -124,-2.2 24,-0.2 2,-0.3 -0.991 62.5-167.4-143.2 139.9 -6.4 -41.3 6.6
|
||||
128 128 A V E -af 3 153A 0 24,-3.1 26,-2.5 -2,-0.3 2,-0.4 -0.946 14.8-154.6-127.2 148.0 -5.2 -37.8 5.6
|
||||
129 129 A D E -af 4 154A 0 -126,-2.1 -124,-2.8 -2,-0.3 2,-0.4 -0.952 21.4-153.8-109.1 138.4 -6.5 -34.6 4.0
|
||||
130 130 A V E +af 5 155A 0 24,-2.2 26,-1.8 -2,-0.4 2,-0.3 -0.942 20.6 161.8-119.0 132.5 -3.8 -32.4 2.3
|
||||
131 131 A V E +a 6 0A 1 -126,-2.1 -124,-3.0 -2,-0.4 2,-0.3 -0.891 29.4 60.8-136.9 168.7 -4.0 -28.6 1.8
|
||||
132 132 A G E +af 7 160A 0 27,-2.2 29,-1.4 -2,-0.3 2,-0.3 -0.862 69.1 12.3 123.6-147.1 -1.4 -25.8 1.0
|
||||
133 133 A I E S+ f 0 161A 3 -126,-2.4 29,-0.1 -2,-0.3 -2,-0.1 -0.940 100.9 13.1-130.5 140.1 1.2 -24.8 -1.7
|
||||
134 134 A A > > - 0 0 10 27,-0.7 3,-2.7 -2,-0.3 5,-1.7 0.595 49.9-167.2-105.1 145.3 1.7 -25.6 -4.5
|
||||
135 135 A T T 3 5S+ 0 0 3 26,-1.4 5,-0.5 -3,-0.3 27,-0.2 0.819 96.4 44.5 -44.0 -42.3 -1.0 -27.8 -6.0
|
||||
136 136 A D T 3 5S+ 0 0 6 25,-0.2 -1,-0.3 3,-0.1 26,-0.1 0.408 126.0 23.8 -90.1 -1.7 1.2 -28.8 -8.9
|
||||
137 137 A H T <>5S+ 0 0 84 -3,-2.7 4,-2.5 3,-0.0 5,-0.2 0.324 127.9 8.1-127.0 -97.4 4.4 -29.4 -6.8
|
||||
138 138 A C H >5S+ 0 0 12 1,-0.2 4,-2.4 2,-0.2 -33,-0.5 0.781 126.0 54.1 -73.0 -28.5 4.6 -30.3 -3.2
|
||||
139 139 A V H ><S+ 0 0 0 -5,-1.7 4,-2.8 2,-0.2 -1,-0.2 0.952 111.9 45.3 -63.2 -49.5 0.9 -30.9 -2.6
|
||||
140 140 A R H > S+ 0 0 58 -6,-0.7 4,-2.6 -5,-0.5 5,-0.2 0.940 115.5 47.0 -56.8 -53.1 0.7 -33.3 -5.5
|
||||
141 141 A Q H X S+ 0 0 41 -4,-2.5 4,-1.9 -38,-0.2 -2,-0.2 0.910 113.4 48.1 -61.7 -36.3 3.9 -35.1 -4.3
|
||||
142 142 A T H X S+ 0 0 0 -4,-2.4 4,-1.9 1,-0.2 -1,-0.2 0.921 111.6 50.4 -73.5 -44.0 2.7 -35.3 -0.7
|
||||
143 143 A A H X S+ 0 0 0 -4,-2.8 4,-1.9 2,-0.2 -2,-0.2 0.924 112.2 45.7 -57.1 -50.4 -0.7 -36.7 -1.7
|
||||
144 144 A E H X S+ 0 0 19 -4,-2.6 4,-2.3 -5,-0.2 -2,-0.2 0.849 111.5 52.3 -69.5 -28.2 0.7 -39.4 -3.9
|
||||
145 145 A D H X S+ 0 0 33 -4,-1.9 4,-2.1 -5,-0.2 -1,-0.2 0.864 104.0 57.5 -68.4 -35.9 3.2 -40.4 -1.2
|
||||
146 146 A A H <>S+ 0 0 0 -4,-1.9 5,-2.4 2,-0.2 -2,-0.2 0.956 111.2 43.4 -54.8 -45.5 0.4 -40.7 1.3
|
||||
147 147 A V H ><5S+ 0 0 37 -4,-1.9 3,-1.7 1,-0.2 -2,-0.2 0.900 110.1 53.7 -70.0 -42.9 -1.2 -43.2 -1.0
|
||||
148 148 A R H 3<5S+ 0 0 156 -4,-2.3 -1,-0.2 1,-0.3 -2,-0.2 0.892 107.7 54.1 -54.7 -40.2 2.0 -45.1 -1.8
|
||||
149 149 A N T 3<5S- 0 0 71 -4,-2.1 -1,-0.3 -5,-0.1 -2,-0.2 0.148 126.3 -99.6 -84.2 13.7 2.5 -45.5 2.0
|
||||
150 150 A G T < 5S+ 0 0 63 -3,-1.7 2,-0.3 1,-0.2 -3,-0.2 0.584 75.9 135.6 85.7 9.8 -1.0 -47.0 2.5
|
||||
151 151 A L < - 0 0 10 -5,-2.4 2,-0.4 -6,-0.2 -1,-0.2 -0.735 59.7-117.8 -94.0 145.0 -3.0 -44.0 3.7
|
||||
152 152 A A E -f 127 0A 49 -26,-2.0 -24,-3.1 -2,-0.3 2,-0.4 -0.646 46.8-159.3 -73.4 126.0 -6.5 -43.1 2.5
|
||||
153 153 A T E +f 128 0A 12 -2,-0.4 28,-2.3 26,-0.3 2,-0.4 -0.959 25.1 175.8-133.0 125.9 -6.0 -39.6 1.0
|
||||
154 154 A R E -fg 129 181A 60 -26,-2.5 -24,-2.2 -2,-0.4 2,-0.4 -0.952 15.9-152.8-116.4 144.7 -7.9 -36.5 0.0
|
||||
155 155 A V E -fg 130 182A 0 26,-2.3 28,-2.3 -2,-0.4 2,-1.2 -0.975 14.4-144.2-102.9 129.4 -6.9 -33.2 -1.4
|
||||
156 156 A L E > - g 0 183A 0 -26,-1.8 3,-2.7 -2,-0.4 28,-0.2 -0.780 19.5-167.0 -91.0 87.9 -9.2 -30.3 -0.5
|
||||
157 157 A V E > S+ 0 0A 45 26,-1.7 3,-1.2 -2,-1.2 -1,-0.2 0.773 80.4 59.7 -58.7 -32.3 -8.7 -28.5 -3.9
|
||||
158 158 A D E 3 S+ 0 0A 76 25,-0.4 -1,-0.3 1,-0.3 26,-0.1 0.677 103.1 55.3 -68.7 -13.5 -10.3 -25.2 -2.8
|
||||
159 159 A L E < S+ 0 0A 6 -3,-2.7 -27,-2.2 -29,-0.2 2,-0.3 -0.157 95.7 80.5-115.5 33.3 -7.6 -25.0 -0.1
|
||||
160 160 A T E < -f 132 0A 22 -3,-1.2 2,-0.4 -29,-0.2 -27,-0.2 -0.859 63.0-149.0-126.2 161.3 -4.6 -25.3 -2.4
|
||||
161 161 A A E -f 133 0A 11 -29,-1.4 -26,-1.4 -2,-0.3 -27,-0.7 -0.984 15.8-161.3-134.1 121.1 -2.8 -22.7 -4.6
|
||||
162 162 A G - 0 0 26 -2,-0.4 3,-0.1 -28,-0.3 6,-0.0 -0.765 21.2-128.3-107.8 161.1 -1.2 -23.8 -7.9
|
||||
163 163 A V S S+ 0 0 87 -2,-0.3 2,-0.3 1,-0.2 -141,-0.2 0.908 86.0 5.2 -77.3 -44.3 1.4 -22.0 -10.0
|
||||
164 164 A S > - 0 0 46 1,-0.1 4,-1.6 -143,-0.0 -1,-0.2 -0.955 65.2-123.4-142.9 153.3 -0.2 -22.1 -13.3
|
||||
165 165 A A H > S+ 0 0 81 -2,-0.3 4,-2.1 2,-0.2 5,-0.1 0.884 109.2 48.9 -61.4 -43.4 -3.5 -23.3 -14.7
|
||||
166 166 A D H > S+ 0 0 111 1,-0.2 4,-1.9 2,-0.2 -1,-0.1 0.934 114.8 42.4 -67.4 -50.6 -2.0 -25.8 -17.2
|
||||
167 167 A T H > S+ 0 0 57 2,-0.2 4,-2.0 1,-0.2 -1,-0.2 0.792 111.5 56.9 -62.7 -32.0 0.3 -27.5 -14.8
|
||||
168 168 A T H X S+ 0 0 24 -4,-1.6 4,-3.0 2,-0.2 -2,-0.2 0.935 107.1 48.8 -66.6 -40.8 -2.4 -27.6 -12.1
|
||||
169 169 A V H X S+ 0 0 80 -4,-2.1 4,-2.7 2,-0.2 -2,-0.2 0.934 111.9 48.5 -57.0 -56.3 -4.7 -29.5 -14.5
|
||||
170 170 A A H X S+ 0 0 43 -4,-1.9 4,-1.9 1,-0.2 -1,-0.2 0.890 113.4 49.0 -52.9 -39.7 -1.9 -32.0 -15.3
|
||||
171 171 A A H X S+ 0 0 5 -4,-2.0 4,-2.1 2,-0.2 -2,-0.2 0.945 109.0 49.9 -70.4 -47.3 -1.3 -32.4 -11.6
|
||||
172 172 A L H X S+ 0 0 51 -4,-3.0 4,-2.1 1,-0.2 -2,-0.2 0.957 112.1 50.1 -57.6 -47.1 -5.0 -32.9 -10.7
|
||||
173 173 A E H X S+ 0 0 114 -4,-2.7 4,-2.0 1,-0.2 -1,-0.2 0.898 109.5 49.4 -57.3 -40.9 -5.3 -35.6 -13.4
|
||||
174 174 A E H X S+ 0 0 104 -4,-1.9 4,-1.2 2,-0.2 -1,-0.2 0.842 107.7 56.7 -69.7 -28.4 -2.1 -37.5 -12.2
|
||||
175 175 A M H <>S+ 0 0 0 -4,-2.1 5,-2.3 2,-0.2 3,-0.4 0.956 107.4 46.4 -67.3 -49.0 -3.5 -37.4 -8.6
|
||||
176 176 A R H ><5S+ 0 0 188 -4,-2.1 3,-2.1 3,-0.2 -2,-0.2 0.932 109.8 54.2 -54.8 -46.4 -6.7 -39.2 -9.7
|
||||
177 177 A T H 3<5S+ 0 0 126 -4,-2.0 -1,-0.2 1,-0.3 -2,-0.2 0.746 109.3 48.6 -62.9 -23.7 -4.6 -41.8 -11.7
|
||||
178 178 A A T 3<5S- 0 0 32 -4,-1.2 -1,-0.3 -3,-0.4 -2,-0.2 0.421 126.4-108.8 -89.4 -2.1 -2.7 -42.4 -8.5
|
||||
179 179 A S T < 5 + 0 0 87 -3,-2.1 2,-0.3 1,-0.3 -26,-0.3 0.652 64.6 149.3 81.5 19.2 -6.1 -42.8 -6.6
|
||||
180 180 A V < - 0 0 6 -5,-2.3 2,-0.5 -6,-0.1 -1,-0.3 -0.671 46.2-124.6 -73.5 140.7 -6.2 -39.6 -4.5
|
||||
181 181 A E E -g 154 0A 107 -28,-2.3 -26,-2.3 -2,-0.3 2,-0.6 -0.771 19.5-156.4 -91.7 121.9 -9.7 -38.2 -3.9
|
||||
182 182 A L E +g 155 0A 50 -2,-0.5 2,-0.3 -28,-0.2 -26,-0.2 -0.900 30.3 145.8-106.7 110.9 -10.2 -34.5 -5.0
|
||||
183 183 A V E -g 156 0A 34 -28,-2.3 -26,-1.7 -2,-0.6 -25,-0.4 -0.836 49.0-103.3-134.4 166.8 -13.1 -32.7 -3.1
|
||||
184 184 A C 0 0 94 -2,-0.3 -28,-0.0 -28,-0.2 -29,-0.0 -0.779 360.0 360.0 -92.4 154.1 -14.0 -29.3 -1.8
|
||||
185 185 A S 0 0 68 -2,-0.3 -1,-0.1 -29,-0.0 -147,-0.0 -0.354 360.0 360.0 -73.8 360.0 -13.6 -28.8 2.0
|
|
@ -8,7 +8,7 @@ setwd("~/git/LSHTM_analysis/scripts")
|
|||
getwd()
|
||||
|
||||
# load libraries
|
||||
#source("~/git/LSHTM_analysis/scripts/Header_TT.R")
|
||||
#source("Header_TT.R")
|
||||
require("getopt", quietly = TRUE) # cmd parse arguments
|
||||
|
||||
# load functions
|
||||
|
|
File diff suppressed because it is too large
Load diff
|
@ -1,135 +0,0 @@
|
|||
# count numbers for ML
|
||||
|
||||
source("~/git/LSHTM_analysis/config/alr.R")
|
||||
#source("~/git/LSHTM_analysis/config/embb.R")
|
||||
#source("~/git/LSHTM_analysis/config/gid.R")
|
||||
#source("~/git/LSHTM_analysis/config/katg.R")
|
||||
#source("~/git/LSHTM_analysis/config/pnca.R")
|
||||
#source("~/git/LSHTM_analysis/config/rpob.R")
|
||||
|
||||
#############################
|
||||
# GET the actual merged dfs
|
||||
#############################
|
||||
#source("~/git/LSHTM_analysis/scripts/plotting/get_plotting_dfs.R")
|
||||
source("~/git/LSHTM_analysis/scripts/plotting/get_ml_dfs.R")
|
||||
|
||||
#############################
|
||||
# Output files: merged data
|
||||
#############################
|
||||
outfile_merged_df3 = paste0(outdir, '/', tolower(gene), '_merged_df3.csv')
|
||||
#outfile_merged_df2 = paste0(outdir, '/', tolower(gene), '_merged_df2.csv')
|
||||
|
||||
################################################
|
||||
# Add acticve site indication
|
||||
###############################################
|
||||
merged_df2$active_site = as.integer(merged_df2$position %in% active_aa_pos)
|
||||
merged_df3$active_site = as.integer(merged_df3$position %in% active_aa_pos)
|
||||
|
||||
# check
|
||||
cols_sel = c('mutationinformation', 'mutation_info_labels'
|
||||
#, 'dm_om_numeric'
|
||||
, 'dst', 'dst_mode')
|
||||
|
||||
check_mdf2 = merged_df2[, cols_sel]
|
||||
check_mdf2T = table(check_mdf2$mutationinformation, check_mdf2$dst_mode)
|
||||
ft_mdf2 = as.data.frame.matrix(check_mdf2T)
|
||||
|
||||
#==================
|
||||
# CHECK: dst mode
|
||||
#===================
|
||||
dst_check = all((ft_mdf2[,1]==0)==(ft_mdf2[,2]!=0)); dst_check
|
||||
|
||||
#=======================
|
||||
# CHECK: dst mode labels
|
||||
#=======================
|
||||
#table(merged_df2$mutation_info_labels_orig)
|
||||
#table(merged_df2$mutation_info_labels_v1)
|
||||
table(merged_df2$mutation_info_labels)
|
||||
|
||||
dst_check1 = table(merged_df2$dst_mode)[1] == table(merged_df2$mutation_info_labels)[2]
|
||||
dst_check2 = table(merged_df2$dst_mode)[2] == table(merged_df2$mutation_info_labels)[1]
|
||||
|
||||
check12 = all(dst_check && all(dst_check1 == dst_check2))
|
||||
|
||||
if (check12) {
|
||||
cat('\nPASS: dst mode labels verified. merged_df3 CAN be trusted! ')
|
||||
}else{
|
||||
stop('FAIL: Something is wrong with the dst_mode column. Quitting!')
|
||||
}
|
||||
|
||||
table(is.na(merged_df3$dst))
|
||||
|
||||
#==========================
|
||||
# CHECK: active site labels
|
||||
#==========================
|
||||
table(merged_df2$active_site)
|
||||
table(merged_df3$active_site)
|
||||
aa_check1 = all( table(merged_df2$active_site) == table(as.integer(merged_df2$position %in% active_aa_pos)) )
|
||||
aa_check2 = all( table(merged_df3$active_site) == table(as.integer(merged_df3$position %in% active_aa_pos)) )
|
||||
|
||||
if ( all(aa_check1 && aa_check2) ){
|
||||
cat('\nActive site indications successfully applied to merged_dfs for gene:', tolower(gene))
|
||||
}
|
||||
|
||||
gene
|
||||
gene_match
|
||||
|
||||
nrow(merged_df3)
|
||||
|
||||
##############################################
|
||||
write.csv(merged_df3, outfile_merged_df3)
|
||||
#write.csv(merged_df2, outfile_merged_df2)
|
||||
cat(paste("\nmerged df3 filename:", outfile_merged_df3
|
||||
#, "\nmerged df2 filename:", outfile_merged_df2)
|
||||
))
|
||||
|
||||
#%%###################################################################
|
||||
|
||||
###################################################
|
||||
###################################################
|
||||
###################################################
|
||||
|
||||
# source("~/git/LSHTM_analysis/config/alr.R")
|
||||
# source("~/git/LSHTM_analysis/config/embb.R")
|
||||
# source("~/git/LSHTM_analysis/config/gid.R")
|
||||
# source("~/git/LSHTM_analysis/config/katg.R")
|
||||
# source("~/git/LSHTM_analysis/config/pnca.R")
|
||||
# source("~/git/LSHTM_analysis/config/rpob.R")
|
||||
# #
|
||||
df3_filename = paste0("~/git/Data/", drug, "/output/", tolower(gene), "_merged_df3.csv")
|
||||
df3 = read.csv(df3_filename)
|
||||
# #
|
||||
# mutationinformation
|
||||
length(unique((df3$mutationinformation)))
|
||||
# #
|
||||
# # #dm _om
|
||||
# table(df3$mutation_info)
|
||||
# #table(df3$mutation_info_orig)
|
||||
# #table(df3$mutation_info_labels_orig)
|
||||
#
|
||||
# # used in plots and analyses
|
||||
# table(df3$mutation_info_labels) # different, and matches dst_mode
|
||||
# table(df3$dst_mode)
|
||||
#
|
||||
# # test_set
|
||||
# na_count <-sapply(df3, function(y) sum(length(which(is.na(y)))))
|
||||
# na_count[drug]
|
||||
# #
|
||||
# # # training set
|
||||
# table(df3[drug])
|
||||
# #
|
||||
# # # drtype: MDR and XDR
|
||||
# # #table(df3$drtype) orig i.e. incorrect ones!
|
||||
# # table(df3$drtype_mode_labels)
|
||||
#
|
||||
#
|
||||
# df3_complete = df3
|
||||
# table(df3_complete$dst_mode)
|
||||
# comp_lin_all = df3_complete[df3_complete$lineage_labels%in%c("L1", "L2", "L3", "L4"),]
|
||||
# table(comp_lin_all$lineage); sum(table(comp_lin_all$lineage))
|
||||
#
|
||||
# df3_actual = df3[!is.na(df3$dst), ]
|
||||
# table(df3_actual$dst_mode)
|
||||
# comp_lin_actual = df3_actual[df3_actual$lineage_labels%in%c("L1", "L2", "L3", "L4"),]
|
||||
# table(comp_lin_actual$lineage); sum(table(comp_lin_actual$lineage))
|
||||
#
|
|
@ -1,260 +0,0 @@
|
|||
# count numbers for ML
|
||||
|
||||
source("~/git/LSHTM_analysis/config/alr.R")
|
||||
#source("~/git/LSHTM_analysis/config/embb.R")
|
||||
#source("~/git/LSHTM_analysis/config/gid.R")
|
||||
#source("~/git/LSHTM_analysis/config/katg.R")
|
||||
#source("~/git/LSHTM_analysis/config/pnca.R")
|
||||
#source("~/git/LSHTM_analysis/config/rpob.R")
|
||||
|
||||
#############################
|
||||
# GET the actual merged dfs
|
||||
#############################
|
||||
source("~/git/LSHTM_analysis/scripts/plotting/get_plotting_dfs.R")
|
||||
|
||||
#############################
|
||||
# Output files: merged data
|
||||
#############################
|
||||
outfile_merged_df3 = paste0(outdir, '/', tolower(gene), '_merged_df3.csv')
|
||||
#outfile_merged_df2 = paste0(outdir, '/', tolower(gene), '_merged_df2.csv')
|
||||
|
||||
################################################
|
||||
# Add acticve site indication
|
||||
###############################################
|
||||
merged_df2$active_site = as.integer(merged_df2$position %in% active_aa_pos)
|
||||
#merged_df2_comp$active_site = as.integer(merged_df2_comp$position %in% active_aa_pos)
|
||||
|
||||
merged_df3$active_site = as.integer(merged_df3$position %in% active_aa_pos)
|
||||
#merged_df3_comp$active_site = as.integer(merged_df3_comp$position %in% active_aa_pos)
|
||||
|
||||
# check
|
||||
cols_sel = c('mutationinformation', 'mutation_info_labels'
|
||||
#, 'dm_om_numeric'
|
||||
, 'dst', 'dst_mode')
|
||||
|
||||
check_mdf2 = merged_df2[, cols_sel]
|
||||
check_mdf2T = table(check_mdf2$mutationinformation, check_mdf2$dst_mode)
|
||||
ft_mdf2 = as.data.frame.matrix(check_mdf2T)
|
||||
|
||||
#==================
|
||||
# CHECK: dst mode
|
||||
#===================
|
||||
dst_check = all((ft_mdf2[,1]==0)==(ft_mdf2[,2]!=0)); dst_check
|
||||
|
||||
#=======================
|
||||
# CHECK: dst mode labels
|
||||
#=======================
|
||||
table(merged_df2$mutation_info_labels_orig)
|
||||
table(merged_df2$mutation_info_labels_v1)
|
||||
table(merged_df2$mutation_info_labels)
|
||||
|
||||
dst_check1 = table(merged_df2$dst_mode)[1] == table(merged_df2$mutation_info_labels)[2]
|
||||
dst_check2 = table(merged_df2$dst_mode)[2] == table(merged_df2$mutation_info_labels)[1]
|
||||
|
||||
check12 = all(dst_check && all(dst_check1 == dst_check2))
|
||||
|
||||
if (check12) {
|
||||
cat('\nPASS: dst mode labels verified. merged_df3 CAN be trusted! ')
|
||||
}else{
|
||||
stop('FAIL: Something is wrong with the dst_mode column. Quitting!')
|
||||
}
|
||||
|
||||
table(is.na(merged_df3$dst))
|
||||
|
||||
#==========================
|
||||
# CHECK: active site labels
|
||||
#==========================
|
||||
table(merged_df2$active_site)
|
||||
table(merged_df3$active_site)
|
||||
aa_check1 = all( table(merged_df2$active_site) == table(as.integer(merged_df2$position %in% active_aa_pos)) )
|
||||
aa_check2 = all( table(merged_df3$active_site) == table(as.integer(merged_df3$position %in% active_aa_pos)) )
|
||||
|
||||
if ( all(aa_check1 && aa_check2) ){
|
||||
cat('\nActive site indications successfully applied to merged_dfs for gene:', tolower(gene))
|
||||
}
|
||||
|
||||
gene
|
||||
gene_match
|
||||
|
||||
nrow(merged_df3)
|
||||
###########################################
|
||||
#========================
|
||||
# CHECK: drtype: revised labels [Merged_df2]
|
||||
#=========================
|
||||
table(merged_df2$drtype) #orig
|
||||
table(merged_df2$drtype_mode)
|
||||
# mapping 2.1: numeric
|
||||
# drtype_map = {'XDR': 5
|
||||
# , 'Pre-XDR': 4
|
||||
# , 'MDR': 3
|
||||
# , 'Pre-MDR': 2
|
||||
# , 'Other': 1
|
||||
# , 'Sensitive': 0}
|
||||
|
||||
# create a labels col that is mapped based on drtype_mode
|
||||
merged_df2$drtype_mode_labels = merged_df2$drtype_mode
|
||||
merged_df2$drtype_mode_labels = as.factor(merged_df2$drtype_mode)
|
||||
levels(merged_df2$drtype_mode_labels)
|
||||
levels(merged_df2$drtype_mode_labels) <- c('Sensitive', 'Other'
|
||||
, 'Pre-MDR', 'MDR'
|
||||
, 'Pre-XDR', 'XDR')
|
||||
levels(merged_df2$drtype_mode_labels)
|
||||
# check
|
||||
a1 = all(table(merged_df2$drtype_mode) == table(merged_df2$drtype_mode_labels))
|
||||
b1 = sum(table(merged_df2$drtype_mode_labels)) == nrow(merged_df2)
|
||||
|
||||
if (all(a1 && b1)){
|
||||
cat("\nPASS: added drtype mode labels to merged_df2")
|
||||
}else{
|
||||
stop("FAIL: could not add drtype mode labels to merged_df2")
|
||||
##quit()
|
||||
}
|
||||
#################################################
|
||||
|
||||
#=======================
|
||||
# CHECK: drtype: revised labels [merged_df3]
|
||||
#=======================
|
||||
table(merged_df3$drtype) #orig
|
||||
table(merged_df3$drtype_mode)
|
||||
# mapping 2.1: numeric
|
||||
# drtype_map = {'XDR': 5
|
||||
# , 'Pre-XDR': 4
|
||||
# , 'MDR': 3
|
||||
# , 'Pre-MDR': 2
|
||||
# , 'Other': 1
|
||||
# , 'Sensitive': 0}
|
||||
|
||||
# create a labels col that is mapped based on drtype_mode
|
||||
merged_df3$drtype_mode_labels = merged_df3$drtype_mode
|
||||
merged_df3$drtype_mode_labels = as.factor(merged_df3$drtype_mode)
|
||||
levels(merged_df3$drtype_mode_labels)
|
||||
levels(merged_df3$drtype_mode_labels) <- c('Sensitive', 'Other'
|
||||
, 'Pre-MDR', 'MDR'
|
||||
, 'Pre-XDR', 'XDR')
|
||||
levels(merged_df3$drtype_mode_labels)
|
||||
a2 = all(table(merged_df3$drtype_mode) == table(merged_df3$drtype_mode_labels))
|
||||
b2 = sum(table(merged_df3$drtype_mode_labels)) == nrow(merged_df3)
|
||||
# check
|
||||
if (all(a2 && b2)){
|
||||
cat("\nPASS: added drtype mode labels to merged_df3")
|
||||
}else{
|
||||
stop("FAIL: could not add drtype mode labels to merged_df3")
|
||||
##quit()
|
||||
}
|
||||
#===============
|
||||
# CHECK: lineage
|
||||
#===============
|
||||
l1 = table(merged_df3$lineage) == table(merged_df3$lineage_labels)
|
||||
l2 = table(merged_df2$lineage) == table(merged_df2$lineage_labels)
|
||||
l3 = sum(table(merged_df2$lineage_labels)) == nrow(merged_df2)
|
||||
l4 = sum(table(merged_df3$lineage_labels)) == nrow(merged_df3)
|
||||
|
||||
if (all(l1 && l2 && l3 && l4) ){
|
||||
cat("\nPASS: lineage and lineage labels are identical!")
|
||||
}else{
|
||||
stop("FAIL: could not verify lineage labels")
|
||||
##quit()
|
||||
}
|
||||
|
||||
###############################################
|
||||
# #=============
|
||||
# # mutation_info: revised labels
|
||||
# #==============
|
||||
# table(merged_df3$mutation_info)
|
||||
# sum(table(merged_df3$mutation_info))
|
||||
# table(merged_df3$mutation_info_orig)
|
||||
##############################################
|
||||
|
||||
# #=============
|
||||
# # <drug>, dst_mode: revised labels
|
||||
# #==============
|
||||
# table(merged_df3$dst) # orig
|
||||
# sum(table(merged_df3$dst))
|
||||
#
|
||||
# table(merged_df3$dst_mode)
|
||||
# #table(merged_df3[dr_muts_col])
|
||||
# sum(table(merged_df3$drtype_mode))
|
||||
|
||||
##############################################
|
||||
if ( all( check12 && aa_check1 && aa_check2 && a1 && b1 && a2 && b2 && l1 && l2 && l3 && l4) ){
|
||||
cat("\nWriting merged_dfs for:"
|
||||
, "\nDrug:", drug
|
||||
, "\nGene:", gene)
|
||||
|
||||
write.csv(merged_df3, outfile_merged_df3)
|
||||
#write.csv(merged_df2, outfile_merged_df2)
|
||||
|
||||
cat(paste("\nmerged df3 filename:", outfile_merged_df3
|
||||
#, "\nmerged df2 filename:", outfile_merged_df2)
|
||||
))
|
||||
|
||||
} else{
|
||||
stop("FAIL: Not able to write merged dfs. Please check numbers!")
|
||||
#quit()
|
||||
}
|
||||
|
||||
#%%###################################################################
|
||||
# check merged_df3
|
||||
check_mdf3 = merged_df3[, cols_sel]
|
||||
|
||||
check_mdf3T = table(check_mdf3$mutationinformation, check_mdf3$dst_mode)
|
||||
ft_mdf3 = as.data.frame.matrix(check_mdf3T)
|
||||
|
||||
#==================
|
||||
# CHECK: dst mode
|
||||
#===================
|
||||
dst_check_mdf3 = all((ft_mdf3[,1]==0)==(ft_mdf3[,2]!=0)); dst_check_mdf3
|
||||
|
||||
sel = c("mutationinformation", "dst", "dst_mode")
|
||||
|
||||
a = merged_df3[, sel]
|
||||
str(a)
|
||||
|
||||
|
||||
###################################################
|
||||
###################################################
|
||||
###################################################
|
||||
|
||||
source("~/git/LSHTM_analysis/config/alr.R")
|
||||
source("~/git/LSHTM_analysis/config/embb.R")
|
||||
source("~/git/LSHTM_analysis/config/gid.R")
|
||||
source("~/git/LSHTM_analysis/config/katg.R")
|
||||
source("~/git/LSHTM_analysis/config/pnca.R")
|
||||
source("~/git/LSHTM_analysis/config/rpob.R")
|
||||
#
|
||||
df3_filename = paste0("~/git/Data/", drug, "/output/", tolower(gene), "_merged_df3.csv")
|
||||
df3 = read.csv(df3_filename)
|
||||
#
|
||||
# mutationinformation
|
||||
length(unique((df3$mutationinformation)))
|
||||
#
|
||||
# #dm _om
|
||||
table(df3$mutation_info)
|
||||
table(df3$mutation_info_orig)
|
||||
table(df3$mutation_info_labels_orig)
|
||||
|
||||
# used in plots and analyses
|
||||
table(df3$mutation_info_labels) # different, and matches dst_mode
|
||||
table(df3$dst_mode)
|
||||
|
||||
# test_set
|
||||
na_count <-sapply(df3, function(y) sum(length(which(is.na(y)))))
|
||||
na_count[drug]
|
||||
#
|
||||
# # training set
|
||||
table(df3[drug])
|
||||
#
|
||||
# # drtype: MDR and XDR
|
||||
# #table(df3$drtype) orig i.e. incorrect ones!
|
||||
# table(df3$drtype_mode_labels)
|
||||
|
||||
|
||||
df3_complete = df3
|
||||
table(df3_complete$dst_mode)
|
||||
comp_lin_all = df3_complete[df3_complete$lineage_labels%in%c("L1", "L2", "L3", "L4"),]
|
||||
table(comp_lin_all$lineage); sum(table(comp_lin_all$lineage))
|
||||
|
||||
df3_actual = df3[!is.na(df3$dst), ]
|
||||
table(df3_actual$dst_mode)
|
||||
comp_lin_actual = df3_actual[df3_actual$lineage_labels%in%c("L1", "L2", "L3", "L4"),]
|
||||
table(comp_lin_actual$lineage); sum(table(comp_lin_actual$lineage))
|
File diff suppressed because it is too large
Load diff
|
@ -75,14 +75,15 @@ args = arg_parser.parse_args()
|
|||
drug = args.drug
|
||||
gene = args.gene
|
||||
|
||||
#drug = 'pyrazinamide'
|
||||
#gene = 'pncA'
|
||||
|
||||
gene_match = gene + '_p.'
|
||||
print('mut pattern for gene', gene, ':', gene_match)
|
||||
|
||||
nssnp_match = gene_match +'[A-Za-z]{3}[0-9]+[A-Za-z]{3}'
|
||||
print('nsSNP for gene', gene, ':', nssnp_match)
|
||||
|
||||
nssnp_match2 = re.compile(nssnp_match)
|
||||
|
||||
wt_regex = gene_match.lower()+'([A-Za-z]{3})'
|
||||
print('wt regex:', wt_regex)
|
||||
|
||||
|
@ -218,21 +219,20 @@ meta_gene_epi = meta_gene_multi.loc[(meta_gene_multi['dr_mult_snp_count']>1) | (
|
|||
|
||||
#%% TEST
|
||||
# formatting, replace !nssnp_match with nothing
|
||||
#foo1 = 'pncA_p.Thr47Pro;pncA_p.Thr61Pro;rpsA_c.XX'
|
||||
#foo2 = 'pncA_Chromosome:g.2288693_2289280del; WT; pncA_p.Thr61Ala'
|
||||
foo1 = 'pncA_p.Thr47Pro;pncA_p.Thr61Pro;rpsA_c.XX'
|
||||
foo2 = 'pncA_Chromosome:g.2288693_2289280del; WT; pncA_p.Thr61Ala'
|
||||
|
||||
|
||||
#foo1_s = foo1.split(';')
|
||||
#foo1_s
|
||||
#nssnp_match2 = re.compile('(pncA_p.[A-Za-z]{3}[0-9]+[A-Za-z]{3})')
|
||||
#arse=list(filter(nssnp_match2.match, foo1_s))
|
||||
#arse
|
||||
|
||||
#foo1_s2 = ';'.join(arse)
|
||||
#foo1_s2
|
||||
foo1_s = foo1.split(';')
|
||||
foo1_s
|
||||
nssnp_match2 = re.compile('(pncA_p.[A-Za-z]{3}[0-9]+[A-Za-z]{3})')
|
||||
arse=list(filter(nssnp_match2.match, foo1_s))
|
||||
arse
|
||||
|
||||
foo1_s2 = ';'.join(arse)
|
||||
foo1_s2
|
||||
#%%
|
||||
#nssnp_match2 = re.compile('(pncA_p.[A-Za-z]{3}[0-9]+[A-Za-z]{3})')
|
||||
nssnp_match2 = re.compile('(pncA_p.[A-Za-z]{3}[0-9]+[A-Za-z]{3})')
|
||||
|
||||
# dr_muts_col
|
||||
dr_clean_col = dr_muts_col + '_clean'
|
||||
|
@ -248,7 +248,6 @@ for i, v in enumerate(meta_gene_epi[dr_muts_col]):
|
|||
dr2_s = v.split(';')
|
||||
print(dr2_s)
|
||||
dr2_sf = list(filter(nssnp_match2.match, dr2_s))
|
||||
#dr2_sf = list(filter(nssnp_match.match, dr2_s))
|
||||
print(dr2_sf)
|
||||
dr2_sf2 = ';'.join(dr2_sf)
|
||||
meta_gene_epi[dr_clean_col].iloc[i] = dr2_sf2
|
||||
|
@ -263,13 +262,13 @@ meta_gene_epi[other_clean_col] = ''
|
|||
|
||||
for i, v in enumerate(meta_gene_epi[other_muts_col]):
|
||||
#print(i, v)
|
||||
#print('======================================================')
|
||||
#print(i)
|
||||
#print(v)
|
||||
print('======================================================')
|
||||
print(i)
|
||||
print(v)
|
||||
other2_s = v.split(';')
|
||||
#print(other2_s)
|
||||
print(other2_s)
|
||||
other2_sf = list(filter(nssnp_match2.match, other2_s))
|
||||
#print(other2_sf)
|
||||
print(other2_sf)
|
||||
other2_sf2 = ';'.join(other2_sf)
|
||||
meta_gene_epi[other_clean_col].iloc[i] = other2_sf2
|
||||
|
||||
|
@ -282,8 +281,7 @@ meta_gene_epi_f = meta_gene_epi[['id', 'sample'
|
|||
, 'dr_mult_snp_count'
|
||||
, other_muts_col, other_clean_col
|
||||
, 'other_mult_snp_count']]
|
||||
#print(meta_gene_epi_f.columns)
|
||||
print(meta_gene_epi_f)
|
||||
meta_gene_epi_f.columns
|
||||
|
||||
cols_to_output = ['id', 'sample'
|
||||
, dr_clean_col
|
||||
|
@ -295,6 +293,7 @@ cols_to_output = ['id', 'sample'
|
|||
meta_gene_epi_f2 = meta_gene_epi_f[cols_to_output]
|
||||
|
||||
|
||||
|
||||
#%%
|
||||
# formatting, replace !nssnp_match with nothing
|
||||
#nssnp_neg_match = '(?!pncA_p.[A-Za-z]{3}[0-9]+[A-Za-z]{3})'
|
||||
|
|
|
@ -1,257 +0,0 @@
|
|||
use strict;
|
||||
use warnings;
|
||||
|
||||
sub trim;
|
||||
sub distance;
|
||||
sub res_cod1_to_res_cod3;
|
||||
sub res_cod3_to_res_cod1;
|
||||
|
||||
# ____________________________________________________________________________________________________________________
|
||||
# Input parameters
|
||||
my $pdb = $ARGV[0];
|
||||
my $mutation = $ARGV[1];
|
||||
my $wt_chain = $ARGV[2];
|
||||
|
||||
if(scalar(@ARGV) != 3){
|
||||
print "___________________________________________________________________________________
|
||||
SINTAX:
|
||||
perl dist_mutation_to_na.pl <pdb> <mutation> <chain>
|
||||
___________________________________________________________________________________\n";
|
||||
exit;
|
||||
}
|
||||
|
||||
# ____________________________________________________________________________________________________________________
|
||||
|
||||
my $wild_res = substr($mutation, 0, 1);
|
||||
my $wild_res_pos = substr($mutation, 1, length($mutation)-2);
|
||||
my $mutated_res = substr($mutation, length($mutation)-1, 1);
|
||||
|
||||
open(PDB,"<$pdb") or die "$!Erro ao abrir: $pdb\n";
|
||||
my @pdb = <PDB>;
|
||||
close PDB;
|
||||
|
||||
|
||||
my $k = 0;
|
||||
my @coord_x; my @coord_y;
|
||||
my @coord_z; my @res_num;
|
||||
my @res_name; my @min_dist;
|
||||
my @chain;
|
||||
|
||||
# ==================================================================================================
|
||||
foreach my $line (@pdb){
|
||||
if($line =~ /^ATOM|^HETATM/){
|
||||
if( trim(substr($line,17,3)) eq "DA" or
|
||||
trim(substr($line,17,3)) eq "DG" or
|
||||
trim(substr($line,17,3)) eq "DC" or
|
||||
trim(substr($line,17,3)) eq "DT" or
|
||||
|
||||
trim(substr($line,17,3)) eq "A" or
|
||||
trim(substr($line,17,3)) eq "G" or
|
||||
trim(substr($line,17,3)) eq "C" or
|
||||
trim(substr($line,17,3)) eq "U"
|
||||
){
|
||||
my $res_cod = res_cod3_to_res_cod1(trim(substr($line,17,3)));
|
||||
my $res_ind = trim(substr($line,22,4));
|
||||
my $x = trim(substr($line,30,8));
|
||||
my $y = trim(substr($line,38,8));
|
||||
my $z = trim(substr($line,46,8));
|
||||
|
||||
$coord_x[$k] = $x;
|
||||
$coord_y[$k] = $y;
|
||||
$coord_z[$k] = $z;
|
||||
|
||||
$res_num[$k] = $res_ind;
|
||||
$res_name[$k] = $res_cod;
|
||||
|
||||
$chain[$k] = substr($line,21,1);
|
||||
|
||||
$k++;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
my $k2 = 0;
|
||||
my @coord_x2; my @coord_y2;
|
||||
my @coord_z2; my @res_num2;
|
||||
my @res_name2; my @min_dist2;
|
||||
my @chain2;
|
||||
|
||||
foreach my $line (@pdb){
|
||||
if(trim(substr($line,0,6)) eq "ATOM"){
|
||||
my $res_cod = res_cod3_to_res_cod1(trim(substr($line,17,3)));
|
||||
my $res_ind = trim(substr($line,22,4));
|
||||
my $x = trim(substr($line,30,8));
|
||||
my $y = trim(substr($line,38,8));
|
||||
my $z = trim(substr($line,46,8));
|
||||
my $curr_chain = substr($line,21,1);
|
||||
|
||||
if($wild_res_pos == $res_ind and $wt_chain eq $curr_chain){
|
||||
|
||||
$coord_x2[$k2] = $x;
|
||||
$coord_y2[$k2] = $y;
|
||||
$coord_z2[$k2] = $z;
|
||||
|
||||
$res_num2[$k2] = $res_ind;
|
||||
$res_name2[$k2] = $res_cod;
|
||||
|
||||
$chain2[$k2] = substr($line,21,1);
|
||||
|
||||
$k2++;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#print "$k2\t$k\n";
|
||||
#print "Calculating distances\n";
|
||||
|
||||
# ==================================================================================================
|
||||
|
||||
my $min_dist = 999;
|
||||
for(my $i=0; $i<$k2; $i++){
|
||||
for(my $j=0; $j<$k; $j++){
|
||||
|
||||
my $dist = distance($coord_x2[$i],$coord_y2[$i],$coord_z2[$i],$coord_x[$j],$coord_y[$j],$coord_z[$j]);
|
||||
|
||||
my $res_ind1 = $res_num2[$i];
|
||||
my $res_ind2 = $res_num[$j];
|
||||
|
||||
if($min_dist > $dist){
|
||||
$min_dist = $dist;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
printf '%.3f'."\n", $min_dist;
|
||||
exit;
|
||||
# ____________________________________________________________________________________________________________________
|
||||
sub trim{
|
||||
my $string = shift;
|
||||
$string =~ s/^\s+//;
|
||||
$string =~ s/\s+$//;
|
||||
return $string;
|
||||
}
|
||||
|
||||
sub distance{
|
||||
my($x1,$y1,$z1,$x2,$y2,$z2) = @_;
|
||||
my $distance;
|
||||
|
||||
$distance = sqrt(($x1-$x2)**2 + ($y1-$y2)**2 + ($z1-$z2)**2);
|
||||
return $distance;
|
||||
}
|
||||
|
||||
sub res_cod3_to_res_cod1{
|
||||
my $cod3 = shift;
|
||||
|
||||
if($cod3 eq "ALA"){
|
||||
return "A";
|
||||
} elsif($cod3 eq "VAL"){
|
||||
return "V";
|
||||
} elsif($cod3 eq "LEU"){
|
||||
return "L";
|
||||
} elsif($cod3 eq "GLY"){
|
||||
return "G";
|
||||
} elsif($cod3 eq "SER"){
|
||||
return "S";
|
||||
} elsif($cod3 eq "TRP"){
|
||||
return "W";
|
||||
} elsif($cod3 eq "THR"){
|
||||
return "T";
|
||||
} elsif($cod3 eq "GLN"){
|
||||
return "Q";
|
||||
} elsif($cod3 eq "GLU"){
|
||||
return "E";
|
||||
} elsif($cod3 eq "CYS"){
|
||||
return "C";
|
||||
} elsif($cod3 eq "ARG"){
|
||||
return "R";
|
||||
} elsif($cod3 eq "PRO"){
|
||||
return "P";
|
||||
} elsif($cod3 eq "ASP"){
|
||||
return "D";
|
||||
} elsif($cod3 eq "PHE"){
|
||||
return "F";
|
||||
} elsif($cod3 eq "ILE"){
|
||||
return "I";
|
||||
} elsif($cod3 eq "HIS"){
|
||||
return "H";
|
||||
} elsif($cod3 eq "ASN"){
|
||||
return "N";
|
||||
} elsif($cod3 eq "MET"){
|
||||
return "M";
|
||||
} elsif($cod3 eq "TYR"){
|
||||
return "Y";
|
||||
} elsif($cod3 eq "LYS"){
|
||||
return "K";
|
||||
}
|
||||
return "ERRO";
|
||||
}
|
||||
|
||||
#----------------------------------------------------------------------------------------
|
||||
# Recebe codigo de residuo de um caractere e retorna o equivalente de tres
|
||||
sub res_cod1_to_res_cod3($){
|
||||
|
||||
my $cod1 = shift;
|
||||
|
||||
if($cod1 eq "A"){
|
||||
return "ALA";
|
||||
}
|
||||
elsif($cod1 eq "V"){
|
||||
return "VAL";
|
||||
}
|
||||
elsif($cod1 eq "L"){
|
||||
return "LEU";
|
||||
}
|
||||
elsif($cod1 eq "G"){
|
||||
return "GLY";
|
||||
}
|
||||
elsif($cod1 eq "S"){
|
||||
return "SER";
|
||||
}
|
||||
elsif($cod1 eq "W"){
|
||||
return "TRP";
|
||||
}
|
||||
elsif($cod1 eq "T"){
|
||||
return "THR";
|
||||
}
|
||||
elsif($cod1 eq "Q"){
|
||||
return "GLN";
|
||||
}
|
||||
elsif($cod1 eq "E"){
|
||||
return "GLU";
|
||||
}
|
||||
elsif($cod1 eq "C"){
|
||||
return "CYS";
|
||||
}
|
||||
elsif($cod1 eq "R"){
|
||||
return "ARG";
|
||||
}
|
||||
elsif($cod1 eq "P"){
|
||||
return "PRO";
|
||||
}
|
||||
elsif($cod1 eq "D"){
|
||||
return "ASP";
|
||||
}
|
||||
elsif($cod1 eq "F"){
|
||||
return "PHE";
|
||||
}
|
||||
elsif($cod1 eq "I"){
|
||||
return "ILE";
|
||||
}
|
||||
elsif($cod1 eq "H"){
|
||||
return "HIS";
|
||||
}
|
||||
elsif($cod1 eq "N"){
|
||||
return "ASN";
|
||||
}
|
||||
elsif($cod1 eq "M"){
|
||||
return "MET";
|
||||
}
|
||||
elsif($cod1 eq "Y"){
|
||||
return "TYR";
|
||||
}
|
||||
elsif($cod1 eq "K"){
|
||||
return "LYS";
|
||||
}
|
||||
return "ERRO";
|
||||
}
|
||||
#----------------------------------------------------------------------------------------
|
|
@ -1,100 +0,0 @@
|
|||
########################################
|
||||
# Lineage barplot
|
||||
# Lineage and SAV count barplot
|
||||
# Lineage Diversity barplot
|
||||
########################################
|
||||
|
||||
lin_count_bp <- function( lf_data = lin_lf
|
||||
, all_lineages = F
|
||||
, x_categ = "sel_lineages"
|
||||
, y_count = "p_count"
|
||||
, use_lineages = c("L1", "L2", "L3", "L4")
|
||||
, bar_fill_categ = "count_categ"
|
||||
, display_label_col = "p_count"
|
||||
, bar_stat_stype = "identity"
|
||||
, x_lab_angle = 90
|
||||
, d_lab_size = 2.3
|
||||
, d_lab_hjust = 0.5
|
||||
, d_lab_vjust = 0.5
|
||||
, d_lab_col = "black"
|
||||
, my_xats = 8 # x axis text size
|
||||
, my_yats = 8 # y axis text size
|
||||
, my_xals = 10 # x axis label size
|
||||
, my_yals = 10 # y axis label size
|
||||
, my_lls = 10 # legend label size
|
||||
, bar_col_labels = c("Mutations", "Total Samples")
|
||||
, bar_col_values = c("grey50", "gray75")
|
||||
, bar_leg_name = ""
|
||||
, leg_location = "top"
|
||||
, y_log10 = FALSE
|
||||
, y_scale_percent = FALSE
|
||||
, y_label = c("Count", "SAV diversity")
|
||||
, ...
|
||||
#, y_label = c("Count")
|
||||
) {
|
||||
if(!all_lineages){
|
||||
lf_data = lf_data[lf_data[[x_categ]]%in%use_lineages,]
|
||||
}
|
||||
|
||||
g = ggplot(lf_data
|
||||
, aes( x = factor( eval(parse(text = x_categ)), ordered = T )
|
||||
, y = eval(parse(text = y_count))
|
||||
, fill = eval(parse(text = bar_fill_categ)) ) )
|
||||
|
||||
OutPlot = g + geom_bar( stat = bar_stat_stype
|
||||
, position = position_stack(reverse = TRUE)
|
||||
#, alpha = 1
|
||||
#, colour = "grey75"
|
||||
) +
|
||||
theme(axis.text.x = element_text(size = my_xats
|
||||
, angle = x_lab_angle)
|
||||
, axis.text.y = element_text(size = my_yats
|
||||
, angle = 90
|
||||
, hjust = 1
|
||||
, vjust = 0)
|
||||
, axis.title.x = element_text(size = my_xals
|
||||
, colour = "black")
|
||||
, axis.title.y = element_text(size = my_yals
|
||||
, colour = "black")
|
||||
, legend.position = leg_location
|
||||
, legend.text = element_text(size = my_lls)
|
||||
, legend.key.size = unit(my_lls, 'pt')) +
|
||||
|
||||
geom_label(aes(label = eval(parse(text = display_label_col)))
|
||||
, size = d_lab_size
|
||||
, hjust = d_lab_hjust
|
||||
, vjust = d_lab_vjust
|
||||
, colour = d_lab_col
|
||||
, show.legend = FALSE
|
||||
#, check_overlap = TRUE
|
||||
, position = position_stack(reverse = T)) +
|
||||
|
||||
scale_fill_manual(values = bar_col_values
|
||||
, name = bar_leg_name
|
||||
, labels = bar_col_labels) +
|
||||
labs(title = ""
|
||||
, x = ""
|
||||
, y = y_label
|
||||
, colour = "black")
|
||||
|
||||
if (y_log10){
|
||||
|
||||
OutPlot = OutPlot +
|
||||
scale_y_continuous(trans = "log10"
|
||||
, labels = trans_format("log10", math_format(10^.x) ) )
|
||||
}
|
||||
|
||||
if (y_scale_percent){
|
||||
|
||||
OutPlot = OutPlot +
|
||||
scale_y_continuous(labels = scales::percent_format(accuracy = 1)) +
|
||||
#scale_y_continuous(labels = scales::percent) +
|
||||
|
||||
labs(title = ""
|
||||
, x = ""
|
||||
, y = y_label
|
||||
, colour = "black")
|
||||
}
|
||||
|
||||
return(OutPlot)
|
||||
}
|
|
@ -1,119 +0,0 @@
|
|||
########################################
|
||||
# Lineage barplot
|
||||
# Lineage and SAV count barplot
|
||||
# Lineage Diversity barplot
|
||||
########################################
|
||||
|
||||
lin_count_bp_diversity <- function( lf_data = lin_wf
|
||||
, all_lineages = F
|
||||
, x_categ = "sel_lineages"
|
||||
, y_count = "snp_diversity"
|
||||
#, all_lineages = F
|
||||
, use_lineages = c("L1", "L2", "L3", "L4")
|
||||
#, bar_fill_categ = "count_categ"
|
||||
, display_label_col = "snp_diversity_f"
|
||||
, bar_stat_stype = "identity"
|
||||
, x_lab_angle = 90
|
||||
, d_lab_size = 2.3
|
||||
, d_lab_hjust = 0.5
|
||||
, d_lab_vjust = 0.5
|
||||
, d_lab_col = "black"
|
||||
, my_xats = 8 # x axis text size
|
||||
, my_yats = 8 # y axis text size
|
||||
, my_xals = 10 # x axis label size
|
||||
, my_yals = 10 # y axis label size
|
||||
, my_lls = 10 # legend label size
|
||||
, bar_col_labels = "" #c("Mutations", "Total Samples")
|
||||
, bar_col_values = c("gray50", "gray75")
|
||||
, bar_leg_name = ""
|
||||
, leg_location = "top"
|
||||
, y_log10 = FALSE
|
||||
, y_scale_percent = FALSE
|
||||
#, y_label = c("Count", "SAV diversity")
|
||||
, y_label = c("SAV diversity")
|
||||
, bp_plot_title = ""
|
||||
, title_colour = "chocolate4"
|
||||
, subtitle_text = NULL
|
||||
, sts = 20
|
||||
, subtitle_colour = "#350E20FF" #brown
|
||||
, ...) {
|
||||
if(!all_lineages){
|
||||
lf_data = lf_data[lf_data[[x_categ]]%in%use_lineages,]
|
||||
}
|
||||
|
||||
g = ggplot(lf_data
|
||||
, aes( x = factor( eval(parse(text = x_categ)), ordered = T )
|
||||
, y = eval(parse(text = y_count))
|
||||
#, fill = eval(parse(text = bar_fill_categ))
|
||||
) )
|
||||
|
||||
OutPlot = g + geom_bar( stat = bar_stat_stype
|
||||
, position = position_stack(reverse = TRUE)
|
||||
#, alpha = 1
|
||||
#, colour = "grey75"
|
||||
) +
|
||||
theme(axis.text.x = element_text(size = my_xats
|
||||
, angle = x_lab_angle)
|
||||
, axis.text.y = element_text(size = my_yats
|
||||
, angle = 90
|
||||
, hjust = 1
|
||||
, vjust = 0)
|
||||
, axis.title.x = element_text(size = my_xals
|
||||
, colour = "black")
|
||||
, axis.title.y = element_text(size = my_yals
|
||||
, colour = "black")
|
||||
, legend.position = leg_location
|
||||
, legend.text = element_text(size = my_lls)
|
||||
, legend.key.size = unit(my_lls, 'pt')
|
||||
, plot.title = element_text(size = my_lls
|
||||
, colour = title_colour
|
||||
, hjust = 0.5)
|
||||
, plot.subtitle = element_text(size = sts
|
||||
, hjust = 0.5
|
||||
, colour = subtitle_colour)) +
|
||||
|
||||
geom_label(aes(label = eval(parse(text = display_label_col)))
|
||||
, size = d_lab_size
|
||||
, hjust = d_lab_hjust
|
||||
, vjust = d_lab_vjust
|
||||
, colour = d_lab_col
|
||||
, show.legend = FALSE
|
||||
#, check_overlap = TRUE
|
||||
, position = position_stack(reverse = T)) +
|
||||
|
||||
scale_fill_manual(values = bar_col_values
|
||||
, name = bar_leg_name
|
||||
, labels = bar_col_labels) +
|
||||
# labs(title = ""
|
||||
# , x = ""
|
||||
# , y = y_label
|
||||
# , colour = "black")
|
||||
#
|
||||
labs(title = bp_plot_title
|
||||
, subtitle = subtitle_text
|
||||
, x = ""
|
||||
, y = y_label
|
||||
, colour = "black")
|
||||
|
||||
if (y_log10){
|
||||
|
||||
OutPlot = OutPlot +
|
||||
scale_y_continuous(trans = "log10"
|
||||
, labels = trans_format("log10", math_format(10^.x) ) )
|
||||
}
|
||||
|
||||
if (y_scale_percent){
|
||||
|
||||
OutPlot = OutPlot +
|
||||
scale_y_continuous(labels = scales::percent_format(accuracy = 1)) +
|
||||
#scale_y_continuous(labels = scales::percent) +
|
||||
|
||||
labs(title = bp_plot_title
|
||||
, subtitle = subtitle_text
|
||||
, x = ""
|
||||
, y = y_label
|
||||
, colour = "black")
|
||||
}
|
||||
|
||||
return(OutPlot)
|
||||
}
|
|
@ -2,9 +2,8 @@
|
|||
# 1b: Define function: coloured barplot by subgroup
|
||||
# LINK: https://stackoverflow.com/questions/49818271/stacked-barplot-with-colour-gradients-for-each-bar
|
||||
#########################################################
|
||||
#source("~/git/LSHTM_analysis/scripts/functions/generate_distance_colour_map.R")
|
||||
|
||||
ColourPalleteMulti = function(df, group, subgroup){
|
||||
ColourPalleteMulti <- function(df, group, subgroup){
|
||||
|
||||
# Find how many colour categories to create and the number of colours in each
|
||||
categories <- aggregate(as.formula(paste(subgroup, group, sep="~" ))
|
||||
|
@ -25,134 +24,4 @@ ColourPalleteMulti = function(df, group, subgroup){
|
|||
, category.end[i]))(categories[i,2])}))
|
||||
return(colours)
|
||||
}
|
||||
#########################################################################
|
||||
|
||||
########################
|
||||
# Generate bp with
|
||||
# colour palette derived
|
||||
# from the data using
|
||||
# above function
|
||||
#########################
|
||||
|
||||
bp_stability_hmap <- function(plot_df = merged_df3
|
||||
, xvar_colname = "position"
|
||||
, yvar_colname = 'avg_stability_scaled' # Only here so that you can do function(df)
|
||||
#, bar_col_colname = "group"
|
||||
, stability_colname = "avg_stability_scaled" # Only here so that you can do function(df)
|
||||
, stability_outcome_colname = "avg_stability_outcome" # Only here so that you can do function(df)
|
||||
, p_title = "DUMMY TITLE", # Only here so that you can do function(df)
|
||||
my_xaxls = 6, # x-axis label size
|
||||
my_yaxls = 6, # y-axis label size
|
||||
my_xaxts = 9, # x-axis text size
|
||||
my_yaxts = 10, # y-axis text size
|
||||
my_pts = 10 # plot-title size
|
||||
, my_xlab = "Position"
|
||||
, my_ylab = ""
|
||||
|
||||
# Custom 2: x-axis: geom tiles ~ lig distance
|
||||
#, A_xvar_lig = T
|
||||
, lig_dist_colname = LigDist_colname # from globals
|
||||
, tpos0 = 0 # 0 is a magic number that does my sensible default
|
||||
, tW0 = 1
|
||||
, tH0 = 0.2,
|
||||
y_max_override = 1, # an override for tidily plotting multiple different-ranged plots together
|
||||
reorder_position = FALSE, # enable to reorder according to plot_df$pos_count
|
||||
...
|
||||
|
||||
|
||||
|
||||
|
||||
)
|
||||
{
|
||||
# Custom 2: x-axis geom tiles ~ lig distance
|
||||
|
||||
# order the df by position and ensure it is a factor
|
||||
plot_df = plot_df[order(plot_df[[xvar_colname]]), ]
|
||||
plot_df[[xvar_colname]] = factor(plot_df[[xvar_colname]])
|
||||
|
||||
#cat("\nSneak peak:\n")
|
||||
head(data.frame( plot_df[[xvar_colname]], plot_df[[stability_colname]] ) )
|
||||
|
||||
# stability values isolated to help with generating column called: 'group'
|
||||
my_grp = plot_df[[stability_colname]]
|
||||
# cat( "\nLength of SAVs:", length(my_grp)
|
||||
# , "\nLength of unique values for SAVs:", length(unique(my_grp)) )
|
||||
#
|
||||
# Add col: 'group'
|
||||
plot_df$group = paste0(plot_df[[stability_outcome_colname]], "_", my_grp, sep = "")
|
||||
plot_df=plot_df %>% dplyr::add_count(position)
|
||||
plot_df$pos_count=plot_df$n
|
||||
plot_df$n=NULL
|
||||
|
||||
# define a "max Y" in case the user didn't supply one
|
||||
if(reorder_position) {
|
||||
y_max = max(plot_df$pos_count)
|
||||
}
|
||||
else{
|
||||
y_max = 1 # boring default
|
||||
}
|
||||
y_axis_limit = round_any(y_max, y_max_override, ceiling)
|
||||
|
||||
# Call the function to create the palette based on the group defined above
|
||||
#subcols_ps
|
||||
subcols_bp_hmap = ColourPalleteMulti(plot_df, stability_outcome_colname, stability_colname)
|
||||
|
||||
cat("\nNo. of sub colours generated:", length(subcols_bp_hmap))
|
||||
anno_bar=position_annotation(plot_df,
|
||||
reorder_position=reorder_position,
|
||||
...
|
||||
)
|
||||
|
||||
subcols_plot = ggplot(plot_df) +
|
||||
scale_fill_manual( values = subcols_bp_hmap
|
||||
, guide = "none") +
|
||||
# scale_x_discrete("Position", labels=factor(plot_df$position)) +
|
||||
scale_y_continuous(limits=c(0,y_axis_limit)) +
|
||||
theme(
|
||||
panel.grid = element_line(color="lightgrey", size=0.125)
|
||||
, axis.text.x = element_text(size = my_xaxls
|
||||
, angle = 90
|
||||
, hjust = 1
|
||||
, vjust = 0.4)
|
||||
, axis.text.y = element_text(size = my_yaxls
|
||||
, angle = 0
|
||||
, hjust = 1
|
||||
, vjust = 0)
|
||||
, axis.title.x = element_blank()
|
||||
, axis.ticks = element_blank()
|
||||
#, axis.title.x = element_text(size = my_xaxts)
|
||||
, axis.title.y = element_text(size = my_yaxts )
|
||||
, plot.title = element_text(size = my_pts
|
||||
, hjust = 0.5)
|
||||
# , panel.grid = element_blank()
|
||||
, panel.background = element_rect(fill = "transparent", colour=NA)
|
||||
) +
|
||||
labs(title = p_title
|
||||
, x = my_xlab
|
||||
, y = my_ylab) +
|
||||
if(reorder_position) {
|
||||
geom_bar(aes(x=reorder(position,-pos_count), fill = group),
|
||||
colour = "grey",
|
||||
size=0.125
|
||||
)
|
||||
|
||||
}else{
|
||||
geom_bar(aes(x=position, fill = group),
|
||||
colour = "grey",
|
||||
size=0.125
|
||||
)
|
||||
}
|
||||
|
||||
|
||||
# Generate the subcols barplot
|
||||
cowplot::plot_grid(
|
||||
subcols_plot,
|
||||
NULL,
|
||||
anno_bar,
|
||||
ncol = 1,
|
||||
align = "v",
|
||||
rel_heights = c(6,-0.1,1)
|
||||
)
|
||||
|
||||
}
|
||||
# bp_stability_hmap(small_df3)
|
||||
#########################################################
|
|
@ -6,7 +6,7 @@
|
|||
###########################################################
|
||||
# load libraries and functions
|
||||
|
||||
#source("~/git/LSHTM_analysis/scripts/Header_TT.R")
|
||||
#source("Header_TT.R")
|
||||
|
||||
#==========================================================
|
||||
# combining_dfs_plotting():
|
||||
|
@ -21,7 +21,7 @@
|
|||
# 1) large combined df including NAs for AF, OR,etc
|
||||
# Dim: same no. of rows as gene associated meta_data_with_AFandOR
|
||||
# 2) small combined df including NAs for AF, OR, etc.
|
||||
# Dim: same as mcsm data or foldX
|
||||
# Dim: same as mcsm data
|
||||
# 3) large combined df excluding NAs
|
||||
# Dim: dim(#1) - na_count_df2
|
||||
# 4) small combined df excluding NAs
|
||||
|
@ -31,20 +31,10 @@
|
|||
# 6) LIGAND small combined df excluding NAs
|
||||
# Dim: dim()
|
||||
#==========================================================
|
||||
#lig_dist_colname = 'ligand_distance' or global var LigDist_colname
|
||||
#lig_dist_cutoff = 10 or global var LigDist_cutoff
|
||||
geneL_normal = c("pnca")
|
||||
geneL_na = c("gid", "rpob")
|
||||
geneL_ppi2 = c("alr", "embb", "katg", "rpob")
|
||||
|
||||
|
||||
|
||||
combining_dfs_plotting <- function( my_df_u
|
||||
, gene_metadata
|
||||
#, gene # ADDED
|
||||
, lig_dist_colname = ''
|
||||
, lig_dist_cutoff = ''
|
||||
, plotting = TRUE){
|
||||
, lig_dist_colname = 'ligand_distance'
|
||||
, lig_dist_cutoff = 10){
|
||||
|
||||
# counting NAs in AF, OR cols
|
||||
# or_mychisq
|
||||
|
@ -60,20 +50,20 @@ combining_dfs_plotting <- function( my_df_u
|
|||
, "\nNA in pvalue: ", sum(is.na(my_df_u$pval_fisher))
|
||||
, "\nNA in AF:", sum(is.na(my_df_u$af)))
|
||||
}
|
||||
#
|
||||
# # or kin
|
||||
# if (identical(sum(is.na(my_df_u$or_kin))
|
||||
# , sum(is.na(my_df_u$pwald_kin))
|
||||
# , sum(is.na(my_df_u$af_kin)))){
|
||||
# cat("\nPASS: NA count match for OR, pvalue and AF\n from Kinship matrix calculations")
|
||||
# na_count = sum(is.na(my_df_u$af_kin))
|
||||
# cat("\nNo. of NAs: ", sum(is.na(my_df_u$or_kin)))
|
||||
# } else{
|
||||
# cat("\nFAIL: NA count mismatch"
|
||||
# , "\nNA in OR: ", sum(is.na(my_df_u$or_kin))
|
||||
# , "\nNA in pvalue: ", sum(is.na(my_df_u$pwald_kin))
|
||||
# , "\nNA in AF:", sum(is.na(my_df_u$af_kin)))
|
||||
# }
|
||||
|
||||
# or kin
|
||||
if (identical(sum(is.na(my_df_u$or_kin))
|
||||
, sum(is.na(my_df_u$pwald_kin))
|
||||
, sum(is.na(my_df_u$af_kin)))){
|
||||
cat("\nPASS: NA count match for OR, pvalue and AF\n from Kinship matrix calculations")
|
||||
na_count = sum(is.na(my_df_u$af_kin))
|
||||
cat("\nNo. of NAs: ", sum(is.na(my_df_u$or_kin)))
|
||||
} else{
|
||||
cat("\nFAIL: NA count mismatch"
|
||||
, "\nNA in OR: ", sum(is.na(my_df_u$or_kin))
|
||||
, "\nNA in pvalue: ", sum(is.na(my_df_u$pwald_kin))
|
||||
, "\nNA in AF:", sum(is.na(my_df_u$af_kin)))
|
||||
}
|
||||
|
||||
str(gene_metadata)
|
||||
|
||||
|
@ -105,7 +95,7 @@ combining_dfs_plotting <- function( my_df_u
|
|||
# merging_cols = merging_cols[[1]]
|
||||
merging_cols = 'mutationinformation'
|
||||
|
||||
cat("\nLinking column being used:", merging_cols)
|
||||
cat("\nLinking column being used: mutationinformation")
|
||||
|
||||
# important checks!
|
||||
table(nchar(my_df_u$mutationinformation))
|
||||
|
@ -118,7 +108,6 @@ combining_dfs_plotting <- function( my_df_u
|
|||
, y = my_df_u
|
||||
, by = merging_cols
|
||||
, all.y = T)
|
||||
#, all.x = T)
|
||||
|
||||
cat("\nDim of merged_df2: ", dim(merged_df2))
|
||||
|
||||
|
@ -146,17 +135,6 @@ combining_dfs_plotting <- function( my_df_u
|
|||
|
||||
head(merged_df2$position)
|
||||
|
||||
merged_muts_u = unique(merged_df2$mutationinformation)
|
||||
meta_muts_u = unique(gene_metadata$mutationinformation)
|
||||
# find the index where it differs
|
||||
cat("\nLength of unique mcsm_muts:", length(merged_muts_u)
|
||||
, "\nLength of unique meta muts:",length(meta_muts_u) )
|
||||
|
||||
meta_muts_all = gene_metadata$mutationinformation
|
||||
merged_muts = merged_df2$mutationinformation
|
||||
discrepancy_uniq = unique(meta_muts_u[! meta_muts_u %in% merged_muts_u])
|
||||
discrepancy = meta_muts_all[! meta_muts_all %in% merged_muts]
|
||||
|
||||
# sanity check
|
||||
cat("\nChecking nrows in merged_df2")
|
||||
if(nrow(gene_metadata) == nrow(merged_df2)){
|
||||
|
@ -164,57 +142,17 @@ combining_dfs_plotting <- function( my_df_u
|
|||
,"\nExpected no. of rows: ",nrow(gene_metadata)
|
||||
,"\nGot no. of rows: ", nrow(merged_df2))
|
||||
} else{
|
||||
cat("\nWARNING: nrow(merged_df2)!= nrow(gene associated gene_metadata)"
|
||||
cat("\nFAIL: nrow(merged_df2)!= nrow(gene associated gene_metadata)"
|
||||
, "\nExpected no. of rows after merge: ", nrow(gene_metadata)
|
||||
, "\nGot no. of rows: ", nrow(merged_df2)
|
||||
, "\nFinding discrepancy")
|
||||
merged_muts_u = unique(merged_df2$mutationinformation)
|
||||
meta_muts_u = unique(gene_metadata$mutationinformation)
|
||||
# find the index where it differs
|
||||
cat("\nLength of unique mcsm_muts:", length(merged_muts_u)
|
||||
, "\nLength of unique meta muts:",length(meta_muts_u)
|
||||
, "\nLength of unique muts in meta muts NOT in mcsm muts:", length(discrepancy_uniq)
|
||||
, "These correspond to:", discrepancy, "entries"
|
||||
, "\nThese problematic muts are:\n"
|
||||
, discrepancy_uniq)
|
||||
#quit()
|
||||
cat("\nChecking again...")
|
||||
expected_nrows_df2 = nrow(gene_metadata) - length(discrepancy)
|
||||
if (nrow(merged_df2) == expected_nrows_df2){
|
||||
cat("\nPASS: nrow(merged_df2) is as expected after accounting for discrepancy"
|
||||
,"\nExpected no. of rows: ", expected_nrows_df2
|
||||
,"\nGot no. of rows: ", nrow(merged_df2))
|
||||
}else{ cat("\nFAIL: nrow(merged_df2) is NOT as expected even after accounting for discrepancy"
|
||||
, "\nExpected no. of rows after merge: ", expected_nrows_df2
|
||||
, "\nGot no. of rows: ", nrow(merged_df2)
|
||||
, "\nQuitting!")
|
||||
unique(meta_muts_u[! meta_muts_u %in% merged_muts_u])
|
||||
quit()
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
# Quick formatting: ordering df and pretty labels
|
||||
|
||||
#------------------------------
|
||||
# sorting by column: position
|
||||
#------------------------------
|
||||
merged_df2 = merged_df2[order(merged_df2$position), ]
|
||||
|
||||
#-----------------------
|
||||
# mutation_info_labels
|
||||
#-----------------------
|
||||
#merged_df2$mutation_info_labels = ifelse(merged_df2$mutation_info == dr_muts_col
|
||||
# , "DM", "OM")
|
||||
#merged_df2$mutation_info_labels = factor(merged_df2$mutation_info_labels)
|
||||
#-----------------------
|
||||
# lineage labels
|
||||
#-----------------------
|
||||
merged_df2$lineage_labels = merged_df2$lineage
|
||||
#merged_df2$lineage_labels = as.factor(merged_df2$lineage_labels)
|
||||
#merged_df2$lineage_labels = factor(merged_df2$lineage_labels)
|
||||
table(merged_df2$mutation_info_labels_orig) # original
|
||||
table(merged_df2$mutation_info_labels_v1) # intermediate
|
||||
table(merged_df2$mutation_info_labels) # revised, corresponding to dst_mode
|
||||
|
||||
#=================================================================
|
||||
# Merge 2: merged_df3
|
||||
# dfs with NAs in ORs
|
||||
|
@ -224,122 +162,17 @@ combining_dfs_plotting <- function( my_df_u
|
|||
# but this should be good for the numerical corr plots
|
||||
#==================================================================
|
||||
# remove duplicated mutations
|
||||
# cat("\nMerging dfs without NAs: small df (removing muts with no AF|OR associated)"
|
||||
# ,"\nCannot trust lineage info from this"
|
||||
# ,"\nlinking col: mutationinforamtion"
|
||||
# ,"\nfilename: merged_df3")
|
||||
#
|
||||
# merged_df3 = merged_df2[!duplicated(merged_df2$mutationinformation),]
|
||||
#
|
||||
#
|
||||
cat("\nMerging dfs without NAs: small df (removing muts with no AF|OR associated)"
|
||||
,"\nCannot trust lineage info from this"
|
||||
,"\nlinking col: mutationinforamtion"
|
||||
,"\nfilename: merged_df3")
|
||||
|
||||
# head(merged_df3$position); tail(merged_df3$position) # should be sorted
|
||||
#
|
||||
# # sanity check
|
||||
# cat("\nChecking nrows in merged_df3")
|
||||
#
|
||||
# if( nrow(my_df_u) == nrow(merged_df3) ){
|
||||
# cat("\nPASS: No. of rows match with my_df"
|
||||
# ,"\nExpected no. of rows: ", nrow(my_df_u)
|
||||
# ,"\nGot no. of rows: ", nrow(merged_df3))
|
||||
# } else {
|
||||
# cat("\nFAIL: No. of rows mismatch"
|
||||
# , "\nNo. of rows my_df: ", nrow(my_df_u)
|
||||
# , "\nNo. of rows merged_df3: ", nrow(merged_df3))
|
||||
# quit()
|
||||
# }
|
||||
#
|
||||
# counting NAs in AF, OR cols in merged_df3
|
||||
# this is because mcsm has no AF, OR cols,
|
||||
# so you cannot count NAs
|
||||
# if (identical(sum(is.na(merged_df3$or_kin))
|
||||
# , sum(is.na(merged_df3$pwald_kin))
|
||||
# , sum(is.na(merged_df3$af_kin)))){
|
||||
# cat("\nPASS: NA count match for OR, pvalue and AF\n")
|
||||
# na_count_df3 = sum(is.na(merged_df3$af_kin))
|
||||
# cat("\nNo. of NAs: ", sum(is.na(merged_df3$or_kin)))
|
||||
# } else{
|
||||
# cat("\nFAIL: NA count mismatch"
|
||||
# , "\nNA in OR: ", sum(is.na(merged_df3$or_kin))
|
||||
# , "\nNA in pvalue: ", sum(is.na(merged_df3$pwald_kin))
|
||||
# , "\nNA in AF:", sum(is.na(merged_df3$af_kin)))
|
||||
# }
|
||||
#
|
||||
# ===================================
|
||||
# Revised way to generate merged_df3
|
||||
# ===================================
|
||||
#%% Getting merged_df3: VERY important and careful subsetting merging
|
||||
# dst mode column as carefully curated dst based on knowledge based approach.
|
||||
# so now we want to get the
|
||||
na_muts = merged_df2[is.na(merged_df2$dst),]
|
||||
no_na_muts = merged_df2[!is.na(merged_df2$dst),]
|
||||
|
||||
muts_na_U = na_muts[!duplicated(na_muts[c('mutationinformation')]), ]
|
||||
muts_no_na_U = no_na_muts[!duplicated(no_na_muts[c('mutationinformation')]), ]
|
||||
|
||||
# get muts from no_na that are NOT present in muts with na from dplyr
|
||||
dst_muts = dplyr::anti_join(muts_no_na_U, muts_na_U, by = 'mutationinformation')
|
||||
#dst_muts = anti_join(muts_no_na_U, muts_na_U, by = 'mutationinformation')
|
||||
|
||||
# ALL good muts are NOT in na muts unique i.e dst muts should NOT exist in na_muts
|
||||
if (all(dst_muts$mutationinformation%in%muts_na_U$mutationinformation) == FALSE){
|
||||
cat("\nPASS: checked length for dst tested muts"
|
||||
, "\nNo. of dst testetd muts:", nrow(dst_muts))
|
||||
}else{
|
||||
stop("Dst muts are not correctly identified")
|
||||
}
|
||||
|
||||
if ( class(dst_muts) != "data.frame" ){
|
||||
dst_muts = as.data.frame(dst_muts)
|
||||
} else{
|
||||
cat("\ndst_muts is a df")
|
||||
}
|
||||
|
||||
# ALL bad muts are in na muts unique
|
||||
bad_muts = dplyr::semi_join(muts_no_na_U, muts_na_U, by = "mutationinformation")
|
||||
#bad_muts = semi_join(muts_no_na_U, muts_na_U, by = "mutationinformation")
|
||||
|
||||
|
||||
if (all(bad_muts$mutationinformation%in%muts_na_U$mutationinformation) == TRUE){
|
||||
cat("\nPASS: checked length of NOT-dst tested muts"
|
||||
, "\nNo. of NOT dst-tested_muts:", nrow(bad_muts))
|
||||
}else{
|
||||
stop("Non-dst muts are not correctly identified")
|
||||
}
|
||||
|
||||
if ( class(bad_muts) != "data.frame" ){
|
||||
bad_muts = as.data.frame(bad_muts)
|
||||
} else{
|
||||
cat("\nbad_muts is a df")
|
||||
}
|
||||
|
||||
cat("\nNo. of muts with dst:", nrow(dst_muts)
|
||||
, "\nNo. of muts without dst:", nrow(muts_na_U) - nrow(dst_muts) )
|
||||
|
||||
# now merge
|
||||
if ( all(colnames(muts_na_U) == colnames(dst_muts)) ){
|
||||
cat("\nPASS: rowbind to get merged_df3")
|
||||
merged_df3 = dplyr::bind_rows(muts_na_U, dst_muts)
|
||||
#merged_df3 = bind_rows(muts_na_U, dst_muts)
|
||||
|
||||
} else{
|
||||
stop("Quitting: merged_df3 could not be generated")
|
||||
}
|
||||
|
||||
if ( nrow(merged_df3) == length(unique(merged_df2$mutationinformation)) ){
|
||||
cat("\nPASS: merged_df3 sucessfully generated..."
|
||||
, "\nnrow merged_df3:", nrow(merged_df3)
|
||||
, "\nncol merged_df3:", ncol(merged_df3))
|
||||
}else{
|
||||
stop("Cannot generate merged_df3")
|
||||
}
|
||||
##################################################################
|
||||
merged_df3 = merged_df2[!duplicated(merged_df2$mutationinformation),]
|
||||
head(merged_df3$position); tail(merged_df3$position) # should be sorted
|
||||
|
||||
# sanity check
|
||||
cat("\nChecking nrows in merged_df3")
|
||||
|
||||
if( nrow(my_df_u) == nrow(merged_df3) ){
|
||||
if(nrow(my_df_u) == nrow(merged_df3)){
|
||||
cat("\nPASS: No. of rows match with my_df"
|
||||
,"\nExpected no. of rows: ", nrow(my_df_u)
|
||||
,"\nGot no. of rows: ", nrow(merged_df3))
|
||||
|
@ -349,392 +182,166 @@ combining_dfs_plotting <- function( my_df_u
|
|||
, "\nNo. of rows merged_df3: ", nrow(merged_df3))
|
||||
quit()
|
||||
}
|
||||
#=========================================
|
||||
# NEW: add consurf outcome
|
||||
#=========================================
|
||||
consurf_colOld = "consurf_colour_rev"
|
||||
consurf_colNew = "consurf_outcome"
|
||||
merged_df3[[consurf_colNew]] = merged_df3[[consurf_colOld]]
|
||||
merged_df3[[consurf_colNew]] = as.factor(merged_df3[[consurf_colNew]])
|
||||
merged_df3[[consurf_colNew]]
|
||||
#levels(merged_df3$consurf_outcome) = c("nsd", 1, 2, 3, 4, 5, 6, 7, 8, 9)
|
||||
|
||||
merged_df2[[consurf_colNew]] = merged_df2[[consurf_colOld]]
|
||||
merged_df2[[consurf_colNew]] = as.factor(merged_df2[[consurf_colNew]])
|
||||
merged_df2[[consurf_colNew]]
|
||||
# counting NAs in AF, OR cols in merged_df3
|
||||
# this is because mcsm has no AF, OR cols,
|
||||
# so you cannot count NAs
|
||||
if (identical(sum(is.na(merged_df3$or_kin))
|
||||
, sum(is.na(merged_df3$pwald_kin))
|
||||
, sum(is.na(merged_df3$af_kin)))){
|
||||
cat("\nPASS: NA count match for OR, pvalue and AF\n")
|
||||
na_count_df3 = sum(is.na(merged_df3$af_kin))
|
||||
cat("\nNo. of NAs: ", sum(is.na(merged_df3$or_kin)))
|
||||
} else{
|
||||
cat("\nFAIL: NA count mismatch"
|
||||
, "\nNA in OR: ", sum(is.na(merged_df3$or_kin))
|
||||
, "\nNA in pvalue: ", sum(is.na(merged_df3$pwald_kin))
|
||||
, "\nNA in AF:", sum(is.na(merged_df3$af_kin)))
|
||||
}
|
||||
|
||||
#=========================================
|
||||
# NEW: fixed case for SNAP2 labels
|
||||
#=========================================
|
||||
snap2_colname = "snap2_outcome"
|
||||
merged_df3[[snap2_colname]] <- str_replace(merged_df3[[snap2_colname]], "effect", "Effect")
|
||||
merged_df3[[snap2_colname]] <- str_replace(merged_df3[[snap2_colname]], "neutral", "Neutral")
|
||||
#===================================================
|
||||
# Merge3: merged_df2_comp
|
||||
# same as merge 1 but excluding NAs from ORs, etc.
|
||||
#====================================================
|
||||
cat("\nMerging dfs without any NAs: big df (1-many relationship b/w id & mut)"
|
||||
,"\nfilename: merged_df2_comp")
|
||||
|
||||
merged_df2[[snap2_colname]] <- str_replace(merged_df2[[snap2_colname]], "effect", "Effect")
|
||||
merged_df2[[snap2_colname]] <- str_replace(merged_df2[[snap2_colname]], "neutral", "Neutral")
|
||||
na_count_df2 = sum(is.na(merged_df2$af))
|
||||
merged_df2_comp = merged_df2[!is.na(merged_df2$af),]
|
||||
|
||||
#---------------------------------------------
|
||||
# NEW: add columns that are needed to generate
|
||||
# plots with revised colnames and strings
|
||||
#----------------------------------------------
|
||||
merged_df3$sensitivity = ifelse(merged_df3$dst_mode == 1, "R", "S")
|
||||
merged_df3$mutation_info_labels = ifelse(merged_df3$mutation_info_labels == "DM", "R", "S")
|
||||
|
||||
merged_df2$sensitivity = ifelse(merged_df2$dst_mode == 1, "R", "S")
|
||||
merged_df2$mutation_info_labels = ifelse(merged_df2$mutation_info_labels == "DM", "R", "S")
|
||||
|
||||
# for epistasis: fill na where dst: No equivalent in merged_df3
|
||||
merged_df2$dst2 = ifelse(is.na(merged_df2$dst), merged_df2$dst_mode, merged_df2$dst)
|
||||
|
||||
check1 = all(merged_df3$mutation_info_labels == merged_df3$sensitivity)
|
||||
check2 = all(merged_df2$mutation_info_labels == merged_df2$sensitivity)
|
||||
|
||||
if(check1 && check2){
|
||||
cat("PASS: merged_df3 and merged_df2 have mutation info labels as R and S"
|
||||
, "\nIt also has sensitivity column"
|
||||
, "\nThese are identical")
|
||||
# sanity check: no +-1 gymnastics
|
||||
cat("\nChecking nrows in merged_df2_comp")
|
||||
if(nrow(merged_df2_comp) == (nrow(merged_df2) - na_count_df2)){
|
||||
cat("\nPASS: No. of rows match"
|
||||
,"\nDim of merged_df2_comp: "
|
||||
,"\nExpected no. of rows: ", nrow(merged_df2) - na_count_df2
|
||||
, "\nNo. of rows: ", nrow(merged_df2_comp)
|
||||
, "\nNo. of cols: ", ncol(merged_df2_comp))
|
||||
}else{
|
||||
stop("Abort: merged_df3 or merged_df2 can't be created because of lable mismatch")
|
||||
cat("\nFAIL: No. of rows mismatch"
|
||||
,"\nExpected no. of rows: ", nrow(merged_df2) - na_count_df2
|
||||
,"\nGot no. of rows: ", nrow(merged_df2_comp))
|
||||
}
|
||||
|
||||
##########################################################################
|
||||
# MERGED_df2: average cols #
|
||||
# Average stability + lig-affinity columns #
|
||||
##########################################################################
|
||||
#======================================================
|
||||
# Merge4: merged_df3_comp
|
||||
# same as merge 2 but excluding NAs from ORs, etc or
|
||||
# remove duplicate mutation information
|
||||
#=======================================================
|
||||
na_count_df3 = sum(is.na(merged_df3$af))
|
||||
#merged_df3_comp = merged_df3_comp[!duplicated(merged_df3_comp$mutationinformation),] # a way
|
||||
|
||||
#=====================================
|
||||
# merged_df2: Stability values: average
|
||||
#====================================
|
||||
#------------------------------
|
||||
# foldx sign reverse
|
||||
# for consistency with other tools
|
||||
#----------------------------------
|
||||
head(merged_df2$ddg_foldx)
|
||||
merged_df3_comp = merged_df3[!is.na(merged_df3$af),] # another way
|
||||
cat("\nChecking nrows in merged_df3_comp")
|
||||
|
||||
# foldx values: reverse signs
|
||||
#merged_df2['ddg_foldxC'] = abs(merged_df2$ddg_foldx)
|
||||
#head(merged_df2[, c("ddg_foldx", "ddg_foldxC")])
|
||||
|
||||
# foldx scaled: reverse signs fs
|
||||
merged_df2['foldx_scaled_signC'] = abs(merged_df2$foldx_scaled)
|
||||
head(merged_df2[, c("foldx_scaled", "foldx_scaled_signC")])
|
||||
|
||||
# find which stability cols to average: should contain revised foldx
|
||||
scaled_cols_stab = c("duet_scaled"
|
||||
, "deepddg_scaled"
|
||||
, "ddg_dynamut2_scaled"
|
||||
, "foldx_scaled_signC" # needed to get avg stability
|
||||
)
|
||||
|
||||
#-----------------------------------------------
|
||||
# merged_df2: ADD col: average across predictors: stability
|
||||
#-----------------------------------------------
|
||||
if (all((scaled_cols_stab%in%colnames(merged_df2)))){
|
||||
cat("\nPASS: finding stability cols to average")
|
||||
cols2avg_stab = scaled_cols_stab
|
||||
cat("\nAveraging", length(cols2avg_stab), "stability columns:"
|
||||
, "\nThese are:", cols2avg_stab)
|
||||
|
||||
merged_df2['avg_stability'] = rowMeans(merged_df2[, cols2avg_stab])
|
||||
if(nrow(merged_df3_comp) == (nrow(merged_df3) - na_count_df3)){
|
||||
cat("\nPASS: No. of rows match"
|
||||
,"\nDim of merged_df3_comp: "
|
||||
,"\nExpected no. of rows: ", nrow(merged_df3) - na_count_df3
|
||||
, "\nNo. of rows: ", nrow(merged_df3_comp)
|
||||
, "\nNo. of cols: ", ncol(merged_df3_comp))
|
||||
}else{
|
||||
stop("\nAbort: Foldx column has opposing sign. Can't proceed to avergae.")
|
||||
cat("\nFAIL: No. of rows mismatch"
|
||||
,"\nExpected no. of rows: ", nrow(merged_df3) - na_count_df3
|
||||
,"\nGot no. of rows: ", nrow(merged_df3_comp))
|
||||
}
|
||||
|
||||
head(merged_df2[, c("mutationinformation"
|
||||
, "position"
|
||||
, "foldx_scaled"
|
||||
, scaled_cols_stab
|
||||
, "avg_stability")])
|
||||
#--------------------------------------
|
||||
# merged_df2: ADD col: average stability outcome
|
||||
#--------------------------------------
|
||||
merged_df2["avg_stability_outcome"] = ifelse(merged_df2["avg_stability"] < 0, "Destabilising", "Stabilising")
|
||||
# alternate way of deriving merged_df3_comp
|
||||
foo = merged_df3[!is.na(merged_df3$af),]
|
||||
bar = merged_df3_comp[!duplicated(merged_df3_comp$mutationinformation),]
|
||||
# compare dfs: foo and merged_df3_com
|
||||
all.equal(foo, bar)
|
||||
#summary(comparedf(foo, bar))
|
||||
cat("\n------------------------"
|
||||
, "\nSummary of created dfs:"
|
||||
, "\n------------------------"
|
||||
, "\n1) Dim of merged_df2: " , nrow(merged_df2), "," , ncol(merged_df2)
|
||||
, "\n2) Dim of merged_df2_comp: " , nrow(merged_df2_comp), "," , ncol(merged_df2_comp)
|
||||
, "\n3) Dim of merged_df3: " , nrow(merged_df3), "," , ncol(merged_df3)
|
||||
, "\n4) Dim of merged_df3_comp: " , nrow(merged_df3_comp), "," , ncol(merged_df3_comp))
|
||||
|
||||
head(merged_df2[, c("mutationinformation"
|
||||
, "position"
|
||||
, "avg_stability"
|
||||
, "avg_stability_outcome")])
|
||||
#####################################################################
|
||||
# Combining: LIG
|
||||
#####################################################################
|
||||
|
||||
table(merged_df2["avg_stability_outcome"] )
|
||||
#============
|
||||
# Merges 5-8
|
||||
#============
|
||||
cat("\n=========================================="
|
||||
, "\nStarting filtering for mcsm ligand df"
|
||||
, "\n===========================================")
|
||||
|
||||
#--------------------------------------
|
||||
# merged_df2: ADD col: average stability scaled
|
||||
#--------------------------------------
|
||||
merged_df2["avg_stability_scaled"] = lapply(merged_df2["avg_stability"]
|
||||
, function(x) {
|
||||
scales::rescale_mid(x
|
||||
, to = c(-1,1)
|
||||
, from = c( min(merged_df2["avg_stability"])
|
||||
, max(merged_df2["avg_stability"]))
|
||||
, mid = 0)
|
||||
})
|
||||
if (lig_dist_colname%in%names(my_df_u)){
|
||||
cat("\nFiltering column: ", lig_dist_colname
|
||||
, "\nCut off criteria: ", lig_dist_cutoff, "Angstroms")
|
||||
df_lig = my_df_u[my_df_u[[lig_dist_colname]] < lig_dist_cutoff,]
|
||||
|
||||
if ( all(table(merged_df2["avg_stability"]<0) == table(merged_df2["avg_stability_scaled"]<0)) ){
|
||||
cat("\nPASS: Avergae stability column successfully averaged, scaled and categorised")
|
||||
#merged_df2_lig = merged_df2[merged_df2$ligand_distance<lig_dist_cutoff,]
|
||||
merged_df2_lig = merged_df2[merged_df2[[lig_dist_colname]] < lig_dist_cutoff,]
|
||||
dim(merged_df2_lig)
|
||||
|
||||
merged_df2_comp_lig = merged_df2_comp[merged_df2_comp[[lig_dist_colname]] < lig_dist_cutoff,]
|
||||
|
||||
merged_df3_lig = merged_df3[merged_df3[[lig_dist_colname]] < lig_dist_cutoff,]
|
||||
merged_df3_comp_lig = merged_df3_comp[merged_df3_comp[[lig_dist_colname]] < lig_dist_cutoff,]
|
||||
|
||||
cat("\n------------------------"
|
||||
, "\nSummary of created ligand dfs:"
|
||||
, "\n------------------------"
|
||||
, "\n1) Dim of merged_df2_lig: " , nrow(merged_df2_lig), "," , ncol(merged_df2_lig)
|
||||
, "\n2) Dim of merged_df2_comp_lig: " , nrow(merged_df2_comp_lig), "," , ncol(merged_df2_comp_lig)
|
||||
, "\n3) Dim of merged_df3_lig: " , nrow(merged_df3_lig), "," , ncol(merged_df3_lig)
|
||||
, "\n4) Dim of merged_df3_comp_lig: " , nrow(merged_df3_comp_lig), "," , ncol(merged_df3_comp_lig))
|
||||
} else {
|
||||
cat("\nFiltering column: ", lig_dist_colname, " not found\n")
|
||||
}
|
||||
#quit()
|
||||
|
||||
# sanity check
|
||||
if (nrow(merged_df3_lig) == nrow(df_lig)){
|
||||
print("\nPASS: verified merged_df3_lig")
|
||||
}else{
|
||||
cat("\nAbort:Avergae stability column could not be processed")
|
||||
cat(paste0("\nFAIL: nrow mismatch for merged_df3_lig"
|
||||
, "\nExpected:", nrow(df_lig)
|
||||
, "\nGot:", nrow(merged_df3_lig)))
|
||||
}
|
||||
|
||||
head(merged_df2["avg_stability_scaled"])
|
||||
#==============================================================
|
||||
|
||||
##########################################################################################
|
||||
#=====================================
|
||||
# merged_df2: Affinity values: average
|
||||
#======================================
|
||||
############################################
|
||||
# OPTIONAL: write output files in one go
|
||||
############################################
|
||||
#outvars = c(#"merged_df2",
|
||||
#"merged_df2_comp",
|
||||
#"merged_df2_lig",
|
||||
#"merged_df2_comp_lig",
|
||||
|
||||
common_scaled_cols_affinity = c("affinity_scaled"
|
||||
, "mmcsm_lig_scaled")
|
||||
#"meregd_df3_comp"
|
||||
#"merged_df3_comp_lig",
|
||||
#"merged_df3",
|
||||
#"merged_df3_lig")
|
||||
|
||||
#------------------------------------------------------
|
||||
# merged_df2: ADD col: ensemble average across predictors: affinity
|
||||
#------------------------------------------------------
|
||||
if (all((common_scaled_cols_affinity%in%colnames(merged_df2)))){
|
||||
cat("\nPASS: finding affinity cols to average")
|
||||
cols2avg_aff = common_scaled_cols_affinity
|
||||
merged_df2['avg_lig_affinity'] = rowMeans(merged_df2[, cols2avg_aff])
|
||||
}else{
|
||||
stop("\nAbort: cols to average not found.")
|
||||
}
|
||||
#cat("Writing output files: "
|
||||
#, "\nPath:", outdir)
|
||||
|
||||
head(merged_df2[, c("mutationinformation"
|
||||
, "position"
|
||||
, cols2avg_aff
|
||||
, "avg_lig_affinity")])
|
||||
#for (i in outvars){
|
||||
#out_filename = paste0(i, ".csv")
|
||||
#outfile = paste0(outdir, "/", out_filename)
|
||||
#cat("Writing output file:"
|
||||
# ,"\nFilename: ", out_filename,"\n")
|
||||
#write.csv(get(i), outfile, row.names = FALSE)
|
||||
#cat("Finished writing: ", outfile
|
||||
# , "\nNo. of rows: ", nrow(get(i))
|
||||
# , "\nNo. of cols: ", ncol(get(i)), "\n")
|
||||
#}
|
||||
|
||||
table(merged_df2$affinity_scaled<0 )
|
||||
table(merged_df2$mmcsm_lig_scaled<0 )
|
||||
|
||||
#--------------------------------------
|
||||
# merged_df2: ADD col: average affinity outcome
|
||||
#--------------------------------------
|
||||
merged_df2["avg_lig_affinity_outcome"] = ifelse(merged_df2["avg_lig_affinity"] < 0, "Destabilising", "Stabilising")
|
||||
|
||||
head(merged_df2[, c("mutationinformation"
|
||||
, "position"
|
||||
, "avg_lig_affinity"
|
||||
, "avg_lig_affinity_outcome")])
|
||||
|
||||
table(merged_df2["avg_lig_affinity_outcome"] )
|
||||
|
||||
min( merged_df2['avg_lig_affinity']); max( merged_df2['avg_lig_affinity'])
|
||||
|
||||
#--------------------------------------
|
||||
# merged_df2: ADD col: average affinity scaled
|
||||
#--------------------------------------
|
||||
merged_df2["avg_lig_affinity_scaled"] = lapply(merged_df2["avg_lig_affinity"]
|
||||
, function(x) {
|
||||
scales::rescale_mid(x
|
||||
, to = c(-1,1)
|
||||
, from = c( min(merged_df2["avg_lig_affinity"])
|
||||
, max(merged_df2["avg_lig_affinity"]))
|
||||
, mid = 0)
|
||||
})
|
||||
|
||||
if ( all(table(merged_df2["avg_lig_affinity"]<0) == table(merged_df2["avg_lig_affinity_scaled"]<0)) ){
|
||||
cat("\nPASS: Avergae affinity column successfully averaged, scaled and categorised")
|
||||
|
||||
}else{
|
||||
cat("\nAbort:Avergae affinity column could not be processed")
|
||||
}
|
||||
|
||||
min( merged_df2['avg_lig_affinity_scaled']); max( merged_df2['avg_lig_affinity_scaled'])
|
||||
|
||||
######################################################################################
|
||||
|
||||
##########################################################################
|
||||
# MERGED_d3: average cols #
|
||||
# Average stability + lig-affinity columns #
|
||||
##########################################################################
|
||||
|
||||
#==========================================
|
||||
# merged_df3: Stability values: average
|
||||
#==========================================
|
||||
#-------------------
|
||||
# foldx sign reverse
|
||||
# for consistency with other tools
|
||||
#-------------------
|
||||
head(merged_df3$ddg_foldx)
|
||||
|
||||
# foldx values: reverse signs
|
||||
#merged_df3['ddg_foldxC'] = abs(merged_df3$ddg_foldx)
|
||||
#head(merged_df3[, c("ddg_foldx", "ddg_foldxC")])
|
||||
|
||||
# foldx scaled: reverse signs fs
|
||||
merged_df3['foldx_scaled_signC'] = abs(merged_df3$foldx_scaled)
|
||||
head(merged_df3[, c("foldx_scaled", "foldx_scaled_signC")])
|
||||
|
||||
# find which stability cols to average: should contain revised foldx
|
||||
scaled_cols_stab = c("duet_scaled"
|
||||
, "deepddg_scaled"
|
||||
, "ddg_dynamut2_scaled"
|
||||
#, "foldx_scaled"
|
||||
, "foldx_scaled_signC" # needed to get avg stability
|
||||
)
|
||||
|
||||
#--------------------------------------------------------
|
||||
# merged_df3: ADD col: ensemble average across predictors: stability
|
||||
#---------------------------------------------------------
|
||||
if (all((scaled_cols_stab%in%colnames(merged_df3)))){
|
||||
cat("\nPASS: finding stability cols to average")
|
||||
cols2avg_stab = scaled_cols_stab
|
||||
cat("\nAveraging", length(cols2avg_stab), "stability columns:"
|
||||
, "\nThese are:", cols2avg_stab)
|
||||
|
||||
merged_df3['avg_stability'] = rowMeans(merged_df3[, cols2avg_stab])
|
||||
}else{
|
||||
stop("\nAbort: Foldx column has opposing sign. Can't proceed to avergae.")
|
||||
}
|
||||
|
||||
head(merged_df3[, c("mutationinformation"
|
||||
, "position"
|
||||
, "foldx_scaled"
|
||||
, scaled_cols_stab
|
||||
, "avg_stability")])
|
||||
#--------------------------------------
|
||||
# merged_df3: ADD col: average stability outcome
|
||||
#--------------------------------------
|
||||
merged_df3["avg_stability_outcome"] = ifelse(merged_df3["avg_stability"] < 0, "Destabilising", "Stabilising")
|
||||
|
||||
head(merged_df3[, c("mutationinformation"
|
||||
, "position"
|
||||
, "avg_stability"
|
||||
, "avg_stability_outcome")])
|
||||
|
||||
table(merged_df3["avg_stability_outcome"] )
|
||||
|
||||
#--------------------------------------
|
||||
# merged_df3: ADD col: average stability scaled
|
||||
#--------------------------------------
|
||||
merged_df3["avg_stability_scaled"] = lapply(merged_df3["avg_stability"]
|
||||
, function(x) {
|
||||
scales::rescale_mid(x
|
||||
, to = c(-1,1)
|
||||
, from = c( min(merged_df3["avg_stability"])
|
||||
, max(merged_df3["avg_stability"]))
|
||||
, mid = 0)
|
||||
})
|
||||
|
||||
if ( all(table(merged_df3["avg_stability"]<0) == table(merged_df3["avg_stability_scaled"]<0)) ){
|
||||
cat("\nPASS: Avergae stability column successfully averaged, scaled and categorised")
|
||||
|
||||
}else{
|
||||
cat("\nAbort:Avergae stability column could not be processed")
|
||||
}
|
||||
|
||||
head(merged_df3["avg_stability_scaled"])
|
||||
|
||||
##########################################################################################
|
||||
#=====================================
|
||||
# merged_df3: Affinity values: average
|
||||
#======================================
|
||||
|
||||
common_scaled_cols_affinity = c("affinity_scaled"
|
||||
, "mmcsm_lig_scaled")
|
||||
|
||||
#------------------------------------------------------
|
||||
# merged_df3: ADD col: ensemble average across predictors: affinity
|
||||
#------------------------------------------------------
|
||||
if (all((common_scaled_cols_affinity%in%colnames(merged_df3)))){
|
||||
cat("\nPASS: finding affinity cols to average")
|
||||
cols2avg_aff = common_scaled_cols_affinity
|
||||
merged_df3['avg_lig_affinity'] = rowMeans(merged_df3[, cols2avg_aff])
|
||||
}else{
|
||||
stop("\nAbort: cols to average not found.")
|
||||
}
|
||||
|
||||
head(merged_df3[, c("mutationinformation"
|
||||
, "position"
|
||||
, cols2avg_aff
|
||||
, "avg_lig_affinity")])
|
||||
|
||||
table(merged_df3$affinity_scaled<0 )
|
||||
table(merged_df3$mmcsm_lig_scaled<0 )
|
||||
|
||||
#--------------------------------------
|
||||
# merged_df3: ADD col: average affinity outcome
|
||||
#--------------------------------------
|
||||
merged_df3["avg_lig_affinity_outcome"] = ifelse(merged_df3["avg_lig_affinity"] < 0, "Destabilising", "Stabilising")
|
||||
|
||||
head(merged_df3[, c("mutationinformation"
|
||||
, "position"
|
||||
, "avg_lig_affinity"
|
||||
, "avg_lig_affinity_outcome")])
|
||||
|
||||
table(merged_df3["avg_lig_affinity_outcome"] )
|
||||
|
||||
min( merged_df3['avg_lig_affinity']); max( merged_df3['avg_lig_affinity'])
|
||||
|
||||
#--------------------------------------
|
||||
# merged_df3: ADD col: average affinity scaled
|
||||
#--------------------------------------
|
||||
merged_df3["avg_lig_affinity_scaled"] = lapply(merged_df3["avg_lig_affinity"]
|
||||
, function(x) {
|
||||
scales::rescale_mid(x
|
||||
, to = c(-1,1)
|
||||
, from = c( min(merged_df3["avg_lig_affinity"])
|
||||
, max(merged_df3["avg_lig_affinity"]))
|
||||
, mid = 0)
|
||||
})
|
||||
|
||||
if ( all(table(merged_df3["avg_lig_affinity"]<0) == table(merged_df3["avg_lig_affinity_scaled"]<0)) ){
|
||||
cat("\nPASS: Avergae affinity column successfully averaged, scaled and categorised")
|
||||
|
||||
}else{
|
||||
cat("\nAbort:Avergae affinity column could not be processed")
|
||||
}
|
||||
|
||||
min( merged_df3['avg_lig_affinity_scaled']); max( merged_df3['avg_lig_affinity_scaled'])
|
||||
|
||||
###################################################################
|
||||
#--------------------------------------------
|
||||
# merged_df3: Rectify pos_count column
|
||||
# Rename existing pos_count colum to reflect
|
||||
# that it is correct according to merged_df2
|
||||
#--------------------------------------------
|
||||
|
||||
nc_pc_CHANGE = which(colnames(merged_df3)== "pos_count"); nc_pc_CHANGE
|
||||
colnames(merged_df3)[nc_pc_CHANGE] = "df2_pos_count_all"
|
||||
head(merged_df3$pos_count)
|
||||
head(merged_df3$df2_pos_count_all)
|
||||
|
||||
# DROP pos_count column
|
||||
# merged_df3$pos_count <-NULL
|
||||
merged_df3 = merged_df3[, !colnames(merged_df3)%in%c("pos_count")]
|
||||
head(merged_df3$pos_count)
|
||||
|
||||
merged_df3 = merged_df3 %>%
|
||||
dplyr::add_count(position)
|
||||
class(merged_df3)
|
||||
merged_df3 = as.data.frame(merged_df3)
|
||||
class(merged_df3)
|
||||
nc_change = which(colnames(merged_df3) == "n")
|
||||
colnames(merged_df3)[nc_change] <- "pos_count"
|
||||
class(merged_df3)
|
||||
|
||||
####################################################################
|
||||
#-------------------------------------------------
|
||||
# merged_df2: Rename existing pos_count
|
||||
# column to df2_pos_count_all like in above df
|
||||
#-------------------------------------------------
|
||||
nc_pc_CHANGE_df2 = which(colnames(merged_df2)== "pos_count"); nc_pc_CHANGE_df2
|
||||
colnames(merged_df2)[nc_pc_CHANGE_df2] = "df2_pos_count_all"
|
||||
head(merged_df2$pos_count)
|
||||
head(merged_df2$df2_pos_count_all)
|
||||
|
||||
####################################################################
|
||||
# ADD: distance to Nucleic acid column for na genes
|
||||
# already done in plotting_data
|
||||
####################################################################
|
||||
# Choose few columns to return as plot_df
|
||||
if (plotting){
|
||||
merged_df3 = merged_df3[, colnames(merged_df3)%in%c(plotting_cols, "pos_count", "df2_pos_count_all")]
|
||||
merged_df2 = merged_df2[, colnames(merged_df2)%in%c(plotting_cols, "df2_pos_count_all")]
|
||||
}
|
||||
####################################################################
|
||||
return(list( merged_df2
|
||||
, merged_df3
|
||||
))
|
||||
, merged_df2_comp
|
||||
, merged_df3_comp
|
||||
, merged_df2_lig
|
||||
, merged_df3_lig
|
||||
, merged_df2_comp_lig
|
||||
, merged_df3_comp_lig))
|
||||
|
||||
cat("\nEnd of combining_dfs_plotting.R script")
|
||||
}
|
|
@ -1,584 +0,0 @@
|
|||
#!/usr/bin/env Rscript
|
||||
|
||||
#########################################################
|
||||
# TASK: function for wide plot
|
||||
#with consurf score and error bars
|
||||
#position numbers coloured by
|
||||
# - ligand distance
|
||||
# - active site residues
|
||||
#########################################################
|
||||
|
||||
#==========================================================
|
||||
# wideP():
|
||||
# input args
|
||||
#==========================================================
|
||||
OLD_wideP_consurf <- function(plotdf
|
||||
, xvar_colname = "position"
|
||||
, yvar_colname = "consurf_score"
|
||||
, yvar_colourN_colname = "consurf_colour_rev" # num from 0-1
|
||||
, plot_error_bars = T
|
||||
, upper_EB_colname = "consurf_ci_upper"
|
||||
, lower_EB_colname = "consurf_ci_lower"
|
||||
|
||||
, plot_type = "point" # default is point
|
||||
, point_colours
|
||||
, p_size = 2
|
||||
, leg_title1 = ""
|
||||
, leg_labels = c("0": "Insufficient Data"
|
||||
, "1": "Variable"
|
||||
, "2", "3", "4", "5", "6", "7", "8"
|
||||
, "9": "Conserved")
|
||||
, panel_col = "black"
|
||||
, panel_col_fill = "black"
|
||||
|
||||
# axes title and label sizes
|
||||
, x_axls = 12 # x-axis label size
|
||||
, y_axls = 15 # y-axis label size
|
||||
, x_axts = 12 # x-axis text size
|
||||
, y_axts = 12 # y-axis text size
|
||||
, default_xtc = "black" # x-axis text colour
|
||||
, ptitle = ""
|
||||
, xlab = ""
|
||||
, ylab = ""
|
||||
, pts = 20
|
||||
|
||||
# plot margins
|
||||
, t_margin = 0.5
|
||||
, r_margin = 0.5
|
||||
, b_margin = 1
|
||||
, l_margin = 1
|
||||
, unit_margin = "cm"
|
||||
|
||||
# Custom 1: x-axis: text colour
|
||||
, xtext_colour_aa = F
|
||||
, xtext_colour_aa1 = active_aa_pos
|
||||
, xtext_colour_aa2 = aa_pos_drug
|
||||
, xtext_colours = c("purple", "brown", "black")
|
||||
|
||||
# Custom 2: x-axis: geom tiles ~ lig distance
|
||||
, A_xvar_lig = T
|
||||
, leg_title2 = "Ligand Distance"
|
||||
, lig_dist_colname = LigDist_colname # from globals
|
||||
, lig_dist_colours = c("green", "yellow", "orange", "red")
|
||||
, tpos0 = 0 # 0 is a magic number that does my sensible default
|
||||
, tW0 = 1
|
||||
, tH0 = 0.3
|
||||
|
||||
# Custom 3: x-axis: geom tiles ~ active sites and ligand
|
||||
, A_xvar_aa = F
|
||||
, aa_pos_drug = NULL
|
||||
, drug_aa_colour = "purple"
|
||||
, tW = 1
|
||||
, tH = 0.2
|
||||
, active_aa_pos = NULL
|
||||
, active_aa_colour = "brown"
|
||||
|
||||
, aa_pos_lig1 = NULL
|
||||
, aa_colour_lig1 = "blue"
|
||||
, tpos1 = 0
|
||||
|
||||
, aa_pos_lig2 = NULL
|
||||
, aa_colour_lig2 = "cyan"
|
||||
, tpos2 = 0
|
||||
|
||||
, aa_pos_lig3 = NULL
|
||||
, aa_colour_lig3 = "cornflowerblue"
|
||||
, tpos3 = 0
|
||||
|
||||
, default_gt_clr = "white"
|
||||
, debug=FALSE
|
||||
){
|
||||
|
||||
if(missing(point_colours)){
|
||||
temp_cols = colorRampPalette(c("seagreen", "sienna3"))(30)
|
||||
point_colours = temp_cols
|
||||
}else{
|
||||
point_colours = point_colours
|
||||
}
|
||||
|
||||
###############################
|
||||
# custom 1: x-axis text colour
|
||||
##############################
|
||||
|
||||
if (xtext_colour_aa){
|
||||
positionF <- levels(as.factor(plotdf[[xvar_colname]]))
|
||||
length(positionF)
|
||||
aa_pos_colours = ifelse(positionF%in%xtext_colour_aa1, xtext_colours[1]
|
||||
, ifelse(positionF%in%xtext_colour_aa2
|
||||
, xtext_colours[2]
|
||||
, xtext_colours[3]))
|
||||
}else{
|
||||
aa_pos_colours = default_xtc
|
||||
}
|
||||
|
||||
################################################
|
||||
# Custom 2: x-axis geom tiles ~ lig distance
|
||||
################################################
|
||||
|
||||
#=========================
|
||||
# Build data with colours
|
||||
# ~ ligand distance
|
||||
#=========================
|
||||
if (A_xvar_lig){
|
||||
cat("\nAnnotating x-axis ~", lig_dist_colname, "requested...")
|
||||
|
||||
#-------------------------------------
|
||||
# round column values: to colour by
|
||||
#--------------------------------------
|
||||
#plotdf = plotdf[order(plotdf[[lig_dist_colname]]),]
|
||||
plotdf['lig_distR'] = round(plotdf[[lig_dist_colname]], digits = 0)
|
||||
head(plotdf['lig_distR'])
|
||||
|
||||
#-------------------------------------
|
||||
# ligand distance range, min, max, etc
|
||||
#--------------------------------------
|
||||
lig_min = min(round(plotdf[[lig_dist_colname]]), na.rm = T); lig_min
|
||||
lig_max = max(round(plotdf[[lig_dist_colname]]), na.rm = T); lig_max
|
||||
lig_mean = round(mean(round(plotdf[[lig_dist_colname]]), na.rm = T)); lig_mean
|
||||
|
||||
#-------------------------------------
|
||||
# Create mapping colour key
|
||||
#--------------------------------------
|
||||
# sorting removes NA, so that n_colours == length(ligD_valsR)
|
||||
n_colours = length(sort(unique(round(plotdf[[lig_dist_colname]], digits = 0)))); n_colours
|
||||
|
||||
lig_cols = colorRampPalette(lig_dist_colours)(n_colours); lig_cols
|
||||
ligD_valsR = sort(unique(round(plotdf[[lig_dist_colname]], digits = 0))); ligD_valsR
|
||||
length(ligD_valsR)
|
||||
|
||||
if (n_colours == length(ligD_valsR)) {
|
||||
cat("\nStarting: mapping b/w"
|
||||
, lig_dist_colname
|
||||
, "and colours")
|
||||
}else{
|
||||
cat("\nCannot start mapping b/w", lig_dist_colname, "and colours..."
|
||||
, "\nLength mismatch:"
|
||||
, "No. of colours: ", n_colours
|
||||
, "\nValues to map:", length(ligD_valsR))
|
||||
}
|
||||
|
||||
ligDcolKey <- data.frame(ligD_colours = lig_cols
|
||||
, lig_distR = ligD_valsR); ligDcolKey
|
||||
names(ligDcolKey)
|
||||
cat("\nSuccessful: Mapping b/w", lig_dist_colname, "and colours")
|
||||
|
||||
#-------------------------------------
|
||||
# merge colour key with plotdf
|
||||
#--------------------------------------
|
||||
plotdf = merge(plotdf, ligDcolKey, by = 'lig_distR')
|
||||
|
||||
plotdf_check = as.data.frame(cbind(position = plotdf[[xvar_colname]]
|
||||
, ligD = plotdf[[lig_dist_colname]]
|
||||
, ligDR = plotdf$lig_distR
|
||||
, ligD_cols = plotdf$ligD_colours))
|
||||
} else{
|
||||
plotdf = plotdf
|
||||
}
|
||||
|
||||
###############################################
|
||||
# Custom 3: x-axis geom tiles ~ active sites
|
||||
################################################
|
||||
|
||||
#==========================
|
||||
# Build Data with colours
|
||||
# ~ on active sites
|
||||
#==========================
|
||||
|
||||
if(A_xvar_aa) {
|
||||
cat("\nAnnotation for xvar requested. Building colours for annotation...")
|
||||
|
||||
aa_colour_colname = "bg_all"
|
||||
aa_colour_colname1 = "col_bg1"
|
||||
aa_colour_colname2 = "col_bg2"
|
||||
aa_colour_colname3 = "col_bg3"
|
||||
|
||||
#--------------------------------------------------
|
||||
# column colour 0: Active site + drug binding sites
|
||||
#--------------------------------------------------
|
||||
plotdf[[aa_colour_colname]] = ifelse(plotdf[[xvar_colname]]%in%aa_pos_drug
|
||||
, drug_aa_colour
|
||||
, ifelse(plotdf[[xvar_colname]]%in%active_aa_pos
|
||||
, active_aa_colour, default_gt_clr ))
|
||||
plotdf[[aa_colour_colname]]
|
||||
cat("\nColumn created 'bg_all':", length(plotdf[[aa_colour_colname]]))
|
||||
|
||||
#------------------------------------------------
|
||||
# column colour 1: Ligand 1 + drug binding sites
|
||||
#------------------------------------------------
|
||||
cat("\nAssigning colours to drug binding and ligand-1 binding residues")
|
||||
plotdf[[aa_colour_colname1]] = plotdf[[aa_colour_colname]]
|
||||
plotdf[[aa_colour_colname1]] = ifelse(plotdf[[xvar_colname]]%in%aa_pos_lig1
|
||||
, aa_colour_lig1, plotdf[[aa_colour_colname]])
|
||||
# plotdf[[aa_colour_colname1]] = ifelse( plotdf[[xvar_colname]]%in%active_aa_pos
|
||||
# , drug_aa_colour
|
||||
# , ifelse(plotdf[[xvar_colname]]%in%aa_pos_lig1
|
||||
# , aa_colour_lig1, default_gt_clr))
|
||||
#------------------------------------------------
|
||||
# column colour 2: Ligand 2
|
||||
#------------------------------------------------
|
||||
plotdf[[aa_colour_colname2]] = plotdf[[aa_colour_colname1]]
|
||||
plotdf[[aa_colour_colname2]] = ifelse(plotdf[[xvar_colname]]%in%aa_pos_lig2
|
||||
, aa_colour_lig2, plotdf[[aa_colour_colname1]])
|
||||
|
||||
#------------------------------------------------
|
||||
# column colour 3: Ligand 3
|
||||
#------------------------------------------------
|
||||
plotdf[[aa_colour_colname3]] = plotdf[[aa_colour_colname2]]
|
||||
plotdf[[aa_colour_colname3]] = ifelse(plotdf[[xvar_colname]]%in%aa_pos_lig3
|
||||
, aa_colour_lig3, plotdf[[aa_colour_colname2]])
|
||||
|
||||
}
|
||||
###################
|
||||
# start plot
|
||||
###################
|
||||
|
||||
#-------------------
|
||||
# x and y axis
|
||||
# range, scale, etc
|
||||
#-------------------
|
||||
my_xlim = length(unique(plotdf[[xvar_colname]])); my_xlim
|
||||
ymin = min(plotdf[[yvar_colname]]); ymin
|
||||
ymax = max(plotdf[[yvar_colname]]); ymax
|
||||
|
||||
g = ggplot(plotdf, aes_string(x = sprintf("factor(%s)", xvar_colname)
|
||||
, y = yvar_colname
|
||||
, colour = sprintf("factor(%s)", yvar_colourN_colname)
|
||||
))
|
||||
|
||||
"if SPECIAL do SPECIAL THING, otherwise do NORMAL THING"
|
||||
if (plot_type == "bar"){
|
||||
g0 = g +
|
||||
geom_bar(stat = "identity")
|
||||
}
|
||||
else{
|
||||
g0 = g +
|
||||
coord_cartesian(xlim = c(1, my_xlim)
|
||||
, ylim = c(ymin, ymax)
|
||||
, clip = "off") +
|
||||
geom_point(size = p_size) +
|
||||
scale_colour_manual(values = point_colours)
|
||||
}
|
||||
|
||||
if (plot_error_bars){
|
||||
g0 = g0 +
|
||||
geom_errorbar(aes(ymin = eval(parse(text = lower_EB_colname))
|
||||
, ymax = eval(parse(text = upper_EB_colname))
|
||||
))
|
||||
}else{
|
||||
|
||||
g0 = g0
|
||||
|
||||
}
|
||||
|
||||
#---------------------
|
||||
# add axis formatting
|
||||
#---------------------
|
||||
g1 = g0 + theme( axis.text.x = element_text(size = x_axts
|
||||
, angle = 90
|
||||
, hjust = 1
|
||||
, vjust = 0.4
|
||||
, face = "bold"
|
||||
, colour = aa_pos_colours)
|
||||
, axis.text.y = element_text(size = y_axts
|
||||
, angle = 0
|
||||
, hjust = 1
|
||||
, vjust = 0)
|
||||
, axis.title.x = element_text(size = x_axls)
|
||||
, axis.title.y = element_text(size = y_axls )
|
||||
, panel.background = element_rect(fill = panel_col_fill, color = panel_col)
|
||||
, panel.grid.major = element_line(color = "black")
|
||||
, panel.grid.minor = element_line(color = "black")
|
||||
, plot.title = element_text(size = pts
|
||||
, hjust = 0.5)
|
||||
, plot.margin = margin(t = t_margin
|
||||
, r = r_margin
|
||||
, b = b_margin
|
||||
, l = l_margin
|
||||
, unit = unit_margin))+
|
||||
guides(colour = guide_legend(title = "ConsurfXXXX")) +
|
||||
|
||||
labs(title = ptitle
|
||||
, x = xlab
|
||||
, y = ylab)
|
||||
|
||||
#------------------
|
||||
#Extract legend1
|
||||
#------------------
|
||||
# yayy
|
||||
g1_leg = ggplot(plotdf, aes_string(x = sprintf("factor(%s)"
|
||||
, xvar_colname) ))
|
||||
g1_leg = g1_leg + geom_bar(); g1_leg
|
||||
g1_leg = g1_leg + geom_bar(aes_string(fill = sprintf("factor(%s)"
|
||||
, yvar_colourN_colname)))
|
||||
|
||||
g1_leg = g1_leg + scale_fill_manual(values = consurf_palette2 , name = leg_title1)
|
||||
g1_leg
|
||||
|
||||
legend1 = get_legend(g1_leg)
|
||||
|
||||
|
||||
#####################################################
|
||||
#============================================
|
||||
# x-axis: geom_tiles ~ ligand distance
|
||||
#============================================
|
||||
#-------
|
||||
# plot
|
||||
#-------
|
||||
if(A_xvar_lig){ # 0 is a magic number that does my sensible default
|
||||
if (tpos0 == 0){
|
||||
tpos0 = ymin-0.5
|
||||
}
|
||||
if (tpos1 == 0){
|
||||
tpos1 = ymin-0.65
|
||||
}
|
||||
if (tpos2 == 0){
|
||||
tpos2 = ymin-0.75
|
||||
}
|
||||
if (tpos3 == 0){
|
||||
tpos3 = ymin-0.85
|
||||
}
|
||||
|
||||
|
||||
cat("\nColouring x-axis aa based on", lig_dist_colname
|
||||
, "\nNo. of colours:", n_colours)
|
||||
|
||||
g2 = g1 + geom_tile(aes(, tpos0
|
||||
, width = tW0
|
||||
, height = tH0)
|
||||
, fill = plotdf$ligD_colours
|
||||
, colour = plotdf$ligD_colours
|
||||
, linetype = "blank")
|
||||
|
||||
#cat("Nrows of plot df", length(plotdf$ligD_colours))
|
||||
out = g2
|
||||
#
|
||||
# #------------------
|
||||
# # Extract legend2
|
||||
# #------------------
|
||||
# labels = seq(lig_min, lig_max, len = 5); labels
|
||||
# labelsD = round(labels, digits = 0); labelsD
|
||||
#
|
||||
# g2_leg = g1 +
|
||||
# geom_tile(aes(fill = .data[[lig_dist_colname]])
|
||||
# , colour = "white") +
|
||||
# scale_fill_gradient2(midpoint = lig_mean
|
||||
# , low = "green"
|
||||
# , mid = "yellow"
|
||||
# , high = "red"
|
||||
# , breaks = labels
|
||||
# #, n.breaks = 11
|
||||
# #, minor_breaks = c(2, 4, 6, 8, 10)
|
||||
# , limits = c(lig_min, lig_max)
|
||||
# , labels = labelsD
|
||||
# , name = leg_title2)
|
||||
#
|
||||
# legend2 = get_legend(g2_leg)
|
||||
#
|
||||
# }else{
|
||||
# out = g1
|
||||
# }
|
||||
######################################################
|
||||
#------------------
|
||||
# Extract legend2
|
||||
#------------------
|
||||
labels = seq(lig_min, lig_max, len = 5); labels
|
||||
labelsD = round(labels, digits = 0); labelsD
|
||||
g2_leg = ggplot(plotdf, aes_string(x = sprintf("factor(%s)", xvar_colname)
|
||||
, y = yvar_colname)
|
||||
) +
|
||||
geom_tile(aes(fill = .data[[lig_dist_colname]])
|
||||
, colour = "white") +
|
||||
scale_fill_gradient2(midpoint = lig_mean
|
||||
, low = "green"
|
||||
, mid = "yellow"
|
||||
, high = "red"
|
||||
, breaks = labels
|
||||
#, n.breaks = 11
|
||||
#, minor_breaks = c(2, 4, 6, 8, 10)
|
||||
, limits = c(lig_min, lig_max)
|
||||
, labels = labelsD
|
||||
, name = leg_title2)
|
||||
|
||||
legend2 = get_legend(g2_leg)
|
||||
|
||||
}else{
|
||||
out = g1
|
||||
}
|
||||
#==============================================
|
||||
# x-axis: geom_tiles ~ active sites and others
|
||||
#==============================================
|
||||
if(A_xvar_aa){
|
||||
#tpos = 0
|
||||
#tW = 1
|
||||
#tH = 0.2
|
||||
|
||||
#---------------------
|
||||
# Add2plot: 3 ligands
|
||||
#---------------------
|
||||
if (all(!is.null(active_aa_pos) &&
|
||||
!is.null(aa_pos_drug) &&
|
||||
!is.null(aa_pos_lig1) && !is.null(aa_pos_lig2) && !is.null(aa_pos_lig3))) {
|
||||
if (debug){
|
||||
cat("\n\nAnnotating xvar with active, drug binding, and Lig 1&2&3 sites")
|
||||
cat("\nCreating column colours, column name:", aa_colour_colname3)
|
||||
|
||||
cat("\nDoing Plot with 3 ligands")
|
||||
}
|
||||
out = out + geom_tile(aes(,tpos3
|
||||
, width = tW
|
||||
, height = tH )
|
||||
, fill = plotdf[[aa_colour_colname3]]
|
||||
, colour = plotdf[[aa_colour_colname3]]
|
||||
, linetype = "solid") +
|
||||
geom_tile(aes(, tpos2
|
||||
, width = tW
|
||||
, height = tH )
|
||||
, fill = plotdf[[aa_colour_colname2]]
|
||||
, colour = plotdf[[aa_colour_colname2]]
|
||||
, linetype = "solid")+
|
||||
|
||||
geom_tile(aes(, tpos1
|
||||
, width = tW
|
||||
, height = tH)
|
||||
, fill = plotdf[[aa_colour_colname1]]
|
||||
, colour = plotdf[[aa_colour_colname1]]
|
||||
, linetype = "solid")
|
||||
if (debug){
|
||||
cat("\nDone Plot with 3 ligands")
|
||||
}
|
||||
}
|
||||
#---------------------
|
||||
# Add2plot: 2 ligands
|
||||
#---------------------
|
||||
if (all(!is.null(active_aa_pos) &&
|
||||
!is.null(aa_pos_drug) &&
|
||||
!is.null(aa_pos_lig1) && !is.null(aa_pos_lig2) && is.null(aa_pos_lig3))) {
|
||||
if (debug){
|
||||
cat("\n\nAnnotating xvar with active, drug binding, and Lig 1&2 sites")
|
||||
cat("\nCreating column colours, column name:", aa_colour_colname2)
|
||||
|
||||
cat("\nDoing Plot with 2 ligands")
|
||||
}
|
||||
out = out +
|
||||
geom_tile(aes(, tpos2
|
||||
, width = tW
|
||||
, height = tH)
|
||||
, fill = plotdf[[aa_colour_colname2]]
|
||||
, colour = plotdf[[aa_colour_colname2]]
|
||||
, linetype = "solid")+
|
||||
geom_tile(aes(, tpos1
|
||||
, width = tW
|
||||
, height = tH)
|
||||
, fill = plotdf[[aa_colour_colname1]]
|
||||
, colour = plotdf[[aa_colour_colname1]]
|
||||
, linetype = "solid")
|
||||
if (debug){
|
||||
cat("\nDone Plot with 2 ligands")
|
||||
}
|
||||
}
|
||||
|
||||
#---------------------
|
||||
# Add2plot: 1 ligand
|
||||
#---------------------
|
||||
if (all(!is.null(active_aa_pos) &&
|
||||
!is.null(aa_pos_drug) &&
|
||||
!is.null(aa_pos_lig1) && is.null(aa_pos_lig2) && is.null(aa_pos_lig3))) {
|
||||
if (debug){
|
||||
cat("\n\nAnnotating xvar with active, drug binding, and Lig 1 sites")
|
||||
cat("\nCreating column colours, column name:", aa_colour_colname1)
|
||||
|
||||
cat("\nDoing Plot with 1 ligands")
|
||||
}
|
||||
out = out +
|
||||
geom_tile(aes(, tpos1
|
||||
, width = tW
|
||||
, height = tH)
|
||||
, fill = plotdf[[aa_colour_colname1]]
|
||||
, colour = plotdf[[aa_colour_colname1]]
|
||||
, linetype = "solid")
|
||||
|
||||
cat("\nDone Plot with 1 ligand")
|
||||
|
||||
}
|
||||
|
||||
#-----------------------------------
|
||||
# Add2plot:NO ligands
|
||||
# No Ligs: Just drug and active site
|
||||
# DEFAULT: A_xvar_aa == TRUE
|
||||
#----------------------------------
|
||||
if (all(!is.null(active_aa_pos) &&
|
||||
!is.null(aa_pos_drug) &&
|
||||
is.null(aa_pos_lig1) &&
|
||||
is.null(aa_pos_lig2) &&
|
||||
is.null(aa_pos_lig3))) {
|
||||
if (debug){
|
||||
cat("\n\nAnnotating xvar with active and drug binding sites")
|
||||
cat("\nCreating column colours, column name:", aa_colour_colname)
|
||||
cat("\nDoing Plot with 0 ligands: active and drug site only")
|
||||
}
|
||||
out = out + geom_tile(aes(, tpos3
|
||||
, width = tW
|
||||
, height = tH)
|
||||
, fill = plotdf[[aa_colour_colname]]
|
||||
, colour = plotdf[[aa_colour_colname]]
|
||||
, linetype = "solid")
|
||||
if (debug){
|
||||
cat("\nDone Plot with: Active and Drug sites")
|
||||
}
|
||||
}
|
||||
}else{
|
||||
cat("\nNo annotation for additional ligands on xvar requested")
|
||||
}
|
||||
#==============================================
|
||||
if (A_xvar_lig){
|
||||
legs = cowplot::plot_grid(legend1
|
||||
, legend2
|
||||
, ncol = 1
|
||||
, align = "hv"
|
||||
, rel_heights = c(2/4,3/4))
|
||||
|
||||
out2 = cowplot::plot_grid( out + theme(legend.position = "none")
|
||||
, legs
|
||||
, ncol = 2
|
||||
, align = "hv"
|
||||
, rel_widths = c(9/10, 0.4/10)
|
||||
)
|
||||
}else{
|
||||
out2 = cowplot::plot_grid( out + theme(legend.position = "none")
|
||||
, legend1
|
||||
, ncol = 2
|
||||
, align = "hv"
|
||||
, rel_widths = c(9/10, 0.5/10)
|
||||
)
|
||||
}
|
||||
#==============================================
|
||||
|
||||
|
||||
#==============================================
|
||||
# if (A_xvar_lig){
|
||||
# legs = grid.arrange(legend1
|
||||
# , legend2
|
||||
# , ncol = 1
|
||||
# , heights = c(3/4,1))
|
||||
#
|
||||
# out2 = grid.arrange( out + theme(legend.position = "none")
|
||||
# , legs
|
||||
# , ncol = 2
|
||||
# , widths = c(9/10, 0.5/10)
|
||||
# )
|
||||
# }else{
|
||||
# out2 = grid.arrange( out + theme(legend.position = "none")
|
||||
# , legend1
|
||||
# , ncol = 2
|
||||
# , widths = c(9/10, 0.5/10)
|
||||
# )
|
||||
# }
|
||||
#==============================================
|
||||
return(out2)
|
||||
#return(out2)
|
||||
|
||||
}
|
||||
|
||||
#############################################################
|
||||
# end of function
|
||||
#############################################################
|
|
@ -1,132 +0,0 @@
|
|||
#!/usr/bin/env Rscript
|
||||
#########################################################
|
||||
# TASK: Script to format data for Correlation plots:
|
||||
# corr_data_extract()
|
||||
|
||||
##################################################################
|
||||
# LigDist_colname #from globals: plotting_globals.R
|
||||
# ppi2Dist_colname #from globals: plotting_globals.R
|
||||
# naDist_colname #from globals: plotting_globals.R
|
||||
|
||||
corr_data_extract <- function(df
|
||||
, gene
|
||||
, drug
|
||||
, colnames_to_extract
|
||||
, colnames_display_key
|
||||
, extract_scaled_cols = F){
|
||||
|
||||
if ( missing(colnames_to_extract) || missing(colnames_display_key) ){
|
||||
|
||||
# log10maf
|
||||
df$maf2 = log10(df$maf) # can't see otherwise
|
||||
sum(is.na(df$maf2))
|
||||
|
||||
cat("\n=========================================="
|
||||
, "\nCORR PLOTS data: ALL params"
|
||||
, "\n=========================================")
|
||||
|
||||
cat("\nExtracting default columns for"
|
||||
, "\nGene name:", gene
|
||||
, "\nDrug name:", drug)
|
||||
|
||||
geneL_normal = c("pnca")
|
||||
geneL_na = c("gid", "rpob")
|
||||
geneL_ppi2 = c("alr", "embb", "katg", "rpob")
|
||||
|
||||
common_colnames = c(drug, "dst_mode"
|
||||
, "duet_stability_change" , "ddg_foldx" , "deepddg" , "ddg_dynamut2"
|
||||
, "asa" , "rsa" , "kd_values" , "rd_values"
|
||||
# previously maf
|
||||
, "maf2" , "log10_or_mychisq" , "neglog_pval_fisher"
|
||||
, LigDist_colname
|
||||
, "consurf_score" , "snap2_score" , "provean_score"
|
||||
, "ligand_affinity_change", "mmcsm_lig"
|
||||
#, "ddg_dynamut", "ddg_encom", "dds_encom", "ddg_mcsm", "ddg_sdm", "ddg_duet"
|
||||
)
|
||||
|
||||
display_common_colnames = c( drug, "dst_mode"
|
||||
, "mCSM-DUET" , "FoldX" , "DeepDDG", "Dynamut2"
|
||||
, "ASA" , "RSA" , "KD" , "RD"
|
||||
# previously MAF
|
||||
, "Log10(MAF)" , "Log10(OR)" , "-Log10(P)"
|
||||
, "Lig-Dist"
|
||||
, "ConSurf" , "SNAP2" , "PROVEAN"
|
||||
, "mCSM-lig", "mmCSM-lig"
|
||||
# , "Dynamut" , "ENCoM-DDG" , "mCSM" , "SDM" , "DUET-d" , "ENCoM-DDS"
|
||||
)
|
||||
|
||||
if (tolower(gene)%in%geneL_normal){
|
||||
colnames_to_extract = c(common_colnames)
|
||||
display_colnames = c(display_common_colnames)
|
||||
corr_df = df[,colnames_to_extract]
|
||||
colnames(corr_df) = display_colnames
|
||||
|
||||
}
|
||||
|
||||
if (tolower(gene)%in%geneL_ppi2){
|
||||
colnames_to_extract = c(common_colnames ,"mcsm_ppi2_affinity", ppi2Dist_colname)
|
||||
display_colnames = c(display_common_colnames,"mCSM-PPI2" , "PPI-Dist")
|
||||
corr_df = df[,colnames_to_extract]
|
||||
colnames(corr_df) = display_colnames
|
||||
}
|
||||
|
||||
if (tolower(gene)%in%geneL_na){
|
||||
colnames_to_extract = c(common_colnames,"mcsm_na_affinity", naDist_colname)
|
||||
display_colnames = c(display_common_colnames, "mCSM-NA", "NA-Dist")
|
||||
corr_df = df[,colnames_to_extract]
|
||||
colnames(corr_df) = display_colnames
|
||||
}
|
||||
|
||||
# SPECIAL case for rpob as it exists in both ppi and na
|
||||
if (tolower(gene)%in%c("rpob")){
|
||||
colnames_to_extract = c(common_colnames
|
||||
, "mcsm_na_affinity", naDist_colname
|
||||
, "mcsm_ppi2_affinity", ppi2Dist_colname)
|
||||
|
||||
display_colnames = c(display_common_colnames
|
||||
,"mCSM-NA", "NA-Dist"
|
||||
,"mCSM-PPI2", "PPI-Dist")
|
||||
|
||||
|
||||
corr_df = df[,colnames_to_extract]
|
||||
colnames(corr_df) = display_colnames
|
||||
}
|
||||
# [optional] arg: extract_scaled_cols
|
||||
if (extract_scaled_cols){
|
||||
cat("\nExtracting scaled columns as well...\n")
|
||||
all_scaled_cols = colnames(merged_df3)[grep(".*scaled", colnames(merged_df3))]
|
||||
colnames_to_extract = c(colnames_to_extract, all_scaled_cols)
|
||||
corr_df = df[,colnames_to_extract]
|
||||
colnames(corr_df) = display_colnames
|
||||
}else{
|
||||
colnames_to_extract = colnames_to_extract
|
||||
corr_df = df[,colnames_to_extract]
|
||||
colnames(corr_df) = display_colnames
|
||||
}
|
||||
|
||||
# WORKED:
|
||||
# # extract df based on gene
|
||||
# corr_df = df[,colnames_to_extract]
|
||||
# colnames(corr_df)
|
||||
# display_colnames
|
||||
#
|
||||
# # arg: colnames_display_key
|
||||
# colnames(corr_df)[colnames(corr_df)%in%colnames_to_extract] <- display_colnames
|
||||
# colnames(corr_df)
|
||||
|
||||
cat("\nExtracted ncols:", ncol(corr_df)
|
||||
,"\nRenaming successful")
|
||||
|
||||
cat("\nSneak peak...")
|
||||
print(head(corr_df))
|
||||
|
||||
# Move drug column to the end
|
||||
last_col = colnames(corr_df[ncol(corr_df)])
|
||||
#corr_df_f = corr_df %>% dplyr::relocate(all_of(drug), .after = last_col)
|
||||
|
||||
#return(corr_df_f)
|
||||
return(corr_df)
|
||||
|
||||
}
|
||||
|
||||
}
|
|
@ -1,58 +0,0 @@
|
|||
dashboard_ggpairs=function(
|
||||
plot_df, plot_title
|
||||
, tt_args_size = 2.5
|
||||
, gp_args_size = 2.5
|
||||
, method = "spearman"
|
||||
){
|
||||
if (method == "spearman") {
|
||||
title="ρ"
|
||||
|
||||
}
|
||||
if (method == "kendall") {
|
||||
title="τ"
|
||||
}
|
||||
else {
|
||||
title="P"
|
||||
}
|
||||
ggpairs(
|
||||
plot_df,
|
||||
columns = 1:(ncol(plot_df)-1),
|
||||
upper = list(
|
||||
continuous = wrap(
|
||||
'cor', # ggally_cor()
|
||||
method = "spearman",
|
||||
use = "pairwise.complete.obs",
|
||||
title=title,
|
||||
digits=2,
|
||||
justify_labels = "centre",
|
||||
title_args=list(size=tt_args_size, colour="black"),#2.5
|
||||
group_args=list(size=gp_args_size)#2.5
|
||||
)
|
||||
),
|
||||
lower = list(
|
||||
continuous = wrap("points",
|
||||
alpha = 0.7,
|
||||
size=0.125),
|
||||
combo = wrap("dot",
|
||||
alpha = 0.7,
|
||||
size=0.125)
|
||||
),
|
||||
aes(
|
||||
colour = factor(
|
||||
ifelse(
|
||||
dst_mode==0,
|
||||
"S",
|
||||
"R"
|
||||
)
|
||||
),
|
||||
alpha = 0.5
|
||||
),
|
||||
title=plot_title
|
||||
) +
|
||||
|
||||
scale_colour_manual(values = c("red", "blue")) +
|
||||
scale_fill_manual(values = c("red", "blue")) #+
|
||||
# theme(text = element_text(size=7,
|
||||
# face="bold"))
|
||||
}
|
||||
|
|
@ -1,825 +0,0 @@
|
|||
#!/usr/bin/env Rscript
|
||||
#########################################################
|
||||
# TASK: Script to format data for dm om plots:
|
||||
# generating WF and LF data for each of the parameters:
|
||||
# duet, mcsm-lig, foldx, deepddg, dynamut2, mcsm-na, mcsm-ppi2, encom, dynamut..etc
|
||||
# Called by get_plotting_dfs.R
|
||||
|
||||
##################################################################
|
||||
# from plotting_globals.R
|
||||
# DistCutOff, LigDist_colname, ppi2Dist_colname, naDist_colname
|
||||
#gene
|
||||
|
||||
dm_om_wf_lf_data <- function(df
|
||||
, gene # from globals
|
||||
, colnames_to_extract
|
||||
#, LigDist_colname # from globals used
|
||||
#, ppi2Dist_colname #from globals used
|
||||
#, naDist_colname #from globals used
|
||||
, snp_colname = "mutationinformation"
|
||||
, aa_pos_colname = "position"
|
||||
, mut_colname = "mutation"
|
||||
, mut_info_colname = "dst_mode"
|
||||
, mut_info_label_colname = "mutation_info_labels"
|
||||
, categ_cols_to_factor){
|
||||
|
||||
df = as.data.frame(df)
|
||||
df$maf2 = log10(df$maf) # can't see otherwise
|
||||
sum(is.na(df$maf2))
|
||||
|
||||
# Initialise the required dfs based on gene name
|
||||
#geneL_normal = c("pnca")
|
||||
#geneL_na = c("gid", "rpob")
|
||||
#geneL_ppi2 = c("alr", "embb", "katg", "rpob")
|
||||
|
||||
#ADDED: IMPORTANT for rpob to be in both to make sure all data is returned
|
||||
geneL_normal = c("pnca")
|
||||
geneL_both = c("rpob")
|
||||
geneL_ppi2 = c("alr", "embb", "katg")
|
||||
geneL_na = c("gid")
|
||||
|
||||
# common_dfs
|
||||
common_dfsL = list(
|
||||
wf_duet = data.frame()
|
||||
, lf_duet = data.frame()
|
||||
, wf_mcsm_lig = data.frame()
|
||||
, lf_mcsm_lig = data.frame()
|
||||
, wf_mmcsm_lig2 = data.frame() # NEW
|
||||
, lf_mmcsm_lig2 = data.frame() # NEW
|
||||
, wf_foldx = data.frame()
|
||||
, lf_foldx = data.frame()
|
||||
, wf_deepddg = data.frame()
|
||||
, lf_deepddg = data.frame()
|
||||
, wf_dynamut2 = data.frame()
|
||||
, lf_dynamut2 = data.frame()
|
||||
, wf_consurf = data.frame()
|
||||
, lf_consurf = data.frame()
|
||||
, wf_snap2 = data.frame()
|
||||
, lf_snap2 = data.frame()
|
||||
, wf_dist_gen = data.frame() # NEW
|
||||
, lf_dist_gen = data.frame() # NEW
|
||||
)
|
||||
|
||||
# additional dfs
|
||||
if (tolower(gene)%in%geneL_normal){
|
||||
wf_lf_dataL = common_dfsL
|
||||
}
|
||||
|
||||
if (tolower(gene)%in%geneL_ppi2){
|
||||
additional_dfL = list(
|
||||
wf_mcsm_ppi2 = data.frame()
|
||||
, lf_mcsm_ppi2 = data.frame()
|
||||
)
|
||||
wf_lf_dataL = c(common_dfsL, additional_dfL)
|
||||
}
|
||||
|
||||
if (tolower(gene)%in%geneL_na){
|
||||
additional_dfL = list(
|
||||
wf_mcsm_na = data.frame()
|
||||
, lf_mcsm_na = data.frame()
|
||||
)
|
||||
wf_lf_dataL = c(common_dfsL, additional_dfL)
|
||||
}
|
||||
|
||||
if (tolower(gene)%in%geneL_both){
|
||||
additional_dfL = list(
|
||||
wf_mcsm_ppi2 = data.frame(),
|
||||
lf_mcsm_ppi2 = data.frame(),
|
||||
wf_mcsm_na = data.frame(),
|
||||
lf_mcsm_na = data.frame()
|
||||
)
|
||||
wf_lf_dataL = c(common_dfsL, additional_dfL)
|
||||
}
|
||||
|
||||
cat("\nInitializing an empty list of length:"
|
||||
, length(wf_lf_dataL))
|
||||
|
||||
#=======================================================================
|
||||
# display names
|
||||
stability_suffix <- paste0(delta_symbol, delta_symbol, "G")
|
||||
|
||||
duet_dn = paste0("mCSM-DUET ", stability_suffix); duet_dn
|
||||
foldx_dn = paste0("FoldX ", stability_suffix); foldx_dn
|
||||
deepddg_dn = paste0("Deepddg " , stability_suffix); deepddg_dn
|
||||
dynamut2_dn = paste0("Dynamut2 " , stability_suffix); dynamut2_dn
|
||||
|
||||
consurf_dn = "ConSurf"
|
||||
snap2_dn = "SNAP2"
|
||||
provean_dn = "PROVEAN"
|
||||
|
||||
or_dn = "Log10(OR)"
|
||||
pval_dn = "-Log10(P)"
|
||||
maf2_dn = "Log10(MAF)"
|
||||
|
||||
asa_dn = "ASA"
|
||||
rsa_dn = "RSA"
|
||||
rd_dn = "RD"
|
||||
kd_dn = "KD"
|
||||
|
||||
lig_dist_dn = paste0("Lig Dist(", angstroms_symbol, ")"); lig_dist_dn
|
||||
mcsm_lig_dn = paste0("mCSM-lig"); mcsm_lig_dn
|
||||
mmcsm_lig_dn2 = paste0("mmCSM-lig"); mmcsm_lig_dn2
|
||||
|
||||
na_dist_dn = paste0("Dist to NA (", angstroms_symbol, ")"); na_dist_dn
|
||||
mcsm_na_dn = paste0("mCSM-NA ", stability_suffix); mcsm_na_dn
|
||||
|
||||
ppi2_dist_dn = paste0("PPI Dist(", angstroms_symbol, ")"); ppi2_dist_dn
|
||||
mcsm_ppi2_dn = paste0("mCSM-PPI2 ", stability_suffix); mcsm_ppi2_dn
|
||||
|
||||
#=======================================================================
|
||||
if(missing(categ_cols_to_factor)){
|
||||
categ_cols_to_factor = grep( "_outcome|_info", colnames(df) )
|
||||
}else{
|
||||
categ_cols_to_factor = categ_cols_to_factor
|
||||
}
|
||||
#fact_cols = colnames(comb_df_s)[grepl( "_outcome|_info", colnames(comb_df_s) )]
|
||||
fact_cols = colnames(df)[categ_cols_to_factor]
|
||||
|
||||
if (any(lapply(df[, fact_cols], class) == "character")){
|
||||
cat("\nChanging", length(categ_cols_to_factor), "cols to factor")
|
||||
df[, fact_cols] <- lapply(df[, fact_cols], as.factor)
|
||||
if (all(lapply(df[, fact_cols], class) == "factor")){
|
||||
cat("\nSuccessful: cols changed to factor")
|
||||
}
|
||||
}else{
|
||||
cat("\nRequested cols aready factors")
|
||||
}
|
||||
|
||||
cat("\ncols changed to factor are:\n", colnames(df)[categ_cols_to_factor] )
|
||||
|
||||
#=======================================================================
|
||||
if (missing(colnames_to_extract)){
|
||||
# NOTE: these vars are from globals
|
||||
#LigDist_colname, ppi2Dist_colname, naDist_colname
|
||||
|
||||
common_colnames = c(snp_colname
|
||||
, mut_colname , "dst_mode" , mut_info_label_colname
|
||||
, aa_pos_colname
|
||||
|
||||
, "duet_stability_change" , "duet_scaled" , "duet_outcome"
|
||||
, "ddg_foldx" , "foldx_scaled" , "foldx_outcome"
|
||||
, "deepddg" , "deepddg_scaled" , "deepddg_outcome"
|
||||
, "ddg_dynamut2" , "ddg_dynamut2_scaled" , "ddg_dynamut2_outcome"
|
||||
|
||||
, "consurf_score" , "consurf_scaled" , "consurf_outcome" , "consurf_colour_rev"
|
||||
, "snap2_score" , "snap2_scaled" , "snap2_outcome"
|
||||
, "provean_score" , "provean_scaled" , "provean_outcome"
|
||||
|
||||
, "log10_or_mychisq" , "neglog_pval_fisher" , "maf2"
|
||||
, "asa" , "rsa" , "rd_values" , "kd_values"
|
||||
|
||||
, "mmcsm_lig" , "mmcsm_lig_scaled" , "mmcsm_lig_outcome"
|
||||
, "ligand_affinity_change", "affinity_scaled" , "ligand_outcome" , LigDist_colname
|
||||
)
|
||||
|
||||
display_common_colnames = c(snp_colname
|
||||
, mut_colname
|
||||
, "dst_mode" , mut_info_label_colname
|
||||
, aa_pos_colname
|
||||
|
||||
, "duet_stability_change" , duet_dn , "duet_outcome"
|
||||
, "ddg_foldx" , foldx_dn , "foldx_outcome"
|
||||
, "deepddg" , deepddg_dn , "deepddg_outcome"
|
||||
, "ddg_dynamut2" , dynamut2_dn , "ddg_dynamut2_outcome"
|
||||
, consurf_dn , "consurf_scaled" , "consurf_outcome" , "consurf_colour_rev"
|
||||
, snap2_dn , "snap2_scaled" , "snap2_outcome"
|
||||
, provean_dn , "provean_scaled" , "provean_outcome"
|
||||
|
||||
, or_dn , pval_dn , maf2_dn
|
||||
, asa_dn , rsa_dn , rd_dn , kd_dn
|
||||
|
||||
, "mmcsm_lig" , mmcsm_lig_dn2 , "mmcsm_lig_outcome"
|
||||
, "ligand_affinity_change", mcsm_lig_dn , "ligand_outcome" , lig_dist_dn
|
||||
)
|
||||
|
||||
if (length(common_colnames) == length(display_common_colnames)){
|
||||
cat("\nLength match: Proceeding to extracting end cols")
|
||||
}else{
|
||||
stop("Abort: Length mismatch: b/w ncols to extract and disply name")
|
||||
}
|
||||
|
||||
# ordering is important!
|
||||
# static_cols_end = c(lig_dist_dn
|
||||
# , "ASA"
|
||||
# , "RSA"
|
||||
# , "RD"
|
||||
# , "KD"
|
||||
# , "Log10(MAF)"
|
||||
# #, "Log10(OR)"
|
||||
# #, "-Log(P)"
|
||||
# )
|
||||
static_cols_end_common = c(lig_dist_dn, "Log10(MAF)"); static_cols_end_common
|
||||
|
||||
if (tolower(gene)%in%geneL_normal){
|
||||
colnames_to_extract = c(common_colnames)
|
||||
display_colnames = c(display_common_colnames)
|
||||
comb_df_sl = df[, colnames_to_extract]
|
||||
|
||||
# Rename cols: display names
|
||||
colnames(comb_df_sl) = display_colnames
|
||||
#colnames(comb_df)[colnames(comb_df)%in%colnames_to_extract] <- display_colnames
|
||||
|
||||
static_cols_end = static_cols_end_common
|
||||
cat("\nend colnames for gene:", static_cols_end)
|
||||
}
|
||||
|
||||
if (tolower(gene)%in%geneL_ppi2){
|
||||
colnames_to_extract = c(common_colnames, "mcsm_ppi2_affinity" ,"mcsm_ppi2_scaled" , "mcsm_ppi2_outcome" , ppi2Dist_colname)
|
||||
display_colnames = c(display_common_colnames,"mcsm_ppi2_affinity", mcsm_ppi2_dn , "mcsm_ppi2_outcome" , ppi2_dist_dn )
|
||||
comb_df_sl = df[, colnames_to_extract]
|
||||
|
||||
# Rename cols: display names
|
||||
colnames(comb_df_sl) = display_colnames
|
||||
# Affinity filtered data: mcsm-ppi2 --> ppi2Dist_colname
|
||||
comb_df_sl_ppi2 = comb_df_sl[comb_df_sl[[ppi2_dist_dn]]<DistCutOff,]
|
||||
|
||||
# ordering is important!
|
||||
static_cols_end = c(ppi2_dist_dn, static_cols_end_common)
|
||||
cat("\nend colnames for gene:", static_cols_end)
|
||||
}
|
||||
|
||||
if (tolower(gene)%in%geneL_na){
|
||||
colnames_to_extract = c(common_colnames ,"mcsm_na_affinity" , "mcsm_na_scaled" , "mcsm_na_outcome" , naDist_colname)
|
||||
display_colnames = c(display_common_colnames , "mcsm_na_affinity" , mcsm_na_dn , "mcsm_na_outcome" , na_dist_dn)
|
||||
comb_df_sl = df[, colnames_to_extract]
|
||||
|
||||
# Rename cols: display names
|
||||
colnames(comb_df_sl) = display_colnames
|
||||
# Affinity filtered data: mcsm-na --> naDist_colname
|
||||
comb_df_sl_na = comb_df_sl[comb_df_sl[[na_dist_dn]]<DistCutOff,]
|
||||
|
||||
# ordering is important!
|
||||
static_cols_end = c(na_dist_dn, static_cols_end_common)
|
||||
cat("\nend colnames for gene:", static_cols_end)
|
||||
|
||||
}
|
||||
|
||||
if (tolower(gene)%in%geneL_both){
|
||||
colnames_to_extract = c(
|
||||
common_colnames,
|
||||
"mcsm_ppi2_affinity" ,
|
||||
"mcsm_ppi2_scaled" ,
|
||||
"mcsm_ppi2_outcome" ,
|
||||
ppi2Dist_colname,
|
||||
"mcsm_na_affinity" ,
|
||||
"mcsm_na_scaled" ,
|
||||
"mcsm_na_outcome" ,
|
||||
naDist_colname
|
||||
)
|
||||
display_colnames = c(
|
||||
display_common_colnames,
|
||||
"mcsm_ppi2_affinity",
|
||||
mcsm_ppi2_dn,
|
||||
"mcsm_ppi2_outcome",
|
||||
ppi2_dist_dn,
|
||||
"mcsm_na_affinity",
|
||||
mcsm_na_dn,
|
||||
"mcsm_na_outcome",
|
||||
na_dist_dn
|
||||
)
|
||||
comb_df_sl = df[, colnames_to_extract]
|
||||
colnames(comb_df_sl) = display_colnames
|
||||
comb_df_sl_ppi2 = comb_df_sl[comb_df_sl[[ppi2_dist_dn]]<DistCutOff,]
|
||||
comb_df_sl_na = comb_df_sl[comb_df_sl[[na_dist_dn]]<DistCutOff,]
|
||||
static_cols_end = c(ppi2_dist_dn, na_dist_dn, static_cols_end_common)
|
||||
|
||||
}
|
||||
|
||||
|
||||
# Affinity filtered data: mcsm-lig: COMMON for all genes, mcsm-lig --> LigDist_colname
|
||||
comb_df_sl_lig = comb_df_sl[comb_df_sl[[lig_dist_dn]]<DistCutOff,]
|
||||
|
||||
}
|
||||
|
||||
#======================
|
||||
# Selecting dfs
|
||||
# with appropriate cols
|
||||
#=======================
|
||||
static_cols_start = c(snp_colname
|
||||
, aa_pos_colname
|
||||
, mut_colname
|
||||
, mut_info_label_colname)
|
||||
|
||||
# static_cols_end
|
||||
cat("\nEnd colnames for gene:", static_cols_end)
|
||||
|
||||
#########################################################################
|
||||
#==============
|
||||
# Distance and genomics
|
||||
#==============
|
||||
# WF data: dist + genomics
|
||||
cols_to_select_dist_gen = c(static_cols_start, c("duet_outcome", duet_dn), static_cols_end)
|
||||
wf_dist_gen = comb_df_sl[, cols_to_select_dist_gen]; head(wf_dist_gen)
|
||||
|
||||
#pivot_cols_ps = cols_to_select_ps[1:5]; pivot_cols_ps
|
||||
pivot_cols_dist_gen = cols_to_select_dist_gen[1: (length(static_cols_start) + 1)]; pivot_cols_dist_gen
|
||||
expected_rows_lf = nrow(wf_dist_gen) * (length(wf_dist_gen) - length(pivot_cols_dist_gen))
|
||||
expected_rows_lf
|
||||
|
||||
# LF dist and genomics
|
||||
lf_dist_gen = tidyr::gather(wf_dist_gen
|
||||
, key = param_type
|
||||
, value = param_value
|
||||
, all_of(duet_dn):tail(static_cols_end,1)
|
||||
, factor_key = TRUE)
|
||||
|
||||
if (nrow(lf_dist_gen) == expected_rows_lf){
|
||||
cat("\nPASS: long format data created for Distance and Genomics")
|
||||
}else{
|
||||
cat("\nFAIL: long format data could not be created for Distance and Genomics")
|
||||
quit()
|
||||
}
|
||||
|
||||
# DROP duet cols
|
||||
drop_cols = c(duet_dn, "duet_outcome"); drop_cols
|
||||
table(lf_dist_gen$param_type)
|
||||
lf_dist_gen = lf_dist_gen[!lf_dist_gen$param_type%in%drop_cols,]
|
||||
lf_dist_gen$param_type = factor(lf_dist_gen$param_type)
|
||||
table(lf_dist_gen$param_type)
|
||||
|
||||
# NEW columns [outcome and outcome colname]
|
||||
lf_dist_gen$outcome_colname = mut_info_colname
|
||||
lf_dist_gen$outcome = lf_dist_gen[[mut_info_label_colname]]
|
||||
head(lf_dist_gen)
|
||||
|
||||
wf_dist_gen = subset(wf_dist_gen, select = !(names(wf_dist_gen) %in% drop_cols))
|
||||
|
||||
colnames(wf_dist_gen)
|
||||
colnames(lf_dist_gen)
|
||||
|
||||
# Assign them to the output list
|
||||
wf_lf_dataL[['wf_dist_gen']] = wf_dist_gen
|
||||
wf_lf_dataL[['lf_dist_gen']] = lf_dist_gen
|
||||
##########################################################
|
||||
|
||||
#==============
|
||||
# DUET
|
||||
#==============
|
||||
# WF data: duet
|
||||
cols_to_select_duet = c(static_cols_start, c("duet_outcome", duet_dn), static_cols_end)
|
||||
wf_duet = comb_df_sl[, cols_to_select_duet]
|
||||
|
||||
#pivot_cols_ps = cols_to_select_ps[1:5]; pivot_cols_ps
|
||||
pivot_cols_duet = cols_to_select_duet[1: (length(static_cols_start) + 1)]; pivot_cols_duet
|
||||
expected_rows_lf = nrow(wf_duet) * (length(wf_duet) - length(pivot_cols_duet))
|
||||
expected_rows_lf
|
||||
|
||||
# LF data: duet
|
||||
lf_duet = tidyr::gather(wf_duet
|
||||
, key = param_type
|
||||
, value = param_value
|
||||
, all_of(duet_dn):tail(static_cols_end,1)
|
||||
, factor_key = TRUE)
|
||||
|
||||
if (nrow(lf_duet) == expected_rows_lf){
|
||||
cat("\nPASS: long format data created for ", duet_dn)
|
||||
}else{
|
||||
cat("\nFAIL: long format data could not be created for duet")
|
||||
quit()
|
||||
}
|
||||
|
||||
table(lf_duet$param_type)
|
||||
|
||||
# NEW columns [outcome and outcome colname]
|
||||
lf_duet$outcome_colname = "duet_outcome"
|
||||
lf_duet$outcome = lf_duet$duet_outcome
|
||||
|
||||
# DROP static cols
|
||||
lf_duet = lf_duet[!lf_duet$param_type%in%c(static_cols_end),]
|
||||
lf_duet$param_type = factor(lf_duet$param_type)
|
||||
table(lf_duet$param_type); colnames(lf_duet)
|
||||
|
||||
# Assign them to the output list
|
||||
wf_lf_dataL[['wf_duet']] = wf_duet
|
||||
wf_lf_dataL[['lf_duet']] = lf_duet
|
||||
|
||||
############################################################################
|
||||
#==============
|
||||
# FoldX
|
||||
#==============
|
||||
# WF data: Foldx
|
||||
cols_to_select_foldx= c(static_cols_start, c("foldx_outcome", foldx_dn), static_cols_end)
|
||||
wf_foldx = comb_df_sl[, cols_to_select_foldx]
|
||||
|
||||
pivot_cols_foldx = cols_to_select_foldx[1: (length(static_cols_start) + 1)]; pivot_cols_foldx
|
||||
expected_rows_lf = nrow(wf_foldx) * (length(wf_foldx) - length(pivot_cols_foldx))
|
||||
expected_rows_lf
|
||||
|
||||
# LF data: Foldx
|
||||
lf_foldx = gather(wf_foldx
|
||||
, key = param_type
|
||||
, value = param_value
|
||||
, all_of(foldx_dn):tail(static_cols_end,1)
|
||||
, factor_key = TRUE)
|
||||
|
||||
if (nrow(lf_foldx) == expected_rows_lf){
|
||||
cat("\nPASS: long format data created for ", foldx_dn)
|
||||
}else{
|
||||
cat("\nFAIL: long format data could not be created for duet")
|
||||
quit()
|
||||
}
|
||||
|
||||
# NEW column
|
||||
lf_foldx$outcome_colname = "foldx_outcome"
|
||||
lf_foldx$outcome = lf_foldx$foldx_outcome
|
||||
|
||||
# DROP static cols
|
||||
lf_foldx = lf_foldx[!lf_foldx$param_type%in%c(static_cols_end),]
|
||||
lf_foldx$param_type = factor(lf_foldx$param_type)
|
||||
table(lf_foldx$param_type); colnames(lf_foldx)
|
||||
|
||||
# Assign them to the output list
|
||||
wf_lf_dataL[['wf_foldx']] = wf_foldx
|
||||
wf_lf_dataL[['lf_foldx']] = lf_foldx
|
||||
|
||||
############################################################################
|
||||
#==============
|
||||
# Deepddg
|
||||
#==============
|
||||
# WF data: deepddg
|
||||
cols_to_select_deepddg = c(static_cols_start, c("deepddg_outcome", deepddg_dn), static_cols_end)
|
||||
wf_deepddg = comb_df_sl[, cols_to_select_deepddg]
|
||||
|
||||
pivot_cols_deepddg = cols_to_select_deepddg[1: (length(static_cols_start) + 1)]; pivot_cols_deepddg
|
||||
expected_rows_lf = nrow(wf_deepddg) * (length(wf_deepddg) - length(pivot_cols_deepddg))
|
||||
expected_rows_lf
|
||||
|
||||
# LF data: Deepddg
|
||||
lf_deepddg = gather(wf_deepddg
|
||||
, key = param_type
|
||||
, value = param_value
|
||||
, all_of(deepddg_dn):tail(static_cols_end,1)
|
||||
, factor_key = TRUE)
|
||||
|
||||
if (nrow(lf_deepddg) == expected_rows_lf){
|
||||
cat("\nPASS: long format data created for ", deepddg_dn)
|
||||
}else{
|
||||
cat("\nFAIL: long format data could not be created for duet")
|
||||
quit()
|
||||
}
|
||||
|
||||
# NEW columns [outcome and outcome colname]
|
||||
lf_deepddg$outcome_colname = "deepddg_outcome"
|
||||
lf_deepddg$outcome = lf_deepddg$deepddg_outcome
|
||||
|
||||
# DROP static cols
|
||||
lf_deepddg = lf_deepddg[!lf_deepddg$param_type%in%c(static_cols_end),]
|
||||
lf_deepddg$param_type = factor(lf_deepddg$param_type)
|
||||
table(lf_deepddg$param_type); colnames(lf_deepddg)
|
||||
|
||||
# Assign them to the output list
|
||||
wf_lf_dataL[['wf_deepddg']] = wf_deepddg
|
||||
wf_lf_dataL[['lf_deepddg']] = lf_deepddg
|
||||
############################################################################
|
||||
#==============
|
||||
# Dynamut2: LF
|
||||
#==============
|
||||
# WF data: dynamut2
|
||||
cols_to_select_dynamut2 = c(static_cols_start, c("ddg_dynamut2_outcome", dynamut2_dn), static_cols_end)
|
||||
wf_dynamut2 = comb_df_sl[, cols_to_select_dynamut2]
|
||||
|
||||
pivot_cols_dynamut2 = cols_to_select_dynamut2[1: (length(static_cols_start) + 1)]; pivot_cols_dynamut2
|
||||
expected_rows_lf = nrow(wf_dynamut2) * (length(wf_dynamut2) - length(pivot_cols_dynamut2))
|
||||
expected_rows_lf
|
||||
|
||||
# LF data: dynamut2
|
||||
lf_dynamut2 = gather(wf_dynamut2
|
||||
, key = param_type
|
||||
, value = param_value
|
||||
, all_of(dynamut2_dn):tail(static_cols_end,1)
|
||||
, factor_key = TRUE)
|
||||
|
||||
if (nrow(lf_dynamut2) == expected_rows_lf){
|
||||
cat("\nPASS: long format data created for ", dynamut2_dn)
|
||||
}else{
|
||||
cat("\nFAIL: long format data could not be created for duet")
|
||||
quit()
|
||||
}
|
||||
|
||||
# NEW columns [outcome and outcome colname]
|
||||
lf_dynamut2$outcome_colname = "ddg_dynamut2_outcome"
|
||||
lf_dynamut2$outcome = lf_dynamut2$ddg_dynamut2_outcome
|
||||
|
||||
# DROP static cols
|
||||
lf_dynamut2 = lf_dynamut2[!lf_dynamut2$param_type%in%c(static_cols_end),]
|
||||
lf_dynamut2$param_type = factor(lf_dynamut2$param_type)
|
||||
table(lf_dynamut2$param_type); colnames(lf_dynamut2)
|
||||
|
||||
# Assign them to the output list
|
||||
wf_lf_dataL[['wf_dynamut2']] = wf_dynamut2
|
||||
wf_lf_dataL[['lf_dynamut2']] = lf_dynamut2
|
||||
|
||||
######################################################################################
|
||||
#==================
|
||||
# Consurf: LF
|
||||
#https://consurf.tau.ac.il/overview.php
|
||||
# consurf_score:
|
||||
# <0 (below average): slowly evolving i.e CONSERVED
|
||||
# >0 (above average): rapidly evolving, i.e VARIABLE
|
||||
#table(df$consurf_colour_rev)
|
||||
# TODO
|
||||
#1--> "most_variable", 2--> "", 3-->"", 4-->""
|
||||
#5-->"", 6-->"", 7-->"", 8-->"", 9-->"most_conserved"
|
||||
#====================
|
||||
# WF data: consurf
|
||||
cols_to_select_consurf = c(static_cols_start, c("consurf_outcome", consurf_dn), static_cols_end)
|
||||
wf_consurf = comb_df_sl[, cols_to_select_consurf]
|
||||
|
||||
pivot_cols_consurf = cols_to_select_consurf[1: (length(static_cols_start) + 1)]; pivot_cols_consurf
|
||||
expected_rows_lf = nrow(wf_consurf) * (length(wf_consurf) - length(pivot_cols_consurf))
|
||||
expected_rows_lf
|
||||
|
||||
# when outcome didn't exist
|
||||
#cols_to_select_consurf = c(static_cols_start, c(consurf_dn), static_cols_end)
|
||||
#wf_consurf = comb_df_sl[, cols_to_select_consurf]
|
||||
#
|
||||
# pivot_cols_consurf = cols_to_select_consurf[1: (length(static_cols_start))]; pivot_cols_consurf
|
||||
# expected_rows_lf = nrow(wf_consurf) * (length(wf_consurf) - length(pivot_cols_consurf))
|
||||
# expected_rows_lf
|
||||
|
||||
# LF data: consurf
|
||||
lf_consurf = gather(wf_consurf
|
||||
, key = param_type
|
||||
, value = param_value
|
||||
, all_of(consurf_dn):tail(static_cols_end,1)
|
||||
, factor_key = TRUE)
|
||||
|
||||
if (nrow(lf_consurf) == expected_rows_lf){
|
||||
cat("\nPASS: long format data created for ", consurf_dn)
|
||||
}else{
|
||||
cat("\nFAIL: long format data could not be created for duet")
|
||||
quit()
|
||||
}
|
||||
|
||||
# NEW columns [outcome and outcome colname]
|
||||
lf_consurf$outcome_colname = "consurf_outcome"
|
||||
lf_consurf$outcome = lf_consurf$consurf_outcome
|
||||
|
||||
# DROP static cols
|
||||
lf_consurf = lf_consurf[!lf_consurf$param_type%in%c(static_cols_end),]
|
||||
lf_consurf$param_type = factor(lf_consurf$param_type)
|
||||
table(lf_consurf$param_type); colnames(lf_consurf)
|
||||
|
||||
# Assign them to the output list
|
||||
wf_lf_dataL[['wf_consurf']] = wf_consurf
|
||||
wf_lf_dataL[['lf_consurf']] = lf_consurf
|
||||
###########################################################################
|
||||
#==============
|
||||
# SNAP2: LF
|
||||
#==============
|
||||
# WF data: snap2
|
||||
cols_to_select_snap2 = c(static_cols_start, c("snap2_outcome", snap2_dn), static_cols_end)
|
||||
wf_snap2 = comb_df_sl[, cols_to_select_snap2]
|
||||
|
||||
pivot_cols_snap2 = cols_to_select_snap2[1: (length(static_cols_start) + 1)]; pivot_cols_snap2
|
||||
expected_rows_lf = nrow(wf_snap2) * (length(wf_snap2) - length(pivot_cols_snap2))
|
||||
expected_rows_lf
|
||||
|
||||
# LF data: snap2
|
||||
lf_snap2 = gather(wf_snap2
|
||||
, key = param_type
|
||||
, value = param_value
|
||||
, all_of(snap2_dn):tail(static_cols_end,1)
|
||||
, factor_key = TRUE)
|
||||
|
||||
if (nrow(lf_snap2) == expected_rows_lf){
|
||||
cat("\nPASS: long format data created for ", snap2_dn)
|
||||
}else{
|
||||
cat("\nFAIL: long format data could not be created for duet")
|
||||
quit()
|
||||
}
|
||||
|
||||
# NEW columns [outcome and outcome colname]
|
||||
lf_snap2$outcome_colname = "snap2_outcome"
|
||||
lf_snap2$outcome = lf_snap2$snap2_outcome
|
||||
|
||||
# DROP static cols
|
||||
lf_snap2 = lf_snap2[!lf_snap2$param_type%in%c(static_cols_end),]
|
||||
lf_snap2$param_type = factor(lf_snap2$param_type)
|
||||
table(lf_snap2$param_type); colnames(lf_snap2)
|
||||
|
||||
# Assign them to the output list
|
||||
wf_lf_dataL[['wf_snap2']] = wf_snap2
|
||||
wf_lf_dataL[['lf_snap2']] = lf_snap2
|
||||
|
||||
#==============
|
||||
# Provean2: LF
|
||||
#==============
|
||||
# WF data: provean
|
||||
cols_to_select_provean = c(static_cols_start, c("provean_outcome", provean_dn), static_cols_end)
|
||||
wf_provean = comb_df_sl[, cols_to_select_provean]
|
||||
|
||||
pivot_cols_provean = cols_to_select_provean[1: (length(static_cols_start) + 1)]; pivot_cols_provean
|
||||
expected_rows_lf = nrow(wf_provean) * (length(wf_provean) - length(pivot_cols_provean))
|
||||
expected_rows_lf
|
||||
|
||||
# LF data: provean
|
||||
lf_provean = gather(wf_provean
|
||||
, key = param_type
|
||||
, value = param_value
|
||||
, all_of(provean_dn):tail(static_cols_end,1)
|
||||
, factor_key = TRUE)
|
||||
|
||||
if (nrow(lf_provean) == expected_rows_lf){
|
||||
cat("\nPASS: long format data created for ", provean_dn)
|
||||
}else{
|
||||
cat("\nFAIL: long format data could not be created for duet")
|
||||
quit()
|
||||
}
|
||||
|
||||
# NEW columns [outcome and outcome colname]
|
||||
lf_provean$outcome_colname = "provean_outcome"
|
||||
lf_provean$outcome = lf_provean$provean_outcome
|
||||
|
||||
# DROP static cols
|
||||
lf_provean = lf_provean[!lf_provean$param_type%in%c(static_cols_end),]
|
||||
lf_provean$param_type = factor(lf_provean$param_type)
|
||||
table(lf_provean$param_type); colnames(lf_provean)
|
||||
|
||||
# Assign them to the output list
|
||||
wf_lf_dataL[['wf_provean']] = wf_provean
|
||||
wf_lf_dataL[['lf_provean']] = lf_provean
|
||||
|
||||
|
||||
###########################################################################
|
||||
# AFFINITY cols
|
||||
###########################################################################
|
||||
#=========================
|
||||
# mCSM-lig:
|
||||
# data filtered by cut off
|
||||
#=========================
|
||||
#---------------------
|
||||
# mCSM-lig: WF and lF
|
||||
#----------------------
|
||||
# WF data: mcsm_lig
|
||||
cols_to_select_mcsm_lig = c(static_cols_start, c("ligand_outcome", mcsm_lig_dn), static_cols_end)
|
||||
wf_mcsm_lig = comb_df_sl_lig[, cols_to_select_mcsm_lig] # filtered df
|
||||
|
||||
pivot_cols_mcsm_lig = cols_to_select_mcsm_lig[1: (length(static_cols_start) + 1)]; pivot_cols_mcsm_lig
|
||||
expected_rows_lf = nrow(wf_mcsm_lig) * (length(wf_mcsm_lig) - length(pivot_cols_mcsm_lig))
|
||||
expected_rows_lf
|
||||
|
||||
# LF data: mcsm_lig
|
||||
lf_mcsm_lig = gather(wf_mcsm_lig
|
||||
, key = param_type
|
||||
, value = param_value
|
||||
, all_of(mcsm_lig_dn):tail(static_cols_end,1)
|
||||
, factor_key = TRUE)
|
||||
|
||||
if (nrow(lf_mcsm_lig) == expected_rows_lf){
|
||||
cat("\nPASS: long format data created for ", mcsm_lig_dn)
|
||||
}else{
|
||||
cat("\nFAIL: long format data could not be created for mcsm_lig")
|
||||
quit()
|
||||
}
|
||||
|
||||
# NEW columns [outcome and outcome colname]
|
||||
lf_mcsm_lig$outcome_colname = "ligand_outcome"
|
||||
lf_mcsm_lig$outcome = lf_mcsm_lig$ligand_outcome
|
||||
|
||||
# DROP static cols
|
||||
lf_mcsm_lig = lf_mcsm_lig[!lf_mcsm_lig$param_type%in%c(static_cols_end),]
|
||||
lf_mcsm_lig$param_type = factor(lf_mcsm_lig$param_type)
|
||||
table(lf_mcsm_lig$param_type); colnames(lf_mcsm_lig)
|
||||
|
||||
# Assign them to the output list
|
||||
wf_lf_dataL[['wf_mcsm_lig']] = wf_mcsm_lig
|
||||
wf_lf_dataL[['lf_mcsm_lig']] = lf_mcsm_lig
|
||||
|
||||
#=========================
|
||||
# mmCSM-lig2:
|
||||
# data filtered by cut off
|
||||
#=========================
|
||||
#---------------------
|
||||
# mmCSM-lig2: WF and lF
|
||||
#----------------------
|
||||
# WF data: mmcsm_lig2
|
||||
cols_to_select_mmcsm_lig2 = c(static_cols_start, c("mmcsm_lig_outcome", mmcsm_lig_dn2), static_cols_end)
|
||||
wf_mmcsm_lig2 = comb_df_sl_lig[, cols_to_select_mmcsm_lig2] # filtered df
|
||||
|
||||
pivot_cols_mmcsm_lig2 = cols_to_select_mmcsm_lig2[1: (length(static_cols_start) + 1)]; pivot_cols_mmcsm_lig2
|
||||
expected_rows_lf = nrow(wf_mmcsm_lig2) * (length(wf_mmcsm_lig2) - length(pivot_cols_mmcsm_lig2))
|
||||
expected_rows_lf
|
||||
|
||||
# LF data: mmcsm_lig2
|
||||
lf_mmcsm_lig2 = gather(wf_mmcsm_lig2
|
||||
, key = param_type
|
||||
, value = param_value
|
||||
, all_of(mmcsm_lig_dn2):tail(static_cols_end,1)
|
||||
, factor_key = TRUE)
|
||||
|
||||
if (nrow(lf_mmcsm_lig2) == expected_rows_lf){
|
||||
cat("\nPASS: long format data created for ", mmcsm_lig_dn2)
|
||||
}else{
|
||||
cat("\nFAIL: long format data could not be created for mmcsm_lig2")
|
||||
quit()
|
||||
}
|
||||
|
||||
# NEW columns [outcome and outcome colname]
|
||||
lf_mmcsm_lig2$outcome_colname = "mmcsm_lig_outcome"
|
||||
lf_mmcsm_lig2$outcome = lf_mmcsm_lig2$mmcsm_lig_outcome
|
||||
|
||||
# DROP static cols
|
||||
lf_mmcsm_lig2 = lf_mmcsm_lig2[!lf_mmcsm_lig2$param_type%in%c(static_cols_end),]
|
||||
lf_mmcsm_lig2$param_type = factor(lf_mmcsm_lig2$param_type)
|
||||
table(lf_mmcsm_lig2$param_type); colnames(lf_mmcsm_lig2)
|
||||
|
||||
# Assign them to the output list
|
||||
wf_lf_dataL[['wf_mmcsm_lig2']] = wf_mmcsm_lig2
|
||||
wf_lf_dataL[['lf_mmcsm_lig2']] = lf_mmcsm_lig2
|
||||
|
||||
#=========================
|
||||
# mcsm-ppi2 affinity
|
||||
# data filtered by cut off
|
||||
#========================
|
||||
if (tolower(gene)%in%geneL_ppi2 || tolower(gene)%in%geneL_both){
|
||||
#-----------------
|
||||
# mCSM-PPI2: WF and lF
|
||||
#-----------------
|
||||
# WF data: mcsm-ppi2
|
||||
cols_to_select_mcsm_ppi2 = c(static_cols_start, c("mcsm_ppi2_outcome", mcsm_ppi2_dn), static_cols_end)
|
||||
#wf_mcsm_ppi2 = comb_df_sl[, cols_to_select_mcsm_ppi2]
|
||||
wf_mcsm_ppi2 = comb_df_sl_ppi2[, cols_to_select_mcsm_ppi2]
|
||||
|
||||
pivot_cols_mcsm_ppi2 = cols_to_select_mcsm_ppi2[1: (length(static_cols_start) + 1)]; pivot_cols_mcsm_ppi2
|
||||
expected_rows_lf = nrow(wf_mcsm_ppi2) * (length(wf_mcsm_ppi2) - length(pivot_cols_mcsm_ppi2))
|
||||
expected_rows_lf
|
||||
|
||||
# LF data: mcsm-ppi2
|
||||
lf_mcsm_ppi2 = gather(wf_mcsm_ppi2
|
||||
, key = param_type
|
||||
, value = param_value
|
||||
, all_of(mcsm_ppi2_dn):tail(static_cols_end,1)
|
||||
, factor_key = TRUE)
|
||||
|
||||
if (nrow(lf_mcsm_ppi2) == expected_rows_lf){
|
||||
cat("\nPASS: long format data created for ", mcsm_ppi2_dn)
|
||||
}else{
|
||||
cat("\nFAIL: long format data could not be created for duet")
|
||||
quit()
|
||||
}
|
||||
|
||||
# NEW columns [outcome and outcome colname]
|
||||
lf_mcsm_ppi2$outcome_colname = "mcsm_ppi2_outcome"
|
||||
lf_mcsm_ppi2$outcome = lf_mcsm_ppi2$mcsm_ppi2_outcome
|
||||
|
||||
# DROP static cols
|
||||
lf_mcsm_ppi2 = lf_mcsm_ppi2[!lf_mcsm_ppi2$param_type%in%c(static_cols_end),]
|
||||
lf_mcsm_ppi2$param_type = factor(lf_mcsm_ppi2$param_type)
|
||||
table(lf_mcsm_ppi2$param_type); colnames(lf_mcsm_ppi2)
|
||||
|
||||
# Assign them to the output list
|
||||
wf_lf_dataL[['wf_mcsm_ppi2']] = wf_mcsm_ppi2
|
||||
wf_lf_dataL[['lf_mcsm_ppi2']] = lf_mcsm_ppi2
|
||||
|
||||
}
|
||||
|
||||
#====================
|
||||
# mcsm-NA affinity
|
||||
# data filtered by cut off
|
||||
#====================
|
||||
if (tolower(gene)%in%geneL_na|| tolower(gene)%in%geneL_both){
|
||||
#---------------
|
||||
# mCSM-NA: WF and lF
|
||||
#-----------------
|
||||
# WF data: mcsm-na
|
||||
cols_to_select_mcsm_na = c(static_cols_start, c("mcsm_na_outcome", mcsm_na_dn), static_cols_end)
|
||||
#wf_mcsm_na = comb_df_sl[, cols_to_select_mcsm_na]
|
||||
wf_mcsm_na = comb_df_sl_na[, cols_to_select_mcsm_na]
|
||||
|
||||
pivot_cols_mcsm_na = cols_to_select_mcsm_na[1: (length(static_cols_start) + 1)]; pivot_cols_mcsm_na
|
||||
expected_rows_lf = nrow(wf_mcsm_na) * (length(wf_mcsm_na) - length(pivot_cols_mcsm_na))
|
||||
expected_rows_lf
|
||||
|
||||
# LF data: mcsm-na
|
||||
lf_mcsm_na = gather(wf_mcsm_na
|
||||
, key = param_type
|
||||
, value = param_value
|
||||
, all_of(mcsm_na_dn):tail(static_cols_end,1)
|
||||
, factor_key = TRUE)
|
||||
|
||||
if (nrow(lf_mcsm_na) == expected_rows_lf){
|
||||
cat("\nPASS: long format data created for ", mcsm_na_dn)
|
||||
}else{
|
||||
cat("\nFAIL: long format data could not be created for duet")
|
||||
quit()
|
||||
}
|
||||
|
||||
# NEW columns [outcome and outcome colname]
|
||||
lf_mcsm_na$outcome_colname = "mcsm_na_outcome"
|
||||
lf_mcsm_na$outcome = lf_mcsm_na$mcsm_na_outcome
|
||||
|
||||
# DROP static cols
|
||||
lf_mcsm_na = lf_mcsm_na[!lf_mcsm_na$param_type%in%c(static_cols_end),]
|
||||
lf_mcsm_na$param_type = factor(lf_mcsm_na$param_type)
|
||||
table(lf_mcsm_na$param_type); colnames(lf_mcsm_na)
|
||||
|
||||
# Assign them to the output list
|
||||
wf_lf_dataL[['wf_mcsm_na']] = wf_mcsm_na
|
||||
wf_lf_dataL[['lf_mcsm_na']] = lf_mcsm_na
|
||||
|
||||
}
|
||||
|
||||
return(wf_lf_dataL)
|
||||
}
|
||||
############################################################################
|
|
@ -1,142 +0,0 @@
|
|||
source("~/git/LSHTM_analysis/scripts/functions/my_logolas.R")
|
||||
#####################################################################################
|
||||
# DataED_PFM():
|
||||
# Input:
|
||||
# Data:
|
||||
# msaSeq_mut: MSA chr vector for muts
|
||||
# msaSeq_wt [Optional]: MSA chr vector for wt
|
||||
|
||||
# Others params:
|
||||
# ED_score = c("log", log-odds", "diff", "probKL", "ratio", "unscaled_log", "wKL")
|
||||
# bg_prob: background probability, default is equal i.e NULL
|
||||
|
||||
# Returns data for ED plot from MSA
|
||||
# Mut matrix:
|
||||
# PFM matrix
|
||||
# PFM matrix scaled
|
||||
# ED matrix
|
||||
# Wt matrix [optional]
|
||||
# For my case, I always use it as it helps see what is at the wild-type already!
|
||||
|
||||
# TODO: SHINY
|
||||
# drop down: ED score type (in the actual plot function!)
|
||||
# drop down/enter field : bg probability (in the actual plot function!)
|
||||
# Make it hover over position and then get the corresponding data table!
|
||||
########################a###########################################################
|
||||
|
||||
DataED_PFM <- function(msaSeq_mut
|
||||
, msaSeq_wt
|
||||
, ED_score = c("log")
|
||||
, bg_prob = NULL)
|
||||
|
||||
{
|
||||
|
||||
dash_control = list()
|
||||
dash_control_default <- list(concentration = NULL, mode = NULL,
|
||||
optmethod = "mixEM", sample_weights = NULL, verbose = FALSE,
|
||||
bf = TRUE, pi_init = NULL, squarem_control = list(),
|
||||
dash_control = list(), reportcov = FALSE)
|
||||
|
||||
dash_control <- modifyList(dash_control_default, dash_control)
|
||||
|
||||
############################################
|
||||
# Data processing for logo plot for SAVS
|
||||
###########################################
|
||||
|
||||
cat("\nLength of MSA", length(msaSeq_mut))
|
||||
|
||||
pfm_mutM = matrix()
|
||||
pfm_mut_scaledM = matrix()
|
||||
combED_mutM = matrix()
|
||||
|
||||
#--------------------------
|
||||
# Getting PFM: mutant MSA
|
||||
#--------------------------
|
||||
pfm_mutM <- Biostrings::consensusMatrix(msaSeq_mut)
|
||||
colnames(pfm_mutM) <- 1:dim(pfm_mutM)[2]
|
||||
pfm_mut_scaledM <- do.call(dash, append(list(comp_data = pfm_mutM),
|
||||
dash_control))$posmean
|
||||
|
||||
logo_mut_h = get_logo_heights(pfm_mut_scaledM
|
||||
, bg = bg_prob
|
||||
, score = ED_score)
|
||||
|
||||
cat("\nGetting logo_heights from Logolas package...")
|
||||
|
||||
pos_mutM = logo_mut_h[['table_mat_pos_norm']]; pos_mutM
|
||||
pos_mutS = logo_mut_h[['pos_ic']]; pos_mutS
|
||||
pos_mutED = t(pos_mutS*t(pos_mutM)); pos_mutED
|
||||
|
||||
neg_mutM = logo_mut_h[['table_mat_neg_norm']]*(-1)
|
||||
neg_mutS = logo_mut_h[['neg_ic']]; neg_mutS
|
||||
neg_mutED = t(neg_mutS*t(neg_mutM)); neg_mutED
|
||||
|
||||
if (length(pos_mutS) && length(neg_mutS) == dim(pfm_mutM)[2]){
|
||||
cat("\nPASS: pfm calculated successfully including scaled matrix"
|
||||
, "\nDim of pfm matrix:", dim(pfm_mutM)[1], dim(pfm_mutM)[2])
|
||||
}
|
||||
|
||||
combED_mutM = pos_mutED + neg_mutED
|
||||
|
||||
# initialise the mut list
|
||||
names_mutL = c("pfm_mutM", "pfm_mut_scaledM", "combED_mutM")
|
||||
EDmutDataL = vector("list", length(names_mutL))
|
||||
EDmutDataL = list(pfm_mutM, pfm_mut_scaledM, combED_mutM)
|
||||
names(EDmutDataL) = names_mutL
|
||||
|
||||
#---------------------
|
||||
# Getting PFM: WT
|
||||
#---------------------
|
||||
if(!missing(msaSeq_wt)){
|
||||
|
||||
cat("\nLength of WT seq", length(msaSeq_wt))
|
||||
|
||||
pfm_wtM = matrix()
|
||||
pfm_wt_scaledM = matrix()
|
||||
combED_wtM = matrix()
|
||||
|
||||
pfm_wtM <- Biostrings::consensusMatrix(msaSeq_wt)
|
||||
colnames(pfm_wtM) <- 1:dim(pfm_wtM)[2]
|
||||
pfm_wt_scaledM <- do.call(dash, append(list(comp_data = pfm_wtM),
|
||||
dash_control))$posmean
|
||||
|
||||
logo_wt_h = get_logo_heights(pfm_wt_scaledM
|
||||
, bg = bg_prob
|
||||
, score = ED_score)
|
||||
|
||||
pos_wtM = logo_wt_h[['table_mat_pos_norm']]; pos_wtM
|
||||
pos_wtS = logo_wt_h[['pos_ic']]; pos_wtS
|
||||
pos_wtED = t(pos_wtS*t(pos_wtM)); pos_wtED
|
||||
|
||||
neg_wtM = logo_wt_h[['table_mat_neg_norm']]*(-1)
|
||||
neg_wtS = logo_wt_h[['neg_ic']]; neg_wtS
|
||||
neg_wtED = t(neg_wtS*t(neg_wtM)); neg_wtED
|
||||
|
||||
if (length(pos_wtS) && length(neg_wtS) == dim(pfm_wtM)[2]){
|
||||
cat("\nPASS: pfm calculated successfully including scaled matrix"
|
||||
, "\nDim of pfm matrix:", dim(pfm_wtM)[1], dim(pfm_wtM)[2])
|
||||
}
|
||||
|
||||
combED_wtM = pos_wtED + neg_wtED
|
||||
|
||||
# initialise the wt list
|
||||
names_wtL = c("pfm_wtM", "pfm_wt_scaledM", "combED_wtM")
|
||||
EDwtDataL = vector("list", length(names_wtL))
|
||||
EDwtDataL = list(pfm_wtM, pfm_wt_scaledM, combED_wtM)
|
||||
names(EDwtDataL) = names_wtL
|
||||
|
||||
# Combine two lists
|
||||
EDallDataL = append(EDmutDataL, EDwtDataL)
|
||||
|
||||
cat("\nReturning output for Mut + WT"
|
||||
, "\nLength of all data:", length(EDallDataL))
|
||||
return(EDallDataL)
|
||||
|
||||
}else{
|
||||
cat("\nReturning output for Mut data only"
|
||||
, "\nLength of Mut data:", length(EDmutDataL))
|
||||
|
||||
return(EDmutDataL)
|
||||
}
|
||||
}
|
||||
|
|
@ -1,77 +0,0 @@
|
|||
# takes a dataframe and returns the same dataframe with two extra columns for colours and position
|
||||
library('viridis')
|
||||
|
||||
generate_distance_colour_map = function(plot_df,
|
||||
xvar_colname = "position",
|
||||
lig_dist_colname = "ligand_distance",
|
||||
debug = TRUE
|
||||
)
|
||||
{
|
||||
if (debug) {
|
||||
cat("\nAnnotating x-axis ~", lig_dist_colname, "requested...")
|
||||
}
|
||||
|
||||
plot_df['lig_distR'] = round(plot_df[[lig_dist_colname]], digits = 0)
|
||||
|
||||
lig_min = min(round(plot_df[[lig_dist_colname]]), na.rm = T); lig_min
|
||||
lig_max = max(round(plot_df[[lig_dist_colname]]), na.rm = T); lig_max
|
||||
lig_mean = round(mean(round(plot_df[[lig_dist_colname]]), na.rm = T)); lig_mean
|
||||
n_colours = length(sort(unique(round(plot_df[[lig_dist_colname]], digits = 0))))
|
||||
lig_cols = magma(n_colours, direction=-1)
|
||||
ligD_valsR = sort(unique(round(plot_df[[lig_dist_colname]], digits = 0)))
|
||||
|
||||
if (debug) {
|
||||
length(ligD_valsR)
|
||||
if (n_colours == length(ligD_valsR)) {
|
||||
cat("\nStarting: mapping b/w"
|
||||
, lig_dist_colname
|
||||
, "and colours")
|
||||
}else{
|
||||
cat("\nCannot start mapping b/w", lig_dist_colname, "and colours..."
|
||||
, "\nLength mismatch:"
|
||||
, "No. of colours: ", n_colours
|
||||
, "\nValues to map:", length(ligD_valsR))
|
||||
}
|
||||
}
|
||||
|
||||
ligDcolKey <- data.frame(ligD_colours = lig_cols
|
||||
, lig_distR = ligD_valsR); ligDcolKey
|
||||
if (debug) {
|
||||
names(ligDcolKey)
|
||||
cat("\nSuccessful: Mapping b/w", lig_dist_colname, "and colours")
|
||||
}
|
||||
# merge colour key with plot_df
|
||||
plot_df = merge(plot_df, ligDcolKey, by = 'lig_distR')
|
||||
|
||||
return(plot_df)
|
||||
}
|
||||
|
||||
generate_distance_legend = function(plot_df,
|
||||
xvar_colname = 'position',
|
||||
lig_dist_colname = "ligand_distance",
|
||||
legend_title = "Ligand\nDistance"
|
||||
)
|
||||
{
|
||||
# build legend for ligand distance "heat bar"
|
||||
lig_min = min(round(plot_df[[lig_dist_colname]]), na.rm = T); lig_min
|
||||
lig_max = max(round(plot_df[[lig_dist_colname]]), na.rm = T); lig_max
|
||||
lig_mean = round(mean(round(plot_df[[lig_dist_colname]]), na.rm = T)); lig_mean
|
||||
|
||||
labels = seq(lig_min, lig_max, len = 5); labels
|
||||
labelsD = round(labels, digits = 0); labelsD
|
||||
|
||||
get_legend(
|
||||
ggplot(plot_df, aes_string(x = sprintf("factor(%s)", xvar_colname), y=0)) +
|
||||
|
||||
geom_tile(aes(fill = .data[[lig_dist_colname]])
|
||||
, colour = "white") +
|
||||
scale_fill_gradient2(midpoint = lig_mean
|
||||
, low = magma(3, direction=-1)[1]
|
||||
, mid = magma(3, direction=-1)[2]
|
||||
, high = magma(3, direction=-1)[3]
|
||||
, breaks = labels
|
||||
, limits = c(lig_min, lig_max)
|
||||
, labels = labelsD
|
||||
, name = legend_title)
|
||||
)
|
||||
}
|
|
@ -1,143 +0,0 @@
|
|||
#############################
|
||||
# Barplots: ggplot
|
||||
# stats +/-
|
||||
# violin +/-
|
||||
# barplot +/
|
||||
# beeswarm
|
||||
#############################
|
||||
|
||||
lf_bp <- function(lf_df = lf_duet
|
||||
, p_title = ""
|
||||
, colour_categ = "outcome"
|
||||
, x_grp = "mutation_info_labels"
|
||||
, y_var = "param_value"
|
||||
, facet_var = "param_type"
|
||||
, n_facet_row = 1
|
||||
, y_scales = "free_y"
|
||||
, colour_bp_strip = "khaki2"
|
||||
, dot_size = 3
|
||||
, dot_transparency = 0.3
|
||||
, violin_quantiles = c(0.25, 0.5, 0.75) # can be NULL
|
||||
, my_ats = 11 # axis text size
|
||||
, my_als = 10 # axis label size
|
||||
, my_fls = 10 # facet label size
|
||||
, my_pts = 11 # plot title size)
|
||||
, make_boxplot = FALSE
|
||||
, bp_width = c("auto", 0.5)
|
||||
, add_stats = TRUE
|
||||
, stat_grp_comp = c("R", "S")
|
||||
, stat_method = "wilcox.test"
|
||||
, my_paired = FALSE
|
||||
, stat_label = c("p.format", "p.signif")
|
||||
) {
|
||||
|
||||
fwv = as.formula(paste0("~", facet_var))
|
||||
#fwv = reformulate(facet_var)
|
||||
|
||||
p1 <- ggplot(lf_df, aes_string(x = x_grp, y = y_var)) +
|
||||
|
||||
facet_wrap( fwv
|
||||
, nrow = n_facet_row
|
||||
, scales = y_scales) +
|
||||
|
||||
geom_violin(trim = T
|
||||
, scale = "width"
|
||||
#, position = position_dodge(width = 0.9)
|
||||
, draw_quantiles = violin_quantiles)
|
||||
|
||||
if (make_boxplot){
|
||||
|
||||
if (bp_width == "auto"){
|
||||
bp_width = 0.5/length(unique(lf_df[[x_grp]]))
|
||||
cat("\nAutomatically calculated boxplot width, using bp_width:\n", bp_width, "\n")
|
||||
}else{
|
||||
cat("\nBoxplot width value provided, using:", bp_width, "\n")
|
||||
bp_width = bp_width}
|
||||
|
||||
p2 = p1 + geom_boxplot(fill = "white"
|
||||
, outlier.colour = NA
|
||||
#, position = position_dodge(width = 0.9)
|
||||
, width = bp_width) +
|
||||
geom_quasirandom(#priority = "density"
|
||||
#, shape = 21
|
||||
size = dot_size
|
||||
, alpha = dot_transparency
|
||||
, show.legend = FALSE
|
||||
, cex = 0.8
|
||||
, aes(
|
||||
colour = factor(
|
||||
eval(
|
||||
parse(
|
||||
text = colour_categ
|
||||
)
|
||||
)
|
||||
)
|
||||
)
|
||||
) + ggplot2::scale_color_manual(values = consurf_colours)
|
||||
|
||||
} else {
|
||||
#Legend=factor(eval(parse(text = colour_categ)))
|
||||
# ggbeeswarm (better than geom_point)
|
||||
p2 = p1 +
|
||||
#theme(legend.title=element_text('XXX')) + # Legend doesn't need a title)
|
||||
|
||||
geom_quasirandom(#priority = "density"
|
||||
#, shape = 21
|
||||
size = dot_size
|
||||
, alpha = dot_transparency
|
||||
, show.legend = FALSE
|
||||
# , fast = FALSE
|
||||
, cex = 0.8
|
||||
, aes(
|
||||
colour = factor(
|
||||
eval(
|
||||
parse(
|
||||
text = colour_categ
|
||||
)
|
||||
)
|
||||
)
|
||||
)
|
||||
#, aes(colour = Legend)
|
||||
) +
|
||||
ggplot2::scale_color_manual(values = consurf_colours)
|
||||
|
||||
|
||||
}
|
||||
|
||||
# Add foramtting to graph
|
||||
OutPlot = p2 + theme(axis.text.x = element_text(size = my_ats)
|
||||
, axis.text.y = element_text(size = my_ats
|
||||
, angle = 0
|
||||
, hjust = 1
|
||||
, vjust = 0)
|
||||
, axis.title.x = element_text(size = my_ats)
|
||||
, axis.title.y = element_text(size = my_ats)
|
||||
, plot.title = element_text(size = my_pts
|
||||
, hjust = 0.5
|
||||
, colour = "black"
|
||||
, face = "bold")
|
||||
, strip.background = element_rect(fill = colour_bp_strip)
|
||||
, strip.text.x = element_text(size = my_fls
|
||||
, colour = "black")
|
||||
, legend.title = element_text(color = "black"
|
||||
, size = my_als)
|
||||
, legend.text = element_text(size = my_ats)
|
||||
, legend.direction = "vertical") +
|
||||
|
||||
labs(title = p_title
|
||||
, x = ""
|
||||
, y = "")
|
||||
|
||||
if (add_stats){
|
||||
my_comparisonsL <- list( stat_grp_comp )
|
||||
|
||||
OutPlot = OutPlot + stat_compare_means(comparisons = my_comparisonsL
|
||||
, method = stat_method
|
||||
, paired = my_paired
|
||||
, label = stat_label[2])
|
||||
|
||||
return(OutPlot)
|
||||
}
|
||||
|
||||
return(OutPlot)
|
||||
}
|
|
@ -1,133 +0,0 @@
|
|||
#############################
|
||||
# Barplots: ggplot
|
||||
# stats +/-
|
||||
# violin +/-
|
||||
# barplot +/
|
||||
# beeswarm
|
||||
#############################
|
||||
|
||||
lf_bp2 <- function(lf_df #lf_duet
|
||||
, p_title = ""
|
||||
#, colour_categ = "outcome"
|
||||
, colour_categ = "mutation_info_labels"
|
||||
, dot_colours = c("red", "blue")
|
||||
, x_grp = "mutation_info_labels"
|
||||
, y_var = "param_value"
|
||||
, facet_var = "param_type"
|
||||
, n_facet_row = 1
|
||||
, y_scales = "free_y"
|
||||
, colour_bp_strip = "khaki2"
|
||||
, dot_size = 3
|
||||
, dot_transparency = 0.1 #0.3: lighter
|
||||
, violin_quantiles = c(0.25, 0.5, 0.75) # can be NULL
|
||||
, line_thickness = 0.65
|
||||
, my_ats = 22 # axis text size
|
||||
, my_als = 20 # axis label size
|
||||
, my_fls = 20 # facet label size
|
||||
, my_pts = 22 # plot title size)
|
||||
, make_boxplot = FALSE
|
||||
#, bp_width = c("auto", 0.5)
|
||||
, bp_width = "auto"
|
||||
, add_stats = TRUE
|
||||
, stat_grp_comp = c("R", "S")
|
||||
, stat_method = "wilcox.test"
|
||||
, my_paired = FALSE
|
||||
, stat_label = c("p.format", "p.signif")
|
||||
, monochrome = FALSE
|
||||
) {
|
||||
|
||||
fwv = as.formula(paste0("~", facet_var))
|
||||
#fwv = reformulate(facet_var)
|
||||
|
||||
# Only use the longer colour palette if there are many outcomes
|
||||
if (monochrome) {
|
||||
lf_bp_colours = c(1:length(levels(lf_df[[colour_categ]])))
|
||||
lf_bp_colours[c(1:length(levels(lf_df[[colour_categ]])))] = rgb(0,0,0)
|
||||
} else {
|
||||
if (length(levels(lf_df[[colour_categ]])) > 2) {
|
||||
lf_bp_colours = consurf_bp_colours
|
||||
}
|
||||
else {
|
||||
#lf_bp_colours = hue_pal()(2)
|
||||
lf_bp_colours = dot_colours
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
if (bp_width == "auto"){
|
||||
bp_width = 0.5/length(unique(lf_df[[x_grp]]))
|
||||
}else{
|
||||
bp_width = bp_width
|
||||
}
|
||||
my_comparisonsL <- list( stat_grp_comp )
|
||||
|
||||
ymax_abs = max(abs(lf_df$param_value))
|
||||
|
||||
ggplot(lf_df, aes_string(x = x_grp, y = y_var)) +
|
||||
# extend the y axis so there's always room for the stats
|
||||
#ylim(min(lf_df$param_value), max(lf_df$param_value)+abs(max(lf_df$param_value))/4) +
|
||||
ylim(min(lf_df$param_value), max(lf_df$param_value)+ymax_abs/4) +
|
||||
|
||||
facet_wrap(fwv
|
||||
, nrow = n_facet_row
|
||||
, scales = y_scales) +
|
||||
|
||||
ggplot2::scale_color_manual(values = lf_bp_colours) +
|
||||
|
||||
geom_violin(trim = T
|
||||
, size = line_thickness
|
||||
, scale = "width"
|
||||
, colour = "black"
|
||||
#, position = position_dodge(width = 0.9)
|
||||
, draw_quantiles = violin_quantiles) +
|
||||
|
||||
# Add formatting to graph
|
||||
theme(axis.text.x = element_text(size = my_ats)
|
||||
, axis.text.y = element_text(size = my_ats
|
||||
, angle = 0
|
||||
, hjust = 1
|
||||
, vjust = 0)
|
||||
, axis.title.x = element_text(size = my_ats)
|
||||
, axis.title.y = element_text(size = my_ats)
|
||||
, plot.title = element_text(size = my_pts
|
||||
, hjust = 0.5
|
||||
, colour = "black"
|
||||
, face = "bold")
|
||||
, strip.background = element_rect(fill = colour_bp_strip)
|
||||
, strip.text.x = element_text(size = my_fls
|
||||
, colour = "black")
|
||||
, legend.title = element_text(color = "black"
|
||||
, size = my_als)
|
||||
, legend.text = element_text(size = my_ats)
|
||||
, legend.direction = "vertical"
|
||||
#, plot.margin = margin(10,10,10,10,'pt')
|
||||
) +
|
||||
|
||||
labs(title = p_title
|
||||
, x = ""
|
||||
, y = "") +
|
||||
|
||||
stat_compare_means(comparisons = my_comparisonsL
|
||||
, method = stat_method
|
||||
, paired = my_paired
|
||||
, label = stat_label[2]
|
||||
, size = 5) +
|
||||
geom_quasirandom(
|
||||
size = dot_size
|
||||
, alpha = dot_transparency
|
||||
, show.legend = FALSE
|
||||
# , fast = FALSE
|
||||
, cex = 0.8
|
||||
, aes(
|
||||
colour = factor(
|
||||
eval(
|
||||
parse(
|
||||
text = colour_categ
|
||||
)
|
||||
)
|
||||
)
|
||||
)
|
||||
)
|
||||
}
|
||||
|
||||
#lf_bp2(lf_consurf)
|
|
@ -1,23 +0,0 @@
|
|||
library(ggpubr)
|
||||
###################################################################
|
||||
|
||||
lf_unpaired_stats <- function(lf_data
|
||||
, lf_stat_value = "param_value"
|
||||
, lf_stat_group = "mutation_info_labels"
|
||||
, lf_col_statvars = "param_type"
|
||||
, my_paired = FALSE
|
||||
, stat_adj = "none"){
|
||||
# ADDED NEW
|
||||
lf_data[[lf_stat_group]] = as.factor(lf_data[[lf_stat_group]])
|
||||
|
||||
stat_formula = as.formula(paste0(lf_stat_value, "~", lf_stat_group))
|
||||
|
||||
my_stat_df = compare_means(stat_formula
|
||||
, group.by = lf_col_statvars
|
||||
, data = lf_data
|
||||
, paired = my_paired
|
||||
, p.adjust.method = stat_adj)
|
||||
|
||||
|
||||
return(my_stat_df)
|
||||
}
|
|
@ -1,77 +0,0 @@
|
|||
###############################
|
||||
# TASK: function to plot lineage
|
||||
# dist plots with or without facet
|
||||
# think about color palette
|
||||
# for stability
|
||||
##############################
|
||||
|
||||
#n_colours = length(unique(lin_dist_plot$duet_scaled))
|
||||
#my_palette <- colorRampPalette(c(mcsm_red2, mcsm_red1, mcsm_mid, mcsm_blue1, mcsm_blue2))(n = n_colours+1)
|
||||
|
||||
|
||||
lineage_distP <- function(plotdf
|
||||
, x_axis = "duet_scaled"
|
||||
, y_axis = "lineage_labels"
|
||||
, x_lab = "DUET"
|
||||
, all_lineages = F
|
||||
, use_lineages = c("L1", "L2", "L3", "L4")
|
||||
, with_facet = F
|
||||
, facet_wrap_var = "" # FIXME: document what this is for
|
||||
, fill_categ = "mutation_info_labels"
|
||||
, fill_categ_cols = c("#E69F00", "#999999")
|
||||
, label_categories = c("R", "S")
|
||||
, my_ats = 15 # 15 axis text size
|
||||
, my_als = 20 # 20 axis label size
|
||||
, my_leg_ts = 16 #16
|
||||
, my_leg_title = 16 #16
|
||||
, my_strip_ts = 20 #20
|
||||
, leg_pos = c(0.8, 0.9)
|
||||
, leg_pos_wf = c("top", "left", "bottom", "right")
|
||||
, leg_dir_wf = c("horizontal", "vertical")
|
||||
, leg_label = "Mutation Group"
|
||||
, alpha = 0.7)
|
||||
|
||||
{
|
||||
|
||||
if(!all_lineages){
|
||||
plotdf = plotdf[plotdf[[y_axis]]%in%use_lineages,]
|
||||
}
|
||||
|
||||
ggplot(plotdf, aes_string(x = x_axis
|
||||
, y = y_axis))+
|
||||
|
||||
geom_density_ridges(aes_string(fill = fill_categ)
|
||||
, scale = 3
|
||||
, size = 0.3
|
||||
, alpha = alpha) +
|
||||
scale_x_continuous(expand = c(0.01, 0.01)) +
|
||||
#coord_cartesian( xlim = c(-1, 1)) +
|
||||
scale_fill_manual(values = fill_categ_cols
|
||||
, labels = label_categories) +
|
||||
theme(axis.text.x = element_text(size = my_ats
|
||||
, angle = 90
|
||||
, hjust = 1
|
||||
, vjust = 0.4)
|
||||
, axis.text.y = element_text(size = my_ats)
|
||||
, axis.title.x = element_text(size = my_ats)
|
||||
, axis.title.y = element_blank()
|
||||
, strip.text = element_text(size = my_strip_ts)
|
||||
, legend.text = element_text(size = my_leg_ts)
|
||||
, legend.key.size = unit(my_leg_ts, 'pt')
|
||||
, legend.title = element_text(size = my_leg_title)
|
||||
, legend.position = c(0.8, 0.9)) +
|
||||
labs(x = x_lab
|
||||
, fill = leg_label) +
|
||||
|
||||
# FIXME: This didn't work BEFORE i fixed the ggplot() assignment thing!!!
|
||||
if (with_facet){
|
||||
|
||||
# used reformulate or make as formula
|
||||
#fwv = reformulate(facet_wrap_var)
|
||||
fwv = as.formula(paste0("~", facet_wrap_var))
|
||||
|
||||
facet_wrap(fwv) +
|
||||
theme(legend.position = leg_pos_wf
|
||||
, legend.direction = leg_dir_wf)
|
||||
}
|
||||
}
|
|
@ -1,209 +0,0 @@
|
|||
#!/usr/bin/env Rscript
|
||||
#########################################################
|
||||
# TASK: Script to format data for lineage plots
|
||||
# Called by get_plotting_plot_dfs.R
|
||||
|
||||
# lineage_plot_data()
|
||||
# INPUT:
|
||||
# plot_df : merged_df2 (data with 1:many relationship b/w snp and lineage)
|
||||
# NOTE*: DO NOT use merged_df3 as it loses the 1:many relationship)
|
||||
# lineage_column_name : Column name that contains lineage info
|
||||
# remove_empty_lineage : where lineage info is missing, whether to omit those or not
|
||||
# lineage_label_col_name: Column containing pre-formatted lineage labels.
|
||||
# For my case, this is called "lineage_labels"
|
||||
# This column has short labels like L1, L2, L3, etc.
|
||||
# if this is left empty, then the lineage_column_name will be used
|
||||
# id_colname : sample-id column. Used to calculate SAV count
|
||||
# snp_colname : SAV column. Used to calculate SAV diversity
|
||||
|
||||
# RETURNS: List
|
||||
# WF and LF data for lineage-wise snp count and snp diversity
|
||||
|
||||
# TO DO: SHINY
|
||||
#1) remove empty positions
|
||||
#2) select lineages to display?
|
||||
#########################################################
|
||||
|
||||
lineage_plot_data <- function(plot_df
|
||||
, lineage_column_name = "lineage"
|
||||
, remove_empty_lineage = T
|
||||
, lineage_label_col_name = "lineage_labels"
|
||||
, id_colname = "id"
|
||||
, snp_colname = "mutationinformation"){
|
||||
|
||||
################################################################
|
||||
# Get WF and LF data with lineage count, and snp diversity
|
||||
################################################################
|
||||
|
||||
# Initialise output list
|
||||
lineage_dataL = list(
|
||||
lin_wf = data.frame()
|
||||
, lin_lf = data.frame())
|
||||
|
||||
#table(plot_df[[lineage_column_name]])
|
||||
|
||||
#------------------------
|
||||
# Check lineage counts
|
||||
# Including missing
|
||||
#------------------------
|
||||
if (missing(remove_empty_lineage)){
|
||||
|
||||
miss_ll = table(plot_df[[lineage_column_name]] == "")[[2]]
|
||||
rm_ll = which(plot_df[[lineage_column_name]] == "")
|
||||
|
||||
if (length(rm_ll) == miss_ll){
|
||||
cat("\nNo. of samples with missing lineage classification:"
|
||||
, miss_ll
|
||||
, "Removing these...")
|
||||
plot_df = plot_df[-rm_ll,]
|
||||
plot_df = droplevels(plot_df)
|
||||
}else{
|
||||
cat("\nSomething went wrong...numbers mismatch"
|
||||
, "samples with missing lineages:", mis_all
|
||||
, "No. of corresponding indices to remove:", rm_ll)
|
||||
}
|
||||
}else{
|
||||
plot_df = plot_df
|
||||
plot_df = droplevels(plot_df)
|
||||
}
|
||||
|
||||
#------------------------
|
||||
# Lineage labels column
|
||||
#------------------------
|
||||
if (lineage_label_col_name == ""){
|
||||
cat("\nLineage label column missing..."
|
||||
, "\nUsing the column:" , lineage_column_name, "as labels as well")
|
||||
lin_labels = lineage_column_name
|
||||
|
||||
#------------------------------------------
|
||||
if ( !is.factor((plot_df[[lin_labels]])) ){
|
||||
plot_df[[lin_labels]] = as.factor(plot_df[[lin_labels]])
|
||||
cat("\nWARNING: Lineage label not a factor. Correcting.")
|
||||
}else{
|
||||
cat("\nLineage label column already factor")
|
||||
}
|
||||
#------------------------------------------
|
||||
}else{
|
||||
#lin_labels = "lineage_labels"
|
||||
lin_labels = lineage_label_col_name
|
||||
cat("\nLineage label column present"
|
||||
, "\nUsing it, column name:", lin_labels)
|
||||
#------------------------------------------
|
||||
if ( !is.factor((plot_df[[lin_labels]])) ){
|
||||
plot_df[[lin_labels]] = as.factor(plot_df[[lin_labels]])
|
||||
}else{
|
||||
cat("\nLineage label already factor")
|
||||
}
|
||||
#------------------------------------------
|
||||
}
|
||||
|
||||
# This is how lineage labels will appear
|
||||
cat("\nLineage labels will appear as below\n")
|
||||
print( table(plot_df[[lin_labels]]) )
|
||||
cat("\n")
|
||||
cat(paste0("Class of ", lin_labels, ": ", class(plot_df[[lin_labels]])) )
|
||||
cat("\n")
|
||||
print(paste0("No. of levels: ", nlevels(plot_df[[lin_labels]])) )
|
||||
|
||||
#==========================================
|
||||
# WF data: lineages with
|
||||
# snp count
|
||||
# total_samples
|
||||
# snp diversity (perc)
|
||||
#==========================================
|
||||
cat("\nCreating WF Lineage data...")
|
||||
|
||||
sel_lineages = levels(plot_df[[lin_labels]])
|
||||
|
||||
lin_wf = data.frame(sel_lineages) #4, 1
|
||||
total_snps_u = NULL
|
||||
total_samples = NULL
|
||||
|
||||
for (i in sel_lineages){
|
||||
#print(i)
|
||||
curr_total = length(unique(plot_df[[id_colname]])[plot_df[[lin_labels]]==i])
|
||||
#print(curr_total)
|
||||
total_samples = c(total_samples, curr_total)
|
||||
print(total_samples)
|
||||
|
||||
foo = plot_df[plot_df[[lin_labels]]==i,]
|
||||
print(paste0(i, "=======\n"))
|
||||
print(length(unique(foo[[snp_colname]])))
|
||||
curr_count = length(unique(foo[[snp_colname]]))
|
||||
|
||||
total_snps_u = c(total_snps_u, curr_count)
|
||||
}
|
||||
|
||||
lin_wf
|
||||
|
||||
# Add these counts as columns to the plot_df
|
||||
lin_wf$num_snps_u = total_snps_u
|
||||
lin_wf$total_samples = total_samples
|
||||
lin_wf
|
||||
|
||||
#----------------------
|
||||
# Add SAV diversity
|
||||
#----------------------
|
||||
lin_wf$snp_diversity = lin_wf$num_snps_u/lin_wf$total_samples
|
||||
lin_wf
|
||||
|
||||
#----------------------
|
||||
# Add some formatting
|
||||
#----------------------
|
||||
# SAV diversity
|
||||
lin_wf$snp_diversity_f = round( (lin_wf$snp_diversity * 100), digits = 0)
|
||||
lin_wf$snp_diversity_f = paste0(lin_wf$snp_diversity_f, "%")
|
||||
|
||||
# should be as you like it to appear
|
||||
lin_wf$sel_lineages
|
||||
|
||||
# Important: Relevel factors so that x-axis categ appear as you want
|
||||
#lin_lf$sel_lineages = factor(lin_lf$sel_lineages, c())
|
||||
#levels(lin_lf$sel_lineages)
|
||||
|
||||
lineage_dataL[['lin_wf']] = lin_wf
|
||||
|
||||
cat("\nCOMPLETED: Successfully created WF lineage data")
|
||||
|
||||
#=================================
|
||||
# LF data: lineages with
|
||||
# snp count
|
||||
# total_samples
|
||||
# snp diversity (perc)
|
||||
#=================================
|
||||
cat("\nCreating LF Lineage data...")
|
||||
|
||||
names(lin_wf)
|
||||
tot_cols = ncol(lin_wf)
|
||||
pivot_cols = c("sel_lineages", "snp_diversity", "snp_diversity_f")
|
||||
pivot_cols_n = length(pivot_cols)
|
||||
|
||||
expected_rows = nrow(lin_wf) * ( length(lin_wf) - pivot_cols_n )
|
||||
|
||||
lin_lf <- tidyr::gather(lin_wf
|
||||
, count_categ
|
||||
, p_count
|
||||
, num_snps_u:total_samples
|
||||
, factor_key = TRUE)
|
||||
lin_lf
|
||||
|
||||
# quick checks
|
||||
if ( nrow(lin_lf ) == expected_rows ){
|
||||
cat("\nPASS: Lineage LF data created"
|
||||
, "\nnrow: ", nrow(lin_lf)
|
||||
, "\nncol: ", ncol(lin_lf))
|
||||
} else {
|
||||
cat("\nFAIL: numbers mismatch"
|
||||
, "\nExpected nrow: ", expected_rows)
|
||||
}
|
||||
|
||||
# Important: Relevel factors so that x-axis categ appear as you want
|
||||
#lin_lf$sel_lineages = factor(lin_lf$sel_lineages, c())
|
||||
#levels(lin_lf$sel_lineages)
|
||||
|
||||
lineage_dataL[['lin_lf']] = lin_lf
|
||||
|
||||
cat("\nCOMPLETED: Successfully created LF lineage data")
|
||||
return(lineage_dataL)
|
||||
# end bracket
|
||||
}
|
|
@ -1,511 +0,0 @@
|
|||
#####################################################################################
|
||||
# LogoPlotMSA():
|
||||
# Input:
|
||||
# Data:
|
||||
# msaSeq_mut: MSA chr vector for muts
|
||||
# msaSeq_wt: MSA chr vector for wt
|
||||
|
||||
# Logo type params:
|
||||
# logo_type = c("EDLogo", "bits_pfm", "probability_pfm", "bits_raw", "probability_raw")
|
||||
# EDLogo: calculated from the Logolas package based on PFM matrix (scaled).
|
||||
#The required content from the package is sourced locally within 'my_logolas.R'
|
||||
# bits_pfm: Information Content based on PFM scaled matrix (my_logolas.R)
|
||||
# probability_pfm: Probability based on PFM scaled matrix (my_logolas.R)
|
||||
# bits_raw: Information Content based on Raw MSA (ggseqlogo)
|
||||
# probability_raw: Probability based on Raw MSA (ggseqlogo)
|
||||
|
||||
# EDScore_type = c("log", log-odds", "diff", "probKL", "ratio", "unscaled_log", "wKL")
|
||||
# bg_prob: background probability, default is equal i.e NULL.
|
||||
# This is used by the internal call to DataED_PFM(). This func takes thse args. I have used it here for
|
||||
# completeness and allow nuanced plot control
|
||||
|
||||
# my_logo_col = c("chemistry", "hydrophobicity", "clustalx", "taylor")
|
||||
# --> if clustalx and taylor, set variable to black bg + white font
|
||||
# --> if chemistry and hydrophobicity, then grey bg + black font
|
||||
|
||||
# ...other params
|
||||
|
||||
# Returns: Logo plots from MSA both mutant and wt (for comparability)
|
||||
# For my case, I always use it as it helps see what is at the wild-type already!
|
||||
|
||||
# TODO: SHINY
|
||||
# drop down: logo_type
|
||||
# drop down: ED score type
|
||||
# drop down/enter field : bg probability (in the actual plot function!)
|
||||
# drop down: my_logo_col
|
||||
# Make it hover over position and then get the corresponding data table!
|
||||
###################################################################################
|
||||
|
||||
|
||||
###########################################
|
||||
#LogoPlotMSA <- function(msaSeq_mut # chr vector
|
||||
# , msaSeq_wt # chr vector
|
||||
LogoPlotMSA <- function(# unified_msa # <- not needed any more because we have 'target' now
|
||||
target = 'embb'
|
||||
, logo_type = c("EDLogo") #"bits_pfm", "probability_pfm", "bits_raw", "probability_raw")
|
||||
, EDScore_type = c("log") # see if this relevant, or source function should have it!
|
||||
, bg_prob = NULL
|
||||
, my_logo_col = "chemistry"
|
||||
, plot_positions
|
||||
, y_breaks
|
||||
, x_lab_mut = ""
|
||||
, y_lab_mut
|
||||
, x_ats = 10 # text size
|
||||
, x_tangle = 90 # text angle
|
||||
, x_axis_offset = 0 # dist b/w y-axis and plot start
|
||||
, x_axis_offset_filtered = 0
|
||||
, y_axis_offset = 0
|
||||
, y_axis_increment = 1
|
||||
, y_ats = 10
|
||||
, y_tangle = 0
|
||||
, x_tts = 10 # title size
|
||||
, y_tts = 10
|
||||
, leg_pos = "top" # can be top, left, right and bottom or c(0.8, 0.9)
|
||||
, leg_dir = "horizontal" #can be vertical or horizontal
|
||||
, leg_ts = 14 # leg text size
|
||||
, leg_tts = 14 # leg title size
|
||||
, aa_pos_drug = aa_pos_drug
|
||||
, active_aa_pos = active_aa_pos
|
||||
, aa_pos_lig1 = aa_pos_lig1
|
||||
, aa_pos_lig2 = aa_pos_lig2
|
||||
, aa_pos_lig3 = aa_pos_lig3
|
||||
, ...
|
||||
)
|
||||
|
||||
{
|
||||
# FIXME: Hack!
|
||||
# msaSeq_mut=unified_msa[[1]]
|
||||
# msaSeq_wt=unified_msa[[2]]
|
||||
|
||||
unified_msa = get(paste0(target, "_unified_msa"))
|
||||
|
||||
msaSeq_mut=unified_msa[['msa_seq']]
|
||||
msaSeq_wt=unified_msa[['wt_seq']]
|
||||
|
||||
# Get PFM matrix for mut and wt MSA provided
|
||||
data_ed = DataED_PFM(msaSeq_mut
|
||||
, msaSeq_wt
|
||||
, ED_score = EDScore_type)
|
||||
names(data_ed)
|
||||
#"pfm_mutM" "pfm_mut_scaledM" "combED_mutM" "pfm_wtM" "pfm_wt_scaledM" "combED_wtM"
|
||||
|
||||
#merged_df3 for current target (unfortunatly i can't think of an easy way to get this from unified_msa)
|
||||
contig_df=data.frame(position=1:max(nchar(unified_msa$wt_seq)))
|
||||
plot_df = get(paste0(target, "_merged_df3"))
|
||||
|
||||
# generate the tile columns
|
||||
#plot_df=cbind(embb_merged_df3)
|
||||
plot_df$col_aa = ifelse(plot_df[["position"]]%in%active_aa_pos,
|
||||
"transparent", "transparent")
|
||||
plot_df$bg_all = plot_df$col_aa
|
||||
plot_df$bg_all = ifelse(plot_df[["position"]]%in%aa_pos_drug,
|
||||
"drug", plot_df$bg_all)
|
||||
plot_df$col_bg1 = plot_df$bg_all
|
||||
plot_df$col_bg1 = ifelse(plot_df[["position"]]%in%aa_pos_lig1,
|
||||
"lig1", plot_df$col_bg1)
|
||||
plot_df$col_bg2 = plot_df$col_bg1
|
||||
plot_df$col_bg2 = ifelse(plot_df[["position"]]%in%aa_pos_lig2,
|
||||
"lig2", plot_df$col_bg2)
|
||||
plot_df$col_bg3 = plot_df$col_bg2
|
||||
plot_df$col_bg3 = ifelse(plot_df[["position"]]%in%aa_pos_lig3
|
||||
, "lig3", plot_df$col_bg3)
|
||||
|
||||
plot_df = generate_distance_colour_map(plot_df, debug=FALSE)
|
||||
|
||||
# copy only the tile columns to the contiguous DF
|
||||
|
||||
contig_df$ligand_distance = plot_df$ligand_distance[match(contig_df$position, plot_df$position)]
|
||||
contig_df_map = generate_distance_colour_map(contig_df, debug=TRUE)
|
||||
contig_df$ligD_colours = contig_df_map$ligD_colours[match(contig_df$position, contig_df_map$position)]
|
||||
|
||||
#contig_df$ligD_colours = plot_df$ligD_colours[match(contig_df$position, plot_df$position)]
|
||||
contig_df$bg_all = plot_df$bg_all[match(contig_df$position, plot_df$position)]
|
||||
contig_df$col_bg1 = plot_df$col_bg1[match(contig_df$position, plot_df$position)]
|
||||
contig_df$col_bg2 = plot_df$col_bg2[match(contig_df$position, plot_df$position)]
|
||||
contig_df$col_bg3 = plot_df$col_bg3[match(contig_df$position, plot_df$position)]
|
||||
contig_df=replace_na(
|
||||
contig_df,
|
||||
list(
|
||||
ligD_colours='transparent',
|
||||
bg_all = 'transparent',
|
||||
col_bg1 = 'transparent',
|
||||
col_bg2 = 'transparent',
|
||||
col_bg3 = 'transparent'
|
||||
)
|
||||
)
|
||||
|
||||
if (logo_type == "EDLogo"){
|
||||
msa_method = "custom"
|
||||
y_label = "Enrichment Score"
|
||||
data_logo_mut = data_ed[['combED_mutM']]
|
||||
data_logo_wt = data_ed[['combED_wtM']]
|
||||
|
||||
msa_pos = as.numeric(colnames(data_logo_mut))
|
||||
wt_pos = as.numeric(colnames(data_logo_wt))
|
||||
|
||||
# Construct Y-axis for MSA mut plot:
|
||||
cat("\nCalculating y-axis for MSA mut plot")
|
||||
|
||||
if ( missing(y_breaks) ){
|
||||
# Y-axis: Calculating
|
||||
cat("\n----------------------------------------"
|
||||
, "\nY-axis being generated from data"
|
||||
, "\n-----------------------------------------")
|
||||
ylim_low <- floor(min(data_logo_mut)); ylim_low
|
||||
if( ylim_low == 0){
|
||||
ylim_low = ylim_low
|
||||
cat("\nY-axis lower limit:", ylim_low)
|
||||
y_rlow = seq(0, ylim_low, length.out = 3); y_rlow
|
||||
|
||||
ylim_up <- ceiling(max(data_logo_mut)) + 5; ylim_up
|
||||
cat("\nY-axis upper limit:", ylim_up)
|
||||
y_rup = seq(0, ylim_up, by = 2); y_rup
|
||||
}else{
|
||||
ylim_low = ylim_low + (-0.5)
|
||||
cat("\nY-axis lower limit is <0:", ylim_low)
|
||||
y_rlow = seq(0, ylim_low, length.out = 3); y_rlow
|
||||
|
||||
ylim_up <- ceiling(max(data_logo_mut)) + 3; ylim_up
|
||||
cat("\nY-axis upper limit:", ylim_up)
|
||||
y_rup = seq(0, ylim_up, by = 3); y_rup
|
||||
}
|
||||
#ylim_scale <- unique(sort(c(y_rlow, y_rup, ylim_up))); ylim_scale
|
||||
ylim_scale <- unique(sort(c(y_rlow, y_rup))); ylim_scale
|
||||
cat("\nY-axis generated: see below\n"
|
||||
, ylim_scale)
|
||||
}else{
|
||||
# Y-axis: User provided
|
||||
cat("\n--------------------------------"
|
||||
, "\nUsing y-axis:: User provided"
|
||||
,"\n---------------------------------")
|
||||
ylim_scale = sort(y_breaks)
|
||||
ylim_low = min(ylim_scale); ylim_low
|
||||
ylim_up = max(ylim_scale); ylim_up
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
if (logo_type == "bits_pfm"){
|
||||
msa_method = "bits"
|
||||
y_label = "Bits (PFM)"
|
||||
data_logo_mut = data_ed[['pfm_mut_scaledM']]
|
||||
data_logo_wt = data_ed[['pfm_wtM']]
|
||||
|
||||
msa_pos = as.numeric(colnames(data_logo_mut))
|
||||
wt_pos = as.numeric(colnames(data_logo_wt))
|
||||
}
|
||||
|
||||
if (logo_type == "probability_pfm"){
|
||||
msa_method = "probability"
|
||||
y_label = "Probability (PFM)"
|
||||
data_logo_mut = data_ed[['pfm_mut_scaledM']]
|
||||
data_logo_wt = data_ed[['pfm_wtM']]
|
||||
|
||||
msa_pos = as.numeric(colnames(data_logo_mut))
|
||||
wt_pos = as.numeric(colnames(data_logo_wt))
|
||||
}
|
||||
|
||||
if (logo_type == "bits_raw"){
|
||||
msa_method = "bits"
|
||||
y_label = "Bits"
|
||||
|
||||
data_logo_mut = msaSeq_mut
|
||||
msa_interim = sapply(data_logo_mut, function(x) unlist(strsplit(x,"")))
|
||||
msa_interimDF = data.frame(msa_interim)
|
||||
msa_pos = as.numeric(rownames(msa_interimDF))
|
||||
|
||||
data_logo_wt = msaSeq_wt
|
||||
wt_interim = sapply(data_logo_wt, function(x) unlist(strsplit(x,"")))
|
||||
wt_interimDF = data.frame(wt_interim)
|
||||
wt_pos = as.numeric(rownames(wt_interimDF))
|
||||
|
||||
}
|
||||
|
||||
if (logo_type == "probability_raw"){
|
||||
msa_method = "probability"
|
||||
y_label = "Probability"
|
||||
|
||||
data_logo_mut = msaSeq_mut
|
||||
msa_interim = sapply(data_logo_mut, function(x) unlist(strsplit(x,"")))
|
||||
msa_interimDF = data.frame(msa_interim)
|
||||
msa_pos = as.numeric(rownames(msa_interimDF))
|
||||
|
||||
data_logo_wt = msaSeq_wt
|
||||
wt_interim = sapply(data_logo_wt, function(x) unlist(strsplit(x,"")))
|
||||
wt_interimDF = data.frame(wt_interim)
|
||||
wt_pos = as.numeric(rownames(wt_interimDF))
|
||||
}
|
||||
|
||||
#################################################################################
|
||||
# param: plot_position
|
||||
#################################################################################
|
||||
|
||||
if(missing(plot_positions)){
|
||||
|
||||
#================================
|
||||
# NO filtering of positions
|
||||
#================================
|
||||
#---------
|
||||
# MSA mut
|
||||
#---------
|
||||
cat("\n==========================================="
|
||||
, "\nGenerated PFM mut: No filtering"
|
||||
, "\n===========================================")
|
||||
|
||||
plot_mut_edM = data_logo_mut
|
||||
|
||||
#---------
|
||||
# MSA WT
|
||||
#---------
|
||||
cat("\n==========================================="
|
||||
, "\nGenerated PFM WT: No filtering"
|
||||
, "\n===========================================")
|
||||
|
||||
plot_wt_edM = data_logo_wt
|
||||
|
||||
}else{
|
||||
|
||||
#================================
|
||||
# Filtering of positions
|
||||
#================================
|
||||
cat("\n==========================================="
|
||||
, "\nGenerating PFM MSA: filtered positions"
|
||||
, "\n==========================================="
|
||||
, "\nUser specified plotting positions for MSA:"
|
||||
, "\nThese are:\n", plot_positions
|
||||
, "\nSorting plot positions...")
|
||||
|
||||
plot_positions = sort(plot_positions)
|
||||
|
||||
cat("\nPlotting positions sorted:\n"
|
||||
, plot_positions)
|
||||
|
||||
if ( all(plot_positions%in%msa_pos) && all(plot_positions%in%wt_pos) ){
|
||||
cat("\nAll positions within range"
|
||||
, "\nFiltering positions as specified..."
|
||||
, "\nNo. of positions in plot:", length(plot_positions))
|
||||
i_extract = plot_positions
|
||||
|
||||
#-----------------
|
||||
# PFM: mut + wt
|
||||
#------------------
|
||||
if (logo_type%in%c("EDLogo", "bits_pfm", "probability_pfm")){
|
||||
|
||||
plot_mut_edM = data_logo_mut[, i_extract]
|
||||
plot_wt_edM = data_logo_wt[, i_extract]
|
||||
|
||||
}
|
||||
if (logo_type%in%c("bits_raw", "probability_raw")){
|
||||
|
||||
#--------
|
||||
# Mut
|
||||
#--------
|
||||
dfP1 = msa_interimDF[i_extract,]
|
||||
dfP1 = data.frame(t(dfP1))
|
||||
names(dfP1) = i_extract
|
||||
cols_to_paste = names(dfP1)
|
||||
dfP1['chosen_seq'] = apply(dfP1[, cols_to_paste]
|
||||
, 1
|
||||
, paste, sep = ''
|
||||
, collapse = "")
|
||||
plot_mut_edM = dfP1$chosen_seq
|
||||
|
||||
#--------
|
||||
# WT
|
||||
#--------
|
||||
dfP2 = wt_interimDF[i_extract,]
|
||||
dfP2 = data.frame(t(dfP2))
|
||||
names(dfP2) = i_extract
|
||||
cols_to_paste2 = names(dfP2)
|
||||
dfP2['chosen_seq'] = apply( dfP2[, cols_to_paste2]
|
||||
, 1
|
||||
, paste, sep = ''
|
||||
, collapse = "")
|
||||
|
||||
plot_wt_edM = dfP2$chosen_seq
|
||||
|
||||
}
|
||||
|
||||
}else{
|
||||
cat("\nNo. of positions selected:", length(plot_positions))
|
||||
i_ofr = plot_positions[!plot_positions%in%msa_pos]
|
||||
cat("\n1 or more plot_positions out of range..."
|
||||
, "\nThese are:\n", i_ofr
|
||||
, "\nQuitting! Resubmit with correct plot_positions")
|
||||
quit()
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
######################################
|
||||
# Generating plots for muts and wt
|
||||
#####################################
|
||||
if (my_logo_col %in% c('clustalx','taylor')) {
|
||||
cat("\nSelected colour scheme:", my_logo_col
|
||||
, "\nUsing black theme\n")
|
||||
|
||||
theme_bgc = "black"
|
||||
xfont_bgc = "white"
|
||||
yfont_bgc = "white"
|
||||
xtt_col = "white"
|
||||
ytt_col = "white"
|
||||
|
||||
}
|
||||
|
||||
if (my_logo_col %in% c('chemistry', 'hydrophobicity')) {
|
||||
cat("\nstart of MSA"
|
||||
, '\nSelected colour scheme:', my_logo_col
|
||||
, "\nUsing grey theme")
|
||||
|
||||
theme_bgc = "white"
|
||||
xfont_bgc = "black"
|
||||
yfont_bgc = "black"
|
||||
xtt_col = "black"
|
||||
ytt_col = "black"
|
||||
|
||||
}
|
||||
|
||||
#####################################
|
||||
# Generating logo plots for SAVs
|
||||
#####################################
|
||||
PlotlogolasL <- list()
|
||||
|
||||
#-------------------
|
||||
# Mutant logo plot
|
||||
#-------------------
|
||||
p0 = ggplot() + geom_logo(plot_mut_edM
|
||||
, method = msa_method
|
||||
, col_scheme = my_logo_col
|
||||
, seq_type = 'auto') +
|
||||
|
||||
theme(legend.position = leg_pos
|
||||
, legend.direction = leg_dir
|
||||
#, legend.title = element_blank()
|
||||
, legend.title = element_text(size = leg_tts
|
||||
, colour = ytt_col)
|
||||
, legend.text = element_text(size = leg_ts)
|
||||
|
||||
, axis.text.x = element_text(size = x_ats
|
||||
, angle = x_tangle
|
||||
, hjust = 1
|
||||
, vjust = 0.4
|
||||
, colour = xfont_bgc)
|
||||
#, axis.text.y = element_blank()
|
||||
, axis.ticks=element_blank()
|
||||
, axis.text.y = element_text(size = y_ats
|
||||
, angle = y_tangle
|
||||
, hjust = 1
|
||||
, vjust = -1.0
|
||||
, colour = yfont_bgc)
|
||||
, axis.title.x = element_text(size = x_tts
|
||||
, colour = xtt_col)
|
||||
, axis.title.y = element_text(size = y_tts
|
||||
, colour = ytt_col)
|
||||
, panel.grid=element_blank()
|
||||
, plot.background = element_rect(fill = theme_bgc, colour=NA)
|
||||
, panel.background = element_rect(fill = "transparent", colour=NA)
|
||||
|
||||
) +
|
||||
labs(y=y_label) +
|
||||
xlab(x_lab_mut)
|
||||
|
||||
if (missing(plot_positions)){
|
||||
ed_mut_logo_P = p0 +
|
||||
scale_y_continuous(
|
||||
expand = c(0,0),
|
||||
breaks = seq(
|
||||
0,
|
||||
(y_lim),
|
||||
by = y_axis_increment
|
||||
)
|
||||
) +
|
||||
scale_x_discrete(breaks = msa_pos
|
||||
, expand = c(x_axis_offset, 0)
|
||||
, labels = msa_pos
|
||||
, limits = factor(msa_pos))
|
||||
|
||||
}else{
|
||||
ed_mut_logo_P = p0 +
|
||||
scale_y_continuous(
|
||||
expand = c(0,0)#,
|
||||
# breaks = seq(
|
||||
# 0,
|
||||
# (y_lim),
|
||||
# by = y_axis_increment
|
||||
#)
|
||||
) +
|
||||
# scale_x_continuous(expand = c(0,0)) #+
|
||||
|
||||
scale_x_discrete(breaks = i_extract
|
||||
, expand = c(x_axis_offset_filtered, 0)
|
||||
, labels = i_extract
|
||||
, limits = factor(i_extract))
|
||||
}
|
||||
|
||||
cat('\nDone: MSA plot for mutations')
|
||||
#### Wild-type MSA: gene_fasta file ####
|
||||
p1 = ggplot() + geom_logo(plot_wt_edM
|
||||
#, facet = "grid"
|
||||
, method = msa_method
|
||||
, col_scheme = my_logo_col
|
||||
, seq_type = 'aa') +
|
||||
|
||||
theme(legend.position = "none"
|
||||
, legend.direction = leg_dir
|
||||
, legend.title = element_text(size = leg_tts
|
||||
, colour = ytt_col)
|
||||
, legend.text = element_text(size = leg_ts)
|
||||
, axis.text.x = element_blank()
|
||||
, axis.ticks=element_blank()
|
||||
, axis.text.y = element_blank()
|
||||
|
||||
, axis.title.x = element_text(size = x_tts
|
||||
, colour = xtt_col)
|
||||
, axis.title.y = element_text(size = y_tts
|
||||
, colour = ytt_col)
|
||||
|
||||
, panel.grid=element_blank()
|
||||
, plot.background = element_rect(fill = theme_bgc, colour=NA)
|
||||
, panel.background = element_rect(fill = "transparent", colour=NA)
|
||||
, plot.margin = margin(r=0,l=0, unit="pt")
|
||||
|
||||
) +
|
||||
scale_y_discrete(expand = c(0,0)) +
|
||||
ylab("") + xlab("")
|
||||
|
||||
if (missing(plot_positions)){
|
||||
|
||||
# No y-axis needed
|
||||
ed_wt_logo_P = p1# +
|
||||
} else {
|
||||
|
||||
ed_wt_logo_P = p1 +
|
||||
scale_x_discrete(expand = c(0, 0),
|
||||
breaks = i_extract,
|
||||
#labels = i_extract,
|
||||
limits = factor(i_extract)
|
||||
)
|
||||
|
||||
#plot_df=plot_df[plot_df$position %in% plot_positions,]
|
||||
contig_df=contig_df[contig_df$position %in% plot_positions,]
|
||||
anno_bar = position_annotation(
|
||||
contig_df,
|
||||
aa_pos_drug=aa_pos_drug,
|
||||
active_aa_pos=active_aa_pos,
|
||||
aa_pos_lig1=aa_pos_lig1,
|
||||
aa_pos_lig2=aa_pos_lig2,
|
||||
aa_pos_lig3=aa_pos_lig3,
|
||||
generate_colours = FALSE
|
||||
)
|
||||
|
||||
}
|
||||
cowplot::plot_grid(ed_mut_logo_P
|
||||
, ed_wt_logo_P
|
||||
, anno_bar
|
||||
, ncol = 1
|
||||
, align = "v"
|
||||
#, axis='lr'
|
||||
, rel_heights = c(3/4, 1/4,1/10))
|
||||
|
||||
}
|
||||
#LogoPlotMSA(unified_msa)
|
|
@ -1,224 +0,0 @@
|
|||
# Input:
|
||||
# Data:
|
||||
# plot_df: merged_df3 containing the OR column to use as y-axis or any other relevant column
|
||||
|
||||
# x_axis_colname = "position"
|
||||
# y_axis_colname = "or_mychisq"
|
||||
# symbol_colname = "mutant_type"
|
||||
# y_axis_log = F
|
||||
# log_value = log10
|
||||
# if used, y-axis label has "Log" appended to it
|
||||
|
||||
# my_logo_col = c("chemistry", "hydrophobicity", "clustalx", "taylor")
|
||||
# --> if clustalx and taylor, set variable to black bg + white font
|
||||
# --> if chemistry and hydrophobicity, then grey bg + black font
|
||||
|
||||
# rm_empty_y = F
|
||||
# option to remove empty positions i.e positions with no assocaited y-val
|
||||
|
||||
# y_axis_log = F
|
||||
# option to use log scale
|
||||
# FIXME Minor bug: if used with rm_empty_y, sometimes the labels are too small to render(!?)
|
||||
# so positions appear empty despite having y-vals
|
||||
|
||||
# ...other params
|
||||
|
||||
# Returns: Logo plot from combined data containing specific y-value such as OR, etc by position.
|
||||
|
||||
# TODO: SHINY
|
||||
# select/drop down option to remove empty positions
|
||||
# select/drop down option for colour
|
||||
# select/drop down option for log scale
|
||||
# include WT
|
||||
|
||||
# Make it hover over position and then get the corresponding data table!
|
||||
########################a###########################################################
|
||||
|
||||
|
||||
#==================
|
||||
# logo data: OR
|
||||
#==================
|
||||
LogoPlotCustomH <- function(plot_df
|
||||
, x_axis_colname = "position"
|
||||
, y_axis_colname = "or_mychisq"
|
||||
, symbol_colname = "mutant_type"
|
||||
, my_logo_col = "chemistry"
|
||||
, rm_empty_y = F
|
||||
, y_axis_log = F
|
||||
, log_value = log10
|
||||
, y_axis_increment = 50
|
||||
, x_lab = "Position"
|
||||
, y_lab = "Odds Ratio"
|
||||
, x_ats = 6 # text size
|
||||
, x_tangle = 90 # text angle
|
||||
, y_ats = 11
|
||||
, y_tangle = 0
|
||||
, x_tts = 10 # title size
|
||||
, y_tts = 11
|
||||
, leg_pos = "none" # can be top, left, right and bottom or c(0.8, 0.9)
|
||||
, leg_dir = "horizontal" #can be vertical or horizontal
|
||||
, leg_ts = 7 # leg text size
|
||||
, leg_tts = 8 # leg title size
|
||||
, tpos0 = 0 # 0 is a magic number that does my sensible default
|
||||
, tW0 = 1
|
||||
, tH0 = 0.3,
|
||||
...
|
||||
)
|
||||
|
||||
{
|
||||
|
||||
if (rm_empty_y){
|
||||
cat(paste0("Original Rows: ",nrow(plot_df)))
|
||||
plot_df = plot_df[!is.na(plot_df[y_axis_colname]),]
|
||||
cat(paste0("Plotting Rows after removing NAs: ",nrow(plot_df)))
|
||||
}
|
||||
|
||||
|
||||
#-------------------
|
||||
# logo data: LogOR
|
||||
#-------------------
|
||||
if (y_axis_log){
|
||||
|
||||
log_colname = paste0("log10_", y_axis_colname)
|
||||
#plot_df[log_colname] = log_value(plot_df[y_axis_colname])
|
||||
#plot_df[[log_colname]] = log10(plot_df[y_axis_colname])
|
||||
logo_df = plot_df[, c(x_axis_colname, symbol_colname, log_colname)]
|
||||
logo_df_plot = logo_df[, c(x_axis_colname, symbol_colname, log_colname)]
|
||||
logo_df_plot = logo_df_plot %>% spread(x_axis_colname, log_colname, fill = 0.0)
|
||||
rownames(logo_df_plot) = logo_df_plot$mutant_type
|
||||
logo_df_plot$mutant_type = NULL
|
||||
logo_dfP_wf=as.matrix(logo_df_plot)
|
||||
#!!! For consideration: to add y_axis 'breaks' and 'limits' !!!
|
||||
#y_max = max(plot_df[[log_colname]], na.rm = T)
|
||||
#y_axis_increment =
|
||||
#cat("\nRemoving y scale incremenet:", y_axis_increment)
|
||||
|
||||
#y_lim = round_any(y_max, y_axis_increment, f = ceiling)
|
||||
|
||||
} else {
|
||||
|
||||
#-------------------
|
||||
# logo data: OR
|
||||
#-------------------
|
||||
logo_df = plot_df[, c(x_axis_colname, symbol_colname, y_axis_colname)]
|
||||
logo_df_plot = logo_df[, c(x_axis_colname, symbol_colname, y_axis_colname)]
|
||||
logo_df_plot = logo_df_plot %>% spread(x_axis_colname, y_axis_colname, fill = 0.0)
|
||||
rownames(logo_df_plot) = logo_df_plot$mutant_type
|
||||
logo_df_plot$mutant_type = NULL
|
||||
logo_dfP_wf=as.matrix(logo_df_plot)
|
||||
|
||||
#logo_dfP_wf = as.matrix(logo_df_plot %>% spread(x_axis_colname, y_axis_colname, fill = 0.0))
|
||||
}
|
||||
|
||||
#class(logo_dfP_wf)
|
||||
|
||||
#rownames(logo_dfP_wf) = logo_dfP_wf[,1]
|
||||
#dim(logo_dfP_wf)
|
||||
|
||||
#logo_dfP_wf = logo_dfP_wf[,-1]
|
||||
#str(logo_dfP_wf)
|
||||
|
||||
#y_max = max(plot_df[[y_axis_colname]], na.rm = T)
|
||||
y_max = max(colSums(logo_dfP_wf))
|
||||
cat("\nRemoving y scale incremenet:", y_axis_increment)
|
||||
y_lim = round_any(y_max, y_axis_increment, f = ceiling)
|
||||
|
||||
|
||||
#colnames(logo_dfP_wf)
|
||||
position_or = as.numeric(colnames(logo_dfP_wf))
|
||||
|
||||
######################################
|
||||
# Generating plots with given y_axis
|
||||
#####################################
|
||||
if (my_logo_col %in% c('clustalx','taylor')) {
|
||||
cat("\nSelected colour scheme:", my_logo_col
|
||||
, "\nUsing black theme\n")
|
||||
|
||||
theme_bgc = "black"
|
||||
xfont_bgc = "white"
|
||||
yfont_bgc = "white"
|
||||
xtt_col = "white"
|
||||
ytt_col = "white"
|
||||
}
|
||||
|
||||
if (my_logo_col %in% c('chemistry', 'hydrophobicity')) {
|
||||
cat('\nSelected colour scheme:', my_logo_col
|
||||
, "\nUsing grey theme")
|
||||
|
||||
theme_bgc = "white"
|
||||
xfont_bgc = "black"
|
||||
yfont_bgc = "black"
|
||||
xtt_col = "black"
|
||||
ytt_col = "black"
|
||||
}
|
||||
|
||||
# if (y_axis_log){
|
||||
#
|
||||
# if (grepl("Log", y_lab)){
|
||||
# y_lab = y_lab
|
||||
#
|
||||
# }else{
|
||||
# y_lab = paste("Log", y_lab)
|
||||
# }
|
||||
# }
|
||||
plot_grid(
|
||||
ggplot() +
|
||||
geom_logo(logo_dfP_wf
|
||||
, method = "custom"
|
||||
#, method = "bits"
|
||||
, col_scheme = my_logo_col
|
||||
, seq_type = "aa") +
|
||||
#ylab("my custom height") +
|
||||
theme( axis.ticks = element_blank()
|
||||
#, axis.ticks.length = unit(0, "pt")
|
||||
, axis.title.x = element_blank()
|
||||
# , axis.text.x = element_blank() # turn this off and the below on if you want to visually
|
||||
# verify positions.
|
||||
, axis.text.x = element_text(size = x_ats
|
||||
, angle = x_tangle
|
||||
, colour = xfont_bgc
|
||||
, vjust = 0.4
|
||||
, margin = margin(t=0,r=0,b=0,l=0, unit="mm")
|
||||
)
|
||||
, axis.text.y = element_text(size = y_ats
|
||||
, angle = y_tangle
|
||||
, colour = yfont_bgc)
|
||||
, axis.title.y = element_text(size = y_tts
|
||||
, colour = ytt_col)
|
||||
, legend.title = element_text(size = leg_tts
|
||||
, colour = ytt_col)
|
||||
#, legend.text = element_text(size = leg_ts)
|
||||
, legend.text = element_blank()
|
||||
|
||||
, legend.position = leg_pos
|
||||
, legend.direction = leg_dir
|
||||
#, plot.background = element_blank()
|
||||
, plot.margin = margin(b=0)
|
||||
, panel.grid=element_blank()
|
||||
, plot.background = element_rect(fill = theme_bgc, colour=NA)
|
||||
, panel.background = element_rect(fill = "transparent", colour=NA)
|
||||
|
||||
)+
|
||||
|
||||
scale_x_discrete(x_lab
|
||||
#, breaks
|
||||
, labels = position_or
|
||||
, limits = factor(1:length(position_or))) +
|
||||
|
||||
scale_y_continuous(y_lab,
|
||||
breaks = seq(0,
|
||||
(y_lim),
|
||||
by = y_axis_increment
|
||||
),
|
||||
limits = c(0, y_lim)
|
||||
) +
|
||||
labs(y=y_lab),
|
||||
position_annotation(plot_df,
|
||||
bg = theme_bgc,
|
||||
...
|
||||
),
|
||||
|
||||
ncol=1, align='v', rel_heights = c(6,1)
|
||||
)
|
||||
}
|
||||
#LogoPlotCustomH(small_df3)
|
|
@ -1,323 +0,0 @@
|
|||
########################a###########################################################
|
||||
# Input:
|
||||
# Data
|
||||
# mutable_df: merged_df3 containing the OR column to use as y-axis or any other relevant column
|
||||
|
||||
# x_axis_colname = "position"
|
||||
# symbol_mut_colname = "mutant_type"
|
||||
# symbol_wt_colname = "mutant_type"
|
||||
# omit_snp_count = c(0, 1, 2...) can be used to filter positions with specified snp count
|
||||
|
||||
# my_logo_col = c("chemistry", "hydrophobicity", "clustalx", "taylor")
|
||||
# --> if clustalx and taylor, set variable to black bg + white font
|
||||
# --> if chemistry and hydrophobicity, then grey bg + black font
|
||||
|
||||
# ...other params
|
||||
|
||||
# Returns: Logo plot from combined data containing all SAVs per position.
|
||||
# Helps to see the overview of SAV diversity
|
||||
|
||||
# TODO: SHINY
|
||||
# select/drop down: omit_snp_count
|
||||
# select/drop down: my_logo_col
|
||||
# should include WT??
|
||||
|
||||
# Make it hover over position and then get the corresponding data table!
|
||||
####################################################################################
|
||||
|
||||
#==================
|
||||
# logo data: OR
|
||||
#==================
|
||||
# NOTE: my_logo_col
|
||||
|
||||
LogoPlotSnps <- function(plot_df
|
||||
, x_axis_colname = "position"
|
||||
, symbol_mut_colname = "mutant_type"
|
||||
, symbol_wt_colname = "wild_type"
|
||||
, omit_snp_count = c(0) # can be 1, 2, etc.
|
||||
, my_logo_col = "chemistry"
|
||||
, x_lab = "Position"
|
||||
, y_lab = "SAV Count"
|
||||
, x_ats = 6 # text size
|
||||
, x_tangle = 90 # text angle
|
||||
, y_ats = 10
|
||||
, y_tangle = 0
|
||||
, x_tts = 10 # title size
|
||||
, y_tts = 10
|
||||
, leg_pos = "none" # can be top, left, right and bottom or c(0.8, 0.9)
|
||||
, leg_dir = "horizontal" #can be vertical or horizontal
|
||||
, leg_ts = 10 # leg text size
|
||||
, leg_tts = 8 # leg title size
|
||||
, tpos0 = 0 # 0 is a magic number that does my sensible default
|
||||
, tW0 = 1
|
||||
, tH0 = 0.2
|
||||
, debug=FALSE,
|
||||
...
|
||||
|
||||
)
|
||||
|
||||
{
|
||||
mutable_df=cbind(plot_df)
|
||||
# handle funky omit_snp_count. DOES NOT WORK YET
|
||||
if (class(omit_snp_count) != "numeric"){
|
||||
omit_snp_count <- as.numeric(unlist(str_extract_all(omit_snp_count, regex("[0-9]+"))))
|
||||
}
|
||||
############################################
|
||||
# Data processing for logo plot for SAVS
|
||||
############################################
|
||||
|
||||
# Generate "ligand distance" colour map
|
||||
# mutable_df = generate_distance_colour_map(mutable_df, debug=TRUE)
|
||||
# unique_colour_map = unique(mutable_df[,c("position","ligD_colours")])
|
||||
# unique_colour_map = unique_colour_map[order(unique_colour_map$position), ]
|
||||
# rownames(unique_colour_map) = unique_colour_map$position
|
||||
# unique_colour_map2 = unique_colour_map
|
||||
# unique_colour_map2$position=as.factor(unique_colour_map2$position)
|
||||
# unique_colour_map2$ligD_colours = as.factor(unique_colour_map2$ligD_colours)
|
||||
#
|
||||
|
||||
setDT(mutable_df)[, mut_pos_occurrence := .N, by = .(eval(parse(text=x_axis_colname)))]
|
||||
if (debug) {
|
||||
table(mutable_df[[x_axis_colname]])
|
||||
table(mutable_df$mut_pos_occurrence)
|
||||
}
|
||||
max_mut = max(table(mutable_df[[x_axis_colname]]))
|
||||
|
||||
# Subset Data as specified by user
|
||||
cat("\nDisplaying SAV position frequency:\n")
|
||||
print(table(mutable_df$mut_pos_occurrence))
|
||||
|
||||
if ( (length(omit_snp_count) ==1) && (omit_snp_count == 0) ){
|
||||
my_data_snp = mutable_df
|
||||
u = unique(my_data_snp[[x_axis_colname]])
|
||||
max_mult_mut = max(table(my_data_snp[[x_axis_colname]]))
|
||||
if (debug) {
|
||||
cat("\nNo filtering requested:"
|
||||
, "\nTotal no. of SAVs:", sum(table(mutable_df$mut_pos_occurrence))
|
||||
, "\nTotal no. of SAVs omitted:", sum(table(mutable_df$mut_pos_occurrence)[omit_snp_count])
|
||||
, "\nDim of data:", dim(my_data_snp)
|
||||
, "\nNo. of positions:", length(u)
|
||||
, "\nMax no. of muts at any position:", max_mult_mut)
|
||||
}
|
||||
} else {
|
||||
|
||||
my_data_snp = subset(mutable_df, !(mut_pos_occurrence%in%omit_snp_count) )
|
||||
|
||||
exp_nrows = sum(table(mutable_df$mut_pos_occurrence)) - sum(table(mutable_df$mut_pos_occurrence)[omit_snp_count])
|
||||
got_rows = sum(table(my_data_snp$mut_pos_occurrence))
|
||||
u = unique(my_data_snp[[x_axis_colname]])
|
||||
max_mult_mut = max(table(my_data_snp[[x_axis_colname]]))
|
||||
if (debug) {
|
||||
if (got_rows == exp_nrows) {
|
||||
cat("\nPass: Position with the stated SAV frequency filtered:", omit_snp_count
|
||||
, "\nTotal no. of SAVs:", sum(table(mutable_df$mut_pos_occurrence))
|
||||
, "\nTotal no. of SAVs omitted:", sum(table(mutable_df$mut_pos_occurrence)[omit_snp_count])
|
||||
, "\nDim of subsetted data:", dim(my_data_snp)
|
||||
, "\nNo. of positions:", length(u)
|
||||
, "\nMax no. of muts at any position:", max_mult_mut)
|
||||
} else {
|
||||
|
||||
cat("\nFAIL:Position with the stated SAV frequency COULD NOT be filtered..."
|
||||
, "\nExpected:",exp_nrows
|
||||
, "\nGot:", got_rows )
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#--------------------------------------
|
||||
# matrix for mutant type
|
||||
# frequency of mutant type by position
|
||||
#---------------------------------------
|
||||
table(my_data_snp[[symbol_mut_colname]], my_data_snp[[x_axis_colname]])
|
||||
tab_mt = table(my_data_snp[[symbol_mut_colname]], my_data_snp[[x_axis_colname]])
|
||||
class(tab_mt)
|
||||
|
||||
# unclass to convert to matrix
|
||||
tab_mt = unclass(tab_mt)
|
||||
|
||||
if (is.matrix(tab_mt)){
|
||||
if (debug) {
|
||||
cat("\nPASS: Mutant matrix successfully created..."
|
||||
#, "\nRownames of mutant matrix:", rownames(tab_mt)
|
||||
#, "\nColnames of mutant matrix:", colnames(tab_mt)
|
||||
)
|
||||
}
|
||||
} else{
|
||||
tab_mt = as.matrix(tab_mt, rownames = T)
|
||||
if (is.matrix(tab_mt)){
|
||||
if (debug) {
|
||||
cat("\nCreating mutant matrix..."
|
||||
#, "\nRowna mes of mutant matrix:", rownames(tab_mt)
|
||||
#, "\nColnames of mutant matrix:", colnames(tab_mt)
|
||||
)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#-------------------------------------
|
||||
# matrix for wild type
|
||||
# frequency of wild type by position
|
||||
#-------------------------------------
|
||||
tab_wt = table(my_data_snp[[symbol_wt_colname]], my_data_snp[[x_axis_colname]]); tab_wt
|
||||
tab_wt = unclass(tab_wt)
|
||||
|
||||
# Important: remove wt duplicates
|
||||
#wt = my_data_snp[, c("position", "wild_type")]
|
||||
wt = my_data_snp %>%
|
||||
select(x_axis_colname, symbol_wt_colname)
|
||||
|
||||
wt = wt[!duplicated(wt),]
|
||||
wt
|
||||
|
||||
tab_wt = table(wt[[symbol_wt_colname]], wt[[x_axis_colname]]); tab_wt # should all be 1
|
||||
if (debug) {
|
||||
if ( identical(colnames(tab_mt), colnames(tab_wt) ) && identical(ncol(tab_mt), ncol(tab_wt)) ){
|
||||
|
||||
cat("\nPASS: Wild type matrix successfully created"
|
||||
, "\nDim of wt matrix:", dim(tab_wt)
|
||||
, "\nDim of mutant matrix:", dim(tab_mt)
|
||||
, "\n"
|
||||
#, "\nRownames of mutant matrix:", rownames(tab_wt)
|
||||
#, "\nColnames of mutant matrix:", colnames(tab_wt)
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
######################################
|
||||
# Generating plots for muts and wt
|
||||
#####################################
|
||||
LogoPlotL <- list()
|
||||
|
||||
if (my_logo_col %in% c('clustalx','taylor')) {
|
||||
cat("\nSelected colour scheme:", my_logo_col
|
||||
, "\nUsing black theme\n")
|
||||
|
||||
theme_bgc = "black"
|
||||
xfont_bgc = "white"
|
||||
yfont_bgc = "white"
|
||||
xtt_col = "white"
|
||||
ytt_col = "white"
|
||||
}
|
||||
|
||||
if (my_logo_col %in% c('chemistry', 'hydrophobicity')) {
|
||||
cat('\nSelected colour scheme:', my_logo_col
|
||||
, "\nUsing grey theme")
|
||||
|
||||
theme_bgc = "white"
|
||||
xfont_bgc = "black"
|
||||
yfont_bgc = "black"
|
||||
xtt_col = "black"
|
||||
ytt_col = "black"
|
||||
}
|
||||
position_mt = as.numeric(colnames(tab_mt))
|
||||
position_wt = as.numeric(colnames(tab_wt))
|
||||
#####################################
|
||||
# Generating logo plots for SAVs
|
||||
#####################################
|
||||
#-------------------
|
||||
# Mutant logo plot
|
||||
#-------------------
|
||||
logo_top = ggplot() +
|
||||
geom_logo(tab_mt
|
||||
, method = 'custom'
|
||||
, col_scheme = my_logo_col
|
||||
, seq_type = 'aa') +
|
||||
theme_nothing() +
|
||||
ylab(y_lab) +
|
||||
theme(text=element_text(family="FreeSans")
|
||||
, legend.position = leg_pos
|
||||
, legend.direction = leg_dir
|
||||
, legend.title = element_text(size = leg_tts
|
||||
, colour = ytt_col)
|
||||
, legend.text = element_text(size = leg_ts)
|
||||
|
||||
, axis.text.x = element_text(size = x_ats
|
||||
, angle = x_tangle
|
||||
#, hjust = 1
|
||||
#, vjust = 0.4
|
||||
, colour = xfont_bgc
|
||||
#, margin = margin(t = 0.1)
|
||||
)
|
||||
, axis.text.y = element_blank()
|
||||
# , axis.text.y = element_text(size = y_ats
|
||||
# , angle = y_tangle
|
||||
# , hjust = 1
|
||||
# , vjust = -1.0
|
||||
# , colour = yfont_bgc)
|
||||
# , axis.title.x = element_text(size = x_tts
|
||||
# , colour = xtt_col)
|
||||
, axis.title.x = element_blank()
|
||||
, axis.title.y = element_text(size = y_tts
|
||||
, angle = 90
|
||||
, colour = ytt_col
|
||||
, margin = margin(t = 0, r = 0, b = 20, l = 0)
|
||||
#, hjust = -2
|
||||
)
|
||||
|
||||
, plot.background = element_rect(fill = theme_bgc, colour=NA)
|
||||
) +
|
||||
scale_x_discrete("Position"
|
||||
, labels = position_mt
|
||||
, limits = factor(1:length(position_mt))
|
||||
)
|
||||
|
||||
logo_bottom = ggplot() +
|
||||
geom_logo(tab_wt
|
||||
, method = 'custom'
|
||||
, col_scheme = my_logo_col
|
||||
, seq_type = 'aa') +
|
||||
theme_nothing() +
|
||||
scale_x_discrete("Position"
|
||||
, labels = x_axis_colname
|
||||
, limits = factor(1:length(x_axis_colname))) +
|
||||
theme(text = element_text(family="FreeSans")
|
||||
, legend.position = "none"
|
||||
#, axis.text.x = element_blank()
|
||||
#, axis.text.y = element_blank()
|
||||
#, axis.text.y = element_text()
|
||||
, axis.title.x = element_blank()
|
||||
#, axis.title.y = element_blank()
|
||||
, axis.title.y = element_text(size = y_tts
|
||||
, angle = 90
|
||||
, colour = ytt_col
|
||||
, margin = margin(t = 0, r = 0, b = 20, l = 0))
|
||||
, plot.background = element_rect(fill = theme_bgc, colour=NA)
|
||||
) +
|
||||
labs(x=NULL, y="WT")
|
||||
|
||||
anno_bar = position_annotation(plot_df,
|
||||
bg = theme_bgc,
|
||||
# active_aa_pos = active_aa_pos,
|
||||
# aa_pos_drug = aa_pos_drug,
|
||||
# aa_pos_lig1 = aa_pos_lig1,
|
||||
# aa_pos_lig2 = aa_pos_lig2,
|
||||
# aa_pos_lig3 = aa_pos_lig3,
|
||||
...
|
||||
)
|
||||
|
||||
#aligned=align_plots(logo_top, logo_bottom, anno_bar, align='vh', axis='lr')
|
||||
cowplot::plot_grid(
|
||||
logo_top, logo_bottom, anno_bar,
|
||||
#aligned[[1]], aligned[[2]], aligned[[3]],
|
||||
ncol=1,
|
||||
align = "v",
|
||||
rel_heights = c(7, 1,1)
|
||||
)
|
||||
|
||||
# cowplot::plot_grid(
|
||||
# logo_top,
|
||||
# #NULL,
|
||||
# logo_bottom,
|
||||
# #NULL,
|
||||
# anno_bar,
|
||||
# ncol=1,
|
||||
# align = "v",
|
||||
# rel_heights = c(7, 1,1)
|
||||
# )
|
||||
# top logo, bottom logo, heat bar, NULL, position annotation
|
||||
#------------------
|
||||
# Wild logo plot
|
||||
#------------------
|
||||
}
|
||||
|
||||
#LogoPlotSnps(small_df3)
|
File diff suppressed because it is too large
Load diff
|
@ -1,46 +1,30 @@
|
|||
my_corr_pairs <- function (corr_data_all
|
||||
, corr_cols = colnames(corr_data_all)
|
||||
, corr_method = "spearman" # other options: "pearson" or "kendall"
|
||||
, colour_categ_col = "mutation_info_labels"
|
||||
, categ_colour = c("#E69F00", "#999999")
|
||||
, density_show = F
|
||||
, hist_col = "coral4"
|
||||
, dot_size = 1.6
|
||||
, ats = 1.5
|
||||
, corr_lab_size = 3
|
||||
, corr_value_size = 1)
|
||||
{
|
||||
my_corr_pairs <- function (corr_data){
|
||||
|
||||
corr_data_df = corr_data_all[corr_cols]
|
||||
my_bg = categ_colour[as.factor(corr_data_all[[colour_categ_col]])] # converted to factor
|
||||
|
||||
OutPlot_corr = pairs.panels(corr_data_df
|
||||
, method = corr_method
|
||||
, hist.col = hist_col
|
||||
, density = density_show
|
||||
, ellipses = F
|
||||
, smooth = F
|
||||
OutPlot_corr = pairs.panels(corr_data
|
||||
, method = "spearman" # correlation method
|
||||
, hist.col = "grey" ##00AFBB
|
||||
, density = TRUE # show density plots
|
||||
, ellipses = F # show correlation ellipses
|
||||
, stars = T
|
||||
, rug = F
|
||||
, breaks = "Sturges"
|
||||
, show.points = T
|
||||
#, bg = c("#f8766d", "#00bfc4")[unclass(factor(corr_data$duet_outcome))] # foldx colours are reveresed
|
||||
, bg = my_bg
|
||||
, pch = 21
|
||||
#, bg = c("#f8766d", "#00bfc4")[unclass(factor(corr_ps$duet_outcome))] # foldx colours are reveresed
|
||||
#, pch = 21 # for bg
|
||||
, jitter = T
|
||||
, alpha = 1
|
||||
, cex = dot_size
|
||||
, cex.axis = ats
|
||||
, cex.labels = corr_lab_size
|
||||
, cex.cor = corr_value_size
|
||||
)
|
||||
, cex = 1.8
|
||||
, cex.axis = 2
|
||||
, cex.labels = 3.5
|
||||
, cex.cor = 1
|
||||
, smooth = F)
|
||||
return(OutPlot_corr)
|
||||
#return (my_bg)
|
||||
|
||||
}
|
||||
|
||||
######################################################################
|
||||
my_pp = function (x, smooth = TRUE, scale = FALSE, density = TRUE, ellipses = TRUE,
|
||||
digits = 2, method = "spearman", pch = 20, lm = FALSE, cor = TRUE,
|
||||
digits = 2, method = "pearson", pch = 20, lm = FALSE, cor = TRUE,
|
||||
jiggle = FALSE, factor = 2, hist.col = "cyan", show.points = TRUE,
|
||||
rug = TRUE, breaks = "Sturges", cex.cor = 1, wt = NULL, smoother = FALSE,
|
||||
stars = FALSE, ci = FALSE, alpha = 0.05, ...)
|
||||
|
|
|
@ -5,49 +5,91 @@
|
|||
# load libraries and functions
|
||||
library(data.table)
|
||||
library(dplyr)
|
||||
|
||||
# ADDED: New
|
||||
# geneL_normal = c("pnca")
|
||||
# geneL_na = c("gid", "rpob")
|
||||
# geneL_ppi2 = c("alr", "embb", "katg", "rpob")
|
||||
|
||||
#========================================================
|
||||
# plotting_data(): formatting data for plots
|
||||
# input args:
|
||||
## input csv file
|
||||
## lig cut off dist, default = 10 Ang
|
||||
## input csv file
|
||||
## lig cut off dist, default = 10 Ang
|
||||
# output: list of 4 dfs, that need to be decompressed
|
||||
## my_df
|
||||
## my_df_u
|
||||
## my_df_u_lig
|
||||
## dup_muts
|
||||
## my_df
|
||||
## my_df_u
|
||||
## my_df_u_lig
|
||||
## dup_muts
|
||||
#========================================================
|
||||
#lig_dist_colname = 'ligand_distance' or global var LigDist_colname
|
||||
#lig_dist_cutoff = 10 or global var LigDist_cutoff
|
||||
plotting_data <- function(df, lig_dist_colname = 'ligand_distance', lig_dist_cutoff = 10) {
|
||||
my_df = data.frame()
|
||||
my_df_u = data.frame()
|
||||
my_df_u_lig = data.frame()
|
||||
dup_muts = data.frame()
|
||||
|
||||
plotting_data <- function(df
|
||||
, gene # ADDED
|
||||
, lig_dist_colname = 'ligand_distance'
|
||||
, lig_dist_cutoff = 10
|
||||
) {
|
||||
my_df = data.frame()
|
||||
my_df_u = data.frame()
|
||||
my_df_u_lig = data.frame()
|
||||
dup_muts = data.frame()
|
||||
#===========================
|
||||
# Read file: struct params
|
||||
#===========================
|
||||
#df = read.csv(infile_params, header = T)
|
||||
|
||||
#===========================
|
||||
# Read file: struct params
|
||||
#===========================
|
||||
#df = read.csv(infile_params, header = T)
|
||||
cat("\nInput dimensions:", dim(df))
|
||||
|
||||
cat("\nInput dimensions:", dim(df))
|
||||
#==================================
|
||||
# add foldx outcome category
|
||||
# and foldx scaled values
|
||||
|
||||
#==================================
|
||||
# extract unique mutation entries
|
||||
#==================================
|
||||
# This will enable to always have these variables available
|
||||
# when calling for plots
|
||||
#==================================
|
||||
|
||||
# check for duplicate mutations
|
||||
if ( length(unique(df$mutationinformation)) != length(df$mutationinformation)){
|
||||
#------------------------------
|
||||
# adding foldx scaled values
|
||||
# scale data b/w -1 and 1
|
||||
#------------------------------
|
||||
n = which(colnames(df) == "ddg"); n
|
||||
|
||||
my_min = min(df[,n]); my_min
|
||||
my_max = max(df[,n]); my_max
|
||||
|
||||
df$foldx_scaled = ifelse(df[,n] < 0
|
||||
, df[,n]/abs(my_min)
|
||||
, df[,n]/my_max)
|
||||
# sanity check
|
||||
my_min = min(df$foldx_scaled); my_min
|
||||
my_max = max(df$foldx_scaled); my_max
|
||||
|
||||
if (my_min == -1 && my_max == 1){
|
||||
cat("\nPASS: foldx ddg successfully scaled b/w -1 and 1"
|
||||
, "\nProceeding with assigning foldx outcome category")
|
||||
}else{
|
||||
cat("\nFAIL: could not scale foldx ddg values"
|
||||
, "Aborting!\n")
|
||||
}
|
||||
|
||||
#------------------------------
|
||||
# adding foldx outcome category
|
||||
# ddg<0 = "Stabilising" (-ve)
|
||||
#------------------------------
|
||||
c1 = table(df$ddg < 0)
|
||||
df$foldx_outcome = ifelse(df$ddg < 0, "Stabilising", "Destabilising")
|
||||
c2 = table(df$ddg < 0)
|
||||
|
||||
if ( all(c1 == c2) ){
|
||||
cat("\nPASS: foldx outcome successfully created")
|
||||
}else{
|
||||
cat("\nFAIL: foldx outcome could not be created. Aborting!\n")
|
||||
exit()
|
||||
}
|
||||
|
||||
#------------------------------
|
||||
# renaming foldx column from
|
||||
# "ddg" --> "ddg_foldx"
|
||||
#------------------------------
|
||||
|
||||
# change name to foldx
|
||||
colnames(df)[n] <- "ddg_foldx"
|
||||
|
||||
#==================================
|
||||
# extract unique mutation entries
|
||||
#==================================
|
||||
|
||||
# check for duplicate mutations
|
||||
if ( length(unique(df$mutationinformation)) != length(df$mutationinformation)){
|
||||
cat(paste0("\nCAUTION:", " Duplicate mutations identified"
|
||||
, "\nExtracting these...\n"))
|
||||
#cat(my_df[duplicated(my_df$mutationinformation),])
|
||||
|
@ -57,98 +99,32 @@ plotting_data <- function(df
|
|||
, "\nNo. of unique duplicate mutations:", dup_muts_nu
|
||||
, "\n\nExtracting df with unique mutations only\n"))
|
||||
my_df_u = df[!duplicated(df$mutationinformation),]
|
||||
} else {
|
||||
}else{
|
||||
cat(paste0("\nNo duplicate mutations detected\n"))
|
||||
my_df_u = df
|
||||
}
|
||||
}
|
||||
|
||||
upos = unique(my_df_u$position)
|
||||
cat("\nDim of clean df:"); cat(dim(my_df_u), "\n")
|
||||
cat("\nNo. of unique mutational positions:"); cat(length(upos), "\n")
|
||||
#===============================================
|
||||
# ADD : na distance column for genes with nucleic acid affinity
|
||||
#===============================================
|
||||
# if (tolower(gene)%in%geneL_na){
|
||||
#
|
||||
# distcol_nca_name = read.csv(infilename_nca, header = F)
|
||||
# head(distcol_nca_name)
|
||||
# colnames(distcol_nca_name) <- c("mutationinformation", "nca_distance")
|
||||
# head(distcol_nca_name)
|
||||
# class(distcol_nca_name)
|
||||
#
|
||||
# mcol = colnames(distcol_nca_name)[colnames(distcol_nca_name)%in%colnames(my_df_u)]
|
||||
# mcol
|
||||
# head(my_df_u$mutationinformation)
|
||||
# head(distcol_nca_name$mutationinformation)
|
||||
#
|
||||
# my_df_u = merge(my_df_u, distcol_nca_name,
|
||||
# by = "mutationinformation",
|
||||
# all = T)
|
||||
#
|
||||
# }
|
||||
geneL_na=c("gid","rpob")
|
||||
upos = unique(my_df_u$position)
|
||||
cat("\nDim of clean df:"); cat(dim(my_df_u), "\n")
|
||||
cat("\nNo. of unique mutational positions:"); cat(length(upos), "\n")
|
||||
|
||||
if (tolower(gene)%in%geneL_na){
|
||||
infilename_nca = paste0("~/git/Misc/mcsm_na_dist/"
|
||||
, tolower(gene), "_nca_distances.csv")
|
||||
distcol_nca_name = read.csv(infilename_nca, header = F)
|
||||
#===============================================
|
||||
# extract mutations <10 Angstroms and symbol
|
||||
#===============================================
|
||||
table(my_df_u[[lig_dist_colname]] < lig_dist_cutoff)
|
||||
|
||||
if (tolower(gene)=='rpob'){
|
||||
my_df_u_lig = my_df_u[my_df_u[[lig_dist_colname]] < lig_dist_cutoff,]
|
||||
|
||||
print('WARNING: running special-case handler for rpoB')
|
||||
cat(paste0("There are ", nrow(my_df_u_lig), " sites lying within 10\u212b of the ligand\n"))
|
||||
|
||||
# create 5uhc equivalent column for mutationinformation
|
||||
my_df_u$X5uhc_mutationinformation = paste0(my_df_u$wild_type,
|
||||
my_df_u$X5uhc_position,
|
||||
my_df_u$mutant_type)
|
||||
# return list of DFs
|
||||
my_df = df
|
||||
#df_names = c("my_df", "my_df_u", "my_df_u_lig", "dup_muts")
|
||||
all_df = list(my_df, my_df_u, my_df_u_lig, dup_muts)
|
||||
#all_df = Map(setNames, all_df, df_names)
|
||||
|
||||
colnames(distcol_nca_name) <- c("X5uhc_mutationinformation", "nca_distance")
|
||||
|
||||
# do stuff here
|
||||
mcol = colnames(distcol_nca_name)[colnames(distcol_nca_name)%in%colnames(my_df_u)]
|
||||
cat(paste0("\nMerging for gene: ", tolower(gene), "\non column: ", mcol))
|
||||
|
||||
head(my_df_u$mutationinformation)
|
||||
head(distcol_nca_name$X5uhc_mutationinformation)
|
||||
|
||||
my_df_u = merge(my_df_u, distcol_nca_name,
|
||||
by = "X5uhc_mutationinformation",
|
||||
all = T)
|
||||
|
||||
} else {
|
||||
head(distcol_nca_name)
|
||||
colnames(distcol_nca_name) <- c("mutationinformation", "nca_distance")
|
||||
head(distcol_nca_name)
|
||||
class(distcol_nca_name)
|
||||
mcol = colnames(distcol_nca_name)[colnames(distcol_nca_name)%in%colnames(my_df_u)]
|
||||
cat(paste0("\nMerging for gene: ", tolower(gene), "\non column: ", mcol))
|
||||
head(my_df_u$mutationinformation)
|
||||
head(distcol_nca_name$mutationinformation)
|
||||
|
||||
my_df_u = merge(my_df_u, distcol_nca_name,
|
||||
by = "mutationinformation",
|
||||
all = T)
|
||||
}
|
||||
}
|
||||
|
||||
#===============================================
|
||||
# extract mutations <10 Angstroms and symbol
|
||||
#===============================================
|
||||
table(my_df_u[[lig_dist_colname]] < lig_dist_cutoff)
|
||||
|
||||
my_df_u_lig = my_df_u[my_df_u[[lig_dist_colname]] < lig_dist_cutoff,]
|
||||
|
||||
cat(paste0("There are ", nrow(my_df_u_lig), " sites lying within 10\u212b of the ligand\n"))
|
||||
|
||||
# return list of DFs
|
||||
my_df = df
|
||||
#df_names = c("my_df", "my_df_u", "my_df_u_lig", "dup_muts")
|
||||
all_df = list(my_df, my_df_u, my_df_u_lig, dup_muts)
|
||||
#all_df = Map(setNames, all_df, df_names)
|
||||
|
||||
return(all_df)
|
||||
return(all_df)
|
||||
}
|
||||
########################################################################
|
||||
# end of data extraction and cleaning for plots #
|
||||
########################################################################
|
||||
|
||||
|
|
|
@ -23,36 +23,27 @@ import_dirs <- function(drug_name, gene_name) {
|
|||
|
||||
dr_muts_col <<- paste0('dr_mutations_', drug_name)
|
||||
other_muts_col <<- paste0('other_mutations_', drug_name)
|
||||
resistance_col <<- "drtype"
|
||||
gene_match <<- paste0(gene_name,"_p.")
|
||||
|
||||
}
|
||||
|
||||
# Other globals
|
||||
#=====================
|
||||
# Resistance colname
|
||||
#=====================
|
||||
resistance_col <<- "drtype"
|
||||
|
||||
# other globals
|
||||
#===============================
|
||||
# mcsm ligand distance cut off
|
||||
#===============================
|
||||
LigDist_colname <<- "ligand_distance"
|
||||
LigDist_cutoff <<- 10
|
||||
|
||||
DistCutOff <<- 10
|
||||
ppi2Dist_colname <<- "interface_dist"
|
||||
naDist_colname <<- "nca_distance" # added it
|
||||
#mcsm_lig_cutoff <<- 10
|
||||
|
||||
#==================
|
||||
# Angstroms symbol
|
||||
#==================
|
||||
angstroms_symbol <<- "\u212b"
|
||||
#cat(paste0("There are ", nrow(my_df_u_lig), " sites lying within 10", angstroms_symbol, " of the ligand\n"))
|
||||
|
||||
#===============
|
||||
# Delta symbol
|
||||
#===============
|
||||
delta_symbol <<- "\u0394"; delta_symbol
|
||||
stability_suffix <- paste0(delta_symbol, delta_symbol, "G Kcal/mol")
|
||||
|
||||
#==========
|
||||
# Colours
|
||||
|
|
|
@ -1,198 +0,0 @@
|
|||
# position_annotation takes a Data Frame (df) and returns a ggplot object.
|
||||
#
|
||||
# This plots position tiles for the (up to) three ligands as well as drug
|
||||
position_annotation=function(plot_df,
|
||||
bg="transparent",
|
||||
reorder_position = FALSE, # enable to reorder according to plot_df$pos_count
|
||||
generate_colours = TRUE, #set FALSE if you want to generate all the colour columns elsewhere
|
||||
aa_pos_drug=1:100,
|
||||
active_aa_pos=1:100,
|
||||
aa_pos_lig1=1:100,
|
||||
aa_pos_lig2=1:100,
|
||||
aa_pos_lig3=1:100,
|
||||
drug_colour='green',
|
||||
lig1_colour='slategrey',
|
||||
lig2_colour='navyblue',
|
||||
lig3_colour='purple',
|
||||
x_label=NULL
|
||||
)
|
||||
{
|
||||
x_ats = 12
|
||||
x_tangle = 90
|
||||
x_tts = 20
|
||||
y_tts = 23
|
||||
xtt_col = "black"
|
||||
ytt_col = "black"
|
||||
leg_dir = "horizontal"
|
||||
leg_ts = 15
|
||||
leg_tts = 16
|
||||
leg_pos = "none"
|
||||
|
||||
# plot_df=plot_df[order(plot_df$ligand_distance),]
|
||||
#
|
||||
# plot_df$position = factor(plot_df$position)
|
||||
#plot_df = generate_distance_colour_map(plot_df, debug=TRUE)
|
||||
# plot_df$col_aa = ifelse(plot_df[["position"]]%in%active_aa_pos,
|
||||
# "brown", "transparent")
|
||||
|
||||
if (generate_colours){
|
||||
plot_df$col_aa = ifelse(plot_df[["position"]]%in%active_aa_pos,
|
||||
"transparent", "transparent")
|
||||
|
||||
plot_df$bg_all = plot_df$col_aa
|
||||
plot_df$bg_all = ifelse(plot_df[["position"]]%in%aa_pos_drug,
|
||||
"drug", plot_df$bg_all)
|
||||
|
||||
plot_df$col_bg1 = plot_df$bg_all
|
||||
plot_df$col_bg1 = ifelse(plot_df[["position"]]%in%aa_pos_lig1,
|
||||
"lig1", plot_df$col_bg1)
|
||||
|
||||
plot_df$col_bg2 = plot_df$col_bg1
|
||||
plot_df$col_bg2 = ifelse(plot_df[["position"]]%in%aa_pos_lig2,
|
||||
"lig2", plot_df$col_bg2)
|
||||
|
||||
|
||||
plot_df$col_bg3 = plot_df$col_bg2
|
||||
plot_df$col_bg3 = ifelse(plot_df[["position"]]%in%aa_pos_lig3
|
||||
, "lig3", plot_df$col_bg3)
|
||||
|
||||
# the call to generate_distance_colour_map should probably be
|
||||
# wherever the outer DF is built, and not here.
|
||||
plot_df = generate_distance_colour_map(plot_df, debug=TRUE)
|
||||
}
|
||||
heat_bar = ggplot(plot_df) + # THIS STUPID FUCKING FACTOR THING
|
||||
|
||||
# scale_x_discrete("Position", labels=factor(plot_df$position)) +
|
||||
theme_nothing() +
|
||||
theme(#axis.text.x = element_text(angle = 90, size = 6),
|
||||
title = element_blank()
|
||||
) + # enable for alignment debug
|
||||
labs(x = NULL, y = NULL) +
|
||||
|
||||
# if reorder_position is turned on then we need to reorder 'x'
|
||||
# according to the pos_count column (creating this column is
|
||||
# left as a fun exercise to whoever reads this next)
|
||||
if(reorder_position) {
|
||||
geom_tile(aes(y=0, x=reorder(position,-pos_count)),
|
||||
fill=plot_df$ligD_colours)
|
||||
} else {
|
||||
geom_tile(aes(y=0, x=factor(position)),
|
||||
fill=plot_df$ligD_colours)
|
||||
}
|
||||
#end of distance-heat-bar
|
||||
#NULL,
|
||||
if(reorder_position) {
|
||||
pos_tiles = ggplot(plot_df) +
|
||||
#scale_x_discrete("Position", labels=factor(plot_df$position)) +
|
||||
scale_color_manual(values = c(
|
||||
"brown"="brown",
|
||||
"drug"=drug_colour,
|
||||
"transparent"="transparent",
|
||||
"lig1"=lig1_colour,
|
||||
"lig2"=lig2_colour,
|
||||
"lig3"=lig3_colour
|
||||
),
|
||||
#expand=c(0,0)
|
||||
) +
|
||||
scale_fill_manual(values = c(
|
||||
"brown"="brown",
|
||||
"drug"=drug_colour,
|
||||
"transparent"="transparent",
|
||||
"lig1"=lig1_colour,
|
||||
"lig2"=lig2_colour,
|
||||
"lig3"=lig3_colour
|
||||
),
|
||||
#expand=c(0,0)
|
||||
) +
|
||||
theme_nothing() +
|
||||
theme(plot.background = element_rect(fill = bg, colour=NA),
|
||||
#plot.margin = margin(t=0,b=0),
|
||||
panel.background = element_rect(fill = bg, colour=NA),
|
||||
legend.position = "none", axis.title.x = element_text(size = 8)
|
||||
) +
|
||||
labs(x = x_label, y= NULL) +
|
||||
geom_tile(aes(y = 1,x=reorder(position,-pos_count), fill = bg_all, colour = bg_all)
|
||||
) +
|
||||
geom_tile(aes(y = 2, x=reorder(position,-pos_count), fill = col_bg1, colour = col_bg1)
|
||||
) +
|
||||
geom_tile(aes(y = 3, x=reorder(position,-pos_count), fill = col_bg2, colour = col_bg2)
|
||||
) +
|
||||
geom_tile(aes(y = 4, x=reorder(position,-pos_count), fill = col_bg3, colour = col_bg3)
|
||||
)
|
||||
|
||||
} else {
|
||||
pos_tiles = ggplot(plot_df) +
|
||||
#scale_x_discrete("Position", labels=factor(plot_df$position)) +
|
||||
scale_color_manual(values = c(
|
||||
"brown"="brown",
|
||||
"drug"=drug_colour,
|
||||
"transparent"="transparent",
|
||||
"lig1"=lig1_colour,
|
||||
"lig2"=lig2_colour,
|
||||
"lig3"=lig3_colour
|
||||
),
|
||||
#expand=c(0,0)
|
||||
) +
|
||||
scale_fill_manual(values = c(
|
||||
"brown"="brown",
|
||||
"drug"=drug_colour,
|
||||
"transparent"="transparent",
|
||||
"lig1"=lig1_colour,
|
||||
"lig2"=lig2_colour,
|
||||
"lig3"=lig3_colour
|
||||
),
|
||||
#expand=c(0,0)
|
||||
) +
|
||||
theme_nothing() +
|
||||
theme(plot.background = element_rect(fill = bg, colour=NA),
|
||||
#plot.margin = margin(t=0,b=0),
|
||||
panel.background = element_rect(fill = bg, colour=NA),
|
||||
legend.position = "none", axis.title.x = element_text(size = 8)
|
||||
) +
|
||||
labs(x = x_label, y= NULL) +
|
||||
geom_tile(aes(y = 1, x=factor(position), fill = bg_all, colour = bg_all)
|
||||
) +
|
||||
geom_tile(aes(y = 2, x=factor(position), fill = col_bg1, colour = col_bg1)
|
||||
) +
|
||||
geom_tile(aes(y = 3, x=factor(position), fill = col_bg2, colour = col_bg2)
|
||||
) +
|
||||
geom_tile(aes(y = 4, x=factor(position), fill = col_bg3, colour = col_bg3)
|
||||
)
|
||||
}
|
||||
# tile thingies end
|
||||
|
||||
heat_legend=get_legend(heat_bar)
|
||||
out_plot=cowplot::plot_grid(
|
||||
heat_bar,
|
||||
NULL,
|
||||
pos_tiles,
|
||||
ncol=1,
|
||||
align='v',
|
||||
rel_heights = c(1,
|
||||
-0.1,
|
||||
2)
|
||||
)
|
||||
|
||||
return(out_plot)
|
||||
}
|
||||
|
||||
# position_annotation(small_df3,
|
||||
# aa_pos_drug=aa_pos_drug,
|
||||
# active_aa_pos=active_aa_pos,
|
||||
# aa_pos_lig1=aa_pos_lig1,
|
||||
# aa_pos_lig2=aa_pos_lig2,
|
||||
# aa_pos_lig3=aa_pos_lig3
|
||||
# )
|
||||
#
|
||||
# # proof that you can use this function to pass arbitrary lists of numbers :-)
|
||||
# position_annotation(merged_df3,
|
||||
# aa_pos_drug=1:1000,
|
||||
# active_aa_pos=1:1000,
|
||||
# aa_pos_lig1=1:1000,
|
||||
# aa_pos_lig2=1:1000,
|
||||
# aa_pos_lig3=1:1000,
|
||||
# drug_colour = "red",
|
||||
# lig1_colour = "green",
|
||||
# lig2_colour = "blue",
|
||||
# lig3_colour = "skyblue"
|
||||
# )
|
|
@ -1,7 +1,7 @@
|
|||
#!/usr/bin/env Rscript
|
||||
|
||||
#########################################################
|
||||
# TASK: function for barplot showing no. of sites with SAV
|
||||
# TASK: function for barplot showing no. of sites with nsSNP
|
||||
# count
|
||||
#########################################################
|
||||
# load libraries and functions
|
||||
|
@ -11,7 +11,7 @@ library(dplyr)
|
|||
|
||||
theme_set(theme_grey())
|
||||
#=================================================================
|
||||
# site_snp_count_bp(): barplots for no. of sites and SAV count
|
||||
# site_snp_count_bp(): barplots for no. of sites and nsSNP count
|
||||
# input args
|
||||
## df containing data to plot
|
||||
## df column name containing site/position numbers
|
||||
|
@ -22,67 +22,39 @@ theme_set(theme_grey())
|
|||
# visually might be nicer for it to be inside the plot
|
||||
#=================================================================
|
||||
|
||||
site_snp_count_bp <- function (plotdf,
|
||||
df_colname = "position",
|
||||
site_snp_count_bp <- function (plotdf
|
||||
, df_colname = "position"
|
||||
#, bp_plot_title = ""
|
||||
#, leg_title = "Legend title"
|
||||
leg_text_size = 10,#20
|
||||
axis_text_size = 10,#25
|
||||
axis_label_size = 10,#22
|
||||
subtitle_size = 10,#20
|
||||
geom_ls = 10,
|
||||
xaxis_title = "Number of SAVs",
|
||||
yaxis_title = "Number of Sites",
|
||||
title_colour = "chocolate4",
|
||||
subtitle_text = NULL,
|
||||
subtitle_colour = "pink",
|
||||
...
|
||||
)
|
||||
, leg_text_size = 20
|
||||
, axis_text_size = 25
|
||||
, axis_label_size = 22
|
||||
, xaxis_title = "Number of nsSNPs"
|
||||
, yaxis_title = "Number of Sites"
|
||||
, title_colour = "chocolate4"
|
||||
, subtitle_text = NULL
|
||||
, subtitle_size = 20
|
||||
, subtitle_colour = "pink")
|
||||
{
|
||||
|
||||
if (is.null(plotdf)){
|
||||
return(ggplot() + annotate(x=1,y=1,"text", label="NO DATA")+theme_void())
|
||||
}
|
||||
plotdf = as.data.frame(plotdf)
|
||||
# dim of plotdf
|
||||
cat(paste0("\noriginal df dimensions:"
|
||||
, "\nNo. of rows:", nrow(plotdf)
|
||||
, "\nNo. of cols:", ncol(plotdf)
|
||||
, "\nNow adding column: frequency of mutational positions"))
|
||||
|
||||
#-------------------------------------------
|
||||
# adding column: snpcount for each position
|
||||
#-------------------------------------------
|
||||
#setDT(plotdf)[, position_count_check := .N, by = .(eval(parse(text = df_colname)))]
|
||||
|
||||
# from dplyr
|
||||
plotdf = plotdf %>%
|
||||
dplyr::add_count(eval(parse(text = df_colname)))
|
||||
class(plotdf)
|
||||
plotdf = as.data.frame(plotdf)
|
||||
class(plotdf)
|
||||
nc_change = which(colnames(plotdf) == "n")
|
||||
colnames(plotdf)[nc_change] <- "position_count"
|
||||
class(plotdf)
|
||||
|
||||
# if (all(plotdf$position_count==plotdf$position_count_check) ){
|
||||
# cat("\nPASS: position_count column created")
|
||||
# plotdf = plotdf[, !colnames(plotdf)%in%c("position_count_check")]
|
||||
# }else{
|
||||
# stop("\nAbort: pos count numbes mismatch from dplyr and data.table")
|
||||
# }
|
||||
# adding snpcount for each position
|
||||
setDT(plotdf)[, pos_count := .N, by = .(eval(parse(text = df_colname)))]
|
||||
|
||||
cat("\nCumulative nssnp count\n"
|
||||
, table(plotdf$position_count))
|
||||
, table(plotdf$pos_count))
|
||||
|
||||
# calculating total no. of mutations
|
||||
tot_muts = sum(table(plotdf$position_count))
|
||||
|
||||
tot_muts = sum(table(plotdf$pos_count))
|
||||
|
||||
# sanity check
|
||||
if(tot_muts == nrow(plotdf)){
|
||||
cat("\nPASS: total number of mutations match"
|
||||
, "\nTotal no. of SAVs:", tot_muts)
|
||||
, "\nTotal no. of nsSNPs:", tot_muts)
|
||||
} else{
|
||||
cat("\nWARNING: total no. of muts = ", tot_muts
|
||||
, "\nExpected = ", nrow(plotdf))
|
||||
|
@ -93,26 +65,21 @@ site_snp_count_bp <- function (plotdf,
|
|||
, "\nNo. of rows:", nrow(plotdf)
|
||||
, "\nNo. of cols:", ncol(plotdf)))
|
||||
|
||||
#------------------------------------------------------
|
||||
# creating df: average count of snpcount for each position
|
||||
# created in earlier step
|
||||
#-------------------------------------------------------
|
||||
# use group by on position_count
|
||||
# use group by on pos_count
|
||||
snpsBYpos_df <- plotdf %>%
|
||||
dplyr::group_by(eval(parse(text = df_colname))) %>%
|
||||
dplyr::summarise(snpsBYpos = mean(position_count)) # changed from summarize!
|
||||
group_by(eval(parse(text = df_colname))) %>%
|
||||
summarize(snpsBYpos = mean(pos_count))
|
||||
|
||||
cat("\nnssnp count per position\n"
|
||||
, table(snpsBYpos_df$snpsBYpos)
|
||||
, "\n")
|
||||
cat("\nnssnp count\n"
|
||||
, table(snpsBYpos_df$snpsBYpos))
|
||||
|
||||
# calculating total no. of sites associated with SAVs
|
||||
# calculating total no. of sites associated with nsSNPs
|
||||
tot_sites = sum(table(snpsBYpos_df$snpsBYpos))
|
||||
|
||||
# sanity check
|
||||
if(tot_sites == length(unique(plotdf$position))){
|
||||
cat("\nPASS: total number of mutation sites match"
|
||||
, "\nTotal no. of sites with SAVs:", tot_sites)
|
||||
, "\nTotal no. of sites with nsSNPs:", tot_sites)
|
||||
} else{
|
||||
cat("WARNING: total no. of sites = ", tot_sites
|
||||
, "\nExpected = ", length(unique(plotdf$position)))
|
||||
|
@ -121,8 +88,8 @@ site_snp_count_bp <- function (plotdf,
|
|||
# FIXME: should really be legend title
|
||||
# but atm being using as plot title
|
||||
#my_leg_title
|
||||
bp_plot_title = paste0("Total SAVs: ", tot_muts
|
||||
, "\nTotal sites: ", tot_sites)
|
||||
bp_plot_title = paste0("Total nsSNPs: ", tot_muts
|
||||
, ", Total no. of nsSNPs sites: ", tot_sites)
|
||||
|
||||
#-------------
|
||||
# start plot 2
|
||||
|
@ -131,14 +98,13 @@ site_snp_count_bp <- function (plotdf,
|
|||
# not sure if to use with sort or directly
|
||||
my_x = sort(unique(snpsBYpos_df$snpsBYpos))
|
||||
|
||||
ggplot(snpsBYpos_df, aes(x = snpsBYpos)) +
|
||||
geom_bar(aes (alpha = 0.5)
|
||||
g = ggplot(snpsBYpos_df, aes(x = snpsBYpos))
|
||||
OutPlot_pos_count = g + geom_bar(aes (alpha = 0.5)
|
||||
, show.legend = FALSE) +
|
||||
scale_x_continuous(breaks = unique(snpsBYpos_df$snpsBYpos)) +
|
||||
geom_label(stat = "count", aes(label = ..count..)
|
||||
, color = "black"
|
||||
, size = geom_ls
|
||||
, position = position_dodge2(width = 1)) +
|
||||
, size = 10) +
|
||||
theme(axis.text.x = element_text(size = axis_text_size
|
||||
, angle = 0)
|
||||
, axis.text.y = element_text(size = axis_text_size
|
||||
|
@ -149,24 +115,18 @@ site_snp_count_bp <- function (plotdf,
|
|||
#, legend.position = c(0.73,0.8)
|
||||
#, legend.text = element_text(size = leg_text_size)
|
||||
#, legend.title = element_text(size = axis_label_size)
|
||||
#, panel.grid.major = element_blank(),
|
||||
#, panel.grid.minor = element_blank(),
|
||||
, panel.grid = element_blank()
|
||||
, plot.title = element_text(size = leg_text_size
|
||||
, colour = title_colour
|
||||
, hjust = 0.5)
|
||||
, colour = title_colour)
|
||||
, plot.subtitle = element_text(size = subtitle_size
|
||||
, hjust = 0.5
|
||||
, colour = subtitle_colour)) +
|
||||
# labs(title = bp_plot_title
|
||||
# , subtitle = subtitle_text
|
||||
# , x = xaxis_title
|
||||
# , y = yaxis_title)
|
||||
|
||||
labs(title = ""
|
||||
, subtitle = bp_plot_title
|
||||
labs(title = bp_plot_title
|
||||
, subtitle = subtitle_text
|
||||
, x = xaxis_title
|
||||
, y = yaxis_title)
|
||||
|
||||
return(OutPlot_pos_count)
|
||||
}
|
||||
|
||||
########################################################################
|
||||
|
|
|
@ -1,104 +0,0 @@
|
|||
#########################################################
|
||||
# 1b: Define function: coloured barplot by subgroup
|
||||
# LINK: https://stackoverflow.com/questions/49818271/stacked-barplot-with-colour-gradients-for-each-bar
|
||||
#########################################################
|
||||
|
||||
ColourPalleteMulti = function(df, group, subgroup){
|
||||
|
||||
# Find how many colour categories to create and the number of colours in each
|
||||
categories <- aggregate(as.formula(paste(subgroup, group, sep="~" ))
|
||||
, df
|
||||
, function(x) length(unique(x)))
|
||||
# return(categories) }
|
||||
|
||||
category.start <- (scales::hue_pal(l = 100)(nrow(categories))) # Set the top of the colour pallete
|
||||
|
||||
category.end <- (scales::hue_pal(l = 40)(nrow(categories))) # set the bottom
|
||||
|
||||
#return(category.start); return(category.end)}
|
||||
|
||||
# Build Colour pallette
|
||||
colours <- unlist(lapply(1:nrow(categories),
|
||||
function(i){
|
||||
colorRampPalette(colors = c(category.start[i]
|
||||
, category.end[i]))(categories[i,2])}))
|
||||
return(colours)
|
||||
}
|
||||
#########################################################################
|
||||
|
||||
bp_stability_hmap <- function(plotdf = merged_df3
|
||||
, xvar_colname = "position"
|
||||
#, bar_col_colname = "group"
|
||||
, stability_colname = "duet_scaled"
|
||||
, stability_outcome_colname = "duet_outcome"
|
||||
, p_title = "" # "Protein stability (DUET)"
|
||||
, my_xaxls = 12 # x-axis label size
|
||||
, my_yaxls = 20 # y-axis label size
|
||||
, my_xaxts = 18 # x-axis text size
|
||||
, my_yaxts = 20 # y-axis text size
|
||||
, my_pts = 20 # plot-title size
|
||||
, my_xlab = "Position"
|
||||
, my_ylab = "No. of nsSNPs"
|
||||
)
|
||||
{
|
||||
|
||||
# order the df by position and ensure it is a factor
|
||||
plotdf = plotdf[order(plotdf[[xvar_colname]]), ]
|
||||
plotdf[[xvar_colname]] = factor(plotdf[[xvar_colname]])
|
||||
|
||||
#cat("\nSneak peak:\n")
|
||||
head(data.frame( plotdf[[xvar_colname]], plotdf[[stability_colname]] ) )
|
||||
|
||||
# stability values isolated to help with generating column called: 'group'
|
||||
my_grp = plotdf[[stability_colname]]
|
||||
cat( "\nLength of nsSNPs:", length(my_grp)
|
||||
, "\nLength of unique values for nsSNPs:", length(unique(my_grp)) )
|
||||
|
||||
# Add col: 'group'
|
||||
plotdf$group = paste0(plotdf[[stability_outcome_colname]], "_", my_grp, sep = "")
|
||||
|
||||
# check unique values in normalised data
|
||||
cat("\nNo. of unique values in", stability_colname, "no rounding:"
|
||||
, length(unique(plotdf[[stability_colname]])))
|
||||
|
||||
# Call the function to create the palette based on the group defined above
|
||||
#subcols_ps
|
||||
subcols_bp_hmap = ColourPalleteMulti(plotdf, stability_outcome_colname, stability_colname)
|
||||
|
||||
cat("\nNo. of sub colours generated:", length(subcols_bp_hmap))
|
||||
|
||||
#-------------------------------
|
||||
# Generate the subcols barplot
|
||||
#-------------------------------
|
||||
|
||||
#g = ggplot(plotdf, aes(x = factor(position, ordered = T)))
|
||||
g = ggplot(plotdf, aes_string(x = xvar_colname
|
||||
# , ordered = T)
|
||||
))
|
||||
|
||||
|
||||
OutWidePlot = g + geom_bar(aes(fill = group)
|
||||
, colour = "grey") +
|
||||
|
||||
scale_fill_manual( values = subcols_bp_hmap
|
||||
, guide = "none") +
|
||||
|
||||
theme( axis.text.x = element_text(size = my_xaxls
|
||||
, angle = 90
|
||||
, hjust = 1
|
||||
, vjust = 0.4)
|
||||
, axis.text.y = element_text(size = my_yaxls
|
||||
, angle = 0
|
||||
, hjust = 1
|
||||
, vjust = 0)
|
||||
, axis.title.x = element_text(size = my_xaxts)
|
||||
, axis.title.y = element_text(size = my_yaxts )
|
||||
, plot.title = element_text(size = my_pts
|
||||
, hjust = 0.5)) +
|
||||
|
||||
labs(title = p_title
|
||||
, x = my_xlab
|
||||
, y = my_ylab)
|
||||
|
||||
return(OutWidePlot)
|
||||
}
|
|
@ -1,603 +0,0 @@
|
|||
#!/usr/bin/env Rscript
|
||||
#########################################################
|
||||
# TASK: Script to format data for dm om plots:
|
||||
# generating WF and LF data for each of the parameters:
|
||||
# duet, mcsm-lig, foldx, deepddg, dynamut2, mcsm-na, mcsm-ppi2, encom, dynamut..etc
|
||||
# Called by get_plotting_dfs.R
|
||||
|
||||
##################################################################
|
||||
# from plotting_globals.R
|
||||
# DistCutOff, LigDist_colname, ppi2Dist_colname, naDist_colname
|
||||
|
||||
dm_om_wf_lf_data <- function(df
|
||||
, gene # from globals
|
||||
, colnames_to_extract
|
||||
#, ligand_dist_colname = LigDist_colname # from globals
|
||||
#, LigDist_colname # from globals used
|
||||
#, ppi2Dist_colname #from globals used
|
||||
#, naDist_colname #from globals used
|
||||
, dr_muts = dr_muts_col # from globals
|
||||
, other_muts = other_muts_col # from globals
|
||||
, snp_colname = "mutationinformation"
|
||||
, aa_pos_colname = "position" # to sort df by
|
||||
, mut_colname = "mutation"
|
||||
, mut_info_colname = "mutation_info"
|
||||
, mut_info_label_colname = "mutation_info_labels" # if empty, below used
|
||||
#, dr_other_muts_labels = c("DM", "OM") # only used if ^^ = ""
|
||||
, categ_cols_to_factor){
|
||||
|
||||
df = as.data.frame(df)
|
||||
df$maf = log10(df$maf) # can't see otherwise
|
||||
|
||||
# Initialise the required dfs based on gene name
|
||||
geneL_normal = c("pnca")
|
||||
geneL_na = c("gid", "rpob")
|
||||
geneL_ppi2 = c("alr", "embb", "katg", "rpob")
|
||||
|
||||
# common_dfs
|
||||
common_dfsL = list(
|
||||
wf_duet = data.frame()
|
||||
, lf_duet = data.frame()
|
||||
, wf_mcsm_lig = data.frame()
|
||||
, lf_mcsm_lig = data.frame()
|
||||
, wf_foldx = data.frame()
|
||||
, lf_foldx = data.frame()
|
||||
, wf_deepddg = data.frame()
|
||||
, lf_deepddg = data.frame()
|
||||
, wf_dynamut2 = data.frame()
|
||||
, lf_dynamut2 = data.frame()
|
||||
, wf_consurf = data.frame()
|
||||
, lf_consurf = data.frame()
|
||||
, wf_snap2 = data.frame()
|
||||
, lf_snap2 = data.frame()
|
||||
)
|
||||
|
||||
# additional dfs
|
||||
if (tolower(gene)%in%geneL_normal){
|
||||
wf_lf_dataL = common_dfsL
|
||||
}
|
||||
|
||||
if (tolower(gene)%in%geneL_na){
|
||||
additional_dfL = list(
|
||||
wf_mcsm_na = data.frame()
|
||||
, lf_mcsm_na = data.frame()
|
||||
)
|
||||
wf_lf_dataL = c(common_dfsL, additional_dfL)
|
||||
}
|
||||
|
||||
if (tolower(gene)%in%geneL_ppi2){
|
||||
additional_dfL = list(
|
||||
wf_mcsm_ppi2 = data.frame()
|
||||
, lf_mcsm_ppi2 = data.frame()
|
||||
)
|
||||
wf_lf_dataL = c(common_dfsL, additional_dfL)
|
||||
}
|
||||
cat("\nInitializing an empty list of length:"
|
||||
, length(wf_lf_dataL))
|
||||
|
||||
#=======================================================================
|
||||
if (missing(colnames_to_extract)){
|
||||
|
||||
colnames_to_extract = c(snp_colname
|
||||
, mut_colname, mut_info_colname, mut_info_label_colname
|
||||
, aa_pos_colname
|
||||
, LigDist_colname # from globals
|
||||
, ppi2Dist_colname # from globals
|
||||
, naDist_colname # from globals
|
||||
, "duet_stability_change" , "duet_scaled" , "duet_outcome"
|
||||
, "ligand_affinity_change", "affinity_scaled" , "ligand_outcome"
|
||||
, "ddg_foldx" , "foldx_scaled" , "foldx_outcome"
|
||||
, "deepddg" , "deepddg_scaled" , "deepddg_outcome"
|
||||
, "asa" , "rsa"
|
||||
, "rd_values" , "kd_values"
|
||||
, "log10_or_mychisq" , "neglog_pval_fisher" , "maf" #"af"
|
||||
, "ddg_dynamut2" , "ddg_dynamut2_scaled", "ddg_dynamut2_outcome"
|
||||
, "mcsm_ppi2_affinity" , "mcsm_ppi2_scaled" , "mcsm_ppi2_outcome"
|
||||
, "consurf_score" , "consurf_scaled" , "consurf_outcome" # exists now
|
||||
, "consurf_colour_rev"
|
||||
, "snap2_score" , "snap2_scaled" , "snap2_outcome"
|
||||
, "mcsm_na_affinity" , "mcsm_na_scaled" , "mcsm_na_outcome"
|
||||
, "provean_score" , "provean_scaled" , "provean_outcome")
|
||||
|
||||
}else{
|
||||
colnames_to_extract = c(mut_colname, mut_info_colname, mut_info_label_colname
|
||||
, aa_pos_colname, LigDist_colname
|
||||
, colnames_to_extract)
|
||||
}
|
||||
comb_df = df[, colnames(df)%in%colnames_to_extract]
|
||||
comb_df_s = dplyr::arrange(comb_df, aa_pos_colname)
|
||||
|
||||
#=======================================================================
|
||||
if(missing(categ_cols_to_factor)){
|
||||
categ_cols_to_factor = grep( "_outcome|_info", colnames(comb_df_s) )
|
||||
}else{
|
||||
categ_cols_to_factor = categ_cols_to_factor
|
||||
}
|
||||
#fact_cols = colnames(comb_df_s)[grepl( "_outcome|_info", colnames(comb_df_s) )]
|
||||
fact_cols = colnames(comb_df_s)[categ_cols_to_factor]
|
||||
|
||||
if (any(lapply(comb_df_s[, fact_cols], class) == "character")){
|
||||
cat("\nChanging", length(categ_cols_to_factor), "cols to factor")
|
||||
comb_df_s[, fact_cols] <- lapply(comb_df_s[, fact_cols], as.factor)
|
||||
if (all(lapply(comb_df_s[, fact_cols], class) == "factor")){
|
||||
cat("\nSuccessful: cols changed to factor")
|
||||
}
|
||||
}else{
|
||||
cat("\nRequested cols aready factors")
|
||||
}
|
||||
#=======================================================================
|
||||
table(comb_df_s[[mut_info_colname]])
|
||||
|
||||
# pretty display names i.e. labels to reduce major code duplication later
|
||||
foo_cnames = data.frame(colnames(comb_df_s))
|
||||
names(foo_cnames) <- "old_name"
|
||||
|
||||
stability_suffix <- paste0(delta_symbol, delta_symbol, "G")
|
||||
#flexibility_suffix <- paste0(delta_symbol, delta_symbol, "S")
|
||||
|
||||
#lig_dn = paste0("Ligand distance (", angstroms_symbol, ")"); lig_dn
|
||||
#mcsm_lig_dn = paste0("Ligand affinity (log fold change)"); mcsm_lig_dn
|
||||
|
||||
lig_dn = paste0("Lig Dist(", angstroms_symbol, ")"); lig_dn
|
||||
mcsm_lig_dn = paste0("mCSM-lig\n(Log fold change)"); mcsm_lig_dn
|
||||
|
||||
duet_dn = paste0("DUET ", stability_suffix); duet_dn
|
||||
foldx_dn = paste0("FoldX ", stability_suffix); foldx_dn
|
||||
deepddg_dn = paste0("Deepddg " , stability_suffix); deepddg_dn
|
||||
dynamut2_dn = paste0("Dynamut2 " , stability_suffix); dynamut2_dn
|
||||
|
||||
mcsm_na_dn = paste0("mCSM-NA ", stability_suffix); mcsm_na_dn
|
||||
mcsm_ppi2_dn = paste0("mCSM-PPI2 ", stability_suffix); mcsm_ppi2_dn
|
||||
consurf_dn = paste0("ConSurf"); consurf_dn
|
||||
snap2_dn = paste0("SNAP2"); snap2_dn
|
||||
provean_dn = paste0("PROVEAN"); provean_dn
|
||||
|
||||
# change column names: plyr
|
||||
new_colnames = c(asa = "ASA"
|
||||
, rsa = "RSA"
|
||||
, rd_values = "RD"
|
||||
, kd_values = "KD"
|
||||
#, log10_or_mychisq = "Log10(OR)"
|
||||
#, neglog_pval_fisher = "-Log(P)"
|
||||
#, af = "MAF"
|
||||
, maf = "Log10(MAF)"
|
||||
#, ligand_dist_colname= lig_dn # cannot handle variable name 'ligand_dist_colname'
|
||||
, affinity_scaled = mcsm_lig_dn
|
||||
, duet_scaled = duet_dn
|
||||
, foldx_scaled = foldx_dn
|
||||
, deepddg_scaled = deepddg_dn
|
||||
, ddg_dynamut2_scaled = dynamut2_dn
|
||||
, mcsm_na_scaled = mcsm_na_dn
|
||||
, mcsm_ppi2_scaled = mcsm_ppi2_dn
|
||||
#, consurf_scaled = consurf_dn
|
||||
, consurf_score = consurf_dn
|
||||
#, consurf_colour_rev = consurf_dn
|
||||
#, snap2_scaled = snap2_dn
|
||||
, snap2_score = snap2_dn
|
||||
, provean_score = provean_dn)
|
||||
|
||||
|
||||
comb_df_sl1 = plyr::rename(comb_df_s
|
||||
, replace = new_colnames
|
||||
, warn_missing = T
|
||||
, warn_duplicated = T)
|
||||
|
||||
# renaming colname using variable i.e ligand_dist_colname: dplyr
|
||||
#comb_df_sl = comb_df_sl1 %>% dplyr::rename(!!lig_dn := all_of(ligand_dist_colname))
|
||||
comb_df_sl = comb_df_sl1 %>% dplyr::rename(!!lig_dn := all_of(LigDist_colname)) # NEW
|
||||
names(comb_df_sl)
|
||||
|
||||
#=======================
|
||||
# NEW: Affinity filtered data
|
||||
#========================
|
||||
# mcsm-lig --> LigDist_colname
|
||||
comb_df_sl_lig = comb_df_sl[comb_df_sl[[lig_dn]]<DistCutOff,]
|
||||
|
||||
# mcsm-ppi2 --> ppi2Dist_colname
|
||||
comb_df_sl_ppi2 = comb_df_sl[comb_df_sl[[ppi2Dist_colname]]<DistCutOff,]
|
||||
|
||||
# mcsm-na --> naDist_colname
|
||||
comb_df_sl_na = comb_df_sl[comb_df_sl[[naDist_colname]]<DistCutOff,]
|
||||
|
||||
#####################################################################
|
||||
static_cols1 = mut_info_label_colname
|
||||
#######################################################################
|
||||
#======================
|
||||
# Selecting dfs
|
||||
# with appropriate cols
|
||||
#=======================
|
||||
static_cols_start = c(snp_colname
|
||||
, aa_pos_colname
|
||||
, mut_colname
|
||||
, static_cols1)
|
||||
|
||||
# ordering is important!
|
||||
static_cols_end = c(lig_dn
|
||||
, "ASA"
|
||||
, "RSA"
|
||||
, "RD"
|
||||
, "KD"
|
||||
, "Log10(MAF)"
|
||||
#, "Log10(OR)"
|
||||
#, "-Log(P)"
|
||||
)
|
||||
|
||||
#########################################################################
|
||||
#==============
|
||||
# DUET
|
||||
#==============
|
||||
# WF data: duet
|
||||
cols_to_select_duet = c(static_cols_start, c("duet_outcome", duet_dn), static_cols_end)
|
||||
wf_duet = comb_df_sl[, cols_to_select_duet]
|
||||
|
||||
#pivot_cols_ps = cols_to_select_ps[1:5]; pivot_cols_ps
|
||||
pivot_cols_duet = cols_to_select_duet[1: (length(static_cols_start) + 1)]; pivot_cols_duet
|
||||
expected_rows_lf = nrow(wf_duet) * (length(wf_duet) - length(pivot_cols_duet))
|
||||
expected_rows_lf
|
||||
|
||||
# LF data: duet
|
||||
lf_duet = tidyr::gather(wf_duet
|
||||
, key = param_type
|
||||
, value = param_value
|
||||
, all_of(duet_dn):tail(static_cols_end,1)
|
||||
, factor_key = TRUE)
|
||||
|
||||
if (nrow(lf_duet) == expected_rows_lf){
|
||||
cat("\nPASS: long format data created for ", duet_dn)
|
||||
}else{
|
||||
cat("\nFAIL: long format data could not be created for duet")
|
||||
quit()
|
||||
}
|
||||
|
||||
# NEW columns [outcome and outcome colname]
|
||||
lf_duet$outcome_colname = "duet_outcome"
|
||||
lf_duet$outcome = lf_duet$duet_outcome
|
||||
|
||||
# Assign them to the output list
|
||||
wf_lf_dataL[['wf_duet']] = wf_duet
|
||||
wf_lf_dataL[['lf_duet']] = lf_duet
|
||||
|
||||
############################################################################
|
||||
#==============
|
||||
# FoldX
|
||||
#==============
|
||||
# WF data: Foldx
|
||||
cols_to_select_foldx= c(static_cols_start, c("foldx_outcome", foldx_dn), static_cols_end)
|
||||
wf_foldx = comb_df_sl[, cols_to_select_foldx]
|
||||
|
||||
pivot_cols_foldx = cols_to_select_foldx[1: (length(static_cols_start) + 1)]; pivot_cols_foldx
|
||||
expected_rows_lf = nrow(wf_foldx) * (length(wf_foldx) - length(pivot_cols_foldx))
|
||||
expected_rows_lf
|
||||
|
||||
# LF data: Foldx
|
||||
lf_foldx = gather(wf_foldx
|
||||
, key = param_type
|
||||
, value = param_value
|
||||
, all_of(foldx_dn):tail(static_cols_end,1)
|
||||
, factor_key = TRUE)
|
||||
|
||||
if (nrow(lf_foldx) == expected_rows_lf){
|
||||
cat("\nPASS: long format data created for ", foldx_dn)
|
||||
}else{
|
||||
cat("\nFAIL: long format data could not be created for duet")
|
||||
quit()
|
||||
}
|
||||
|
||||
# NEW column
|
||||
lf_foldx$outcome_colname = "foldx_outcome"
|
||||
lf_foldx$outcome = lf_foldx$foldx_outcome
|
||||
|
||||
# Assign them to the output list
|
||||
wf_lf_dataL[['wf_foldx']] = wf_foldx
|
||||
wf_lf_dataL[['lf_foldx']] = lf_foldx
|
||||
|
||||
############################################################################
|
||||
#==============
|
||||
# Deepddg
|
||||
#==============
|
||||
# WF data: deepddg
|
||||
cols_to_select_deepddg = c(static_cols_start, c("deepddg_outcome", deepddg_dn), static_cols_end)
|
||||
wf_deepddg = comb_df_sl[, cols_to_select_deepddg]
|
||||
|
||||
pivot_cols_deepddg = cols_to_select_deepddg[1: (length(static_cols_start) + 1)]; pivot_cols_deepddg
|
||||
expected_rows_lf = nrow(wf_deepddg) * (length(wf_deepddg) - length(pivot_cols_deepddg))
|
||||
expected_rows_lf
|
||||
|
||||
# LF data: Deepddg
|
||||
lf_deepddg = gather(wf_deepddg
|
||||
, key = param_type
|
||||
, value = param_value
|
||||
, all_of(deepddg_dn):tail(static_cols_end,1)
|
||||
, factor_key = TRUE)
|
||||
|
||||
if (nrow(lf_deepddg) == expected_rows_lf){
|
||||
cat("\nPASS: long format data created for ", deepddg_dn)
|
||||
}else{
|
||||
cat("\nFAIL: long format data could not be created for duet")
|
||||
quit()
|
||||
}
|
||||
|
||||
# NEW columns [outcome and outcome colname]
|
||||
lf_deepddg$outcome_colname = "deepddg_outcome"
|
||||
lf_deepddg$outcome = lf_deepddg$deepddg_outcome
|
||||
|
||||
# Assign them to the output list
|
||||
wf_lf_dataL[['wf_deepddg']] = wf_deepddg
|
||||
wf_lf_dataL[['lf_deepddg']] = lf_deepddg
|
||||
############################################################################
|
||||
#==============
|
||||
# Dynamut2: LF
|
||||
#==============
|
||||
# WF data: dynamut2
|
||||
cols_to_select_dynamut2 = c(static_cols_start, c("ddg_dynamut2_outcome", dynamut2_dn), static_cols_end)
|
||||
wf_dynamut2 = comb_df_sl[, cols_to_select_dynamut2]
|
||||
|
||||
pivot_cols_dynamut2 = cols_to_select_dynamut2[1: (length(static_cols_start) + 1)]; pivot_cols_dynamut2
|
||||
expected_rows_lf = nrow(wf_dynamut2) * (length(wf_dynamut2) - length(pivot_cols_dynamut2))
|
||||
expected_rows_lf
|
||||
|
||||
# LF data: dynamut2
|
||||
lf_dynamut2 = gather(wf_dynamut2
|
||||
, key = param_type
|
||||
, value = param_value
|
||||
, all_of(dynamut2_dn):tail(static_cols_end,1)
|
||||
, factor_key = TRUE)
|
||||
|
||||
if (nrow(lf_dynamut2) == expected_rows_lf){
|
||||
cat("\nPASS: long format data created for ", dynamut2_dn)
|
||||
}else{
|
||||
cat("\nFAIL: long format data could not be created for duet")
|
||||
quit()
|
||||
}
|
||||
|
||||
# NEW columns [outcome and outcome colname]
|
||||
lf_dynamut2$outcome_colname = "ddg_dynamut2_outcome"
|
||||
lf_dynamut2$outcome = lf_dynamut2$ddg_dynamut2_outcome
|
||||
|
||||
# Assign them to the output list
|
||||
wf_lf_dataL[['wf_dynamut2']] = wf_dynamut2
|
||||
wf_lf_dataL[['lf_dynamut2']] = lf_dynamut2
|
||||
|
||||
|
||||
######################################################################################
|
||||
#==================
|
||||
# Consurf: LF
|
||||
#https://consurf.tau.ac.il/overview.php
|
||||
# consurf_score:
|
||||
# <0 (below average): slowly evolving i.e CONSERVED
|
||||
# >0 (above average): rapidly evolving, i.e VARIABLE
|
||||
#table(df$consurf_colour_rev)
|
||||
# TODO
|
||||
#1--> "most_variable", 2--> "", 3-->"", 4-->""
|
||||
#5-->"", 6-->"", 7-->"", 8-->"", 9-->"most_conserved"
|
||||
#====================
|
||||
# WF data: consurf
|
||||
cols_to_select_consurf = c(static_cols_start, c("consurf_outcome", consurf_dn), static_cols_end)
|
||||
wf_consurf = comb_df_sl[, cols_to_select_consurf]
|
||||
|
||||
pivot_cols_consurf = cols_to_select_consurf[1: (length(static_cols_start) + 1)]; pivot_cols_consurf
|
||||
expected_rows_lf = nrow(wf_consurf) * (length(wf_consurf) - length(pivot_cols_consurf))
|
||||
expected_rows_lf
|
||||
|
||||
# when outcome didn't exist
|
||||
#cols_to_select_consurf = c(static_cols_start, c(consurf_dn), static_cols_end)
|
||||
#wf_consurf = comb_df_sl[, cols_to_select_consurf]
|
||||
#
|
||||
# pivot_cols_consurf = cols_to_select_consurf[1: (length(static_cols_start))]; pivot_cols_consurf
|
||||
# expected_rows_lf = nrow(wf_consurf) * (length(wf_consurf) - length(pivot_cols_consurf))
|
||||
# expected_rows_lf
|
||||
|
||||
# LF data: consurf
|
||||
lf_consurf = gather(wf_consurf
|
||||
, key = param_type
|
||||
, value = param_value
|
||||
, all_of(consurf_dn):tail(static_cols_end,1)
|
||||
, factor_key = TRUE)
|
||||
|
||||
if (nrow(lf_consurf) == expected_rows_lf){
|
||||
cat("\nPASS: long format data created for ", consurf_dn)
|
||||
}else{
|
||||
cat("\nFAIL: long format data could not be created for duet")
|
||||
quit()
|
||||
}
|
||||
|
||||
# NEW columns [outcome and outcome colname]
|
||||
lf_consurf$outcome_colname = "consurf_outcome"
|
||||
lf_consurf$outcome = lf_consurf$consurf_outcome
|
||||
|
||||
# Assign them to the output list
|
||||
wf_lf_dataL[['wf_consurf']] = wf_consurf
|
||||
wf_lf_dataL[['lf_consurf']] = lf_consurf
|
||||
###########################################################################
|
||||
#==============
|
||||
# SNAP2: LF
|
||||
#==============
|
||||
# WF data: snap2
|
||||
cols_to_select_snap2 = c(static_cols_start, c("snap2_outcome", snap2_dn), static_cols_end)
|
||||
wf_snap2 = comb_df_sl[, cols_to_select_snap2]
|
||||
|
||||
pivot_cols_snap2 = cols_to_select_snap2[1: (length(static_cols_start) + 1)]; pivot_cols_snap2
|
||||
expected_rows_lf = nrow(wf_snap2) * (length(wf_snap2) - length(pivot_cols_snap2))
|
||||
expected_rows_lf
|
||||
|
||||
# LF data: snap2
|
||||
lf_snap2 = gather(wf_snap2
|
||||
, key = param_type
|
||||
, value = param_value
|
||||
, all_of(snap2_dn):tail(static_cols_end,1)
|
||||
, factor_key = TRUE)
|
||||
|
||||
if (nrow(lf_snap2) == expected_rows_lf){
|
||||
cat("\nPASS: long format data created for ", snap2_dn)
|
||||
}else{
|
||||
cat("\nFAIL: long format data could not be created for duet")
|
||||
quit()
|
||||
}
|
||||
|
||||
# NEW columns [outcome and outcome colname]
|
||||
lf_snap2$outcome_colname = "snap2_outcome"
|
||||
lf_snap2$outcome = lf_snap2$snap2_outcome
|
||||
|
||||
# Assign them to the output list
|
||||
wf_lf_dataL[['wf_snap2']] = wf_snap2
|
||||
wf_lf_dataL[['lf_snap2']] = lf_snap2
|
||||
|
||||
#==============
|
||||
# Provean2: LF
|
||||
#==============
|
||||
# WF data: provean
|
||||
cols_to_select_provean = c(static_cols_start, c("provean_outcome", provean_dn), static_cols_end)
|
||||
wf_provean = comb_df_sl[, cols_to_select_provean]
|
||||
|
||||
pivot_cols_provean = cols_to_select_provean[1: (length(static_cols_start) + 1)]; pivot_cols_provean
|
||||
expected_rows_lf = nrow(wf_provean) * (length(wf_provean) - length(pivot_cols_provean))
|
||||
expected_rows_lf
|
||||
|
||||
# LF data: provean
|
||||
lf_provean = gather(wf_provean
|
||||
, key = param_type
|
||||
, value = param_value
|
||||
, all_of(provean_dn):tail(static_cols_end,1)
|
||||
, factor_key = TRUE)
|
||||
|
||||
if (nrow(lf_provean) == expected_rows_lf){
|
||||
cat("\nPASS: long format data created for ", provean_dn)
|
||||
}else{
|
||||
cat("\nFAIL: long format data could not be created for duet")
|
||||
quit()
|
||||
}
|
||||
|
||||
# NEW columns [outcome and outcome colname]
|
||||
lf_provean$outcome_colname = "provean_outcome"
|
||||
lf_provean$outcome = lf_provean$provean_outcome
|
||||
|
||||
# Assign them to the output list
|
||||
wf_lf_dataL[['wf_provean']] = wf_provean
|
||||
wf_lf_dataL[['lf_provean']] = lf_provean
|
||||
|
||||
|
||||
###########################################################################
|
||||
# AFFINITY cols
|
||||
###########################################################################
|
||||
#=========================
|
||||
# mCSM-lig:
|
||||
# data filtered by cut off
|
||||
#=========================
|
||||
#---------------------
|
||||
# mCSM-lig: WF and lF
|
||||
#----------------------
|
||||
# WF data: mcsm_lig
|
||||
cols_to_select_mcsm_lig = c(static_cols_start, c("ligand_outcome", mcsm_lig_dn), static_cols_end)
|
||||
wf_mcsm_lig = comb_df_sl_lig[, cols_to_select_mcsm_lig] # filtered df
|
||||
|
||||
pivot_cols_mcsm_lig = cols_to_select_mcsm_lig[1: (length(static_cols_start) + 1)]; pivot_cols_mcsm_lig
|
||||
expected_rows_lf = nrow(wf_mcsm_lig) * (length(wf_mcsm_lig) - length(pivot_cols_mcsm_lig))
|
||||
expected_rows_lf
|
||||
|
||||
# LF data: mcsm_lig
|
||||
lf_mcsm_lig = gather(wf_mcsm_lig
|
||||
, key = param_type
|
||||
, value = param_value
|
||||
, all_of(mcsm_lig_dn):tail(static_cols_end,1)
|
||||
, factor_key = TRUE)
|
||||
|
||||
if (nrow(lf_mcsm_lig) == expected_rows_lf){
|
||||
cat("\nPASS: long format data created for ", mcsm_lig_dn)
|
||||
}else{
|
||||
cat("\nFAIL: long format data could not be created for mcsm_lig")
|
||||
quit()
|
||||
}
|
||||
|
||||
# NEW columns [outcome and outcome colname]
|
||||
lf_mcsm_lig$outcome_colname = "ligand_outcome"
|
||||
lf_mcsm_lig$outcome = lf_mcsm_lig$ligand_outcome
|
||||
|
||||
# Assign them to the output list
|
||||
wf_lf_dataL[['wf_mcsm_lig']] = wf_mcsm_lig
|
||||
wf_lf_dataL[['lf_mcsm_lig']] = lf_mcsm_lig
|
||||
|
||||
#====================
|
||||
# mcsm-NA affinity
|
||||
# data filtered by cut off
|
||||
#====================
|
||||
if (tolower(gene)%in%geneL_na){
|
||||
#---------------
|
||||
# mCSM-NA: WF and lF
|
||||
#-----------------
|
||||
# WF data: mcsm-na
|
||||
cols_to_select_mcsm_na = c(static_cols_start, c("mcsm_na_outcome", mcsm_na_dn), static_cols_end)
|
||||
#wf_mcsm_na = comb_df_sl[, cols_to_select_mcsm_na]
|
||||
wf_mcsm_na = comb_df_sl_na[, cols_to_select_mcsm_na]
|
||||
|
||||
pivot_cols_mcsm_na = cols_to_select_mcsm_na[1: (length(static_cols_start) + 1)]; pivot_cols_mcsm_na
|
||||
expected_rows_lf = nrow(wf_mcsm_na) * (length(wf_mcsm_na) - length(pivot_cols_mcsm_na))
|
||||
expected_rows_lf
|
||||
|
||||
# LF data: mcsm-na
|
||||
lf_mcsm_na = gather(wf_mcsm_na
|
||||
, key = param_type
|
||||
, value = param_value
|
||||
, all_of(mcsm_na_dn):tail(static_cols_end,1)
|
||||
, factor_key = TRUE)
|
||||
|
||||
if (nrow(lf_mcsm_na) == expected_rows_lf){
|
||||
cat("\nPASS: long format data created for ", mcsm_na_dn)
|
||||
}else{
|
||||
cat("\nFAIL: long format data could not be created for duet")
|
||||
quit()
|
||||
}
|
||||
|
||||
# NEW columns [outcome and outcome colname]
|
||||
lf_mcsm_na$outcome_colname = "mcsm_na_outcome"
|
||||
lf_mcsm_na$outcome = lf_mcsm_na$mcsm_na_outcome
|
||||
|
||||
# Assign them to the output list
|
||||
wf_lf_dataL[['wf_mcsm_na']] = wf_mcsm_na
|
||||
wf_lf_dataL[['lf_mcsm_na']] = lf_mcsm_na
|
||||
|
||||
}
|
||||
|
||||
#=========================
|
||||
# mcsm-ppi2 affinity
|
||||
# data filtered by cut off
|
||||
#========================
|
||||
if (tolower(gene)%in%geneL_ppi2){
|
||||
#-----------------
|
||||
# mCSM-PPI2: WF and lF
|
||||
#-----------------
|
||||
# WF data: mcsm-ppi2
|
||||
cols_to_select_mcsm_ppi2 = c(static_cols_start, c("mcsm_ppi2_outcome", mcsm_ppi2_dn), static_cols_end)
|
||||
#wf_mcsm_ppi2 = comb_df_sl[, cols_to_select_mcsm_ppi2]
|
||||
wf_mcsm_ppi2 = comb_df_sl_ppi2[, cols_to_select_mcsm_ppi2]
|
||||
|
||||
pivot_cols_mcsm_ppi2 = cols_to_select_mcsm_ppi2[1: (length(static_cols_start) + 1)]; pivot_cols_mcsm_ppi2
|
||||
expected_rows_lf = nrow(wf_mcsm_ppi2) * (length(wf_mcsm_ppi2) - length(pivot_cols_mcsm_ppi2))
|
||||
expected_rows_lf
|
||||
|
||||
# LF data: mcsm-ppi2
|
||||
lf_mcsm_ppi2 = gather(wf_mcsm_ppi2
|
||||
, key = param_type
|
||||
, value = param_value
|
||||
, all_of(mcsm_ppi2_dn):tail(static_cols_end,1)
|
||||
, factor_key = TRUE)
|
||||
|
||||
if (nrow(lf_mcsm_ppi2) == expected_rows_lf){
|
||||
cat("\nPASS: long format data created for ", mcsm_ppi2_dn)
|
||||
}else{
|
||||
cat("\nFAIL: long format data could not be created for duet")
|
||||
quit()
|
||||
}
|
||||
|
||||
# NEW columns [outcome and outcome colname]
|
||||
lf_mcsm_ppi2$outcome_colname = "mcsm_ppi2_outcome"
|
||||
lf_mcsm_ppi2$outcome = lf_mcsm_ppi2$mcsm_ppi2_outcome
|
||||
|
||||
# Assign them to the output list
|
||||
wf_lf_dataL[['wf_mcsm_ppi2']] = wf_mcsm_ppi2
|
||||
wf_lf_dataL[['lf_mcsm_ppi2']] = lf_mcsm_ppi2
|
||||
|
||||
}
|
||||
|
||||
return(wf_lf_dataL)
|
||||
}
|
||||
############################################################################
|
|
@ -1,97 +0,0 @@
|
|||
library(ggpubr)
|
||||
###################################################################
|
||||
|
||||
####################################
|
||||
lf_bp_with_stats <- function(lf_df
|
||||
, x_grp = "mutation_info"
|
||||
, y_var = "param_value"
|
||||
, facet_var = "param_type"
|
||||
, n_facet_row = 1
|
||||
, y_scales = "free_y"
|
||||
, p_title = ""
|
||||
, colour_categ = ""
|
||||
, colour_bp_strip = "khaki2"
|
||||
, stat_grp_comp = c("DM", "OM")
|
||||
, stat_method = "wilcox.test"
|
||||
, my_paired = FALSE
|
||||
, bp_width = c("auto", 0.5)
|
||||
, dot_size = 3
|
||||
, dot_transparency = 0.3
|
||||
, stat_label = c("p.format", "p.signif")
|
||||
, my_ats = 22 # axis text size
|
||||
, my_als = 20 # axis label size
|
||||
, my_fls = 20 # facet label size
|
||||
, my_pts = 22 # plot title size
|
||||
) {
|
||||
if (bp_width == "auto"){
|
||||
bp_width = 0.5/length(unique(lf_df[[x_grp]]))
|
||||
cat("\nAutomatically calculated boxplot width, using bp_width:\n", bp_width, "\n")
|
||||
}else{
|
||||
cat("\nBoxplot width value provided, using:", bp_width, "\n")
|
||||
bp_width = bp_width
|
||||
}
|
||||
|
||||
my_comparisonsL <- list( stat_grp_comp )
|
||||
|
||||
bp_statP <- ggplot(lf_df, aes(x = eval(parse(text = x_grp))
|
||||
, y = eval(parse(text = y_var)) )) +
|
||||
|
||||
facet_wrap(~ eval(parse(text = facet_var))
|
||||
, nrow = n_facet_row
|
||||
, scales = y_scales) +
|
||||
|
||||
geom_violin(trim = T
|
||||
, scale = "width"
|
||||
#, position = position_dodge(width = 0.9)
|
||||
, draw_quantiles = c(0.25, 0.5, 0.75)) +
|
||||
|
||||
# geom_boxplot(fill = "white"
|
||||
# , outlier.colour = NA
|
||||
# #, position = position_dodge(width = 0.9)
|
||||
# , width = bp_width) +
|
||||
|
||||
# geom_point(position = position_jitterdodge(dodge.width = 0.5)
|
||||
# , alpha = 0.5
|
||||
# , show.legend = FALSE
|
||||
# , aes(colour = factor(eval(parse(text = colour_categ))) )) +
|
||||
|
||||
# ggbeeswarm (better than geom_point)
|
||||
geom_beeswarm(priority = "density"
|
||||
#, shape = 21
|
||||
, size = dot_size
|
||||
, alpha = dot_transparency
|
||||
, show.legend = FALSE
|
||||
, cex = 0.8
|
||||
, aes(colour = factor(eval(parse(text = colour_categ))) )) +
|
||||
|
||||
theme(axis.text.x = element_text(size = my_ats)
|
||||
, axis.text.y = element_text(size = my_ats
|
||||
, angle = 0
|
||||
, hjust = 1
|
||||
, vjust = 0)
|
||||
, axis.title.x = element_text(size = my_ats)
|
||||
, axis.title.y = element_text(size = my_ats)
|
||||
, plot.title = element_text(size = my_pts
|
||||
, hjust = 0.5
|
||||
, colour = "black"
|
||||
, face = "bold")
|
||||
, strip.background = element_rect(fill = colour_bp_strip)
|
||||
, strip.text.x = element_text(size = my_fls
|
||||
, colour = "black")
|
||||
, legend.title = element_text(color = "black"
|
||||
, size = my_als)
|
||||
, legend.text = element_text(size = my_ats)
|
||||
, legend.direction = "vertical") +
|
||||
|
||||
labs(title = p_title
|
||||
, x = ""
|
||||
, y = "")+
|
||||
|
||||
stat_compare_means(comparisons = my_comparisonsL
|
||||
, method = stat_method
|
||||
, paired = my_paired
|
||||
, label = stat_label[1])
|
||||
|
||||
return(bp_statP)
|
||||
|
||||
}
|
|
@ -1,319 +0,0 @@
|
|||
#####################################################################################
|
||||
# LogoPlotMSA():
|
||||
# Input:
|
||||
# Data:
|
||||
# msaSeq_mut: MSA chr vector for muts
|
||||
# msaSeq_wt [Optional]: MSA chr vector for wt
|
||||
|
||||
# Others params:
|
||||
# plot_positions: can choose what positions to plot
|
||||
# msa_method : can be "bits" or "probability"
|
||||
# my_logo_col : can be "chemistry", "hydrophobicity", "taylor" or "clustalx"
|
||||
|
||||
# Returns data LogoPlot from MSA
|
||||
|
||||
#...
|
||||
|
||||
# TODO: SHINY
|
||||
# drop down: my_logo_col i.e the 4 colour choices
|
||||
# drop down: for DataED_PFM(), ED score options:
|
||||
# c("log", log-odds", "diff", "probKL", "ratio", "unscaled_log", "wKL")
|
||||
# drop down/enter field: for DataED_PFM(), background probability
|
||||
# Make it hover over position and then get the corresponding data table!
|
||||
###################################################################################
|
||||
|
||||
#==================
|
||||
# logo data: OR
|
||||
#==================
|
||||
LogoPlotMSA <- function(msaSeq_mut
|
||||
, msaSeq_wt
|
||||
, plot_positions
|
||||
, msa_method = 'bits' # or probability
|
||||
, my_logo_col = "chemistry"
|
||||
, x_lab = "Wild-type position"
|
||||
, y_lab = ""
|
||||
, x_ats = 13 # text size
|
||||
, x_tangle = 90 # text angle
|
||||
, x_axis_offset = 0.07 # dist b/w y-axis and plot start
|
||||
, y_ats = 13
|
||||
, y_tangle = 0
|
||||
, x_tts = 13 # title size
|
||||
, y_tts = 13
|
||||
, leg_pos = "top" # can be top, left, right and bottom or c(0.8, 0.9)
|
||||
, leg_dir = "horizontal" #can be vertical or horizontal
|
||||
, leg_ts = 16 # leg text size
|
||||
, leg_tts = 16 # leg title size
|
||||
)
|
||||
|
||||
{
|
||||
|
||||
############################################
|
||||
# Data processing for logo plot for nsSNPS
|
||||
###########################################
|
||||
cat("\nLength of MSA", length(msaSeq_mut)
|
||||
, "\nlength of WT seq:", length(msaSeq_wt))
|
||||
|
||||
if(missing(plot_positions)){
|
||||
#if(is.null(plot_positions)){
|
||||
cat("\n======================="
|
||||
, "\nPlotting entire MSA"
|
||||
, "\n========================")
|
||||
msa_seq_plot = msaSeq_mut
|
||||
msa_all_interim = sapply(msa_seq_plot, function(x) unlist(strsplit(x,"")))
|
||||
msa_all_interimDF = data.frame(msa_all_interim)
|
||||
msa_all_pos = as.numeric(rownames(msa_all_interimDF))
|
||||
|
||||
wt_seq_plot = msaSeq_wt
|
||||
wt_all_interim = sapply(wt_seq_plot, function(x) unlist(strsplit(x,"")))
|
||||
wt_all_interimDF = data.frame(wt_all_interim)
|
||||
wt_all_pos = as.numeric(rownames(wt_all_interimDF))
|
||||
|
||||
|
||||
} else {
|
||||
cat("\nUser specified plotting positions for MSA:"
|
||||
, "\nThese are:\n", plot_positions
|
||||
, "\nSorting plot positions...")
|
||||
|
||||
plot_positions = sort(plot_positions)
|
||||
|
||||
cat("\nPlotting positions sorted:\n"
|
||||
, plot_positions)
|
||||
|
||||
#-----------
|
||||
# MSA: mut
|
||||
#-----------
|
||||
cat("\n==========================================="
|
||||
, "\nGenerating MSA: filtered positions"
|
||||
, "\n===========================================")
|
||||
|
||||
msa_interim = sapply(msaSeq_mut, function(x) unlist(strsplit(x,"")))
|
||||
msa_interimDF = data.frame(msa_interim)
|
||||
msa_pos = as.numeric(rownames(msa_interimDF))
|
||||
|
||||
if (all(plot_positions%in%msa_pos)){
|
||||
cat("\nAll positions within range"
|
||||
, "\nProceeding with generating requested position MSA seqs..."
|
||||
, "\nNo. of positions in plot:", length(plot_positions))
|
||||
i_extract = plot_positions
|
||||
dfP1 = msa_interimDF[i_extract,]
|
||||
|
||||
}else{
|
||||
cat("\nNo. of positions selected:", length(plot_positions))
|
||||
i_ofr = plot_positions[!plot_positions%in%msa_pos]
|
||||
cat("\n1 or more plot_positions out of range..."
|
||||
, "\nThese are:\n", i_ofr
|
||||
, "\nQuitting! Resubmit with correct plot_positions")
|
||||
#i_extract = plot_positions[plot_positions%in%msa_pos]
|
||||
#cat("\nFinal no. of positions being plottted:", length(i_extract)
|
||||
# , "\nNo. of positions dropped from request:", length(i_ofr))
|
||||
quit()
|
||||
}
|
||||
|
||||
#matP1 = msa_interim[i_extract, 1:ncol(msa_interim)]
|
||||
#dfP1 = msa_interimDF[i_extract,]
|
||||
dfP1 = data.frame(t(dfP1))
|
||||
names(dfP1) = i_extract
|
||||
cols_to_paste = names(dfP1)
|
||||
dfP1['chosen_seq'] = apply(dfP1[ , cols_to_paste]
|
||||
, 1
|
||||
, paste, sep = ''
|
||||
, collapse = "")
|
||||
|
||||
msa_seq_plot = dfP1$chosen_seq
|
||||
|
||||
#-----------
|
||||
# WT: fasta
|
||||
#-----------
|
||||
cat("\n========================================="
|
||||
, "\nGenerating WT fasta: filtered positions"
|
||||
,"\n===========================================")
|
||||
wt_interim = sapply(msaSeq_wt, function(x) unlist(strsplit(x,"")))
|
||||
wt_interimDF = data.frame(wt_interim)
|
||||
wt_pos = as.numeric(rownames(wt_interimDF))
|
||||
|
||||
if (all(plot_positions%in%wt_pos)){
|
||||
cat("\nAll positions within range"
|
||||
, "\nProceeding with generating requested position MSA seqs..."
|
||||
, "\nplot positions:", length(plot_positions))
|
||||
i2_extract = plot_positions
|
||||
}else{
|
||||
cat("\nNo. of positions selected:", length(plot_positions))
|
||||
i2_ofr = plot_positions[!plot_positions%in%wt_pos]
|
||||
cat("\n1 or more plot_positions out of range..."
|
||||
, "\nThese are:\n", i_ofr
|
||||
, "\nQuitting! Resubmit with correct plot_positions")
|
||||
#i2_extract = plot_positions[plot_positions%in%wt_pos]
|
||||
#cat("\nFinal no. of positions being plottted:", length(i2_extract)
|
||||
# , "\nNo. of positions dropped from request:", length(i2_ofr))
|
||||
quit()
|
||||
}
|
||||
|
||||
#matP1 = msa_interim[i_extract, 1:ncol(msa_interim)]
|
||||
dfP2 = wt_interimDF[i2_extract,]
|
||||
dfP2 = data.frame(t(dfP2))
|
||||
names(dfP2) = i2_extract
|
||||
cols_to_paste2 = names(dfP2)
|
||||
dfP2['chosen_seq'] = apply( dfP2[ , cols_to_paste2]
|
||||
, 1
|
||||
, paste, sep = ''
|
||||
, collapse = "")
|
||||
|
||||
wt_seq_plot = dfP2$chosen_seq
|
||||
}
|
||||
|
||||
######################################
|
||||
# Generating plots for muts and wt
|
||||
#####################################
|
||||
|
||||
if (my_logo_col %in% c('clustalx','taylor')) {
|
||||
cat("\nSelected colour scheme:", my_logo_col
|
||||
, "\nUsing black theme\n")
|
||||
|
||||
theme_bgc = "black"
|
||||
xfont_bgc = "white"
|
||||
yfont_bgc = "white"
|
||||
xtt_col = "white"
|
||||
ytt_col = "white"
|
||||
}
|
||||
|
||||
if (my_logo_col %in% c('chemistry', 'hydrophobicity')) {
|
||||
cat("\nstart of MSA"
|
||||
, '\nSelected colour scheme:', my_logo_col
|
||||
, "\nUsing grey theme")
|
||||
|
||||
theme_bgc = "grey"
|
||||
xfont_bgc = "black"
|
||||
yfont_bgc = "black"
|
||||
xtt_col = "black"
|
||||
ytt_col = "black"
|
||||
}
|
||||
|
||||
#####################################
|
||||
# Generating logo plots for nsSNPs
|
||||
#####################################
|
||||
LogoPlotMSAL <- list()
|
||||
|
||||
#-------------------
|
||||
# Mutant logo plot
|
||||
#-------------------
|
||||
p0 = ggseqlogo(msa_seq_plot
|
||||
, facet = "grid"
|
||||
, method = msa_method
|
||||
, col_scheme = my_logo_col
|
||||
, seq_type = 'aa') +
|
||||
theme(legend.position = leg_pos
|
||||
, legend.direction = leg_dir
|
||||
#, legend.title = element_blank()
|
||||
, legend.title = element_text(size = leg_tts
|
||||
, colour = ytt_col)
|
||||
, legend.text = element_text(size = leg_ts)
|
||||
|
||||
, axis.text.x = element_text(size = x_ats
|
||||
, angle = x_tangle
|
||||
, hjust = 1
|
||||
, vjust = 0.4
|
||||
, colour = xfont_bgc)
|
||||
#, axis.text.y = element_blank()
|
||||
, axis.text.y = element_text(size = y_ats
|
||||
, angle = y_tangle
|
||||
, hjust = 1
|
||||
, vjust = -1.0
|
||||
, colour = yfont_bgc)
|
||||
, axis.title.x = element_text(size = x_tts
|
||||
, colour = xtt_col)
|
||||
, axis.title.y = element_text(size = y_tts
|
||||
, colour = ytt_col)
|
||||
, plot.background = element_rect(fill = theme_bgc))+
|
||||
xlab(x_lab)
|
||||
|
||||
if (missing(plot_positions)){
|
||||
msa_mut_logo_P = p0 +
|
||||
scale_x_discrete(breaks = msa_all_pos
|
||||
, expand = c(0.02,0)
|
||||
, labels = msa_all_pos
|
||||
, limits = factor(msa_all_pos))
|
||||
|
||||
}else{
|
||||
msa_mut_logo_P = p0 +
|
||||
scale_y_continuous(expand = c(0,0.09)) +
|
||||
scale_x_discrete(breaks = i_extract
|
||||
, expand = c(x_axis_offset,0)
|
||||
, labels = i_extract
|
||||
, limits = factor(i_extract))
|
||||
}
|
||||
|
||||
cat('\nDone: MSA plot for mutations')
|
||||
#return(msa_mut_logoP)
|
||||
LogoPlotMSAL[['msa_mut_logoP']] <- msa_mut_logo_P
|
||||
|
||||
#---------------------------------
|
||||
# Wild-type MSA: gene_fasta file
|
||||
#---------------------------------
|
||||
p1 = ggseqlogo(wt_seq_plot
|
||||
, facet = "grid"
|
||||
, method = msa_method
|
||||
, col_scheme = my_logo_col
|
||||
, seq_type = 'aa') +
|
||||
|
||||
theme(legend.position = "none"
|
||||
, legend.direction = leg_dir
|
||||
#, legend.title = element_blank()
|
||||
, legend.title = element_text(size = leg_tts
|
||||
, colour = ytt_col)
|
||||
, legend.text = element_text(size = leg_ts)
|
||||
|
||||
, axis.text.x = element_text(size = x_ats
|
||||
, angle = x_tangle
|
||||
, hjust = 1
|
||||
, vjust = 0.4
|
||||
, colour = xfont_bgc)
|
||||
, axis.text.y = element_blank()
|
||||
|
||||
, axis.title.x = element_text(size = x_tts
|
||||
, colour = xtt_col)
|
||||
, axis.title.y = element_text(size = y_tts
|
||||
, colour = ytt_col)
|
||||
|
||||
, plot.background = element_rect(fill = theme_bgc)) +
|
||||
ylab("") + xlab("Wild-type position")
|
||||
|
||||
if (missing(plot_positions)){
|
||||
msa_wt_logo_P = p1 +
|
||||
scale_x_discrete(breaks = wt_all_pos
|
||||
, expand = c(0.02,0)
|
||||
, labels = wt_all_pos
|
||||
, limits = factor(wt_all_pos) )
|
||||
|
||||
}else{
|
||||
msa_wt_logo_P = p1 +
|
||||
scale_y_continuous(expand = c(0,0.09)) +
|
||||
scale_x_discrete(breaks = i2_extract
|
||||
, expand = c(x_axis_offset, 0)
|
||||
, labels = i2_extract
|
||||
, limits = factor(i2_extract))
|
||||
}
|
||||
|
||||
cat('\nDone: MSA plot for WT')
|
||||
#return(msa_wt_logoP)
|
||||
LogoPlotMSAL[['msa_wt_logoP']] <- msa_wt_logo_P
|
||||
|
||||
#=========================================
|
||||
# Output
|
||||
# Combined plot: logo_MSA
|
||||
#=========================================
|
||||
|
||||
cat('\nDone: msa_mut_logoP + msa_wt_logoP')
|
||||
|
||||
# colour scheme: https://rdrr.io/cran/ggseqlogo/src/R/col_schemes.r
|
||||
#cat("\nOutput plot:", LogoSNPs_comb, "\n")
|
||||
#svg(LogoSNPs_combined, width = 32, height = 10)
|
||||
|
||||
LogoMSA_comb = cowplot::plot_grid(LogoPlotMSAL[['msa_mut_logoP']]
|
||||
, LogoPlotMSAL[['msa_wt_logoP']]
|
||||
, nrow = 2
|
||||
, align = "v"
|
||||
, rel_heights = c(3/4, 1/4))
|
||||
|
||||
return(LogoMSA_comb)
|
||||
|
||||
}
|
|
@ -1,83 +0,0 @@
|
|||
setwd("~/git/LSHTM_analysis/scripts/plotting/")
|
||||
|
||||
source("../functions/lf_bp_with_stats.R")
|
||||
source("../functions/lf_bp.R")
|
||||
|
||||
######################
|
||||
# Make plot
|
||||
######################
|
||||
# Note: Data
|
||||
# run other_plots_data.R
|
||||
# to get the long format data to test this function
|
||||
|
||||
lf_bp(lf_df = lf_dynamut2
|
||||
, p_title = "Dynamut2"
|
||||
, colour_categ = "ddg_dynamut2_outcome"
|
||||
, x_grp = "mutation_info"
|
||||
, y_var = "param_value"
|
||||
, facet_var = "param_type"
|
||||
, n_facet_row = 1
|
||||
, y_scales = "free_y"
|
||||
, colour_bp_strip = "khaki2"
|
||||
, dot_size = 3
|
||||
, dot_transparency = 0.3
|
||||
, violin_quantiles = c(0.25, 0.5, 0.75)
|
||||
, my_ats = 22 # axis text size
|
||||
, my_als = 20 # axis label size
|
||||
, my_fls = 20 # facet label size
|
||||
, my_pts = 22 # plot title size
|
||||
, make_boxplot = F
|
||||
, bp_width = "auto"
|
||||
, add_stats = T
|
||||
, stat_grp_comp = c("DM", "OM")
|
||||
, stat_method = "wilcox.test"
|
||||
, my_paired = FALSE
|
||||
, stat_label = c("p.format", "p.signif") )
|
||||
|
||||
# foo = lf_dynamut2 %>%
|
||||
# group_by(mutation_info, param_type) %>%
|
||||
# summarise( Mean = mean(param_value, na.rm = T)
|
||||
# , SD = sd(param_value, na.rm = T)
|
||||
# , Median = median(param_value, na.rm = T)
|
||||
# , IQR = IQR(param_value, na.rm = T) )
|
||||
|
||||
# Quick tests
|
||||
plotdata_sel = subset(lf_dynamut2
|
||||
, lf_dynamut2$param_type == "ASA")
|
||||
|
||||
plot_sum = plotdata_sel %>%
|
||||
group_by(mutation_info, param_type) %>%
|
||||
summarise(n = n()
|
||||
, Mean = mean(param_value, na.rm = T)
|
||||
, SD = sd(param_value, na.rm = T)
|
||||
, Min = min(param_value, na.rm = T)
|
||||
, Q1 = quantile(param_value, na.rm = T, 0.25)
|
||||
, Median = median(param_value, na.rm = T)
|
||||
, Q3 = quantile(param_value, na.rm = T, 0.75)
|
||||
, Max = max(param_value, na.rm = T) ) %>%
|
||||
rename('Mutation Class' = mutation_info
|
||||
, Parameter = param_type)
|
||||
plot_sum = as.data.frame(plot_sum, row.names = NULL)
|
||||
plot_sum
|
||||
|
||||
bar = compare_means(param_value ~ mutation_info
|
||||
, group.by = "param_type"
|
||||
, data = plotdata_sel
|
||||
, paired = FALSE
|
||||
, p.adjust.method = "BH")
|
||||
bar2 = bar[c("param_type"
|
||||
, "group1"
|
||||
, "group2"
|
||||
, "p.format"
|
||||
, "p.signif"
|
||||
, "p.adj")] %>%
|
||||
rename(Parameter = param_type
|
||||
, Group1 = group1
|
||||
, Group2 = group2
|
||||
, "P-value" = p.format
|
||||
, "P-sig" = p.signif
|
||||
, "P-adj" = p.adj)
|
||||
bar2 = data.frame(bar2); bar2
|
||||
|
||||
library(Hmisc)
|
||||
describe(lf_dynamut2)
|
|
@ -15,70 +15,47 @@ theme_set(theme_grey())
|
|||
## ...opt args
|
||||
#==========================================================
|
||||
stability_count_bp <- function(plotdf
|
||||
, df_colname = ""
|
||||
, leg_title = ""
|
||||
, ats = 12#25 # axis text size
|
||||
, als = 11#22 # axis label size
|
||||
, lts = 10#20 # legend text size
|
||||
, ltis = 11#22 # label title size
|
||||
, geom_ls = 10 # geom_label size
|
||||
, yaxis_title = "Number of SAVs"
|
||||
, df_colname
|
||||
, leg_title = "Legend title"
|
||||
, axis_text_size = 25
|
||||
, axis_label_size = 22
|
||||
, leg_text_size = 20
|
||||
, leg_title_size = 22
|
||||
, yaxis_title = "Number of nsSNPs"
|
||||
, bp_plot_title = ""
|
||||
, label_categories #= c("LEVEL1", "LEVEL2")
|
||||
, label_categories = c("Destabilising", "Stabilising")
|
||||
, title_colour = "chocolate4"
|
||||
, subtitle_text = NULL
|
||||
, sts = 10#20
|
||||
, subtitle_colour = "#350E20FF" #brown
|
||||
, subtitle_size = 20
|
||||
, subtitle_colour = "pink"
|
||||
#, leg_position = c(0.73,0.8) # within plot area
|
||||
, leg_position = "top"
|
||||
, bar_fill_values = c("#F8766D", "#00BFC4")){
|
||||
, leg_position = "top"){
|
||||
|
||||
# convert to factor and get labels
|
||||
plotdf[[df_colname]] = as.factor(plotdf[[df_colname]])
|
||||
label_categories = levels(plotdf[[df_colname]])
|
||||
|
||||
#OutPlot_count = ggplot(plotdf, aes(x = eval(parse(text = df_colname)))) +
|
||||
OutPlot_count = ggplot(plotdf, aes_string(x = df_colname)) +
|
||||
geom_bar(aes(fill = eval(parse(text = df_colname)))
|
||||
, show.legend = TRUE) +
|
||||
OutPlot_count = ggplot(plotdf, aes(x = eval(parse(text = df_colname)))) +
|
||||
geom_bar(aes(fill = eval(parse(text = df_colname))), show.legend = TRUE) +
|
||||
geom_label(stat = "count"
|
||||
, aes(label = ..count..)
|
||||
, color = "black"
|
||||
, show.legend = FALSE
|
||||
, size = geom_ls
|
||||
#, nudge_x = 0
|
||||
#, nudge_y = -1
|
||||
, label.size = 0.25 ) +
|
||||
, size = 10) +
|
||||
theme(axis.text.x = element_blank()
|
||||
, axis.title.x = element_blank()
|
||||
, axis.title.y = element_text(size = als)
|
||||
, axis.text.y = element_text(size = ats)
|
||||
, axis.title.y = element_text(size = axis_label_size)
|
||||
, axis.text.y = element_text(size = axis_text_size)
|
||||
, legend.position = leg_position
|
||||
, legend.text = element_text(size = lts)
|
||||
, legend.title = element_text(size = ltis)
|
||||
#, panel.grid.major = element_blank(),
|
||||
#, panel.grid.minor = element_blank(),
|
||||
, panel.grid = element_blank()
|
||||
, legend.key.size = unit(lts,"pt")
|
||||
, plot.title = element_text(size = als
|
||||
, colour = title_colour
|
||||
, hjust = 0.5)
|
||||
, plot.subtitle = element_text(size = sts
|
||||
, legend.text = element_text(size = leg_text_size)
|
||||
, legend.title = element_text(size = leg_title_size)
|
||||
, plot.title = element_text(size = axis_label_size
|
||||
, colour = title_colour)
|
||||
, plot.subtitle = element_text(size = subtitle_size
|
||||
, hjust = 0.5
|
||||
, colour = subtitle_colour)) +
|
||||
labs(title = bp_plot_title
|
||||
, subtitle = subtitle_text
|
||||
, y = yaxis_title) +
|
||||
|
||||
# scale_fill_discrete(name = leg_title
|
||||
# , labels = label_categories) +
|
||||
|
||||
scale_fill_manual(name = ""
|
||||
# name = leg_title
|
||||
, values = bar_fill_values
|
||||
, labels = label_categories # problem with consurf decreasing level
|
||||
)
|
||||
|
||||
scale_fill_discrete(name = leg_title
|
||||
#, labels = c("Destabilising", "Stabilising")
|
||||
, labels = label_categories)
|
||||
|
||||
return(OutPlot_count)
|
||||
}
|
||||
|
|
|
@ -11,7 +11,7 @@ getwd()
|
|||
# that will be used in testing the functions
|
||||
#===========================================
|
||||
source("plotting_data.R")
|
||||
infile = "~/git/Data/streptomycin/output/"
|
||||
infile = "/home/tanu/git/Data/streptomycin/output/"
|
||||
pd_df = plotting_data(infile)
|
||||
my_df = pd_df[[1]]
|
||||
my_df_u = pd_df[[2]]
|
|
@ -4,7 +4,7 @@
|
|||
# Odds Ratio from master data
|
||||
#########################################################
|
||||
# load libraries
|
||||
#source("~/git/LSHTM_analysis/scripts/Header_TT.R")
|
||||
#source("Header_TT.R")
|
||||
require("getopt", quietly = TRUE) # cmd parse arguments
|
||||
|
||||
# working dir and loading libraries
|
|
@ -5,14 +5,15 @@ getwd()
|
|||
#===========================================
|
||||
# load functions, data, dirs, hardocded vars
|
||||
# that will be used in testing the functions
|
||||
#drug = "streptomycin"
|
||||
#gene = "gid"
|
||||
#source("plotting_data.R")
|
||||
#infile = paste0("~/git/Data/", drug, "/output/", gene, "_comb_stab_struc_params.csv")
|
||||
#infile_df = read.csv(infile)
|
||||
|
||||
|
||||
#===========================================
|
||||
drug = "streptomycin"
|
||||
gene = "gid"
|
||||
|
||||
source("plotting_data.R")
|
||||
|
||||
infile = paste0("~/git/Data/", drug, "/output/", gene, "_comb_stab_struc_params.csv")
|
||||
infile_df = read.csv(infile)
|
||||
|
||||
lig_dist = 5
|
||||
pd_df = plotting_data(infile_df
|
||||
, lig_dist_colname = 'ligand_distance'
|
||||
|
@ -41,8 +42,8 @@ print(paste0("plot filename:", basic_bp_duet))
|
|||
|
||||
# function only
|
||||
stability_count_bp(plotdf = my_df_u
|
||||
, df_colname = "ligand_outcome"
|
||||
, leg_title = "Lig outcome"
|
||||
, df_colname = "duet_outcome"
|
||||
, leg_title = "DUET outcome"
|
||||
, label_categories = c("Destabilising", "Stabilising")
|
||||
, leg_position = "top")
|
||||
|
||||
|
@ -62,7 +63,7 @@ lig_dist = 10
|
|||
stability_count_bp(plotdf = my_df_u_lig
|
||||
, df_colname = "ligand_outcome"
|
||||
, leg_title = "Ligand outcome"
|
||||
, yaxis_title = paste0("Number of SAVs\nLigand dist: <", lig_dist, "\u212b")
|
||||
, yaxis_title = paste0("Number of nsSNPs\nLigand dist: <", lig_dist, "\u212b")
|
||||
#, bp_plot_title = "Sites < 10 Ang of ligand"
|
||||
)
|
||||
|
|
@ -36,16 +36,8 @@ source("combining_dfs_plotting.R")
|
|||
#---------------------
|
||||
# call: import_dirs()
|
||||
#---------------------
|
||||
#gene = 'gid'
|
||||
#drug = 'streptomycin'
|
||||
#source("~/git/LSHTM_analysis/config/gid.R")
|
||||
#source("~/git/LSHTM_analysis/config/alr.R")
|
||||
#source("~/git/LSHTM_analysis/config/katg.R")
|
||||
source("~/git/LSHTM_analysis/config/pnca.R")
|
||||
#source("~/git/LSHTM_analysis/config/rpob.R")
|
||||
#source("~/git/LSHTM_analysis/config/embb.R")
|
||||
|
||||
|
||||
gene = 'gid'
|
||||
drug = 'streptomycin'
|
||||
|
||||
import_dirs(drug_name = drug, gene_name = gene)
|
||||
|
||||
|
@ -67,9 +59,8 @@ mcsm_comb_data = read.csv(infile_params, header = T)
|
|||
# call function: plotting_data()
|
||||
#-------------------------------
|
||||
pd_df = plotting_data(df = mcsm_comb_data
|
||||
, lig_dist_colname = LigDist_colname
|
||||
, lig_dist_cutoff = LigDist_cutoff)
|
||||
|
||||
, ligand_dist_colname = 'ligand_distance'
|
||||
, lig_dist_cutoff = 10
|
||||
my_df_u = pd_df[[2]]
|
||||
|
||||
#======================================
|
||||
|
@ -93,8 +84,8 @@ gene_metadata <- read.csv(infile_metadata
|
|||
#-----------------------------------------
|
||||
all_plot_dfs = combining_dfs_plotting(my_df_u
|
||||
, gene_metadata
|
||||
, lig_dist_colname = LigDist_colname
|
||||
, lig_dist_cutoff = LigDist_cutoff)
|
||||
, lig_dist_colname = 'ligand_distance'
|
||||
, lig_dist_cutoff = 10)
|
||||
|
||||
merged_df2 = all_plot_dfs[[1]]
|
||||
merged_df3 = all_plot_dfs[[2]]
|
|
@ -19,7 +19,7 @@ import_dirs(drug_name = drug, gene_name = gene)
|
|||
#-------------------------------
|
||||
source("plotting_data.R")
|
||||
|
||||
infile_params = "~/git/Data/streptomycin/output/gid_comb_stab_struc_params.csv"
|
||||
infile_params = "/home/tanu/git/Data/streptomycin/output/gid_comb_stab_struc_params.csv"
|
||||
mcsm_comb_data = read.csv(infile_params, header = T)
|
||||
|
||||
pd_df = plotting_data(df = mcsm_comb_data
|
|
@ -1,63 +0,0 @@
|
|||
############################################################################
|
||||
# merged_df3 = read.csv("~/git/Data/cycloserine/output/alr_all_params.csv"); source("~/git/LSHTM_analysis/config/alr.R")
|
||||
# if ( tolower(gene) == "alr") {
|
||||
# aa_pos_lig1 = NULL
|
||||
# aa_pos_lig2 = NULL
|
||||
# aa_pos_lig3 = NULL
|
||||
# p_title = gene
|
||||
# }
|
||||
|
||||
source("~/git/LSHTM_analysis/scripts/plotting/get_plotting_dfs.R")
|
||||
###########################################################################
|
||||
# merged_df3 = read.csv("~/git/Data/ethambutol/output/embb_all_params.csv"); source("~/git/LSHTM_analysis/config/embb.R")
|
||||
# if ( tolower(gene) == "embb") {
|
||||
# aa_pos_lig1 = aa_pos_ca
|
||||
# aa_pos_lig2 = aa_pos_cdl
|
||||
# aa_pos_lig3 = aa_pos_dsl
|
||||
# p_title = gene
|
||||
# }
|
||||
source("~/git/LSHTM_analysis/scripts/plotting/get_plotting_dfs.R")
|
||||
|
||||
###########################################################################
|
||||
merged_df3 = read.csv("~/git/Data/streptomycin/output/gid_all_params.csv")
|
||||
|
||||
source("~/git/LSHTM_analysis/config/gid.R")
|
||||
if ( tolower(gene) == "gid") {
|
||||
aa_pos_lig1 = aa_pos_rna
|
||||
aa_pos_lig2 = aa_pos_sam
|
||||
aa_pos_lig3 = aa_pos_amp
|
||||
p_title = gene
|
||||
}
|
||||
source("~/git/LSHTM_analysis/scripts/plotting/get_plotting_dfs.R")
|
||||
|
||||
###########################################################################
|
||||
# merged_df3 = read.csv("~/git/Data/isoniazid/output/katg_all_params.csv"); source("~/git/LSHTM_analysis/config/katg.R")
|
||||
# if ( tolower(gene) == "katg") {
|
||||
# aa_pos_lig1 = aa_pos_hem
|
||||
# aa_pos_lig2 = NULL
|
||||
# aa_pos_lig3 = NULL
|
||||
# p_title = gene
|
||||
# }
|
||||
source("~/git/LSHTM_analysis/scripts/plotting/get_plotting_dfs.R")
|
||||
|
||||
###########################################################################
|
||||
# merged_df3 = read.csv("~/git/Data/pyrazinamide/output/pnca_all_params.csv"); source("~/git/LSHTM_analysis/config/pnca.R")
|
||||
# if ( tolower(gene) == "pnca") {
|
||||
# aa_pos_lig1 = aa_pos_fe
|
||||
# aa_pos_lig2 = NULL
|
||||
# aa_pos_lig3 = NULL
|
||||
# p_title = gene
|
||||
# }
|
||||
source("~/git/LSHTM_analysis/scripts/plotting/get_plotting_dfs.R")
|
||||
|
||||
###########################################################################
|
||||
merged_df3 = read.csv("~/git/Data/rifampicin/output/rpob_all_params.csv"); source("~/git/LSHTM_analysis/config/rpob.R")
|
||||
if ( tolower(gene) == "rpob") {
|
||||
aa_pos_lig1 = NULL
|
||||
aa_pos_lig2 = NULL
|
||||
aa_pos_lig3 = NULL
|
||||
p_title = gene
|
||||
}
|
||||
source("~/git/LSHTM_analysis/scripts/plotting/get_plotting_dfs.R")
|
||||
|
||||
#########################################################################
|
|
@ -1,518 +0,0 @@
|
|||
#!/usr/bin/env Rscript
|
||||
#########################################################
|
||||
# TASK: Script to format data for dm om plots:
|
||||
# generating WF and LF data for each of the parameters:
|
||||
# duet, mcsm-lig, foldx, deepddg, dynamut2, mcsm-na, mcsm-ppi2, encom, dynamut..etc
|
||||
# Called by get_plotting_dfs.R
|
||||
|
||||
##################################################################
|
||||
# from plotting_globals.R
|
||||
# DistCutOff, LigDist_colname, ppi2Dist_colname, naDist_colname
|
||||
|
||||
dm_om_wf_lf_data <- function(df
|
||||
, gene_name = gene # from globals
|
||||
, colnames_to_extract
|
||||
, ligand_dist_colname = LigDist_colname # from globals
|
||||
#, ppi2Dist_colname #from globals used
|
||||
#, naDist_colname #from globals used
|
||||
, dr_muts = dr_muts_col # from globals
|
||||
, other_muts = other_muts_col # from globals
|
||||
, snp_colname = "mutationinformation"
|
||||
, aa_pos_colname = "position" # to sort df by
|
||||
, mut_colname = "mutation"
|
||||
, mut_info_colname = "mutation_info"
|
||||
, mut_info_label_colname = "mutation_info_labels" # if empty, below used
|
||||
#, dr_other_muts_labels = c("DM", "OM") # only used if ^^ = ""
|
||||
, categ_cols_to_factor){
|
||||
|
||||
df = as.data.frame(df)
|
||||
|
||||
# Initialise the required dfs based on gene name
|
||||
geneL_normal = c("pnca")
|
||||
geneL_na = c("gid", "rpob")
|
||||
geneL_ppi2 = c("alr", "embb", "katg", "rpob")
|
||||
|
||||
# common_dfs
|
||||
common_dfsL = list(
|
||||
wf_duet = data.frame()
|
||||
, lf_duet = data.frame()
|
||||
, wf_mcsm_lig = data.frame()
|
||||
, lf_mcsm_lig = data.frame()
|
||||
, wf_foldx = data.frame()
|
||||
, lf_foldx = data.frame()
|
||||
, wf_deepddg = data.frame()
|
||||
, lf_deepddg = data.frame()
|
||||
, wf_dynamut2 = data.frame()
|
||||
, lf_dynamut2 = data.frame()
|
||||
, wf_consurf = data.frame()
|
||||
, lf_consurf = data.frame()
|
||||
, wf_snap2 = data.frame()
|
||||
, lf_snap2 = data.frame()
|
||||
)
|
||||
|
||||
# additional dfs
|
||||
if (tolower(gene_name)%in%geneL_normal){
|
||||
wf_lf_dataL = common_dfsL
|
||||
}
|
||||
|
||||
if (tolower(gene_name)%in%geneL_na){
|
||||
additional_dfL = list(
|
||||
wf_mcsm_na = data.frame()
|
||||
, lf_mcsm_na = data.frame()
|
||||
)
|
||||
wf_lf_dataL = c(common_dfsL, additional_dfL)
|
||||
}
|
||||
|
||||
if (tolower(gene_name)%in%geneL_ppi2){
|
||||
additional_dfL = list(
|
||||
wf_mcsm_ppi2 = data.frame()
|
||||
, lf_mcsm_ppi2 = data.frame()
|
||||
)
|
||||
wf_lf_dataL = c(common_dfsL, additional_dfL)
|
||||
}
|
||||
cat("\nInitializing an empty list of length:"
|
||||
, length(wf_lf_dataL))
|
||||
|
||||
#=======================================================================
|
||||
if (missing(colnames_to_extract)){
|
||||
|
||||
colnames_to_extract = c(snp_colname
|
||||
, mut_colname, mut_info_colname, mut_info_label_colname
|
||||
, aa_pos_colname
|
||||
, LigDist_colname # from globals
|
||||
, ppi2Dist_colname # from globals
|
||||
, naDist_colname # from globals
|
||||
, "duet_stability_change" , "duet_scaled" , "duet_outcome"
|
||||
, "ligand_affinity_change", "affinity_scaled" , "ligand_outcome"
|
||||
, "ddg_foldx" , "foldx_scaled" , "foldx_outcome"
|
||||
, "deepddg" , "deepddg_scaled" , "deepddg_outcome"
|
||||
, "asa" , "rsa"
|
||||
, "rd_values" , "kd_values"
|
||||
, "log10_or_mychisq" , "neglog_pval_fisher" , "maf" #"af"
|
||||
, "ddg_dynamut2" , "ddg_dynamut2_scaled", "ddg_dynamut2_outcome"
|
||||
, "mcsm_ppi2_affinity" , "mcsm_ppi2_scaled" , "mcsm_ppi2_outcome"
|
||||
, "consurf_score" , "consurf_scaled" , "consurf_outcome" # exists now
|
||||
, "snap2_score" , "snap2_scaled" , "snap2_outcome"
|
||||
, "mcsm_na_affinity" , "mcsm_na_scaled" , "mcsm_na_outcome")
|
||||
}else{
|
||||
colnames_to_extract = c(mut_colname, mut_info_colname, mut_info_label_colname
|
||||
, aa_pos_colname, LigDist_colname
|
||||
, colnames_to_extract)
|
||||
}
|
||||
comb_df = df[, colnames(df)%in%colnames_to_extract]
|
||||
comb_df_s = dplyr::arrange(comb_df, aa_pos_colname)
|
||||
|
||||
#=======================================================================
|
||||
if(missing(categ_cols_to_factor)){
|
||||
categ_cols_to_factor = grep( "_outcome|_info", colnames(comb_df_s) )
|
||||
}else{
|
||||
categ_cols_to_factor = categ_cols_to_factor
|
||||
}
|
||||
#fact_cols = colnames(comb_df_s)[grepl( "_outcome|_info", colnames(comb_df_s) )]
|
||||
fact_cols = colnames(comb_df_s)[categ_cols_to_factor]
|
||||
|
||||
if (any(lapply(comb_df_s[, fact_cols], class) == "character")){
|
||||
cat("\nChanging", length(categ_cols_to_factor), "cols to factor")
|
||||
comb_df_s[, fact_cols] <- lapply(comb_df_s[, fact_cols], as.factor)
|
||||
if (all(lapply(comb_df_s[, fact_cols], class) == "factor")){
|
||||
cat("\nSuccessful: cols changed to factor")
|
||||
}
|
||||
}else{
|
||||
cat("\nRequested cols aready factors")
|
||||
}
|
||||
#=======================================================================
|
||||
table(comb_df_s[[mut_info_colname]])
|
||||
|
||||
# pretty display names i.e. labels to reduce major code duplication later
|
||||
foo_cnames = data.frame(colnames(comb_df_s))
|
||||
names(foo_cnames) <- "old_name"
|
||||
|
||||
stability_suffix <- paste0(delta_symbol, delta_symbol, "G")
|
||||
#flexibility_suffix <- paste0(delta_symbol, delta_symbol, "S")
|
||||
|
||||
#lig_dn = paste0("Ligand distance (", angstroms_symbol, ")"); lig_dn
|
||||
#mcsm_lig_dn = paste0("Ligand affinity (log fold change)"); mcsm_lig_dn
|
||||
|
||||
lig_dn = paste0("Lig Dist(", angstroms_symbol, ")"); lig_dn
|
||||
mcsm_lig_dn = paste0("mCSM-lig"); mcsm_lig_dn
|
||||
|
||||
duet_dn = paste0("DUET ", stability_suffix); duet_dn
|
||||
foldx_dn = paste0("FoldX ", stability_suffix); foldx_dn
|
||||
deepddg_dn = paste0("Deepddg " , stability_suffix); deepddg_dn
|
||||
dynamut2_dn = paste0("Dynamut2 " , stability_suffix); dynamut2_dn
|
||||
|
||||
mcsm_na_dn = paste0("mCSM-NA ", stability_suffix); mcsm_na_dn
|
||||
mcsm_ppi2_dn = paste0("mCSM-PPI2 ", stability_suffix); mcsm_ppi2_dn
|
||||
consurf_dn = paste0("Consurf"); consurf_dn
|
||||
snap2_dn = paste0("SNAP2"); snap2_dn
|
||||
|
||||
|
||||
# change column names: plyr
|
||||
new_colnames = c(asa = "ASA"
|
||||
, rsa = "RSA"
|
||||
, rd_values = "RD"
|
||||
, kd_values = "KD"
|
||||
, log10_or_mychisq = "Log10 (OR)"
|
||||
, neglog_pval_fisher = "-Log (P)"
|
||||
#, af = "MAF"
|
||||
, maf = "MAF"
|
||||
#, ligand_dist_colname = lig_dn # cannot handle variable name 'ligand_dist_colname'
|
||||
, affinity_scaled = mcsm_lig_dn
|
||||
, duet_scaled = duet_dn
|
||||
, foldx_scaled = foldx_dn
|
||||
, deepddg_scaled = deepddg_dn
|
||||
, ddg_dynamut2_scaled = dynamut2_dn
|
||||
, mcsm_na_scaled = mcsm_na_dn
|
||||
, mcsm_ppi2_affinity = mcsm_ppi2_dn
|
||||
, consurf_score = consurf_dn
|
||||
, snap2_score = snap2_dn)
|
||||
|
||||
comb_df_sl1 = plyr::rename(comb_df_s
|
||||
, replace = new_colnames
|
||||
, warn_missing = T
|
||||
, warn_duplicated = T)
|
||||
|
||||
# renaming colname using variable i.e ligand_dist_colname: dplyr
|
||||
comb_df_sl = comb_df_sl1 %>% dplyr::rename(!!lig_dn := all_of(ligand_dist_colname))
|
||||
names(comb_df_sl)
|
||||
|
||||
#=======================
|
||||
# NEW: Affinity filtered data
|
||||
#========================
|
||||
# mcsm-lig --> LigDist_colname
|
||||
comb_df_sl_lig = comb_df_sl[comb_df_sl[[lig_dn]]<DistCutOff,]
|
||||
|
||||
# mcsm-ppi2 --> ppi2Dist_colname
|
||||
comb_df_sl_ppi2 = comb_df_sl[comb_df_sl[[ppi2Dist_colname]]<DistCutOff,]
|
||||
|
||||
# mcsm-na --> naDist_colname
|
||||
comb_df_sl_na = comb_df_sl[comb_df_sl[[naDist_colname]]<DistCutOff,]
|
||||
|
||||
#####################################################################
|
||||
static_cols1 = mut_info_label_colname
|
||||
#######################################################################
|
||||
#======================
|
||||
# Selecting dfs
|
||||
# with appropriate cols
|
||||
#=======================
|
||||
static_cols_start = c(snp_colname
|
||||
, aa_pos_colname
|
||||
, mut_colname
|
||||
, static_cols1)
|
||||
|
||||
# ordering is important!
|
||||
static_cols_end = c(lig_dn
|
||||
, "ASA"
|
||||
, "RSA"
|
||||
, "RD"
|
||||
, "KD"
|
||||
, "MAF"
|
||||
, "Log10 (OR)"
|
||||
#, "-Log (P)"
|
||||
)
|
||||
|
||||
#########################################################################
|
||||
#==============
|
||||
# DUET
|
||||
#==============
|
||||
# WF data: duet
|
||||
cols_to_select_duet = c(static_cols_start, c("duet_outcome", duet_dn), static_cols_end)
|
||||
wf_duet = comb_df_sl[, cols_to_select_duet]
|
||||
|
||||
#pivot_cols_ps = cols_to_select_ps[1:5]; pivot_cols_ps
|
||||
pivot_cols_duet = cols_to_select_duet[1: (length(static_cols_start) + 1)]; pivot_cols_duet
|
||||
expected_rows_lf = nrow(wf_duet) * (length(wf_duet) - length(pivot_cols_duet))
|
||||
expected_rows_lf
|
||||
|
||||
# LF data: duet
|
||||
lf_duet = tidyr::gather(wf_duet
|
||||
, key = param_type
|
||||
, value = param_value
|
||||
, all_of(duet_dn):tail(static_cols_end,1)
|
||||
, factor_key = TRUE)
|
||||
|
||||
if (nrow(lf_duet) == expected_rows_lf){
|
||||
cat("\nPASS: long format data created for ", duet_dn)
|
||||
}else{
|
||||
cat("\nFAIL: long format data could not be created for duet")
|
||||
quit()
|
||||
}
|
||||
|
||||
# Assign them to the output list
|
||||
wf_lf_dataL[['wf_duet']] = wf_duet
|
||||
wf_lf_dataL[['lf_duet']] = lf_duet
|
||||
|
||||
############################################################################
|
||||
#==============
|
||||
# FoldX
|
||||
#==============
|
||||
# WF data: Foldx
|
||||
cols_to_select_foldx= c(static_cols_start, c("foldx_outcome", foldx_dn), static_cols_end)
|
||||
wf_foldx = comb_df_sl[, cols_to_select_foldx]
|
||||
|
||||
pivot_cols_foldx = cols_to_select_foldx[1: (length(static_cols_start) + 1)]; pivot_cols_foldx
|
||||
expected_rows_lf = nrow(wf_foldx) * (length(wf_foldx) - length(pivot_cols_foldx))
|
||||
expected_rows_lf
|
||||
|
||||
# LF data: Foldx
|
||||
lf_foldx = gather(wf_foldx
|
||||
, key = param_type
|
||||
, value = param_value
|
||||
, all_of(foldx_dn):tail(static_cols_end,1)
|
||||
, factor_key = TRUE)
|
||||
|
||||
if (nrow(lf_foldx) == expected_rows_lf){
|
||||
cat("\nPASS: long format data created for ", foldx_dn)
|
||||
}else{
|
||||
cat("\nFAIL: long format data could not be created for duet")
|
||||
quit()
|
||||
}
|
||||
|
||||
# Assign them to the output list
|
||||
wf_lf_dataL[['wf_foldx']] = wf_foldx
|
||||
wf_lf_dataL[['lf_foldx']] = lf_foldx
|
||||
|
||||
############################################################################
|
||||
#==============
|
||||
# Deepddg
|
||||
#==============
|
||||
# WF data: deepddg
|
||||
cols_to_select_deepddg = c(static_cols_start, c("deepddg_outcome", deepddg_dn), static_cols_end)
|
||||
wf_deepddg = comb_df_sl[, cols_to_select_deepddg]
|
||||
|
||||
pivot_cols_deepddg = cols_to_select_deepddg[1: (length(static_cols_start) + 1)]; pivot_cols_deepddg
|
||||
expected_rows_lf = nrow(wf_deepddg) * (length(wf_deepddg) - length(pivot_cols_deepddg))
|
||||
expected_rows_lf
|
||||
|
||||
# LF data: Deepddg
|
||||
lf_deepddg = gather(wf_deepddg
|
||||
, key = param_type
|
||||
, value = param_value
|
||||
, all_of(deepddg_dn):tail(static_cols_end,1)
|
||||
, factor_key = TRUE)
|
||||
|
||||
if (nrow(lf_deepddg) == expected_rows_lf){
|
||||
cat("\nPASS: long format data created for ", deepddg_dn)
|
||||
}else{
|
||||
cat("\nFAIL: long format data could not be created for duet")
|
||||
quit()
|
||||
}
|
||||
|
||||
# Assign them to the output list
|
||||
wf_lf_dataL[['wf_deepddg']] = wf_deepddg
|
||||
wf_lf_dataL[['lf_deepddg']] = lf_deepddg
|
||||
############################################################################
|
||||
#==============
|
||||
# Dynamut2: LF
|
||||
#==============
|
||||
# WF data: dynamut2
|
||||
cols_to_select_dynamut2 = c(static_cols_start, c("ddg_dynamut2_outcome", dynamut2_dn), static_cols_end)
|
||||
wf_dynamut2 = comb_df_sl[, cols_to_select_dynamut2]
|
||||
|
||||
pivot_cols_dynamut2 = cols_to_select_dynamut2[1: (length(static_cols_start) + 1)]; pivot_cols_dynamut2
|
||||
expected_rows_lf = nrow(wf_dynamut2) * (length(wf_dynamut2) - length(pivot_cols_dynamut2))
|
||||
expected_rows_lf
|
||||
|
||||
# LF data: dynamut2
|
||||
lf_dynamut2 = gather(wf_dynamut2
|
||||
, key = param_type
|
||||
, value = param_value
|
||||
, all_of(dynamut2_dn):tail(static_cols_end,1)
|
||||
, factor_key = TRUE)
|
||||
|
||||
if (nrow(lf_dynamut2) == expected_rows_lf){
|
||||
cat("\nPASS: long format data created for ", dynamut2_dn)
|
||||
}else{
|
||||
cat("\nFAIL: long format data could not be created for duet")
|
||||
quit()
|
||||
}
|
||||
|
||||
# Assign them to the output list
|
||||
wf_lf_dataL[['wf_dynamut2']] = wf_dynamut2
|
||||
wf_lf_dataL[['lf_dynamut2']] = lf_dynamut2
|
||||
|
||||
|
||||
######################################################################################
|
||||
#==================
|
||||
# Consurf: LF
|
||||
#https://consurf.tau.ac.il/overview.php
|
||||
# consurf_score:
|
||||
# <0 (below average): slowly evolving i.e CONSERVED
|
||||
# >0 (above average): rapidly evolving, i.e VARIABLE
|
||||
#table(df$consurf_colour_rev)
|
||||
# TODO
|
||||
#1--> "most_variable", 2--> "", 3-->"", 4-->""
|
||||
#5-->"", 6-->"", 7-->"", 8-->"", 9-->"most_conserved"
|
||||
#====================
|
||||
# FIXME: if you add category column to consurf
|
||||
cols_to_select_consurf = c(static_cols_start, c("consurf_outcome", consurf_dn), static_cols_end)
|
||||
wf_consurf = comb_df_sl[, cols_to_select_consurf]
|
||||
pivot_cols_consurf = cols_to_select_consurf[1: (length(static_cols_start) + 1)]; pivot_cols_consurf
|
||||
|
||||
# WF data: consurf
|
||||
cols_to_select_consurf = c(static_cols_start, c(consurf_dn), static_cols_end)
|
||||
wf_consurf = comb_df_sl[, cols_to_select_consurf]
|
||||
|
||||
pivot_cols_consurf = cols_to_select_consurf[1: (length(static_cols_start))]; pivot_cols_consurf
|
||||
expected_rows_lf = nrow(wf_consurf) * (length(wf_consurf) - length(pivot_cols_consurf))
|
||||
expected_rows_lf
|
||||
|
||||
# LF data: consurf
|
||||
lf_consurf = gather(wf_consurf
|
||||
, key = param_type
|
||||
, value = param_value
|
||||
, all_of(consurf_dn):tail(static_cols_end,1)
|
||||
, factor_key = TRUE)
|
||||
|
||||
if (nrow(lf_consurf) == expected_rows_lf){
|
||||
cat("\nPASS: long format data created for ", consurf_dn)
|
||||
}else{
|
||||
cat("\nFAIL: long format data could not be created for duet")
|
||||
quit()
|
||||
}
|
||||
|
||||
# Assign them to the output list
|
||||
wf_lf_dataL[['wf_consurf']] = wf_consurf
|
||||
wf_lf_dataL[['lf_consurf']] = lf_consurf
|
||||
###########################################################################
|
||||
#==============
|
||||
# SNAP2: LF
|
||||
#==============
|
||||
# WF data: snap2
|
||||
cols_to_select_snap2 = c(static_cols_start, c("snap2_outcome", snap2_dn), static_cols_end)
|
||||
wf_snap2 = comb_df_sl[, cols_to_select_snap2]
|
||||
|
||||
pivot_cols_snap2 = cols_to_select_snap2[1: (length(static_cols_start) + 1)]; pivot_cols_snap2
|
||||
expected_rows_lf = nrow(wf_snap2) * (length(wf_snap2) - length(pivot_cols_snap2))
|
||||
expected_rows_lf
|
||||
|
||||
# LF data: snap2
|
||||
lf_snap2 = gather(wf_snap2
|
||||
, key = param_type
|
||||
, value = param_value
|
||||
, all_of(snap2_dn):tail(static_cols_end,1)
|
||||
, factor_key = TRUE)
|
||||
|
||||
if (nrow(lf_snap2) == expected_rows_lf){
|
||||
cat("\nPASS: long format data created for ", snap2_dn)
|
||||
}else{
|
||||
cat("\nFAIL: long format data could not be created for duet")
|
||||
quit()
|
||||
}
|
||||
|
||||
# Assign them to the output list
|
||||
wf_lf_dataL[['wf_snap2']] = wf_snap2
|
||||
wf_lf_dataL[['lf_snap2']] = lf_snap2
|
||||
###########################################################################
|
||||
# AFFINITY cols
|
||||
###########################################################################
|
||||
#=========================
|
||||
# mCSM-lig:
|
||||
# data filtered by cut off
|
||||
#=========================
|
||||
#---------------------
|
||||
# mCSM-lig: WF and lF
|
||||
#----------------------
|
||||
# WF data: mcsm_lig
|
||||
cols_to_select_mcsm_lig = c(static_cols_start, c("ligand_outcome", mcsm_lig_dn), static_cols_end)
|
||||
wf_mcsm_lig = comb_df_sl_lig[, cols_to_select_mcsm_lig] # filtered df
|
||||
|
||||
pivot_cols_mcsm_lig = cols_to_select_mcsm_lig[1: (length(static_cols_start) + 1)]; pivot_cols_mcsm_lig
|
||||
expected_rows_lf = nrow(wf_mcsm_lig) * (length(wf_mcsm_lig) - length(pivot_cols_mcsm_lig))
|
||||
expected_rows_lf
|
||||
|
||||
# LF data: mcsm_lig
|
||||
lf_mcsm_lig = gather(wf_mcsm_lig
|
||||
, key = param_type
|
||||
, value = param_value
|
||||
, all_of(mcsm_lig_dn):tail(static_cols_end,1)
|
||||
, factor_key = TRUE)
|
||||
|
||||
if (nrow(lf_mcsm_lig) == expected_rows_lf){
|
||||
cat("\nPASS: long format data created for ", mcsm_lig_dn)
|
||||
}else{
|
||||
cat("\nFAIL: long format data could not be created for mcsm_lig")
|
||||
quit()
|
||||
}
|
||||
|
||||
# Assign them to the output list
|
||||
wf_lf_dataL[['wf_mcsm_lig']] = wf_mcsm_lig
|
||||
wf_lf_dataL[['lf_mcsm_lig']] = lf_mcsm_lig
|
||||
|
||||
#====================
|
||||
# mcsm-NA affinity
|
||||
# data filtered by cut off
|
||||
#====================
|
||||
if (tolower(gene_name)%in%geneL_na){
|
||||
#---------------
|
||||
# mCSM-NA: WF and lF
|
||||
#-----------------
|
||||
# WF data: mcsm-na
|
||||
cols_to_select_mcsm_na = c(static_cols_start, c("mcsm_na_outcome", mcsm_na_dn), static_cols_end)
|
||||
#wf_mcsm_na = comb_df_sl[, cols_to_select_mcsm_na]
|
||||
wf_mcsm_na = comb_df_sl_na[, cols_to_select_mcsm_na]
|
||||
|
||||
pivot_cols_mcsm_na = cols_to_select_mcsm_na[1: (length(static_cols_start) + 1)]; pivot_cols_mcsm_na
|
||||
expected_rows_lf = nrow(wf_mcsm_na) * (length(wf_mcsm_na) - length(pivot_cols_mcsm_na))
|
||||
expected_rows_lf
|
||||
|
||||
# LF data: mcsm-na
|
||||
lf_mcsm_na = gather(wf_mcsm_na
|
||||
, key = param_type
|
||||
, value = param_value
|
||||
, all_of(mcsm_na_dn):tail(static_cols_end,1)
|
||||
, factor_key = TRUE)
|
||||
|
||||
if (nrow(lf_mcsm_na) == expected_rows_lf){
|
||||
cat("\nPASS: long format data created for ", mcsm_na_dn)
|
||||
}else{
|
||||
cat("\nFAIL: long format data could not be created for duet")
|
||||
quit()
|
||||
}
|
||||
|
||||
# Assign them to the output list
|
||||
wf_lf_dataL[['wf_mcsm_na']] = wf_mcsm_na
|
||||
wf_lf_dataL[['lf_mcsm_na']] = lf_mcsm_na
|
||||
|
||||
}
|
||||
|
||||
#=========================
|
||||
# mcsm-ppi2 affinity
|
||||
# data filtered by cut off
|
||||
#========================
|
||||
if (tolower(gene_name)%in%geneL_ppi2){
|
||||
#-----------------
|
||||
# mCSM-PPI2: WF and lF
|
||||
#-----------------
|
||||
# WF data: mcsm-ppi2
|
||||
cols_to_select_mcsm_ppi2 = c(static_cols_start, c("mcsm_ppi2_outcome", mcsm_ppi2_dn), static_cols_end)
|
||||
#wf_mcsm_ppi2 = comb_df_sl[, cols_to_select_mcsm_ppi2]
|
||||
wf_mcsm_ppi2 = comb_df_sl_ppi2[, cols_to_select_mcsm_ppi2]
|
||||
|
||||
pivot_cols_mcsm_ppi2 = cols_to_select_mcsm_ppi2[1: (length(static_cols_start) + 1)]; pivot_cols_mcsm_ppi2
|
||||
expected_rows_lf = nrow(wf_mcsm_ppi2) * (length(wf_mcsm_ppi2) - length(pivot_cols_mcsm_ppi2))
|
||||
expected_rows_lf
|
||||
|
||||
# LF data: mcsm-ppi2
|
||||
lf_mcsm_ppi2 = gather(wf_mcsm_ppi2
|
||||
, key = param_type
|
||||
, value = param_value
|
||||
, all_of(mcsm_ppi2_dn):tail(static_cols_end,1)
|
||||
, factor_key = TRUE)
|
||||
|
||||
if (nrow(lf_mcsm_ppi2) == expected_rows_lf){
|
||||
cat("\nPASS: long format data created for ", mcsm_ppi2_dn)
|
||||
}else{
|
||||
cat("\nFAIL: long format data could not be created for duet")
|
||||
quit()
|
||||
}
|
||||
|
||||
# Assign them to the output list
|
||||
wf_lf_dataL[['wf_mcsm_ppi2']] = wf_mcsm_ppi2
|
||||
wf_lf_dataL[['lf_mcsm_ppi2']] = lf_mcsm_ppi2
|
||||
|
||||
}
|
||||
|
||||
return(wf_lf_dataL)
|
||||
}
|
||||
############################################################################
|
|
@ -1,62 +0,0 @@
|
|||
setwd("~/git/LSHTM_analysis/scripts/plotting")
|
||||
|
||||
source ('get_plotting_dfs.R')
|
||||
source("../functions/bp_lineage.R")
|
||||
|
||||
#########################################
|
||||
# Lineage and SAV count: lineage lf data
|
||||
#########################################
|
||||
#=========================
|
||||
# Data: All lineages or
|
||||
# selected few
|
||||
#=========================
|
||||
sel_lineages = levels(lin_lf$sel_lineages_f)
|
||||
sel_lineages
|
||||
lin_lf_plot = lin_lf[lin_lf$sel_lineages_f%in%sel_lineages,]
|
||||
|
||||
# drop unused factor levels
|
||||
lin_lf_plot$sel_lineages_f = factor(lin_lf_plot$sel_lineages_f)
|
||||
levels(lin_lf_plot$sel_lineages_f)
|
||||
#=========================
|
||||
# Lineage count plot
|
||||
#=========================
|
||||
lin_count_bp(lin_lf_plot = lin_lf
|
||||
, x_categ = "sel_lineages"
|
||||
, y_count = "p_count"
|
||||
, bar_fill_categ = "count_categ"
|
||||
, display_label_col = "p_count"
|
||||
, bar_stat_stype = "identity"
|
||||
, x_lab_angle = 90
|
||||
, my_xats = 20
|
||||
, bar_col_labels = c("Mutations", "Total Samples")
|
||||
, bar_col_values = c("grey50", "gray75")
|
||||
, y_scale_percent = F # T for diversity
|
||||
, y_log10 = F
|
||||
, y_label = "Count")
|
||||
|
||||
###############################################
|
||||
# Lineage SAV diversity count: lineage wf data
|
||||
###############################################
|
||||
#=========================
|
||||
# Data: All lineages or
|
||||
# selected few
|
||||
#=========================
|
||||
sel_lineages = levels(lin_wf$sel_lineages_f)
|
||||
sel_lineages
|
||||
lin_wf_plot = lin_wf[lin_wf$sel_lineages_f%in%sel_lineages,]
|
||||
|
||||
# drop unused factor levels
|
||||
lin_wf_plot$sel_lineages_f = factor(lin_wf_plot$sel_lineages_f)
|
||||
levels(lin_wf_plot$sel_lineages_f)
|
||||
#=========================
|
||||
# Lineage Diversity plot
|
||||
#=========================
|
||||
lin_count_bp(lin_wf_plot = lin_wf
|
||||
, x_categ = "sel_lineages"
|
||||
, y_count = "snp_diversity"
|
||||
, display_label_col = "snp_diversity_f"
|
||||
, bar_stat_stype = "identity"
|
||||
, x_lab_angle = 90
|
||||
, my_xats = 20
|
||||
, y_scale_percent = T
|
||||
, y_label = "SAV diversity")
|
Some files were not shown because too many files have changed in this diff Show more
Loading…
Add table
Add a link
Reference in a new issue