added hbond residues in config for all genes

This commit is contained in:
Tanushree Tunstall 2022-02-09 15:59:18 +00:00
parent 7a14655ecb
commit 6ffb084546
9 changed files with 686 additions and 110 deletions

View file

@ -1,33 +1,105 @@
gene = "alr"
drug = "cycloserine"
aa_plip = c(66, 70, 112, 196, 237, 252, 254, 255, 295, 314, 343)
aa_ligplus = c(66, 64, 70, 112, 196, 236, 237, 252, 253, 254, 255, 388 )
#active_aa_pos = c(66, 64, 70, 112, 196, 236, 237, 252, 253, 254, 255, 295, 314, 343, 388)
active_aa_pos = sort(unique(c(aa_plip, aa_ligplus)))
#==========
# LIGPLUS
#===========
aa_ligplus_dcs = c(66, 64, 70, 112, 196
, 236, 237, 252, 253
, 254, 255, 388)
#aa_plip = c(66 = "hbond"
#, 70 = "hbond"
#, 112 = "hydrophobic"
#, 196 = "hbond"
#, 237 = "hbond"
#, 252 = "hbond"
#, 254 = "hbond"
#, 255 = "hbond"
#, 295 = "hbond"
#, 314 = "hbond"
#, 343 = "hbond")
aa_ligplus_dcs_hbond = c(255, 254, 237, 66, 196)
aa_ligplus_dcs_other = aa_ligplus_dcs[!aa_ligplus_dcs%in%aa_ligplus_dcs_hbond]
#aa_ligplus = c(66 = "hbond"
#, 64 = "hydrophobic"
#, 70 = "hydrophobic"
#, 112 = "hydrophobic"
#, 196 = "hbond"
#, 236 = "hydrophobic"
#, 237 = "hbond"
#, 252 = "hydrophobic"
#, 253 = "hydrophobic"
#, 254 = "hbond"
#, 255 = "hbond"
#, 388 = "hydrophobic"
#)
c1 = length(aa_ligplus_dcs_other) == length(aa_ligplus_dcs) - length(aa_ligplus_dcs_hbond)
#==========
# PLIP
#===========
aa_plip_dcs = c(66, 70, 112, 196, 237
, 252, 254, 255, 295
, 314, 343)
aa_plip_dcs_hbond = c(66, 70, 196, 237
, 252, 254, 255, 295
, 314, 343)
aa_plip_dcs_other = aa_plip_dcs[!aa_plip_dcs%in%aa_plip_dcs_hbond]
c2 = length(aa_plip_dcs_other) == length(aa_plip_dcs) - length(aa_plip_dcs_hbond)
#==========
# Arpeggio
#===========
aa_arpeg_dcs = c(64, 66, 70, 112, 157, 164
, 194, 196, 200, 236, 237, 252, 253
, 254, 255, 256, 295, 314, 342, 343
, 344, 386, 388)
aa_arpeg_dcs_other = aa_arpeg_dcs[!aa_arpeg_dcs%in%c(aa_ligplus_dcs_other
, aa_plip_dcs_other)]
c3 = length(aa_arpeg_dcs_other) == length(aa_arpeg_dcs) - ( length(aa_ligplus_dcs_other) + length(aa_plip_dcs_other) )
#######################################################################
#===============
# Active site aa
#===============
active_aa_pos = sort(unique(c(aa_ligplus_dcs
, aa_plip_dcs
, aa_arpeg_dcs)))
#=================
# Drug binding aa
#=================
aa_pos_dcs = sort(unique(c(aa_ligplus_dcs
, aa_plip_dcs
, aa_arpeg_dcs)))
aa_pos_drug = aa_pos_dcs
#===============
# Hbond aa
#===============
aa_pos_dcs_hbond = sort(unique(c(aa_ligplus_dcs_hbond
, aa_plip_dcs_hbond)))
#=======================
# Other interactions aa
#=======================
aa_pos_dcs_other = active_aa_pos[!active_aa_pos%in%aa_pos_dcs_hbond]
c3 = length(aa_pos_dcs_other) == length(active_aa_pos) - length(aa_pos_dcs_hbond)
#######################################################################
if ( all(c1, c2, c3) ) {
cat("\nPASS:All active site residues and interctions checked and identified for"
, "\ngene:", gene
, "\ndrug:", drug
, "\n==================================================="
, "\nActive site residues for:", length(active_aa_pos)
, "\n==================================================="
, "\n"
, active_aa_pos
, "\n=================================================="
, "\nDrug binding residues:", length(aa_pos_drug)
, "\n==================================================="
, "\n"
#, aa_pos_dcs
, aa_pos_drug
, "\n==================================================="
, "\nHbond residues:", length(aa_pos_dcs_hbond)
, "\n==================================================="
, "\n"
, aa_pos_dcs_hbond
, "\n=================================================="
, "\nOther interaction residues:", length(aa_pos_dcs_other)
, "\n==================================================="
, "\n"
, aa_pos_dcs_other
, "\n\nNO other co-factors or ligands present\n")
}
#######################################################################

View file

@ -1,7 +1,110 @@
gene = "embB"
drug = "ethambutol"
aa_plip = c(299, 302, 303, 327, 594, 988, 1028 )
aa_ligplus = c(299, 302, 303, 306, 334, 594, 988, 1028)
#active_aa_pos = c(299, 302, 303, 306, 327, 334, 594, 988, 1028 )
active_aa_pos = sort(unique(c(aa_plip, aa_ligplus)))
# interacting chain B
#==========
# LIGPLUS
#===========
aa_ligplus_emb = c(299, 302, 303, 306, 334, 594, 988, 1028)
aa_ligplus_emb_hbond = c(299, 594)
aa_ligplus_ca = c(952, 954, 959)
aa_ligplus_ca_hbond = c(952, 954, 959)
aa_ligplus_cdl = c(460, 665, 568, 601, 572, 579, 580, 583)
aa_ligplus_cdl_hbond = c(601, 568, 665)
aa_ligplus_dsl = c(435, 442, 489, 452, 330, 589, 509, 446, 445, 506, 592, 590, 514, 403, 515)
aa_ligplus_dsl_hbond = c(445, 590, 592, 403)
#==========
# PLIP
#===========
aa_plip_emb = c(299, 302, 303, 327, 594, 988, 1028)
aa_plip_emb_hbond = c(299, 327, 594)
aa_plip_ca = c(952, 954, 959)
aa_plip_cdl = c(456, 572, 579, 583, 568)
#aa_plip_cdl_sb = c(537, 568, 601, 665)
aa_plip_dsl = c(330, 435, 446, 452, 489, 506, 589, 590, 445, 403, 595)
aa_plip_dsl_hbond = c(445, 590)
#aa_plip_dsl_sb = c(403, 595)
#==========
# Arpeggio
#===========
# emb:1402, 1403
aa_arpeg_emb = c(298, 299, 302, 303, 306, 318, 327, 334, 403, 445, 592, 594, 988, 1028)
aa_arpeg_ca = c(847, 853, 854, 952, 954, 955, 956, 959, 960)
aa_arpeg_cdl = c(456, 457, 460, 461, 521, 525, 533, 537, 554, 558, 568
, 569, 572, 573, 575, 576, 579, 580, 582, 583, 586, 601, 605, 616, 658
, 661, 662, 665)
aa_arpeg_dsl = c(299, 322, 329, 330, 403, 435, 438, 439, 442, 445, 446
, 449, 452, 455, 486, 489, 490, 493, 506, 509, 510, 513, 514
, 515, 587, 589, 590, 592, 595)
##############################################################
active_aa_pos = sort(unique(c(aa_ligplus_emb
, aa_plip_emb
, aa_arpeg_emb
, aa_ligplus_ca
, aa_plip_ca
, aa_arpeg_ca
, aa_ligplus_cdl
, aa_plip_cdl
, aa_arpeg_cdl
, aa_ligplus_dsl
, aa_plip_dsl
, aa_arpeg_dsl)))
##############################################################
cat("\nNo. of active site residues for gene"
, gene, ":"
, length(active_aa_pos)
, "\nThese are:\n"
, active_aa_pos)
##############################################################
aa_pos_emb = sort(unique(c( aa_ligplus_emb
, aa_plip_emb
, aa_arpeg_emb)))
aa_pos_drug = aa_pos_emb
aa_pos_emb_hbond = sort(unique(c( aa_ligplus_emb_hbond
, aa_plip_emb_hbond)))
aa_pos_ca = sort(unique(c( aa_ligplus_ca
, aa_plip_ca
, aa_arpeg_ca)))
aa_pos_cdl = sort(unique(c( aa_ligplus_cdl
, aa_plip_cdl
, aa_arpeg_cdl )))
aa_pos_cdl_hbond = sort(unique(c( aa_ligplus_cdl_hbond )))
aa_pos_dsl = sort(unique(c( aa_ligplus_dsl
, aa_plip_dsl
, aa_arpeg_dsl)))
aa_pos_dsl_hbond = sort(unique(c( aa_ligplus_dsl_hbond
, aa_plip_dsl_hbond)))
cat("\n==================================================="
, "\nActive site residues for", gene, "comprise of..."
, "\n==================================================="
, "\nNo. of", drug, "binding residues:" , length(aa_pos_emb), "\n"
, aa_pos_emb
, "\nNo. of co-factor 'Ca' binding residues:", length(aa_pos_ca) , "\n"
, aa_pos_ca
, "\nNo. of ligand 'CDL' binding residues:" , length(aa_pos_cdl), "\n"
, aa_pos_cdl
, "\nNo. of ligand 'DSL' binding residues:" , length(aa_pos_dsl), "\n"
, aa_pos_dsl, "\n"
)
##############################################################

View file

@ -1,27 +1,129 @@
gene = "gid"
drug = "streptomycin"
rna_bind_aa_pos = c(96, 97, 118, 163)
binding_aa_pos = c(48, 51, 137, 200)
aa_plip = c(118, 220, 223)
aa_ligplus = c(118, 220, 223)
active_aa_pos = sort(unique(c(rna_bind_aa_pos
, binding_aa_pos
, aa_plip
, aa_ligplus)))
#rna_site = G518
#rna_bind_aa_pos = c(96, 97, 118, 163)
#binding_aa_pos = c(48, 51, 137, 200)
# SAM: 226
# SRY: 1601
#==========
# LIGPLUS
#===========
aa_ligplus_sry = c(118, 220, 223) # 526 (rna) and 7mg527
aa_ligplus_sry_hbond = c(118, 220, 223)
aa_ligplus_sam = c(148, 137, 138, 139
, 93, 69, 119, 120
, 220, 219, 118, 223)
aa_ligplus_sam_hbond = c(220, 223)
aa_ligplus_amp = c(123, 125, 213, 214)
aa_ligplus_amp_hbond = c(125, 123, 213)
aa_ligplus_rna = c(137, 47, 48, 38, 35, 36, 37, 94, 33, 97, 139, 138, 163, 165, 164, 199)
aa_ligplus_rna_hbond = c(33, 97, 37, 47, 137)
#==========
# PLIP
#===========
aa_plip_sry = c(118, 220, 223)
aa_plip_sry_hbond = c(118, 220, 223)
aa_plip_sam = c(92, 118, 119, 120, 139, 220, 223, 148)
aa_plip_sam_hbond = c(92, 118, 119, 120, 139, 220, 223)
aa_plip_amp = c(123, 125, 213)
aa_plip_amp_hbond = c(123, 125, 213)
aa_plip_rna = c(33, 34, 36, 37, 47, 48, 51, 97, 137, 199)
aa_plip_rna_hbond = c(33, 34, 36, 37, 47, 51, 137, 199)
#==========
# Arpeggio
#===========
aa_arpeg_sry = c(118, 148, 220, 223, 224)
aa_arpeg_sam = c(68, 69, 92, 93, 97, 117
, 118, 119, 120, 136, 137
, 138, 139, 140, 148, 218
, 219, 220, 221, 222, 223)
aa_arpeg_amp = c(123, 125, 213)
##############################################################
#=============
# Active site
#=============
active_aa_pos = sort(unique(c(
#rna_bind_aa_pos
#, binding_aa_pos
aa_ligplus_sry
, aa_ligplus_sam
, aa_ligplus_amp
, aa_ligplus_rna
, aa_plip_sry
, aa_plip_sam
, aa_plip_amp
, aa_plip_rna
, aa_arpeg_sry
, aa_arpeg_sam
, aa_arpeg_amp
)))
##############################################################
cat("\nNo. of active site residues for gene"
, gene, ":"
, length(active_aa_pos)
, "\nThese are:\n"
, active_aa_pos)
##############################################################
aa_pos_sry = sort(unique(c(
aa_ligplus_sry
, aa_plip_sry
, aa_arpeg_sry)))
aa_pos_sry_hbond = sort(unique(c(
aa_ligplus_sry_hbond
, aa_plip_sry_hbond)))
aa_pos_drug = aa_pos_sry
aa_pos_rna = sort(unique(c(
aa_ligplus_rna
, aa_plip_rna)))
aa_pos_rna_hbond = sort(unique(c(
aa_ligplus_rna_hbond
, aa_plip_rna_hbond)))
aa_pos_sam = sort(unique(c(
aa_ligplus_sam
, aa_plip_sam
, aa_arpeg_sam)))
aa_pos_sam_hbond = sort(unique(c(
aa_ligplus_sam_hbond
, aa_plip_sam_hbond)))
aa_pos_amp = sort(unique(c(
aa_ligplus_amp
, aa_plip_amp
, aa_arpeg_amp)))
aa_pos_amp_hbond = sort(unique(c(
aa_ligplus_amp_hbond
, aa_plip_amp_hbond)))
cat("\n==================================================="
, "\nActive site residues for", gene, "comprise of..."
, "\n==================================================="
, "\nRNA binding residues:"
, rna_bind_aa_pos
, "\nBinding site residues:"
, binding_aa_pos)
, "\nNo. of", drug, "binding residues:" , length(aa_pos_sry), "\n"
, aa_pos_sry
, "\nNo. of RNA binding residues:" , length(aa_pos_rna), "\n"
, aa_pos_rna
, "\nNo. of ligand 'SAM' binding residues:", length(aa_pos_sam), "\n"
, aa_pos_sam
, "\nNo. of ligand 'AMP' binding residues:", length(aa_pos_amp), "\n"
, aa_pos_amp, "\n")
##############################################################

View file

@ -1,6 +1,104 @@
gene = "katG"
drug = "isoniazid"
aa_plip = c(104, 229, 230)
aa_ligplus = c(107, 108, 137, 229, 230)
active_aa_pos = sort(unique(c(aa_plip, aa_ligplus)))
#==========
# LIGPLUS
#===========
# hem (1500)
aa_ligplus_inh = c(107, 108, 137, 229, 230)
#aa_ligplus_inh_hbond # none
aa_ligplus_hem = c(94, 276, 315, 274, 270, 381, 273, 104, 314, 275,
100, 101, 321, 103, 269, 107, 266, 230, 380, 275, 314)
aa_ligplus_hem_hbond = c(94, 276, 315, 274, 270, 381)
aa_ligplus_hem_other = aa_ligplus_hem[!aa_ligplus_hem%in%aa_ligplus_hem_hbond]
c1 = length(aa_ligplus_hem_other) == length(aa_ligplus_hem) - length(aa_ligplus_hem_hbond)
#==========
# PLIP
#===========
aa_plip_inh = c(104, 229, 230)
aa_plip_inh_hbond = c(104, 229, 230)
aa_plip_hem = c(104, 107, 248, 252, 265, 275, 321, 412, 274, 276, 315)
aa_plip_hem_hbond = c(274, 276, 315)
#aa_plip_hem_sb = c(104, 276)
#aa_plip_hem_pi = c(107)
aa_plip_hem_other = aa_plip_hem[!aa_plip_hem%in%aa_plip_hem_hbond]
c2 = length(aa_plip_hem_other) == length(aa_plip_hem) - length(aa_plip_hem_hbond)
#==========
# Arpeggio
#===========
aa_arpeg_inh = c(104, 107, 108, 136, 137, 228, 229, 230, 232, 315)
aa_arpeg_inh_hbond = c(104, 137)
aa_arpeg_hem = c(94, 100, 101, 103, 104, 107, 230, 231, 232, 248
, 252, 265, 266, 269, 270, 272, 273, 274, 275, 276, 314, 315
, 317, 321, 378, 380, 408, 412)
#from here
##############################################################
#===============
# Active site aa
#===============
active_aa_pos = sort(unique(c(aa_ligplus_inh
, aa_plip_inh
, aa_arpeg_inh
, aa_ligplus_hem
, aa_plip_hem
, aa_arpeg_hem
)))
cat("\nNo. of active site residues for gene"
, gene, ":"
, length(active_aa_pos)
, "\nThese are:\n"
, active_aa_pos)
#=================
# Drug binding aa
#=================
aa_pos_inh = sort(unique(c( aa_ligplus_inh
, aa_plip_inh
, aa_arpeg_inh)))
aa_pos_drug = aa_pos_inh
#===============
# Hbond aa
#===============
aa_pos_inh_hbond = sort(unique(c( aa_plip_inh_hbond
, aa_arpeg_inh_hbond)))
#=======================
# Other interactions aa
#=======================
#---------------------------------------------
aa_pos_hem = sort(unique(c( aa_ligplus_hem
, aa_plip_hem
, aa_arpeg_hem)))
aa_pos_hem_hbond = sort(unique(c( aa_ligplus_hem_hbond
, aa_plip_hem_hbond
#, aa_arpeg_hem_hbond
)))
cat("\n==================================================="
, "\nActive site residues for", gene, "comprise of..."
, "\n==================================================="
, "\nNo. of", drug, "binding residues:" , length(aa_pos_inh) , "\n"
, aa_pos_inh
, "\nNo. of 'HEM' binding residues:" , length(aa_pos_hem) , "\n"
, aa_pos_hem, "\n")
##############################################################

View file

@ -11,16 +11,26 @@ drug = "pyrazinamide"
#aa_ligplus = c(8, 13 , 49 , 133, 134 , 138, 137)
#active_aa_pos = sort(unique(c(aa_plip, aa_ligplus)))
metal_aa_pos = c(49, 51, 57, 71)
catalytic_aa_pos = c(8, 96, 138)
substrate_aa_pos = c(13, 68, 103, 137)
hbond_aa_pos = c(133, 134, 8, 138)
#aa_pos_substrate = c(13, 68, 103, 137)
aa_pos_pza = c(13, 68, 103, 137)
aa_pos_fe = c(49, 51, 57, 71)
aa_pos_catalytic = c(8, 96, 138)
aa_pos_hbond = c(133, 134, 8, 138)
aa_pos_drug = aa_pos_pza
#==========
# Arpeggio
#===========
# all same except one extra
aa_arpeg = c(102)
##############################################################
active_aa_pos = sort(unique(c(metal_aa_pos
, catalytic_aa_pos
, substrate_aa_pos
, hbond_aa_pos)))
, catalytic_aa_pos
, substrate_aa_pos
, hbond_aa_pos
, aa_arpeg)))
##############################################################
cat("\nNo. of active site residues for gene"
, gene, ":"
, length(active_aa_pos)
@ -30,11 +40,13 @@ cat("\nNo. of active site residues for gene"
cat("\n==================================================="
, "\nActive site residues for", gene, "comprise of..."
, "\n==================================================="
, "\nMetal coordination centre residues:"
, metal_aa_pos
, "\nCatalytic triad residues:"
, catalytic_aa_pos
, "\nSubstrate binding residues:"
, substrate_aa_pos
, "\nH-bonding residues:"
, hbond_aa_pos)
, "\nNo. of", drug, "binding residues:" , length(aa_pos_pza) , "\n"
, aa_pos_pza
, "\nMetal coordination centre residues:" , length(aa_pos_fe) , "\n"
, aa_pos_fe
, "\nCatalytic triad residues:" , length(aa_pos_catalytic) , "\n"
, aa_pos_catalytic
, "\nH-bonding residues:" , length(aa_pos_hbond) , "\n"
, aa_pos_hbond , "\n")
##############################################################

View file

@ -1,6 +1,57 @@
gene = "rpoB"
drug = "rifampicin"
aa_plip = c(429, 432, 491, 487)
aa_plip_5uhc = c(430, 452, 483, 491, 432, 433, 448, 450, 459, 487)
active_aa_pos = sort(unique(c(aa_plip, aa_plip_5uhc)))
#==========
# LIGPLUS
#===========
# Error! No atom records found!
#==========
# PLIP
#===========
aa_plip_rfp = c(429, 432, 491, 487)
aa_plip_rfp_hbond = c(429, 432, 487)
# chainC: equivalent with offset (-6 from 5uhc) accounted
aa_plip_5uhc_rfp = c(430, 452, 483
, 491, 432, 433
, 448, 450, 459, 487)
aa_plip_5uhc_rfp_hbond = c(432, 433, 448, 450, 459, 487)
#==========
# Arpeggio
#===========
# rfp: 1894
aa_arpeg_rfp = c(170, 428, 429, 430, 431, 432
, 433, 435, 445, 448, 450, 452
, 453, 458, 483, 487, 491, 604
, 607, 674)
##############################################################
active_aa_pos = sort(unique(c(aa_plip_rfp
, aa_plip_5uhc_rfp
, aa_arpeg_rfp)))
##############################################################
cat("\nNo. of active site residues for gene"
, gene, ":"
, length(active_aa_pos)
, "\nThese are:\n"
, active_aa_pos)
##############################################################
aa_pos_rfp = sort(unique(c(aa_plip_rfp
, aa_plip_5uhc_rfp
, aa_arpeg_rfp)))
aa_pos_drug = aa_pos_rfp
aa_pos_rfp_hbond = sort(unique(c(aa_plip_rfp_hbond
, aa_plip_5uhc_rfp_hbond)))
cat("\n==================================================="
, "\nActive site residues for", gene, "comprise of..."
, "\n==================================================="
, "\nNo. of", drug, "binding residues:" , length(aa_pos_rfp), "\n"
, aa_pos_rfp
, "\n\nNO other co-factors or ligands present\n")
##############################################################

View file

@ -193,6 +193,29 @@ map(paste0(func_path, source_files), source) # source all your R scripts!
# set plot script dir
plot_script_path = "~/git/LSHTM_analysis/scripts/plotting/"
####################################################
consurf_palette1 = c("0" = "yellow2"
, "1" = "cyan1"
, "2" = "steelblue2"
, "3" = "cadetblue2"
, "4" = "paleturquoise2"
, "5" = "thistle3"
, "6" = "thistle2"
, "7" = "plum2"
, "8" = "maroon"
, "9" = "violetred2")
consurf_palette2 = c("0" = "yellow2"
, "1" = "forestgreen"
, "2" = "seagreen3"
, "3" = "palegreen1"
, "4" = "darkseagreen2"
, "5" = "thistle3"
, "6" = "lightpink1"
, "7" = "orchid3"
, "8" = "orchid4"
, "9" = "darkorchid4")
##################################################
# Function name clashes with plyr and dplyr

View file

@ -213,7 +213,7 @@ g = ggplot(df, aes(factor(position, ordered = T)))
# This is the key to generating the geom_tiles OUTside the plotting area on the x-axis!
OutPlot_aa_PS = g +
coord_cartesian(xlim = c(1, my_xlim)
#, ylim = c(0, 6)
#, ylim = c(0, 6)0.85
, ylim = c(0, max(snp_count))
, clip = "off") +
geom_bar(aes(fill = group), colour = "grey") +

View file

@ -1,30 +1,3 @@
consurf_palette1 = c("0" = "yellow2"
, "1" = "cyan1"
, "2" = "steelblue2"
, "3" = "cadetblue2"
, "4" = "paleturquoise2"
, "5" = "thistle3"
, "6" = "thistle2"
, "7" = "plum2"
, "8" = "maroon"
, "9" = "violetred2")
consurf_palette2 = c("0" = "yellow2"
, "1" = "forestgreen"
, "2" = "seagreen3"
, "3" = "palegreen1"
, "4" = "darkseagreen2"
, "5" = "thistle3"
, "6" = "lightpink1"
, "7" = "orchid3"
, "8" = "orchid4"
, "9" = "darkorchid4")
#myCOL <- colorRampPalette(c("yellow2", "palegreen1", "darkorchid4"))(10)
#plot(1:100, col = myCOL, pch = 19, cex = 2)
#myColors <- scale_color_brewer(palette = "Cyan-Magenta")
consurf_cols = consurf_palette1
consurf_cols = consurf_palette2
#consurf_cols = myCOL
@ -42,17 +15,20 @@ length(unique(merged_df3[[aa_position_colname]]))
positionF <- levels(as.factor(merged_df3[[aa_position_colname]]))
length(positionF)
aa_pos_colours = ifelse(positionF%in%rna_bind_aa_pos, "purple"
, ifelse(positionF%in%binding_aa_pos, "orange", "black" ))
aa_pos_colours = ifelse(positionF%in%aa_pos_sry, "purple"
, ifelse(positionF%in%aa_pos_rna, "orange", "black" ))
bar = merged_df3
bar[['lab_bg']] = ifelse(bar[[aa_position_colname]]%in%rna_bind_aa_pos
aa_colour_colname = "lab_bg"
bar[[aa_colour_colname]] = ifelse(bar[[aa_position_colname]]%in%aa_pos_sry
, "purple"
, ifelse(bar[[aa_position_colname]]%in%binding_aa_pos
, ifelse(bar[[aa_position_colname]]%in%aa_pos_rna
, "orange", "white" ))
head(bar[[aa_position_colname]])
head(bar[['lab_bg']])
head(bar[[aa_colour_colname]])
my_xlim = length(unique(bar$position)); my_xlim
######################################################################
my_xlim = length(unique(bar[[aa_position_colname]])); my_xlim
ymin = min(bar$consurf_score)
ymax = max(bar$consurf_score)
@ -70,15 +46,38 @@ g = ggplot(bar, aes(x = factor(position)
) +
geom_errorbar(aes(ymin = consurf_ci_lower, ymax = consurf_ci_upper))
g0 = g + geom_tile(aes(,-2, width = 0.95, height = -0.2)
, fill = bar$lab_bg
, colour = "white")
g0
# g0 = g + geom_tile(aes(,-2, width = 0.95, height = -0.2)
# #, fill = bar$lab_bg
# , fill = bar[[aa_colour_colname]]
# , colour = "white")
g1 = g0 + theme( axis.text.x = element_text(size = 10
g0 = g +
geom_tile(aes(,-1.7, width = 0.95, height = 0.3)
, fill = bar$ligD_colours
, colour = "black")
# g0 = g +
# geom_tile(aes(,-1.9, width = 0.95, height = 0.2)
# , fill = bar[[aa_colour_colname4]]
# , colour = "white") +
# geom_tile(aes(,-2, width = 0.95, height = 0.3)
# , fill = bar[[aa_colour_colname3]]
# , colour = "white"
# )+
# geom_tile(aes(,-2.1, width = 0.95, height = 0.35)
# , fill = bar[[aa_colour_colname2]]
# , colour = "white"
# )+
# geom_tile(aes(,-2.2, width = 0.95, height = 0.45)
# , fill = bar[[aa_colour_colname1]]
# , colour = "white"
# )
g1 = g + theme( axis.text.x = element_text(size = 10
, angle = 90
, hjust = 1
, vjust = 0.4
, face = "bold"
#, colour = aa_pos_colours
)
, axis.text.y = element_text(size = 12
@ -92,12 +91,128 @@ g1 = g0 + theme( axis.text.x = element_text(size = 10
, panel.grid.minor = element_line(color = "black")) +
guides(colour = guide_legend(title = "Consurf"
, position = "top"
#, direction = "horizontal"
#, direction = "horizontal"
)) +
labs(title = ""
, x = "Wild-type position"
#, x = "Wild-type position"
, x = ""
, y = "Consurf score")
g1
##########################################
# add tile afterwards
g2 = g1 + geom_tile(aes(,-1.7, width = 0.95, height = 0.3)
, fill = bar$ligD_colours
, colour = "black")
g2
g3 = g2 +
geom_tile(aes(,-1.9, width = 0.95, height = 0.3)
, fill = bar[[aa_colour_colname4]]
, colour = "white") +
geom_tile(aes(,-2, width = 0.95, height = 0.3)
, fill = bar[[aa_colour_colname3]]
, colour = "white"
)+
geom_tile(aes(,-2.1, width = 0.95, height = 0.35)
, fill = bar[[aa_colour_colname2]]
, colour = "white"
)+
geom_tile(aes(,-2.2, width = 0.95, height = 0.35)
, fill = bar[[aa_colour_colname1]]
, colour = "white"
)
g3
#==============================================
#Multiple legends in a plot with geom_tile
#==============================================
# https://stackoverflow.com/questions/24822621/multiple-legends-in-a-plot-with-geom-tile
g_legend<-function(a.gplot){
tmp <- ggplot_gtable(ggplot_build(a.gplot))
leg <- which(sapply(tmp$grobs, function(x) x$name) == "guide-box")
legend <- tmp$grobs[[leg]]
legend
}
legend1 = g_legend(g1)
g2 = g1 + geom_tile(aes(,-1.7, width = 0.95, height = 0.3)
, fill = bar$ligD_colours
, colour = "black")
g2
legend2 = g_legend(g2)
grid.arrange(g1+theme(legend.position = 'none')
#, legend1
, legend2
, ncol = 2
, widths=c(4/6, 1/6))
###############################################################
###############################################################
lig_min = min(round(bar[[LigDist_colname]])); lig_min
lig_max = max(round(bar[[LigDist_colname]])); lig_max
lig_mean = round(mean(bar[[LigDist_colname]])); lig_mean
labels = seq(lig_min, lig_max, len = 5); labels
labelsD = round(labels, digits = 0); labelsD
g = ggplot(bar, aes( x = factor(.data[[aa_position_colname]])
, y = .data[[LigDist_colname]]))
g
# yayy
g1 = g + geom_tile(aes(fill = .data[[LigDist_colname]])
, colour = "white") +
#scale_fill_gradient(low = "green", high = "red")
scale_fill_gradient2(midpoint = lig_mean
, low = "green"
, mid = "yellow"
, high = "red"
, breaks = labels
#, n.breaks = 11
#, minor_breaks = c(2, 4, 6, 8, 10)
, limits = c(lig_min, lig_max)
, labels = labelsD
, name = "Ligand Distance")
g1
main_leg = g_legend(g2)
main_leg
ligD_leg = g_legend(g1)
grid.arrange(ligD_leg
, ncol = 2
, widths=c(4/6, 1/6))
grid.arrange(g2+theme(legend.position = 'none')
, main_leg
, ligD_leg
, ncol = 3
, widths=c(9/10, 0.5/10, 0.5/10))
##########################
g3 = g2 + theme(legend.position = 'none')
g3
g4 = g3 + geom_tile(aes(fill = .data[['consurf_score']])
, colour = "white")
g4 + geom_tile(scale_fill_gradient(consurf_palette2)
midpoint = lig_mean
, low = "green"
, mid = "yellow"
, high = "red"
, breaks = labels
#, n.breaks = 11
#, minor_breaks = c(2, 4, 6, 8, 10)
, limits = c(lig_min, lig_max)
, labels = labelsD
, name = "Ligand Distance")
g1