diff --git a/config/alr.R b/config/alr.R index 38ca6cc..75a5d7b 100644 --- a/config/alr.R +++ b/config/alr.R @@ -1,33 +1,105 @@ gene = "alr" drug = "cycloserine" -aa_plip = c(66, 70, 112, 196, 237, 252, 254, 255, 295, 314, 343) -aa_ligplus = c(66, 64, 70, 112, 196, 236, 237, 252, 253, 254, 255, 388 ) -#active_aa_pos = c(66, 64, 70, 112, 196, 236, 237, 252, 253, 254, 255, 295, 314, 343, 388) -active_aa_pos = sort(unique(c(aa_plip, aa_ligplus))) +#========== +# LIGPLUS +#=========== +aa_ligplus_dcs = c(66, 64, 70, 112, 196 + , 236, 237, 252, 253 + , 254, 255, 388) -#aa_plip = c(66 = "hbond" -#, 70 = "hbond" -#, 112 = "hydrophobic" -#, 196 = "hbond" -#, 237 = "hbond" -#, 252 = "hbond" -#, 254 = "hbond" -#, 255 = "hbond" -#, 295 = "hbond" -#, 314 = "hbond" -#, 343 = "hbond") +aa_ligplus_dcs_hbond = c(255, 254, 237, 66, 196) +aa_ligplus_dcs_other = aa_ligplus_dcs[!aa_ligplus_dcs%in%aa_ligplus_dcs_hbond] -#aa_ligplus = c(66 = "hbond" -#, 64 = "hydrophobic" -#, 70 = "hydrophobic" -#, 112 = "hydrophobic" -#, 196 = "hbond" -#, 236 = "hydrophobic" -#, 237 = "hbond" -#, 252 = "hydrophobic" -#, 253 = "hydrophobic" -#, 254 = "hbond" -#, 255 = "hbond" -#, 388 = "hydrophobic" -#) +c1 = length(aa_ligplus_dcs_other) == length(aa_ligplus_dcs) - length(aa_ligplus_dcs_hbond) + +#========== +# PLIP +#=========== +aa_plip_dcs = c(66, 70, 112, 196, 237 + , 252, 254, 255, 295 + , 314, 343) +aa_plip_dcs_hbond = c(66, 70, 196, 237 + , 252, 254, 255, 295 + , 314, 343) + +aa_plip_dcs_other = aa_plip_dcs[!aa_plip_dcs%in%aa_plip_dcs_hbond] + +c2 = length(aa_plip_dcs_other) == length(aa_plip_dcs) - length(aa_plip_dcs_hbond) + +#========== +# Arpeggio +#=========== +aa_arpeg_dcs = c(64, 66, 70, 112, 157, 164 + , 194, 196, 200, 236, 237, 252, 253 + , 254, 255, 256, 295, 314, 342, 343 + , 344, 386, 388) + +aa_arpeg_dcs_other = aa_arpeg_dcs[!aa_arpeg_dcs%in%c(aa_ligplus_dcs_other + , aa_plip_dcs_other)] + +c3 = length(aa_arpeg_dcs_other) == length(aa_arpeg_dcs) - ( length(aa_ligplus_dcs_other) + length(aa_plip_dcs_other) ) + +####################################################################### + +#=============== +# Active site aa +#=============== +active_aa_pos = sort(unique(c(aa_ligplus_dcs + , aa_plip_dcs + , aa_arpeg_dcs))) +#================= +# Drug binding aa +#================= +aa_pos_dcs = sort(unique(c(aa_ligplus_dcs + , aa_plip_dcs + , aa_arpeg_dcs))) +aa_pos_drug = aa_pos_dcs + +#=============== +# Hbond aa +#=============== +aa_pos_dcs_hbond = sort(unique(c(aa_ligplus_dcs_hbond + , aa_plip_dcs_hbond))) + +#======================= +# Other interactions aa +#======================= +aa_pos_dcs_other = active_aa_pos[!active_aa_pos%in%aa_pos_dcs_hbond] + +c3 = length(aa_pos_dcs_other) == length(active_aa_pos) - length(aa_pos_dcs_hbond) + +####################################################################### +if ( all(c1, c2, c3) ) { + + cat("\nPASS:All active site residues and interctions checked and identified for" + , "\ngene:", gene + , "\ndrug:", drug + , "\n===================================================" + , "\nActive site residues for:", length(active_aa_pos) + , "\n===================================================" + , "\n" + , active_aa_pos + + , "\n==================================================" + , "\nDrug binding residues:", length(aa_pos_drug) + , "\n===================================================" + , "\n" + #, aa_pos_dcs + , aa_pos_drug + + , "\n===================================================" + , "\nHbond residues:", length(aa_pos_dcs_hbond) + , "\n===================================================" + , "\n" + , aa_pos_dcs_hbond + + , "\n==================================================" + , "\nOther interaction residues:", length(aa_pos_dcs_other) + , "\n===================================================" + , "\n" + , aa_pos_dcs_other + , "\n\nNO other co-factors or ligands present\n") + +} +####################################################################### diff --git a/config/embb.R b/config/embb.R index 0009583..c8db852 100644 --- a/config/embb.R +++ b/config/embb.R @@ -1,7 +1,110 @@ gene = "embB" drug = "ethambutol" -aa_plip = c(299, 302, 303, 327, 594, 988, 1028 ) -aa_ligplus = c(299, 302, 303, 306, 334, 594, 988, 1028) -#active_aa_pos = c(299, 302, 303, 306, 327, 334, 594, 988, 1028 ) -active_aa_pos = sort(unique(c(aa_plip, aa_ligplus))) +# interacting chain B +#========== +# LIGPLUS +#=========== +aa_ligplus_emb = c(299, 302, 303, 306, 334, 594, 988, 1028) +aa_ligplus_emb_hbond = c(299, 594) + +aa_ligplus_ca = c(952, 954, 959) +aa_ligplus_ca_hbond = c(952, 954, 959) + +aa_ligplus_cdl = c(460, 665, 568, 601, 572, 579, 580, 583) +aa_ligplus_cdl_hbond = c(601, 568, 665) + +aa_ligplus_dsl = c(435, 442, 489, 452, 330, 589, 509, 446, 445, 506, 592, 590, 514, 403, 515) +aa_ligplus_dsl_hbond = c(445, 590, 592, 403) + +#========== +# PLIP +#=========== +aa_plip_emb = c(299, 302, 303, 327, 594, 988, 1028) +aa_plip_emb_hbond = c(299, 327, 594) + +aa_plip_ca = c(952, 954, 959) + +aa_plip_cdl = c(456, 572, 579, 583, 568) +#aa_plip_cdl_sb = c(537, 568, 601, 665) + +aa_plip_dsl = c(330, 435, 446, 452, 489, 506, 589, 590, 445, 403, 595) +aa_plip_dsl_hbond = c(445, 590) +#aa_plip_dsl_sb = c(403, 595) + +#========== +# Arpeggio +#=========== +# emb:1402, 1403 +aa_arpeg_emb = c(298, 299, 302, 303, 306, 318, 327, 334, 403, 445, 592, 594, 988, 1028) +aa_arpeg_ca = c(847, 853, 854, 952, 954, 955, 956, 959, 960) +aa_arpeg_cdl = c(456, 457, 460, 461, 521, 525, 533, 537, 554, 558, 568 + , 569, 572, 573, 575, 576, 579, 580, 582, 583, 586, 601, 605, 616, 658 + , 661, 662, 665) +aa_arpeg_dsl = c(299, 322, 329, 330, 403, 435, 438, 439, 442, 445, 446 + , 449, 452, 455, 486, 489, 490, 493, 506, 509, 510, 513, 514 + , 515, 587, 589, 590, 592, 595) + +############################################################## +active_aa_pos = sort(unique(c(aa_ligplus_emb + , aa_plip_emb + , aa_arpeg_emb + + , aa_ligplus_ca + , aa_plip_ca + , aa_arpeg_ca + + , aa_ligplus_cdl + , aa_plip_cdl + , aa_arpeg_cdl + + , aa_ligplus_dsl + , aa_plip_dsl + , aa_arpeg_dsl))) +############################################################## +cat("\nNo. of active site residues for gene" + , gene, ":" + , length(active_aa_pos) + , "\nThese are:\n" + , active_aa_pos) + +############################################################## +aa_pos_emb = sort(unique(c( aa_ligplus_emb + , aa_plip_emb + , aa_arpeg_emb))) +aa_pos_drug = aa_pos_emb + +aa_pos_emb_hbond = sort(unique(c( aa_ligplus_emb_hbond + , aa_plip_emb_hbond))) + +aa_pos_ca = sort(unique(c( aa_ligplus_ca + , aa_plip_ca + , aa_arpeg_ca))) + +aa_pos_cdl = sort(unique(c( aa_ligplus_cdl + , aa_plip_cdl + , aa_arpeg_cdl ))) + +aa_pos_cdl_hbond = sort(unique(c( aa_ligplus_cdl_hbond ))) + +aa_pos_dsl = sort(unique(c( aa_ligplus_dsl + , aa_plip_dsl + , aa_arpeg_dsl))) + +aa_pos_dsl_hbond = sort(unique(c( aa_ligplus_dsl_hbond + , aa_plip_dsl_hbond))) + + +cat("\n===================================================" + , "\nActive site residues for", gene, "comprise of..." + , "\n===================================================" + , "\nNo. of", drug, "binding residues:" , length(aa_pos_emb), "\n" + , aa_pos_emb + , "\nNo. of co-factor 'Ca' binding residues:", length(aa_pos_ca) , "\n" + , aa_pos_ca + , "\nNo. of ligand 'CDL' binding residues:" , length(aa_pos_cdl), "\n" + , aa_pos_cdl + , "\nNo. of ligand 'DSL' binding residues:" , length(aa_pos_dsl), "\n" + , aa_pos_dsl, "\n" +) +############################################################## diff --git a/config/gid.R b/config/gid.R index a2f9981..bb645e4 100644 --- a/config/gid.R +++ b/config/gid.R @@ -1,27 +1,129 @@ gene = "gid" drug = "streptomycin" -rna_bind_aa_pos = c(96, 97, 118, 163) -binding_aa_pos = c(48, 51, 137, 200) -aa_plip = c(118, 220, 223) -aa_ligplus = c(118, 220, 223) - -active_aa_pos = sort(unique(c(rna_bind_aa_pos -, binding_aa_pos -, aa_plip -, aa_ligplus))) #rna_site = G518 +#rna_bind_aa_pos = c(96, 97, 118, 163) +#binding_aa_pos = c(48, 51, 137, 200) +# SAM: 226 +# SRY: 1601 +#========== +# LIGPLUS +#=========== +aa_ligplus_sry = c(118, 220, 223) # 526 (rna) and 7mg527 +aa_ligplus_sry_hbond = c(118, 220, 223) + +aa_ligplus_sam = c(148, 137, 138, 139 + , 93, 69, 119, 120 + , 220, 219, 118, 223) +aa_ligplus_sam_hbond = c(220, 223) + +aa_ligplus_amp = c(123, 125, 213, 214) +aa_ligplus_amp_hbond = c(125, 123, 213) + +aa_ligplus_rna = c(137, 47, 48, 38, 35, 36, 37, 94, 33, 97, 139, 138, 163, 165, 164, 199) +aa_ligplus_rna_hbond = c(33, 97, 37, 47, 137) + +#========== +# PLIP +#=========== +aa_plip_sry = c(118, 220, 223) +aa_plip_sry_hbond = c(118, 220, 223) + +aa_plip_sam = c(92, 118, 119, 120, 139, 220, 223, 148) +aa_plip_sam_hbond = c(92, 118, 119, 120, 139, 220, 223) + +aa_plip_amp = c(123, 125, 213) +aa_plip_amp_hbond = c(123, 125, 213) + +aa_plip_rna = c(33, 34, 36, 37, 47, 48, 51, 97, 137, 199) +aa_plip_rna_hbond = c(33, 34, 36, 37, 47, 51, 137, 199) + +#========== +# Arpeggio +#=========== +aa_arpeg_sry = c(118, 148, 220, 223, 224) +aa_arpeg_sam = c(68, 69, 92, 93, 97, 117 + , 118, 119, 120, 136, 137 + , 138, 139, 140, 148, 218 + , 219, 220, 221, 222, 223) +aa_arpeg_amp = c(123, 125, 213) +############################################################## +#============= +# Active site +#============= +active_aa_pos = sort(unique(c( +#rna_bind_aa_pos +#, binding_aa_pos +aa_ligplus_sry +, aa_ligplus_sam +, aa_ligplus_amp +, aa_ligplus_rna +, aa_plip_sry +, aa_plip_sam +, aa_plip_amp +, aa_plip_rna +, aa_arpeg_sry +, aa_arpeg_sam +, aa_arpeg_amp +))) + +############################################################## cat("\nNo. of active site residues for gene" , gene, ":" , length(active_aa_pos) , "\nThese are:\n" , active_aa_pos) +############################################################## +aa_pos_sry = sort(unique(c( + aa_ligplus_sry + , aa_plip_sry + , aa_arpeg_sry))) + +aa_pos_sry_hbond = sort(unique(c( + aa_ligplus_sry_hbond + , aa_plip_sry_hbond))) + +aa_pos_drug = aa_pos_sry + +aa_pos_rna = sort(unique(c( + aa_ligplus_rna + , aa_plip_rna))) + +aa_pos_rna_hbond = sort(unique(c( + aa_ligplus_rna_hbond + , aa_plip_rna_hbond))) + +aa_pos_sam = sort(unique(c( + aa_ligplus_sam + , aa_plip_sam + , aa_arpeg_sam))) + +aa_pos_sam_hbond = sort(unique(c( + aa_ligplus_sam_hbond + , aa_plip_sam_hbond))) + +aa_pos_amp = sort(unique(c( + aa_ligplus_amp + , aa_plip_amp + , aa_arpeg_amp))) + +aa_pos_amp_hbond = sort(unique(c( + aa_ligplus_amp_hbond + , aa_plip_amp_hbond))) + + cat("\n===================================================" , "\nActive site residues for", gene, "comprise of..." , "\n===================================================" - , "\nRNA binding residues:" - , rna_bind_aa_pos - , "\nBinding site residues:" - , binding_aa_pos) + , "\nNo. of", drug, "binding residues:" , length(aa_pos_sry), "\n" + , aa_pos_sry + , "\nNo. of RNA binding residues:" , length(aa_pos_rna), "\n" + , aa_pos_rna + , "\nNo. of ligand 'SAM' binding residues:", length(aa_pos_sam), "\n" + , aa_pos_sam + , "\nNo. of ligand 'AMP' binding residues:", length(aa_pos_amp), "\n" + , aa_pos_amp, "\n") + +############################################################## diff --git a/config/katg.R b/config/katg.R index cf73f39..7924596 100644 --- a/config/katg.R +++ b/config/katg.R @@ -1,6 +1,104 @@ gene = "katG" drug = "isoniazid" -aa_plip = c(104, 229, 230) -aa_ligplus = c(107, 108, 137, 229, 230) -active_aa_pos = sort(unique(c(aa_plip, aa_ligplus))) +#========== +# LIGPLUS +#=========== +# hem (1500) +aa_ligplus_inh = c(107, 108, 137, 229, 230) +#aa_ligplus_inh_hbond # none + +aa_ligplus_hem = c(94, 276, 315, 274, 270, 381, 273, 104, 314, 275, + 100, 101, 321, 103, 269, 107, 266, 230, 380, 275, 314) + +aa_ligplus_hem_hbond = c(94, 276, 315, 274, 270, 381) +aa_ligplus_hem_other = aa_ligplus_hem[!aa_ligplus_hem%in%aa_ligplus_hem_hbond] + +c1 = length(aa_ligplus_hem_other) == length(aa_ligplus_hem) - length(aa_ligplus_hem_hbond) + +#========== +# PLIP +#=========== +aa_plip_inh = c(104, 229, 230) +aa_plip_inh_hbond = c(104, 229, 230) + +aa_plip_hem = c(104, 107, 248, 252, 265, 275, 321, 412, 274, 276, 315) +aa_plip_hem_hbond = c(274, 276, 315) +#aa_plip_hem_sb = c(104, 276) +#aa_plip_hem_pi = c(107) +aa_plip_hem_other = aa_plip_hem[!aa_plip_hem%in%aa_plip_hem_hbond] + +c2 = length(aa_plip_hem_other) == length(aa_plip_hem) - length(aa_plip_hem_hbond) + +#========== +# Arpeggio +#=========== +aa_arpeg_inh = c(104, 107, 108, 136, 137, 228, 229, 230, 232, 315) +aa_arpeg_inh_hbond = c(104, 137) + +aa_arpeg_hem = c(94, 100, 101, 103, 104, 107, 230, 231, 232, 248 + , 252, 265, 266, 269, 270, 272, 273, 274, 275, 276, 314, 315 + , 317, 321, 378, 380, 408, 412) + +#from here + +############################################################## +#=============== +# Active site aa +#=============== +active_aa_pos = sort(unique(c(aa_ligplus_inh + , aa_plip_inh + , aa_arpeg_inh + + , aa_ligplus_hem + , aa_plip_hem + , aa_arpeg_hem + ))) +cat("\nNo. of active site residues for gene" + , gene, ":" + , length(active_aa_pos) + , "\nThese are:\n" + , active_aa_pos) + +#================= +# Drug binding aa +#================= +aa_pos_inh = sort(unique(c( aa_ligplus_inh + , aa_plip_inh + , aa_arpeg_inh))) +aa_pos_drug = aa_pos_inh + + +#=============== +# Hbond aa +#=============== +aa_pos_inh_hbond = sort(unique(c( aa_plip_inh_hbond + , aa_arpeg_inh_hbond))) + +#======================= +# Other interactions aa +#======================= + + + +#--------------------------------------------- + +aa_pos_hem = sort(unique(c( aa_ligplus_hem + , aa_plip_hem + , aa_arpeg_hem))) + +aa_pos_hem_hbond = sort(unique(c( aa_ligplus_hem_hbond + , aa_plip_hem_hbond + #, aa_arpeg_hem_hbond + ))) + + +cat("\n===================================================" + , "\nActive site residues for", gene, "comprise of..." + , "\n===================================================" + , "\nNo. of", drug, "binding residues:" , length(aa_pos_inh) , "\n" + , aa_pos_inh + , "\nNo. of 'HEM' binding residues:" , length(aa_pos_hem) , "\n" + , aa_pos_hem, "\n") + +############################################################## diff --git a/config/pnca.R b/config/pnca.R index 349278e..de15189 100644 --- a/config/pnca.R +++ b/config/pnca.R @@ -11,16 +11,26 @@ drug = "pyrazinamide" #aa_ligplus = c(8, 13 , 49 , 133, 134 , 138, 137) #active_aa_pos = sort(unique(c(aa_plip, aa_ligplus))) -metal_aa_pos = c(49, 51, 57, 71) -catalytic_aa_pos = c(8, 96, 138) -substrate_aa_pos = c(13, 68, 103, 137) -hbond_aa_pos = c(133, 134, 8, 138) +#aa_pos_substrate = c(13, 68, 103, 137) +aa_pos_pza = c(13, 68, 103, 137) +aa_pos_fe = c(49, 51, 57, 71) +aa_pos_catalytic = c(8, 96, 138) +aa_pos_hbond = c(133, 134, 8, 138) +aa_pos_drug = aa_pos_pza +#========== +# Arpeggio +#=========== +# all same except one extra +aa_arpeg = c(102) + +############################################################## active_aa_pos = sort(unique(c(metal_aa_pos - , catalytic_aa_pos - , substrate_aa_pos - , hbond_aa_pos))) - + , catalytic_aa_pos + , substrate_aa_pos + , hbond_aa_pos + , aa_arpeg))) +############################################################## cat("\nNo. of active site residues for gene" , gene, ":" , length(active_aa_pos) @@ -30,11 +40,13 @@ cat("\nNo. of active site residues for gene" cat("\n===================================================" , "\nActive site residues for", gene, "comprise of..." , "\n===================================================" - , "\nMetal coordination centre residues:" - , metal_aa_pos - , "\nCatalytic triad residues:" - , catalytic_aa_pos - , "\nSubstrate binding residues:" - , substrate_aa_pos - , "\nH-bonding residues:" - , hbond_aa_pos) + , "\nNo. of", drug, "binding residues:" , length(aa_pos_pza) , "\n" + , aa_pos_pza + , "\nMetal coordination centre residues:" , length(aa_pos_fe) , "\n" + , aa_pos_fe + , "\nCatalytic triad residues:" , length(aa_pos_catalytic) , "\n" + , aa_pos_catalytic + , "\nH-bonding residues:" , length(aa_pos_hbond) , "\n" + , aa_pos_hbond , "\n") + +############################################################## \ No newline at end of file diff --git a/config/rpob.R b/config/rpob.R index 5a8db53..d3fa494 100644 --- a/config/rpob.R +++ b/config/rpob.R @@ -1,6 +1,57 @@ gene = "rpoB" drug = "rifampicin" -aa_plip = c(429, 432, 491, 487) -aa_plip_5uhc = c(430, 452, 483, 491, 432, 433, 448, 450, 459, 487) -active_aa_pos = sort(unique(c(aa_plip, aa_plip_5uhc))) +#========== +# LIGPLUS +#=========== +# Error! No atom records found! + +#========== +# PLIP +#=========== +aa_plip_rfp = c(429, 432, 491, 487) +aa_plip_rfp_hbond = c(429, 432, 487) + +# chainC: equivalent with offset (-6 from 5uhc) accounted +aa_plip_5uhc_rfp = c(430, 452, 483 + , 491, 432, 433 + , 448, 450, 459, 487) +aa_plip_5uhc_rfp_hbond = c(432, 433, 448, 450, 459, 487) + +#========== +# Arpeggio +#=========== +# rfp: 1894 +aa_arpeg_rfp = c(170, 428, 429, 430, 431, 432 + , 433, 435, 445, 448, 450, 452 + , 453, 458, 483, 487, 491, 604 + , 607, 674) + +############################################################## +active_aa_pos = sort(unique(c(aa_plip_rfp + , aa_plip_5uhc_rfp + , aa_arpeg_rfp))) +############################################################## +cat("\nNo. of active site residues for gene" + , gene, ":" + , length(active_aa_pos) + , "\nThese are:\n" + , active_aa_pos) +############################################################## +aa_pos_rfp = sort(unique(c(aa_plip_rfp + , aa_plip_5uhc_rfp + , aa_arpeg_rfp))) + +aa_pos_drug = aa_pos_rfp + +aa_pos_rfp_hbond = sort(unique(c(aa_plip_rfp_hbond + , aa_plip_5uhc_rfp_hbond))) + +cat("\n===================================================" + , "\nActive site residues for", gene, "comprise of..." + , "\n===================================================" + , "\nNo. of", drug, "binding residues:" , length(aa_pos_rfp), "\n" + , aa_pos_rfp + , "\n\nNO other co-factors or ligands present\n") + +############################################################## diff --git a/scripts/Header_TT.R b/scripts/Header_TT.R index acb2e5e..4bc5792 100755 --- a/scripts/Header_TT.R +++ b/scripts/Header_TT.R @@ -193,6 +193,29 @@ map(paste0(func_path, source_files), source) # source all your R scripts! # set plot script dir plot_script_path = "~/git/LSHTM_analysis/scripts/plotting/" +#################################################### +consurf_palette1 = c("0" = "yellow2" + , "1" = "cyan1" + , "2" = "steelblue2" + , "3" = "cadetblue2" + , "4" = "paleturquoise2" + , "5" = "thistle3" + , "6" = "thistle2" + , "7" = "plum2" + , "8" = "maroon" + , "9" = "violetred2") + +consurf_palette2 = c("0" = "yellow2" + , "1" = "forestgreen" + , "2" = "seagreen3" + , "3" = "palegreen1" + , "4" = "darkseagreen2" + , "5" = "thistle3" + , "6" = "lightpink1" + , "7" = "orchid3" + , "8" = "orchid4" + , "9" = "darkorchid4") + ################################################## # Function name clashes with plyr and dplyr diff --git a/scripts/plotting/barplots_subcolours_aa_PS.R b/scripts/plotting/barplots_subcolours_aa_PS.R index 7b08882..6a31af2 100755 --- a/scripts/plotting/barplots_subcolours_aa_PS.R +++ b/scripts/plotting/barplots_subcolours_aa_PS.R @@ -213,7 +213,7 @@ g = ggplot(df, aes(factor(position, ordered = T))) # This is the key to generating the geom_tiles OUTside the plotting area on the x-axis! OutPlot_aa_PS = g + coord_cartesian(xlim = c(1, my_xlim) - #, ylim = c(0, 6) + #, ylim = c(0, 6)0.85 , ylim = c(0, max(snp_count)) , clip = "off") + geom_bar(aes(fill = group), colour = "grey") + diff --git a/scripts/plotting/consurf_plot.R b/scripts/plotting/consurf_plot.R index 916aee3..86c1ec1 100644 --- a/scripts/plotting/consurf_plot.R +++ b/scripts/plotting/consurf_plot.R @@ -1,30 +1,3 @@ -consurf_palette1 = c("0" = "yellow2" - , "1" = "cyan1" - , "2" = "steelblue2" - , "3" = "cadetblue2" - , "4" = "paleturquoise2" - , "5" = "thistle3" - , "6" = "thistle2" - , "7" = "plum2" - , "8" = "maroon" - , "9" = "violetred2") - -consurf_palette2 = c("0" = "yellow2" - , "1" = "forestgreen" - , "2" = "seagreen3" - , "3" = "palegreen1" - , "4" = "darkseagreen2" - , "5" = "thistle3" - , "6" = "lightpink1" - , "7" = "orchid3" - , "8" = "orchid4" - , "9" = "darkorchid4") - - -#myCOL <- colorRampPalette(c("yellow2", "palegreen1", "darkorchid4"))(10) -#plot(1:100, col = myCOL, pch = 19, cex = 2) -#myColors <- scale_color_brewer(palette = "Cyan-Magenta") - consurf_cols = consurf_palette1 consurf_cols = consurf_palette2 #consurf_cols = myCOL @@ -42,17 +15,20 @@ length(unique(merged_df3[[aa_position_colname]])) positionF <- levels(as.factor(merged_df3[[aa_position_colname]])) length(positionF) -aa_pos_colours = ifelse(positionF%in%rna_bind_aa_pos, "purple" - , ifelse(positionF%in%binding_aa_pos, "orange", "black" )) +aa_pos_colours = ifelse(positionF%in%aa_pos_sry, "purple" + , ifelse(positionF%in%aa_pos_rna, "orange", "black" )) bar = merged_df3 -bar[['lab_bg']] = ifelse(bar[[aa_position_colname]]%in%rna_bind_aa_pos +aa_colour_colname = "lab_bg" + +bar[[aa_colour_colname]] = ifelse(bar[[aa_position_colname]]%in%aa_pos_sry , "purple" - , ifelse(bar[[aa_position_colname]]%in%binding_aa_pos + , ifelse(bar[[aa_position_colname]]%in%aa_pos_rna , "orange", "white" )) head(bar[[aa_position_colname]]) -head(bar[['lab_bg']]) +head(bar[[aa_colour_colname]]) -my_xlim = length(unique(bar$position)); my_xlim +###################################################################### +my_xlim = length(unique(bar[[aa_position_colname]])); my_xlim ymin = min(bar$consurf_score) ymax = max(bar$consurf_score) @@ -70,15 +46,38 @@ g = ggplot(bar, aes(x = factor(position) ) + geom_errorbar(aes(ymin = consurf_ci_lower, ymax = consurf_ci_upper)) - g0 = g + geom_tile(aes(,-2, width = 0.95, height = -0.2) - , fill = bar$lab_bg - , colour = "white") - g0 - -g1 = g0 + theme( axis.text.x = element_text(size = 10 + # g0 = g + geom_tile(aes(,-2, width = 0.95, height = -0.2) + # #, fill = bar$lab_bg + # , fill = bar[[aa_colour_colname]] + # , colour = "white") + + g0 = g + + geom_tile(aes(,-1.7, width = 0.95, height = 0.3) + , fill = bar$ligD_colours + , colour = "black") + + # g0 = g + + # geom_tile(aes(,-1.9, width = 0.95, height = 0.2) + # , fill = bar[[aa_colour_colname4]] + # , colour = "white") + + # geom_tile(aes(,-2, width = 0.95, height = 0.3) + # , fill = bar[[aa_colour_colname3]] + # , colour = "white" + # )+ + # geom_tile(aes(,-2.1, width = 0.95, height = 0.35) + # , fill = bar[[aa_colour_colname2]] + # , colour = "white" + # )+ + # geom_tile(aes(,-2.2, width = 0.95, height = 0.45) + # , fill = bar[[aa_colour_colname1]] + # , colour = "white" + # ) + +g1 = g + theme( axis.text.x = element_text(size = 10 , angle = 90 , hjust = 1 , vjust = 0.4 + , face = "bold" #, colour = aa_pos_colours ) , axis.text.y = element_text(size = 12 @@ -92,12 +91,128 @@ g1 = g0 + theme( axis.text.x = element_text(size = 10 , panel.grid.minor = element_line(color = "black")) + guides(colour = guide_legend(title = "Consurf" , position = "top" - #, direction = "horizontal" + #, direction = "horizontal" )) + labs(title = "" - , x = "Wild-type position" + #, x = "Wild-type position" + , x = "" , y = "Consurf score") g1 -########################################## +# add tile afterwards +g2 = g1 + geom_tile(aes(,-1.7, width = 0.95, height = 0.3) + , fill = bar$ligD_colours + , colour = "black") +g2 + +g3 = g2 + + geom_tile(aes(,-1.9, width = 0.95, height = 0.3) + , fill = bar[[aa_colour_colname4]] + , colour = "white") + + geom_tile(aes(,-2, width = 0.95, height = 0.3) + , fill = bar[[aa_colour_colname3]] + , colour = "white" + )+ + geom_tile(aes(,-2.1, width = 0.95, height = 0.35) + , fill = bar[[aa_colour_colname2]] + , colour = "white" + )+ + geom_tile(aes(,-2.2, width = 0.95, height = 0.35) + , fill = bar[[aa_colour_colname1]] + , colour = "white" + ) + +g3 + +#============================================== +#Multiple legends in a plot with geom_tile +#============================================== + +# https://stackoverflow.com/questions/24822621/multiple-legends-in-a-plot-with-geom-tile +g_legend<-function(a.gplot){ + tmp <- ggplot_gtable(ggplot_build(a.gplot)) + leg <- which(sapply(tmp$grobs, function(x) x$name) == "guide-box") + legend <- tmp$grobs[[leg]] + legend +} + +legend1 = g_legend(g1) + +g2 = g1 + geom_tile(aes(,-1.7, width = 0.95, height = 0.3) + , fill = bar$ligD_colours + , colour = "black") + +g2 + +legend2 = g_legend(g2) + +grid.arrange(g1+theme(legend.position = 'none') + #, legend1 + , legend2 + , ncol = 2 + , widths=c(4/6, 1/6)) +############################################################### +############################################################### + +lig_min = min(round(bar[[LigDist_colname]])); lig_min +lig_max = max(round(bar[[LigDist_colname]])); lig_max +lig_mean = round(mean(bar[[LigDist_colname]])); lig_mean +labels = seq(lig_min, lig_max, len = 5); labels +labelsD = round(labels, digits = 0); labelsD + +g = ggplot(bar, aes( x = factor(.data[[aa_position_colname]]) + , y = .data[[LigDist_colname]])) +g +# yayy +g1 = g + geom_tile(aes(fill = .data[[LigDist_colname]]) + , colour = "white") + + #scale_fill_gradient(low = "green", high = "red") + scale_fill_gradient2(midpoint = lig_mean + , low = "green" + , mid = "yellow" + , high = "red" + , breaks = labels + #, n.breaks = 11 + #, minor_breaks = c(2, 4, 6, 8, 10) + , limits = c(lig_min, lig_max) + , labels = labelsD + , name = "Ligand Distance") +g1 + + +main_leg = g_legend(g2) +main_leg + +ligD_leg = g_legend(g1) +grid.arrange(ligD_leg + , ncol = 2 + , widths=c(4/6, 1/6)) + +grid.arrange(g2+theme(legend.position = 'none') + , main_leg + , ligD_leg + , ncol = 3 + , widths=c(9/10, 0.5/10, 0.5/10)) + + +########################## +g3 = g2 + theme(legend.position = 'none') +g3 +g4 = g3 + geom_tile(aes(fill = .data[['consurf_score']]) + , colour = "white") + +g4 + geom_tile(scale_fill_gradient(consurf_palette2) + + + midpoint = lig_mean + , low = "green" + , mid = "yellow" + , high = "red" + , breaks = labels + #, n.breaks = 11 + #, minor_breaks = c(2, 4, 6, 8, 10) + , limits = c(lig_min, lig_max) + , labels = labelsD + , name = "Ligand Distance") +g1