diff --git a/scripts/functions/consurfP.R b/scripts/functions/consurfP.R new file mode 100644 index 0000000..b80a095 --- /dev/null +++ b/scripts/functions/consurfP.R @@ -0,0 +1,570 @@ +#!/usr/bin/env Rscript + +######################################################### +# TASK: function for wide plot +#with consurf score and error bars +#position numbers coloured by +# - ligand distance +# - active site residues +######################################################### + +#========================================================== +# wideP(): +# input args +#========================================================== +wideP_point <- function(plotdf + , xvar_colname = "position" + , yvar_colname = "consurf_score" + , yvar_colourN_colname = "consurf_colour_rev" # num from 0-1 + , plot_error_bars = T + , upper_EB_colname = "consurf_ci_upper" + , lower_EB_colname = "consurf_ci_lower" + + , plot_type = "point" # default is point + , point_colours + , p_size = 2 + , leg_title1 = "" + , leg_labels = c("0": "Insufficient Data" + , "1": "Variable" + , "2", "3", "4", "5", "6", "7", "8" + , "9": "Conserved") + + # axes title and label sizes + , x_axls = 12 # x-axis label size + , y_axls = 15 # y-axis label size + , x_axts = 12 # x-axis text size + , y_axts = 12 # y-axis text size + , default_xtc = "black" # x-axis text colour + , ptitle = "" + , xlab = "" + , ylab = "" + + # plot margins + , t_margin = 0.5 + , r_margin = 0.5 + , b_margin = 1 + , l_margin = 1 + , unit_margin = "cm" + + # Custom 1: x-axis: text colour + , xtext_colour_aa = F + , xtext_colour_aa1 = active_aa_pos + , xtext_colour_aa2 = aa_pos_drug + , xtext_colours = c("purple", "brown", "black") + + # Custom 2: x-axis: geom tiles ~ lig distance + , A_xvar_lig = T + , leg_title2 = "Ligand Distance" + , lig_dist_colname = LigDist_colname # from globals + , lig_dist_colours = c("green", "yellow", "orange", "red") + , tpos0 = 0 # 0 is a magic number that does my sensible default + , tW0 = 1 + , tH0 = 0.3 + + # Custom 3: x-axis: geom tiles ~ active sites and ligand + , A_xvar_aa = F + , aa_pos_drug = NULL + , drug_aa_colour = "purple" + , tW = 1 + , tH = 0.2 + , active_aa_pos = NULL + , active_aa_colour = "brown" + + , aa_pos_lig1 = NULL + , aa_colour_lig1 = "blue" + , tpos1 = 0 + + , aa_pos_lig2 = NULL + , aa_colour_lig2 = "cyan" + , tpos2 = 0 + + , aa_pos_lig3 = NULL + , aa_colour_lig3 = "cornflowerblue" + , tpos3 = 0 + + , default_gt_clr = "white" + ){ + + if(missing(point_colours)){ + temp_cols = colorRampPalette(c("seagreen", "sienna3"))(30) + point_colours = temp_cols + }else{ + point_colours = point_colours + } + + ############################### + # custom 1: x-axis text colour + ############################## + + if (xtext_colour_aa){ + positionF <- levels(as.factor(plotdf[[xvar_colname]])) + length(positionF) + aa_pos_colours = ifelse(positionF%in%xtext_colour_aa1, xtext_colours[1] + , ifelse(positionF%in%xtext_colour_aa2 + , xtext_colours[2] + , xtext_colours[3])) + }else{ + aa_pos_colours = default_xtc + } + + ################################################ + # Custom 2: x-axis geom tiles ~ lig distance + ################################################ + + #========================= + # Build data with colours + # ~ ligand distance + #========================= + if (A_xvar_lig){ + cat("\nAnnotating x-axis ~", lig_dist_colname, "requested...") + + #------------------------------------- + # round column values: to colour by + #-------------------------------------- + #plotdf = plotdf[order(plotdf[[lig_dist_colname]]),] + plotdf['lig_distR'] = round(plotdf[[lig_dist_colname]], digits = 0) + head(plotdf['lig_distR']) + + #------------------------------------- + # ligand distance range, min, max, etc + #-------------------------------------- + lig_min = min(round(plotdf[[lig_dist_colname]]), na.rm = T); lig_min + lig_max = max(round(plotdf[[lig_dist_colname]]), na.rm = T); lig_max + lig_mean = round(mean(round(plotdf[[lig_dist_colname]]), na.rm = T)); lig_mean + + #------------------------------------- + # Create mapping colour key + #-------------------------------------- + # sorting removes NA, so that n_colours == length(ligD_valsR) + n_colours = length(sort(unique(round(plotdf[[lig_dist_colname]], digits = 0)))); n_colours + + lig_cols = colorRampPalette(lig_dist_colours)(n_colours); lig_cols + ligD_valsR = sort(unique(round(plotdf[[lig_dist_colname]], digits = 0))); ligD_valsR + length(ligD_valsR) + + if (n_colours == length(ligD_valsR)) { + cat("\nStarting: mapping b/w" + , lig_dist_colname + , "and colours") + }else{ + cat("\nCannot start mapping b/w", lig_dist_colname, "and colours..." + , "\nLength mismatch:" + , "No. of colours: ", n_colours + , "\nValues to map:", length(ligD_valsR)) + } + + ligDcolKey <- data.frame(ligD_colours = lig_cols + , lig_distR = ligD_valsR); ligDcolKey + names(ligDcolKey) + cat("\nSuccessful: Mapping b/w", lig_dist_colname, "and colours") + + #------------------------------------- + # merge colour key with plotdf + #-------------------------------------- + plotdf = merge(plotdf, ligDcolKey, by = 'lig_distR') + + plotdf_check = as.data.frame(cbind(position = plotdf[[xvar_colname]] + , ligD = plotdf[[lig_dist_colname]] + , ligDR = plotdf$lig_distR + , ligD_cols = plotdf$ligD_colours)) + } else{ + plotdf = plotdf + } + + ############################################### + # Custom 3: x-axis geom tiles ~ active sites + ################################################ + + #========================== + # Build Data with colours + # ~ on active sites + #========================== + + if(A_xvar_aa) { + cat("\nAnnotation for xvar requested. Building colours for annotation...") + + aa_colour_colname = "bg_all" + aa_colour_colname1 = "col_bg1" + aa_colour_colname2 = "col_bg2" + aa_colour_colname3 = "col_bg3" + + #-------------------------------------------------- + # column colour 0: Active site + drug binding sites + #-------------------------------------------------- + plotdf[[aa_colour_colname]] = ifelse(plotdf[[xvar_colname]]%in%aa_pos_drug + , drug_aa_colour + , ifelse(plotdf[[xvar_colname]]%in%active_aa_pos + , active_aa_colour, default_gt_clr )) + plotdf[[aa_colour_colname]] + cat("\nColumn created 'bg_all':", length(plotdf[[aa_colour_colname]])) + + #------------------------------------------------ + # column colour 1: Ligand 1 + drug binding sites + #------------------------------------------------ + cat("\nAssigning colours to drug binding and ligand-1 binding residues") + plotdf[[aa_colour_colname1]] = plotdf[[aa_colour_colname]] + plotdf[[aa_colour_colname1]] = ifelse(plotdf[[xvar_colname]]%in%aa_pos_lig1 + , aa_colour_lig1, plotdf[[aa_colour_colname]]) + # plotdf[[aa_colour_colname1]] = ifelse( plotdf[[xvar_colname]]%in%active_aa_pos + # , drug_aa_colour + # , ifelse(plotdf[[xvar_colname]]%in%aa_pos_lig1 + # , aa_colour_lig1, default_gt_clr)) + #------------------------------------------------ + # column colour 2: Ligand 2 + #------------------------------------------------ + plotdf[[aa_colour_colname2]] = plotdf[[aa_colour_colname1]] + plotdf[[aa_colour_colname2]] = ifelse(plotdf[[xvar_colname]]%in%aa_pos_lig2 + , aa_colour_lig2, plotdf[[aa_colour_colname1]]) + + #------------------------------------------------ + # column colour 3: Ligand 3 + #------------------------------------------------ + plotdf[[aa_colour_colname3]] = plotdf[[aa_colour_colname2]] + plotdf[[aa_colour_colname3]] = ifelse(plotdf[[xvar_colname]]%in%aa_pos_lig3 + , aa_colour_lig3, plotdf[[aa_colour_colname2]]) + + } + ################### + # start plot + ################### + + #------------------- + # x and y axis + # range, scale, etc + #------------------- + my_xlim = length(unique(plotdf[[xvar_colname]])); my_xlim + ymin = min(plotdf[[yvar_colname]]); ymin + ymax = max(plotdf[[yvar_colname]]); ymax + + g = ggplot(plotdf, aes_string(x = sprintf("factor(%s)", xvar_colname) + , y = yvar_colname + , colour = sprintf("factor(%s)", yvar_colourN_colname) + )) + # "if SPECIAL do SPECIAL THING, otherwise do NORMAL THING" + if (plot_type == "bar"){ + g0 = g + + geom_bar(stat = "identity") + } + else{ + g0 = g + + coord_cartesian(xlim = c(1, my_xlim) + , ylim = c(ymin, ymax) + , clip = "off") + + geom_point(size = p_size) + + scale_colour_manual(values = point_colours) + # , labels = leg_labels + # , name = leg_title1) + } + + if (plot_error_bars){ + g0 = g0 + + geom_errorbar(aes(ymin = eval(parse(text = lower_EB_colname)) + , ymax = eval(parse(text = upper_EB_colname)) + )) + }else{ + + g0 = g0 + + } + + #--------------------- + # add axis formatting + #--------------------- + g1 = g0 + theme( axis.text.x = element_text(size = x_axts + , angle = 90 + , hjust = 1 + , vjust = 0.4 + , face = "bold" + , colour = aa_pos_colours) + , axis.text.y = element_text(size = y_axts + , angle = 0 + , hjust = 1 + , vjust = 0) + , axis.title.x = element_text(size = x_axls) + , axis.title.y = element_text(size = y_axls ) + , panel.background = element_rect(fill = "black", color = "black") + , panel.grid.major = element_line(color = "black") + , panel.grid.minor = element_line(color = "black") + , plot.margin = margin(t = t_margin + , r = r_margin + , b = b_margin + , l = l_margin + , unit = unit_margin))+ + guides(colour = guide_legend(title = "ConsurfXXXX")) + + + labs(title = ptitle + , x = xlab + , y = ylab) + + #------------------ + #Extract legend1 + #------------------ + # yayy + g1_leg = ggplot(plotdf, aes_string(x = sprintf("factor(%s)" + , xvar_colname) )) + g1_leg = g1_leg + geom_bar(); g1_leg + g1_leg = g1_leg + geom_bar(aes_string(fill = sprintf("factor(%s)" + , yvar_colourN_colname))) + + g1_leg = g1_leg + scale_fill_manual(values = consurf_palette2 , name = leg_title1) + g1_leg + + legend1 = get_legend(g1_leg) + + + ##################################################### + #============================================ + # x-axis: geom_tiles ~ ligand distance + #============================================ + #------- + # plot + #------- + if(A_xvar_lig){ # 0 is a magic number that does my sensible default + if (tpos0 == 0){ + tpos0 = ymin-0.5 + } + if (tpos1 == 0){ + tpos1 = ymin-0.65 + } + if (tpos2 == 0){ + tpos2 = ymin-0.75 + } + if (tpos3 == 0){ + tpos3 = ymin-0.85 + } + + + cat("\nColouring x-axis aa based on", lig_dist_colname + , "\nNo. of colours:", n_colours) + + g2 = g1 + geom_tile(aes(, tpos0 + , width = tW0 + , height = tH0) + , fill = plotdf$ligD_colours + , colour = plotdf$ligD_colours + , linetype = "blank") + + #cat("Nrows of plot df", length(plotdf$ligD_colours)) + out = g2 + # + # #------------------ + # # Extract legend2 + # #------------------ + # labels = seq(lig_min, lig_max, len = 5); labels + # labelsD = round(labels, digits = 0); labelsD + # + # g2_leg = g1 + + # geom_tile(aes(fill = .data[[lig_dist_colname]]) + # , colour = "white") + + # scale_fill_gradient2(midpoint = lig_mean + # , low = "green" + # , mid = "yellow" + # , high = "red" + # , breaks = labels + # #, n.breaks = 11 + # #, minor_breaks = c(2, 4, 6, 8, 10) + # , limits = c(lig_min, lig_max) + # , labels = labelsD + # , name = leg_title2) + # + # legend2 = get_legend(g2_leg) + # + # }else{ + # out = g1 + # } + ###################################################### + #------------------ + # Extract legend2 + #------------------ + labels = seq(lig_min, lig_max, len = 5); labels + labelsD = round(labels, digits = 0); labelsD + g2_leg = ggplot(plotdf, aes_string(x = sprintf("factor(%s)", xvar_colname) + , y = yvar_colname) + ) + + geom_tile(aes(fill = .data[[lig_dist_colname]]) + , colour = "white") + + scale_fill_gradient2(midpoint = lig_mean + , low = "green" + , mid = "yellow" + , high = "red" + , breaks = labels + #, n.breaks = 11 + #, minor_breaks = c(2, 4, 6, 8, 10) + , limits = c(lig_min, lig_max) + , labels = labelsD + , name = leg_title2) + + legend2 = get_legend(g2_leg) + + }else{ + out = g1 + } + + + + ##################################################### + # #============================================ + # # x-axis: geom_tiles ~ ligand distance + # #============================================ + # if(A_xvar_lig){ + # cat("\nColouring x-axis aa based on", lig_dist_colname + # , "\nNo. of colours:", n_colours) + # + # g2 = g1 + geom_tile(aes(, tpos0 + # , width = tW0 + # , height = tH0) + # , fill = plotdf$ligD_colours + # , colour = plotdf$ligD_colours + # , linetype = "blank") + # + # #cat("Nrows of plot df", length(plotdf$ligD_colours)) + # out = g2 + # }else{ + # out = g1 + # } + # +############################################################################################ + #============================================== + # x-axis: geom_tiles ~ active sites and others + #============================================== + if(A_xvar_aa){ + #tpos = 0 + #tW = 1 + #tH = 0.2 + + #--------------------- + # Add2plot: 3 ligands + #--------------------- + if (all(!is.null(active_aa_pos) && + !is.null(aa_pos_drug) && + !is.null(aa_pos_lig1) && !is.null(aa_pos_lig2) && !is.null(aa_pos_lig3))) { + cat("\n\nAnnotating xvar with active, drug binding, and Lig 1&2&3 sites") + cat("\nCreating column colours, column name:", aa_colour_colname3) + + cat("\nDoing Plot with 3 ligands") + out = out + geom_tile(aes(,tpos3 + , width = tW + , height = tH ) + , fill = plotdf[[aa_colour_colname3]] + , colour = plotdf[[aa_colour_colname3]] + , linetype = "solid") + + geom_tile(aes(, tpos2 + , width = tW + , height = tH ) + , fill = plotdf[[aa_colour_colname2]] + , colour = plotdf[[aa_colour_colname2]] + , linetype = "solid")+ + + geom_tile(aes(, tpos1 + , width = tW + , height = tH) + , fill = plotdf[[aa_colour_colname1]] + , colour = plotdf[[aa_colour_colname1]] + , linetype = "solid") + + cat("\nDone Plot with 3 ligands") + } + #--------------------- + # Add2plot: 2 ligands + #--------------------- + if (all(!is.null(active_aa_pos) && + !is.null(aa_pos_drug) && + !is.null(aa_pos_lig1) && !is.null(aa_pos_lig2) && is.null(aa_pos_lig3))) { + cat("\n\nAnnotating xvar with active, drug binding, and Lig 1&2 sites") + cat("\nCreating column colours, column name:", aa_colour_colname2) + + cat("\nDoing Plot with 2 ligands") + out = out + + geom_tile(aes(, tpos2 + , width = tW + , height = tH) + , fill = plotdf[[aa_colour_colname2]] + , colour = plotdf[[aa_colour_colname2]] + , linetype = "solid")+ + geom_tile(aes(, tpos1 + , width = tW + , height = tH) + , fill = plotdf[[aa_colour_colname1]] + , colour = plotdf[[aa_colour_colname1]] + , linetype = "solid") + + cat("\nDone Plot with 2 ligands") + } + + #--------------------- + # Add2plot: 1 ligand + #--------------------- + if (all(!is.null(active_aa_pos) && + !is.null(aa_pos_drug) && + !is.null(aa_pos_lig1) && is.null(aa_pos_lig2) && is.null(aa_pos_lig3))) { + cat("\n\nAnnotating xvar with active, drug binding, and Lig 1 sites") + cat("\nCreating column colours, column name:", aa_colour_colname1) + + cat("\nDoing Plot with 1 ligands") + + out = out + + geom_tile(aes(, tpos1 + , width = tW + , height = tH) + , fill = plotdf[[aa_colour_colname1]] + , colour = plotdf[[aa_colour_colname1]] + , linetype = "solid") + + cat("\nDone Plot with 1 ligand") + + } + + #----------------------------------- + # Add2plot:NO ligands + # No Ligs: Just drug and active site + # DEFAULT: A_xvar_aa == TRUE + #---------------------------------- + if (all(!is.null(active_aa_pos) && + !is.null(aa_pos_drug) && + is.null(aa_pos_lig1) && + is.null(aa_pos_lig2) && + is.null(aa_pos_lig3))) { + + cat("\n\nAnnotating xvar with active and drug binding sites") + cat("\nCreating column colours, column name:", aa_colour_colname) + cat("\nDoing Plot with 0 ligands: active and drug site only") + + out = out + geom_tile(aes(, tpos3 + , width = tW + , height = tH) + , fill = plotdf[[aa_colour_colname]] + , colour = plotdf[[aa_colour_colname]] + , linetype = "solid") + + cat("\nDone Plot with: Active and Drug sites") + + } + }else{ + cat("\nNo annotation for xvar requested") + } + if (A_xvar_lig){ + legs = grid.arrange(legend1 + , legend2 + , ncol = 1 + , heights = c(3/4,1)) + out2 = grid.arrange( out + theme(legend.position = "none") + , legs + , ncol = 2 + , widths = c(9/10, 0.5/10) + ) + }else{ + out2 = grid.arrange( out + theme(legend.position = "none") + , legend1 + , ncol = 2 + , widths = c(9/10, 0.5/10) + ) + } + +return(out2) +#return(out2) + +} + +############################################################# +# end of function +############################################################# diff --git a/scripts/functions/tests/test_consurfP.R b/scripts/functions/tests/test_consurfP.R new file mode 100644 index 0000000..ed32a50 --- /dev/null +++ b/scripts/functions/tests/test_consurfP.R @@ -0,0 +1,149 @@ + +## ...opt args +library(ggplot2) +library(tidyverse) +library(cowplot) +library(gridExtra) +source("consurf_plot_func.R") + +# ########################################################################### +# merged_df3 = read.csv("~/git/Data/cycloserine/output/alr_all_params.csv"); source("~/git/LSHTM_analysis/config/alr.R") +# if ( tolower(gene) == "alr") { +# aa_pos_lig1 = NULL +# aa_pos_lig2 = NULL +# aa_pos_lig3 = NULL +# p_title = gene +# } +########################################################################### +# merged_df3 = read.csv("~/git/Data/ethambutol/output/embb_all_params.csv"); source("~/git/LSHTM_analysis/config/embb.R") +# if ( tolower(gene) == "embb") { +# aa_pos_lig1 = aa_pos_ca +# aa_pos_lig2 = aa_pos_cdl +# aa_pos_lig3 = aa_pos_dsl +# p_title = gene +# } +########################################################################### +# merged_df3 = read.csv("~/git/Data/streptomycin/output/gid_all_params.csv"); source("~/git/LSHTM_analysis/config/gid.R") +# if ( tolower(gene) == "gid") { +# aa_pos_lig1 = aa_pos_rna +# aa_pos_lig2 = aa_pos_sam +# aa_pos_lig3 = aa_pos_amp +# p_title = gene +# } +########################################################################### +# merged_df3 = read.csv("~/git/Data/isoniazid/output/katg_all_params.csv"); source("~/git/LSHTM_analysis/config/katg.R") +# if ( tolower(gene) == "katg") { +# aa_pos_lig1 = aa_pos_hem +# aa_pos_lig2 = NULL +# aa_pos_lig3 = NULL +# p_title = gene +# } +########################################################################### +# merged_df3 = read.csv("~/git/Data/pyrazinamide/output/pnca_all_params.csv"); source("~/git/LSHTM_analysis/config/pnca.R") +# if ( tolower(gene) == "pnca") { +# aa_pos_lig1 = aa_pos_fe +# aa_pos_lig2 = NULL +# aa_pos_lig3 = NULL +# p_title = gene +# } +########################################################################### +merged_df3 = read.csv("~/git/Data/rifampicin/output/rpob_all_params.csv"); source("~/git/LSHTM_analysis/config/rpob.R") +if ( tolower(gene) == "rpob") { + aa_pos_lig1 = NULL + aa_pos_lig2 = NULL + aa_pos_lig3 = NULL + p_title = gene +} +######################################################################### + +consurf_palette1 = c("0" = "yellow2" + , "1" = "cyan1" + , "2" = "steelblue2" + , "3" = "cadetblue2" + , "4" = "paleturquoise2" + , "5" = "thistle3" + , "6" = "thistle2" + , "7" = "plum2" + , "8" = "maroon" + , "9" = "violetred2") + +consurf_palette2 = c("0" = "yellow2" + , "1" = "forestgreen" + , "2" = "seagreen3" + , "3" = "palegreen1" + , "4" = "darkseagreen2" + , "5" = "thistle3" + , "6" = "lightpink1" + , "7" = "orchid3" + , "8" = "orchid4" + , "9" = "darkorchid4") + +################################################# +#active_aa_pos = c(2, 4) +#aa_pos_drug = c(3, 4, 14, 10) +aa_pos_hbond = c(2, 4) +aa_pos_other = c(3, 4, 14, 10) + +wideP_point (plotdf = merged_df3 + , xvar_colname = "position" + , yvar_colname = "consurf_score" + , yvar_colourN_colname = "consurf_colour_rev" + , ylab = "Consurf score" + , plot_error_bars = F + , upper_EB_colname = "consurf_ci_upper" + , lower_EB_colname = "consurf_ci_lower" + , plot_type = "point" + , point_colours = consurf_palette2 + , leg_title1 = "Consurf" + , leg_labels = c("0"="Insufficient Data" + , "1"= "Variable" + , "2", "3", "4", "5", "6", "7", "8" + , "9"= "Conserved") + + # axes title and label sizes + , x_axts = 8 + , y_axts = 12 + , x_axls = 12 + , y_axls = 15 + , default_xtc = "black" + , ptitle = p_title + , xlab = "" + + # x-axis: text colour + , xtext_colour_aa = F + , xtext_colour_aa1 = active_aa_pos + , xtext_colour_aa2 = aa_pos_drug + , xtext_colours = c("purple", "brown", "black") + + # x-axis: geom tiles ~ lig distance + , A_xvar_lig = T + , leg_title2 = "Ligand\nDistance" + , lig_dist_colname = "ligand_distance" + , lig_dist_colours = c("green", "yellow", "orange", "red") + #, tpos0 = 0 #-1.9 #-1.7 + #, tpos0 = 2.5 + #, tW0 = 1 + #, tH0 = 0.2 #0.3 + + # x-axis: geom tiles ~ active sites and ligand + , A_xvar_aa = T + , active_aa_pos = active_aa_pos + , active_aa_colour = "brown" + , aa_pos_drug = aa_pos_drug + , drug_aa_colour = "purple" + + , aa_pos_lig1 = aa_pos_lig1 + , aa_colour_lig1 = "blue" + #, tpos1 = 2 #-0.4-1.7 #-0.3-1.65 + + , aa_pos_lig2 = aa_pos_lig2 + , aa_colour_lig2 = "cyan" + #, tpos2 = 1.5#-0.3-1.7 #-0.2-1.65 + + , aa_pos_lig3 = aa_pos_lig3 + , aa_colour_lig3 = "cornflowerblue" + #, tpos3 = 1 #-0.2-1.7#-0.1-1.65 + + , default_gt_clr = "white" + + )