From 4315adc5562ab087e8bf2fd353723d5226a8d5b7 Mon Sep 17 00:00:00 2001 From: Tanushree Tunstall Date: Wed, 10 Aug 2022 14:07:11 +0100 Subject: [PATCH] wideP_consurf3 --- scripts/functions/wideP_consurf3.R | 391 +++++++++++++++++++++++++++++ 1 file changed, 391 insertions(+) create mode 100644 scripts/functions/wideP_consurf3.R diff --git a/scripts/functions/wideP_consurf3.R b/scripts/functions/wideP_consurf3.R new file mode 100644 index 0000000..358418c --- /dev/null +++ b/scripts/functions/wideP_consurf3.R @@ -0,0 +1,391 @@ +#!/usr/bin/env Rscript + +######################################################### +# TASK: function for wide plot +#with consurf score and error bars +#position numbers coloured by +# - ligand distance +# - active site residues +######################################################### + +#========================================================== +# wideP(): +# input args +#========================================================== +wideP_consurf3 <- function(plot_df + , aa_pos_drug = NULL + , aa_pos_lig1 = NULL + , aa_pos_lig2 = NULL + , aa_pos_lig3 = NULL + , active_aa_pos = NULL + + , xvar_colname = "position" + , yvar_colname = "consurf_score" + , yvar_colourN_colname = "consurf_colour_rev" # num from 0-1 + , plot_error_bars = T + , upper_EB_colname = "consurf_ci_upper" + , lower_EB_colname = "consurf_ci_lower" + + , plot_type = "point" # default is point + , point_colours = consurf_bp_colours + , p_size = 2 + , leg_title1 = "" + , leg_labels = c("0": "Insufficient Data" + , "1": "Variable" + , "2", "3", "4", "5", "6", "7", "8" + , "9": "Conserved") + , panel_col = "grey" + , panel_col_fill = "grey" + + # axes title and label sizes + , x_axls = 12 # x-axis label size + , y_axls = 15 # y-axis label size + , x_axts = 12 # x-axis text size + , y_axts = 12 # y-axis text size + , default_xtc = "black" # x-axis text colour + , ptitle = "" + , xlab = "" + , ylab = "" + , pts = 20 + + # plot margins + , t_margin = 0 + , r_margin = 0 + , b_margin = 0 + , l_margin = 0 + , unit_margin = "cm" + + # Custom 1: x-axis: text colour + , xtext_colour_aa = F + , xtext_colour_aa1 = active_aa_pos + , xtext_colour_aa2 = aa_pos_drug + , xtext_colours = c("purple", "brown", "black") + + # Custom 2: x-axis: geom tiles ~ lig distance + , annotate_ligand_distance = T + , leg_title2 = "Ligand Distance" + , lig_dist_colname = LigDist_colname # from globals + , lig_dist_colours = c("green", "yellow", "orange", "red") + , tpos0 = 0 # 0 is a magic number that does my sensible default + , tW0 = 1 + , tH0 = 0.3 + + # Custom 3: x-axis: geom tiles ~ active sites and ligand + , annotate_active_sites = T + + , drug_aa_colour = "purple" + , active_aa_colour = "brown" + + , aa_colour_lig1 = "blue" + , tpos1 = 0 + + , aa_colour_lig2 = "cyan" + , tpos2 = 0 + + , aa_colour_lig3 = "cornflowerblue" + , tpos3 = 0 + + , default_gt_clr = "white" + , build_plot_df=FALSE + , debug=FALSE +){ + + if(missing(point_colours)){ + temp_cols = colorRampPalette(c("seagreen", "sienna3"))(30) + point_colours = temp_cols + }else{ + point_colours = point_colours + } + + ############################### + # custom 1: x-axis text colour + ############################## + + if (xtext_colour_aa){ + positionF <- levels(as.factor(plot_df[[xvar_colname]])) + length(positionF) + aa_pos_colours = ifelse(positionF%in%xtext_colour_aa1, xtext_colours[1] + , ifelse(positionF%in%xtext_colour_aa2 + , xtext_colours[2] + , xtext_colours[3])) + }else{ + aa_pos_colours = default_xtc + } + + ################################################ + # Custom 2: x-axis geom tiles ~ lig distance + ################################################ + + #========================= + # Build data with colours + # ~ ligand distance + #========================= + if (annotate_ligand_distance){ + cat("\nAnnotating x-axis ~", lig_dist_colname, "requested...") + + #------------------------------------- + # round column values: to colour by + #-------------------------------------- + #plot_df = plot_df[order(plot_df[[lig_dist_colname]]),] + plot_df['lig_distR'] = round(plot_df[[lig_dist_colname]], digits = 0) + head(plot_df['lig_distR']) + + #------------------------------------- + # ligand distance range, min, max, etc + #-------------------------------------- + lig_min = min(round(plot_df[[lig_dist_colname]]), na.rm = T); lig_min + lig_max = max(round(plot_df[[lig_dist_colname]]), na.rm = T); lig_max + lig_mean = round(mean(round(plot_df[[lig_dist_colname]]), na.rm = T)); lig_mean + + #------------------------------------- + # Create mapping colour key + #-------------------------------------- + # sorting removes NA, so that n_colours == length(ligD_valsR) + n_colours = length(sort(unique(round(plot_df[[lig_dist_colname]], digits = 0)))); n_colours + + lig_cols = colorRampPalette(lig_dist_colours)(n_colours); lig_cols + ligD_valsR = sort(unique(round(plot_df[[lig_dist_colname]], digits = 0))); ligD_valsR + length(ligD_valsR) + + if (n_colours == length(ligD_valsR)) { + cat("\nStarting: mapping b/w" + , lig_dist_colname + , "and colours") + }else{ + cat("\nCannot start mapping b/w", lig_dist_colname, "and colours..." + , "\nLength mismatch:" + , "No. of colours: ", n_colours + , "\nValues to map:", length(ligD_valsR)) + } + + ligDcolKey <- data.frame(ligD_colours = lig_cols + , lig_distR = ligD_valsR); ligDcolKey + #names(ligDcolKey) + cat("\nSuccessful: Mapping b/w", lig_dist_colname, "and colours") + + #------------------------------------- + # merge colour key with plot_df + #-------------------------------------- + plot_df = merge(plot_df, ligDcolKey, by = 'lig_distR') + + plot_df_check = as.data.frame(cbind(position = plot_df[[xvar_colname]] + , ligD = plot_df[[lig_dist_colname]] + , ligDR = plot_df$lig_distR + , ligD_cols = plot_df$ligD_colours)) + } + + ############################################### + # Custom 3: x-axis geom tiles ~ active sites + ################################################ + + #========================== + # Build Data with colours + # ~ on active sites + #========================== + aa_colour_colname = "bg_all" + aa_colour_colname1 = "col_bg1" + aa_colour_colname2 = "col_bg2" + aa_colour_colname3 = "col_bg3" + + if (build_plot_df) { + if(annotate_active_sites) { + cat("\nAnnotation for xvar requested. Building colours for annotation...") + + + #-------------------------------------------------- + # column colour 0: Active site + drug binding sites + #-------------------------------------------------- + plot_df[[aa_colour_colname]] = ifelse(plot_df[[xvar_colname]]%in%aa_pos_drug + , drug_aa_colour + , ifelse(plot_df[[xvar_colname]]%in%active_aa_pos + , active_aa_colour, default_gt_clr )) + plot_df[[aa_colour_colname]] + cat("\nColumn created 'bg_all':", length(plot_df[[aa_colour_colname]])) + + #------------------------------------------------ + # column colour 1: Ligand 1 + drug binding sites + #------------------------------------------------ + cat("\nAssigning colours to drug binding and ligand-1 binding residues") + plot_df[[aa_colour_colname1]] = plot_df[[aa_colour_colname]] + plot_df[[aa_colour_colname1]] = ifelse(plot_df[[xvar_colname]]%in%aa_pos_lig1 + , aa_colour_lig1, plot_df[[aa_colour_colname]]) + #------------------------------------------------ + # column colour 2: Ligand 2 + #------------------------------------------------ + plot_df[[aa_colour_colname2]] = plot_df[[aa_colour_colname1]] + plot_df[[aa_colour_colname2]] = ifelse(plot_df[[xvar_colname]]%in%aa_pos_lig2 + , aa_colour_lig2, plot_df[[aa_colour_colname1]]) + + #------------------------------------------------ + # column colour 3: Ligand 3 + #------------------------------------------------ + plot_df[[aa_colour_colname3]] = plot_df[[aa_colour_colname2]] + plot_df[[aa_colour_colname3]] = ifelse(plot_df[[xvar_colname]]%in%aa_pos_lig3 + , aa_colour_lig3, plot_df[[aa_colour_colname2]]) + + } + } else { + # set these to the string "DUMMY" so that the build-up-the-tiles bit works + aa_pos_drug = "DUMMY" + aa_pos_lig1 = "DUMMY" + active_aa_pos = "DUMMY" + if (aa_colour_colname2 %in% colnames(merged_df3)) { + aa_pos_lig2 = "DUMMY" + if (aa_colour_colname3 %in% colnames(merged_df3)) { + aa_pos_lig2 = "DUMMY" + } + } + } + ################### + # start plot + ################### + + #------------------- + # x and y axis + # range, scale, etc + #------------------- + my_xlim = length(unique(plot_df[[xvar_colname]])); my_xlim + ymin = min(plot_df[[yvar_colname]]); ymin + ymax = max(plot_df[[yvar_colname]]); ymax + + g = ggplot(plot_df, aes_string(x = sprintf("factor(%s)", xvar_colname) + , y = yvar_colname + , colour = sprintf("factor(%s)", yvar_colourN_colname) + )) + + + #"if SPECIAL do SPECIAL THING, otherwise do NORMAL THING" + if (plot_type == "bar"){ + cat('\ndoing bar plot') + g0 = g + + geom_bar(stat = "identity") + } + else{ + g0 = g + + coord_cartesian(xlim = c(1, my_xlim) + , ylim = c(ymin, ymax) + , clip = "off") + + geom_point(size = p_size, show.legend = FALSE) + + scale_colour_manual(values = point_colours) + } + + if (plot_error_bars){ + cat('\nplotting error bars') + g0 = g0 + + geom_errorbar(aes(ymin = eval(parse(text = lower_EB_colname)) + , ymax = eval(parse(text = upper_EB_colname)) + ), show.legend = FALSE) + }else{ + + g0 = g0 + + } + + #--------------------- + # add axis formatting + #--------------------- + g1 = g0 + theme( axis.text.x = element_text(size = x_axts + , angle = 90 + , hjust = 1 + , vjust = 0.4 + , face = "bold" + , colour = aa_pos_colours) + , axis.text.y = element_text(size = y_axts + , angle = 0 + , hjust = 1 + , vjust = 0) + , axis.title.x = element_text(size = x_axls) + , axis.title.y = element_text(size = y_axls ) + , panel.background = element_rect(fill = panel_col_fill, color = "transparent") + , plot.background = element_rect(fill = panel_col_fill, color = "transparent") + , panel.grid = element_blank() + + #, panel.grid.major = element_line(color = "black") + #, panel.grid.minor = element_line(color = "black") + , plot.title = element_text(size = pts + , hjust = 0.5) + , plot.margin = margin(t = t_margin + , r = r_margin + , b = b_margin + , l = l_margin + , unit = unit_margin))+ + #guides(colour = guide_legend(title = "ConsurfXXXX")) + + labs(x = NULL, y = NULL) + # labs(title = ptitle + # , x = xlab + # , y = ylab) + + #------------------ + #Extract legend1 + #------------------ + # yayy + g1_leg = ggplot(plot_df + , aes_string( + x = sprintf("factor(%s)" + , xvar_colname)) + ) + g1_leg = g1_leg + geom_bar(); g1_leg + g1_leg = g1_leg + geom_bar(aes_string(fill = sprintf("factor(%s)" + , yvar_colourN_colname))) + + g1_leg = g1_leg + scale_fill_manual(values = consurf_bp_colours , name = leg_title1) + g1_leg + + legend1 = get_legend(g1_leg) + + + out = g1 + + # x-axis: geom_tiles ~ active sites and others + if(annotate_active_sites){ + aligned = align_plots(out,position_annotation(plot_df)) + + out=cowplot::plot_grid( + out, + NULL, + ggplot(plot_df, + aes(x=factor(position), # THIS STUPID FUCKING FACTOR THING + ) + ) + + geom_tile(aes(y=0), + fill=plot_df$ligD_colours) + + scale_x_discrete("Position", labels=factor(plot_df$position)) + + theme_nothing() + + theme(plot.background = element_rect(fill = panel_col, colour=NA), + plot.margin = margin(t=0,b=0)) + + labs(x = NULL, y = NULL), #end of distance-heat-bar + NULL, + position_annotation(plot_df, bg = panel_col), + ncol=1, + align='v', + axis='lr', + rel_heights = c(10,0,1,-0.1,1) + + ) +} + if (annotate_ligand_distance){ + cat('\nOutput: Plot + distance heat-bar + other stuff') + legs = cowplot::plot_grid(legend1 + , generate_distance_legend(plot_df) + , ncol = 1 + , align = "hv" + , rel_heights = c(2/4,3/4)) + + out2 = cowplot::plot_grid( out + theme(legend.position = "none") + , legs + , ncol = 2 + , align = "hv" + , rel_widths = c(9/10, 0.4/10) + ) + }else{ + out2 = cowplot::plot_grid( out + theme(legend.position = "none") + , legend1 + , ncol = 2 + , align = "hv" + , rel_widths = c(9/10, 0.5/10) + ) + } + return(out2) +} + +#wideP_consurf3(small_df3, point_colours = consurf_bp_colours)