From c9d7ea9fad00f16f420094db29d542363a18e23e Mon Sep 17 00:00:00 2001 From: Tanushree Tunstall Date: Wed, 17 Aug 2022 18:42:15 +0100 Subject: [PATCH] AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA --- config/alr.R | 2 +- config/embb.R | 4 +- config/gid.R | 2 +- config/katg.R | 2 +- config/pnca.R | 2 +- config/rpob.R | 2 +- scripts/functions/bp_subcolours.R | 115 +++++++------ scripts/functions/logoP_or.R | 44 +++-- scripts/functions/position_annotation.R | 152 ++++++++++++------ .../plotting/plotting_thesis/gg_pairs_all.R | 11 +- 10 files changed, 208 insertions(+), 128 deletions(-) diff --git a/config/alr.R b/config/alr.R index 2e3c04a..bfbc350 100644 --- a/config/alr.R +++ b/config/alr.R @@ -107,6 +107,6 @@ if ( all(c1, c2, c3) ) { aa_pos_lig1 = NULL aa_pos_lig2 = NULL aa_pos_lig3 = NULL -tile_map=data.frame(tile=c("ALR","DSL","CDL","Ca"), +tile_map=data.frame(tile=c("ALR","DPA","CDL","Ca"), tile_colour=c("green","darkslategrey","navyblue","purple")) diff --git a/config/embb.R b/config/embb.R index 965b857..f7a18e8 100644 --- a/config/embb.R +++ b/config/embb.R @@ -104,7 +104,7 @@ cat("\n===================================================" , aa_pos_ca , "\nNo. of ligand 'CDL' binding residues:" , length(aa_pos_cdl), "\n" , aa_pos_cdl - , "\nNo. of ligand 'DSL' binding residues:" , length(aa_pos_dsl), "\n" + , "\nNo. of ligand 'DPA' binding residues:" , length(aa_pos_dsl), "\n" , aa_pos_dsl, "\n" ) ############################################################## @@ -117,6 +117,6 @@ aa_pos_lig1 = aa_pos_dsl #slategray aa_pos_lig2 = aa_pos_cdl #navy blue aa_pos_lig3 = aa_pos_ca #purple -tile_map=data.frame(tile=c("EMB","DSL","CDL","Ca"), +tile_map=data.frame(tile=c("EMB","DPA","CDL","Ca"), tile_colour=c("green","darkslategrey","navyblue","purple")) diff --git a/config/gid.R b/config/gid.R index 18c6328..34ff829 100644 --- a/config/gid.R +++ b/config/gid.R @@ -132,6 +132,6 @@ cat("\n===================================================" aa_pos_lig1 = aa_pos_rna aa_pos_lig2 = aa_pos_sam aa_pos_lig3 = aa_pos_amp -tile_map=data.frame(tile=c("GID","DSL","CDL","Ca"), +tile_map=data.frame(tile=c("GID","DPA","CDL","Ca"), tile_colour=c("green","darkslategrey","navyblue","purple")) diff --git a/config/katg.R b/config/katg.R index 5cee244..89506b5 100644 --- a/config/katg.R +++ b/config/katg.R @@ -106,6 +106,6 @@ cat("\n===================================================" aa_pos_lig1 = aa_pos_hem aa_pos_lig2 = NULL aa_pos_lig3 = NULL -tile_map=data.frame(tile=c("KAT","DSL","CDL","Ca"), +tile_map=data.frame(tile=c("KAT","DPA","CDL","Ca"), tile_colour=c("green","darkslategrey","navyblue","purple")) diff --git a/config/pnca.R b/config/pnca.R index 09a988c..40bb1e7 100644 --- a/config/pnca.R +++ b/config/pnca.R @@ -56,6 +56,6 @@ aa_pos_lig2 = NULL aa_pos_lig3 = NULL #aa_pos_lig2 = aa_pos_catalytic #aa_pos_lig3 = aa_pos_hbond -tile_map=data.frame(tile=c("PZA","DSL","CDL","Ca"), +tile_map=data.frame(tile=c("PZA","DPA","CDL","Ca"), tile_colour=c("green","darkslategrey","navyblue","purple")) diff --git a/config/rpob.R b/config/rpob.R index b5478ab..252d2ff 100644 --- a/config/rpob.R +++ b/config/rpob.R @@ -59,6 +59,6 @@ cat("\n===================================================" aa_pos_lig1 = NULL aa_pos_lig2 = NULL aa_pos_lig3 = NULL -tile_map=data.frame(tile=c("RPO","DSL","CDL","Ca"), +tile_map=data.frame(tile=c("RPO","DPA","CDL","Ca"), tile_colour=c("green","darkslategrey","navyblue","purple")) diff --git a/scripts/functions/bp_subcolours.R b/scripts/functions/bp_subcolours.R index 45842a3..a9e3ae3 100755 --- a/scripts/functions/bp_subcolours.R +++ b/scripts/functions/bp_subcolours.R @@ -36,10 +36,10 @@ ColourPalleteMulti = function(df, group, subgroup){ bp_stability_hmap <- function(plot_df = merged_df3 , xvar_colname = "position" - , yvar_colname = 'duet_scaled' # Only here so that you can do function(df) + , yvar_colname = 'avg_stability_scaled' # Only here so that you can do function(df) #, bar_col_colname = "group" - , stability_colname = "duet_scaled" # Only here so that you can do function(df) - , stability_outcome_colname = "duet_outcome" # Only here so that you can do function(df) + , stability_colname = "avg_stability_scaled" # Only here so that you can do function(df) + , stability_outcome_colname = "avg_stability_outcome" # Only here so that you can do function(df) , p_title = "DUMMY TITLE", # Only here so that you can do function(df) my_xaxls = 6, # x-axis label size my_yaxls = 6, # y-axis label size @@ -54,15 +54,18 @@ bp_stability_hmap <- function(plot_df = merged_df3 , lig_dist_colname = LigDist_colname # from globals , tpos0 = 0 # 0 is a magic number that does my sensible default , tW0 = 1 - , tH0 = 0.2 + , tH0 = 0.2, + y_max_override = 1, # an override for tidily plotting multiple different-ranged plots together + reorder_position = FALSE # enable to reorder according to plot_df$pos_count + ) { # Custom 2: x-axis geom tiles ~ lig distance - - # order the df by position and ensure it is a factor + + # order the df by position and ensure it is a factor plot_df = plot_df[order(plot_df[[xvar_colname]]), ] plot_df[[xvar_colname]] = factor(plot_df[[xvar_colname]]) @@ -76,10 +79,18 @@ bp_stability_hmap <- function(plot_df = merged_df3 # # Add col: 'group' plot_df$group = paste0(plot_df[[stability_outcome_colname]], "_", my_grp, sep = "") + plot_df=plot_df %>% dplyr::add_count(position) + plot_df$pos_count=plot_df$n + plot_df$n=NULL - # check unique values in normalised data - # cat("\nNo. of unique values in", stability_colname, "no rounding:" - # , length(unique(plot_df[[stability_colname]]))) + # define a "max Y" in case the user didn't supply one + if(reorder_position) { + y_max = max(plot_df$pos_count) + } + else{ + y_max = 1 # boring default + } + y_axis_limit = round_any(y_max, y_max_override, ceiling) # Call the function to create the palette based on the group defined above #subcols_ps @@ -87,55 +98,63 @@ bp_stability_hmap <- function(plot_df = merged_df3 cat("\nNo. of sub colours generated:", length(subcols_bp_hmap)) anno_bar=position_annotation(plot_df, - aa_pos_drug=aa_pos_drug, - active_aa_pos=active_aa_pos, - aa_pos_lig1=aa_pos_lig1, - aa_pos_lig2=aa_pos_lig2, - aa_pos_lig3=aa_pos_lig3 + reorder_position=reorder_position, + aa_pos_drug=aa_pos_drug, + active_aa_pos=active_aa_pos, + aa_pos_lig1=aa_pos_lig1, + aa_pos_lig2=aa_pos_lig2, + aa_pos_lig3=aa_pos_lig3 ) - # Generate the subcols barplot - cowplot::plot_grid( - ggplot(plot_df, aes_string(x = xvar_colname - #, ordered = T - ) - ) + - geom_bar(aes(fill = group) - , colour = "grey", size=0.125) + - - scale_fill_manual( values = subcols_bp_hmap - , guide = "none") + - # scale_x_discrete("Position", labels=factor(plot_df$position)) + - - theme( - panel.grid = element_line(color="lightgrey", size=0.125) - , axis.text.x = element_text(size = my_xaxls + subcols_plot = ggplot(plot_df) + + scale_fill_manual( values = subcols_bp_hmap + , guide = "none") + + # scale_x_discrete("Position", labels=factor(plot_df$position)) + + scale_y_continuous(limits=c(0,y_axis_limit)) + + theme( + panel.grid = element_line(color="lightgrey", size=0.125) + , axis.text.x = element_text(size = my_xaxls , angle = 90 , hjust = 1 , vjust = 0.4) - , axis.text.y = element_text(size = my_yaxls - , angle = 0 - , hjust = 1 - , vjust = 0) - , axis.title.x = element_blank() - , axis.ticks = element_blank() - #, axis.title.x = element_text(size = my_xaxts) - , axis.title.y = element_text(size = my_yaxts ) - , plot.title = element_text(size = my_pts - , hjust = 0.5) - # , panel.grid = element_blank() - , panel.background = element_rect(fill = "transparent", colour=NA) - ) + - labs(title = p_title - , x = my_xlab - , y = my_ylab), + , axis.text.y = element_text(size = my_yaxls + , angle = 0 + , hjust = 1 + , vjust = 0) + , axis.title.x = element_blank() + , axis.ticks = element_blank() + #, axis.title.x = element_text(size = my_xaxts) + , axis.title.y = element_text(size = my_yaxts ) + , plot.title = element_text(size = my_pts + , hjust = 0.5) + # , panel.grid = element_blank() + , panel.background = element_rect(fill = "transparent", colour=NA) + ) + + labs(title = p_title + , x = my_xlab + , y = my_ylab) + + if(reorder_position) { + geom_bar(aes(x=reorder(position,-pos_count), fill = group), + colour = "grey", + size=0.125 + ) + + }else{ + geom_bar(aes(x=position, fill = group), + colour = "grey", + size=0.125 + ) + } + + + # Generate the subcols barplot + cowplot::plot_grid( + subcols_plot, NULL, anno_bar, - #generate_distance_legend(plot_df), ncol = 1, align = "v", rel_heights = c(6,-0.1,1) - #rel_widths = c(9/10, 0.4/10) ) } diff --git a/scripts/functions/logoP_or.R b/scripts/functions/logoP_or.R index a73db03..7069afa 100644 --- a/scripts/functions/logoP_or.R +++ b/scripts/functions/logoP_or.R @@ -81,13 +81,11 @@ LogoPlotCustomH <- function(plot_df if (rm_empty_y){ + cat(paste0("Original Rows: ",nrow(plot_df))) plot_df = plot_df[!is.na(plot_df[y_axis_colname]),] - cat("\nRemoving empty positions...\n") + cat(paste0("Plotting Rows after removing NAs: ",nrow(plot_df))) } - y_max = max(plot_df[[y_axis_colname]], na.rm = T) - cat("\nRemoving y scale incremenet:", y_axis_increment) - y_lim = round_any(y_max, y_axis_increment, f = ceiling) #------------------- # logo data: LogOR @@ -98,8 +96,10 @@ LogoPlotCustomH <- function(plot_df plot_df[log_colname] = log_value(plot_df[y_axis_colname]) logo_df = plot_df[, c(x_axis_colname, symbol_colname, log_colname)] logo_df_plot = logo_df[, c(x_axis_colname, symbol_colname, log_colname)] - logo_dfP_wf = as.matrix(logo_df_plot %>% spread(x_axis_colname, log_colname, fill = 0.0)) - + logo_df_plot = logo_df_plot %>% spread(x_axis_colname, y_axis_colname, fill = 0.0) + rownames(logo_df_plot) = logo_df_plot$mutant_type + logo_df_plot$mutant_type = NULL + logo_dfP_wf=as.matrix(logo_df_plot) #!!! For consideration: to add y_axis 'breaks' and 'limits' !!! #y_max = max(plot_df[[log_colname]], na.rm = T) #y_axis_increment = @@ -114,18 +114,28 @@ LogoPlotCustomH <- function(plot_df #------------------- logo_df = plot_df[, c(x_axis_colname, symbol_colname, y_axis_colname)] logo_df_plot = logo_df[, c(x_axis_colname, symbol_colname, y_axis_colname)] - logo_dfP_wf = as.matrix(logo_df_plot %>% spread(x_axis_colname, y_axis_colname, fill = 0.0)) + logo_df_plot = logo_df_plot %>% spread(x_axis_colname, y_axis_colname, fill = 0.0) + rownames(logo_df_plot) = logo_df_plot$mutant_type + logo_df_plot$mutant_type = NULL + logo_dfP_wf=as.matrix(logo_df_plot) + #logo_dfP_wf = as.matrix(logo_df_plot %>% spread(x_axis_colname, y_axis_colname, fill = 0.0)) } #class(logo_dfP_wf) - rownames(logo_dfP_wf) = logo_dfP_wf[,1] + #rownames(logo_dfP_wf) = logo_dfP_wf[,1] #dim(logo_dfP_wf) - logo_dfP_wf = logo_dfP_wf[,-1] + #logo_dfP_wf = logo_dfP_wf[,-1] #str(logo_dfP_wf) + #y_max = max(plot_df[[y_axis_colname]], na.rm = T) + y_max = max(colSums(logo_dfP_wf)) + cat("\nRemoving y scale incremenet:", y_axis_increment) + y_lim = round_any(y_max, y_axis_increment, f = ceiling) + + #colnames(logo_dfP_wf) position_or = as.numeric(colnames(logo_dfP_wf)) @@ -167,11 +177,12 @@ LogoPlotCustomH <- function(plot_df ggplot() + geom_logo(logo_dfP_wf , method = "custom" + #, method = "bits" , col_scheme = my_logo_col , seq_type = "aa") + #ylab("my custom height") + - theme( axis.ticks.x = element_blank() - , axis.ticks.length = unit(0, "pt") + theme( axis.ticks = element_blank() + #, axis.ticks.length = unit(0, "pt") , axis.title.x = element_blank() # , axis.text.x = element_blank() # turn this off and the below on if you want to visually # verify positions. @@ -206,10 +217,13 @@ LogoPlotCustomH <- function(plot_df , labels = position_or , limits = factor(1:length(position_or))) + - scale_y_continuous(y_lab - , breaks = seq(0, (y_lim), by = y_axis_increment) - #, labels = seq(0, (y_lim), by = y_axis_increment) - , limits = c(0, y_lim)) + + scale_y_continuous(y_lab, + breaks = seq(0, + (y_lim), + by = y_axis_increment + ), + limits = c(0, y_lim) + ) + labs(y=y_lab), position_annotation(plot_df, bg = theme_bgc, diff --git a/scripts/functions/position_annotation.R b/scripts/functions/position_annotation.R index ea69746..068da6e 100644 --- a/scripts/functions/position_annotation.R +++ b/scripts/functions/position_annotation.R @@ -3,6 +3,7 @@ # This plots position tiles for the (up to) three ligands as well as drug position_annotation=function(plot_df, bg="transparent", + reorder_position = FALSE, # enable to reorder according to plot_df$pos_count aa_pos_drug=1:100, active_aa_pos=1:100, aa_pos_lig1=1:100, @@ -47,65 +48,112 @@ position_annotation=function(plot_df, plot_df$col_bg3 = ifelse(plot_df[["position"]]%in%aa_pos_lig3 , "purple", plot_df$col_bg3) + # the call to generate_distance_colour_map should probably be + # wherever the outer DF is built, and not here. plot_df = generate_distance_colour_map(plot_df, debug=TRUE) - heat_bar = ggplot(plot_df, - aes(x=factor(position)) # THIS STUPID FUCKING FACTOR THING - ) + - geom_tile(aes(y=0), - fill=plot_df$ligD_colours) + + heat_bar = ggplot(plot_df) + # THIS STUPID FUCKING FACTOR THING + # scale_x_discrete("Position", labels=factor(plot_df$position)) + theme_nothing() + theme(#axis.text.x = element_text(angle = 90, size = 6), title = element_blank() - ) + # enable for alignment debug - labs(x = NULL, y = NULL) #end of distance-heat-bar - #NULL, - pos_tiles = ggplot(plot_df, - aes(x=factor(position), # THIS STUPID FUCKING FACTOR THING - ) - ) + - # geom_tile(aes(y = 0, fill = col_aa, colour = col_aa) - # ) + - geom_tile(aes(y = 1, fill = bg_all, colour = bg_all) - ) + - geom_tile(aes(y = 2, fill = col_bg1, colour = col_bg1) - ) + - geom_tile(aes(y = 3, fill = col_bg2, colour = col_bg2) - ) + - geom_tile(aes(y = 4, fill = col_bg3, colour = col_bg3) - ) + + ) + # enable for alignment debug + labs(x = NULL, y = NULL) + - #scale_x_discrete("Position", labels=factor(plot_df$position)) + - scale_color_manual(values = c( - "brown"="brown", - "green"="#00ff00", - "transparent"="transparent", - "slategrey"="#2f4f4f", - "navyblue"="#000080", - "purple"="#a020f0" - ), - #expand=c(0,0) - ) + - scale_fill_manual(values = c( - "brown"="brown", - "green"="#00ff00", - "transparent"="transparent", - "slategrey"="#2f4f4f", - "navyblue"="#000080", - "purple"="#a020f0" - ), - #expand=c(0,0) - ) + - theme_nothing() + - theme(plot.background = element_rect(fill = bg, colour=NA), - #plot.margin = margin(t=0,b=0), - panel.background = element_rect(fill = bg, colour=NA), - legend.position = "none" - ) + - labs(x = NULL, y= NULL) - - heat_legend=get_legend(heat_bar) + # if reorder_position is turned on then we need to reorder 'x' + # according to the pos_count column (creating this column is + # left as a fun exercise to whoever reads this next) + if(reorder_position) { + geom_tile(aes(y=0, x=reorder(position,-pos_count)), + fill=plot_df$ligD_colours) + } else { + geom_tile(aes(y=0, x=factor(position)), + fill=plot_df$ligD_colours) + } + #end of distance-heat-bar + #NULL, + if(reorder_position) { + pos_tiles = ggplot(plot_df) + + #scale_x_discrete("Position", labels=factor(plot_df$position)) + + scale_color_manual(values = c( + "brown"="brown", + "green"="#00ff00", + "transparent"="transparent", + "slategrey"="#2f4f4f", + "navyblue"="#000080", + "purple"="#a020f0" + ), + #expand=c(0,0) + ) + + scale_fill_manual(values = c( + "brown"="brown", + "green"="#00ff00", + "transparent"="transparent", + "slategrey"="#2f4f4f", + "navyblue"="#000080", + "purple"="#a020f0" + ), + #expand=c(0,0) + ) + + theme_nothing() + + theme(plot.background = element_rect(fill = bg, colour=NA), + #plot.margin = margin(t=0,b=0), + panel.background = element_rect(fill = bg, colour=NA), + legend.position = "none" + ) + + labs(x = NULL, y= NULL) + + geom_tile(aes(y = 1,x=reorder(position,-pos_count), fill = bg_all, colour = bg_all) + ) + + geom_tile(aes(y = 2, x=reorder(position,-pos_count), fill = col_bg1, colour = col_bg1) + ) + + geom_tile(aes(y = 3, x=reorder(position,-pos_count), fill = col_bg2, colour = col_bg2) + ) + + geom_tile(aes(y = 4, x=reorder(position,-pos_count), fill = col_bg3, colour = col_bg3) + ) + + } else { + pos_tiles = ggplot(plot_df) + + #scale_x_discrete("Position", labels=factor(plot_df$position)) + + scale_color_manual(values = c( + "brown"="brown", + "green"="#00ff00", + "transparent"="transparent", + "slategrey"="#2f4f4f", + "navyblue"="#000080", + "purple"="#a020f0" + ), + #expand=c(0,0) + ) + + scale_fill_manual(values = c( + "brown"="brown", + "green"="#00ff00", + "transparent"="transparent", + "slategrey"="#2f4f4f", + "navyblue"="#000080", + "purple"="#a020f0" + ), + #expand=c(0,0) + ) + + theme_nothing() + + theme(plot.background = element_rect(fill = bg, colour=NA), + #plot.margin = margin(t=0,b=0), + panel.background = element_rect(fill = bg, colour=NA), + legend.position = "none" + ) + + labs(x = NULL, y= NULL) + + geom_tile(aes(y = 1, x=factor(position), fill = bg_all, colour = bg_all) + ) + + geom_tile(aes(y = 2, x=factor(position), fill = col_bg1, colour = col_bg1) + ) + + geom_tile(aes(y = 3, x=factor(position), fill = col_bg2, colour = col_bg2) + ) + + geom_tile(aes(y = 4, x=factor(position), fill = col_bg3, colour = col_bg3) + ) + } +# tile thingies end + + heat_legend=get_legend(heat_bar) out_plot=cowplot::plot_grid( heat_bar, NULL, diff --git a/scripts/plotting/plotting_thesis/gg_pairs_all.R b/scripts/plotting/plotting_thesis/gg_pairs_all.R index f28e406..a0d28fe 100644 --- a/scripts/plotting/plotting_thesis/gg_pairs_all.R +++ b/scripts/plotting/plotting_thesis/gg_pairs_all.R @@ -14,9 +14,8 @@ my_gg_pairs=function(plot_df, plot_title title="ρ", digits=2, justify_labels = "centre", - #title_args=c(colour="black"), - title_args=c(size=tt_args_size),#2.5 - group_args=c(size=gp_args_size)#2.5 + title_args=list(size=tt_args_size, colour="black"),#2.5 + group_args=list(size=gp_args_size)#2.5 ) ), lower = list( @@ -93,7 +92,7 @@ corr_ps_colnames = c(static_cols corr_df_ps = corr_plotdf[, corr_ps_colnames] # Plot #1 -plot_corr_df_ps = my_gg_pairs(corr_df_ps, plot_title="Stability features") +plot_corr_df_ps = my_gg_pairs(corr_df_ps, plot_title="Stability estimates") ########################################################## #================ # Conservation @@ -109,7 +108,7 @@ corr_conservation_cols = c( static_cols corr_df_cons = corr_plotdf[, corr_conservation_cols] # Plot #2 -plot_corr_df_cons = my_gg_pairs(corr_df_cons, plot_title="Conservation features") +plot_corr_df_cons = my_gg_pairs(corr_df_cons, plot_title="Conservation estimates") ########################################################## #================ @@ -139,7 +138,7 @@ corr_df_aff = corr_affinity_df[, corr_aff_colnames] colnames(corr_df_aff) # Plot #3 -plot_corr_df_aff = my_gg_pairs(corr_df_aff, plot_title="Affinity features", tt_args_size = 4, gp_args_size =4) +plot_corr_df_aff = my_gg_pairs(corr_df_aff, plot_title="Affinity estimates", tt_args_size = 4, gp_args_size =4) #============= # combine