From 6f354ab39064c2ca16f107ecf4895846685435a5 Mon Sep 17 00:00:00 2001 From: Tanushree Tunstall Date: Sun, 14 Aug 2022 12:17:36 +0100 Subject: [PATCH] oops! --- scripts/dm_om_data.R | 0 scripts/functions/bp_subcolours.R | 44 +-- .../functions/generate_distance_colour_map.R | 8 +- scripts/functions/logoP_or.R | 12 +- scripts/functions/logoP_snp.R | 246 ++++++------ scripts/functions/position_annotation.R | 157 ++++++-- scripts/functions/wideP_consurf3.R | 92 +---- .../corr_plots_thesis_ggpairs.R | 366 ++++++++++++++++++ scripts/plotting/plotting_thesis/gg_pairs.R | 49 +++ .../plotting/plotting_thesis/gg_pairs_all.R | 144 +++++++ 10 files changed, 833 insertions(+), 285 deletions(-) create mode 100644 scripts/dm_om_data.R create mode 100644 scripts/plotting/plotting_thesis/corr_plots_thesis_ggpairs.R create mode 100644 scripts/plotting/plotting_thesis/gg_pairs.R create mode 100644 scripts/plotting/plotting_thesis/gg_pairs_all.R diff --git a/scripts/dm_om_data.R b/scripts/dm_om_data.R new file mode 100644 index 0000000..e69de29 diff --git a/scripts/functions/bp_subcolours.R b/scripts/functions/bp_subcolours.R index 29e79df..98f39d5 100755 --- a/scripts/functions/bp_subcolours.R +++ b/scripts/functions/bp_subcolours.R @@ -40,12 +40,12 @@ bp_stability_hmap <- function(plot_df = merged_df3 #, bar_col_colname = "group" , stability_colname = "duet_scaled" # Only here so that you can do function(df) , stability_outcome_colname = "duet_outcome" # Only here so that you can do function(df) - , p_title = "DUMMY TITLE" # Only here so that you can do function(df) - , my_xaxls = 12 # x-axis label size - , my_yaxls = 20 # y-axis label size - , my_xaxts = 18 # x-axis text size - , my_yaxts = 20 # y-axis text size - , my_pts = 20 # plot-title size + , p_title = "DUMMY TITLE", # Only here so that you can do function(df) + my_xaxls = 6, # x-axis label size + my_yaxls = 6, # y-axis label size + my_xaxts = 9, # x-axis text size + my_yaxts = 10, # y-axis text size + my_pts = 10 # plot-title size , my_xlab = "Position" , my_ylab = "No. of nsSNPs" @@ -68,7 +68,7 @@ bp_stability_hmap <- function(plot_df = merged_df3 # Build data with colours # ~ ligand distance #========================= - plot_df = generate_distance_colour_map(plot_df, debug=TRUE) + # plot_df = generate_distance_colour_map(plot_df, debug=TRUE) # order the df by position and ensure it is a factor plot_df = plot_df[order(plot_df[[xvar_colname]]), ] @@ -104,7 +104,7 @@ bp_stability_hmap <- function(plot_df = merged_df3 # , ordered = T) )) + geom_bar(aes(fill = group) - , colour = "grey") + + , colour = "grey", size=0.125) + scale_fill_manual( values = subcols_bp_hmap , guide = "none") + @@ -120,11 +120,12 @@ bp_stability_hmap <- function(plot_df = merged_df3 , hjust = 1 , vjust = 0) , axis.title.x = element_blank() + , axis.ticks = element_blank() #, axis.title.x = element_text(size = my_xaxts) , axis.title.y = element_text(size = my_yaxts ) , plot.title = element_text(size = my_pts , hjust = 0.5) - , panel.grid = element_blank() + # , panel.grid = element_blank() , panel.background = element_rect(fill = "transparent", colour=NA) ) + @@ -132,25 +133,20 @@ bp_stability_hmap <- function(plot_df = merged_df3 , x = my_xlab , y = my_ylab), NULL, - ggplot(plot_df, - aes(x=factor(position), # THIS STUPID FUCKING FACTOR THING - ) - ) + - geom_tile(aes(y=0), - fill=plot_df$ligD_colours) + - scale_x_discrete("Position", labels=factor(plot_df$position)) + - theme_nothing() + - theme(plot.background = element_rect(fill = "transparent", colour=NA), - plot.margin = margin(t=0,b=0)) + - labs(x = NULL, y = NULL), #end of distance-heat-bar - NULL, - position_annotation(plot_df), + position_annotation(plot_df, + aa_pos_drug=aa_pos_drug, + active_aa_pos=active_aa_pos, + aa_pos_lig1=aa_pos_lig1, + aa_pos_lig2=aa_pos_lig2, + aa_pos_lig3=aa_pos_lig3 + ) + , #generate_distance_legend(plot_df), ncol = 1, align = "v", - rel_heights = c(10,-0.1,1,-0.1,1) + rel_heights = c(10,-0.1,1) #rel_widths = c(9/10, 0.4/10) ) } -#bp_stability_hmap(small_df3) +#bp_stability_hmap(merged_df3) diff --git a/scripts/functions/generate_distance_colour_map.R b/scripts/functions/generate_distance_colour_map.R index 301dfb7..e21f476 100644 --- a/scripts/functions/generate_distance_colour_map.R +++ b/scripts/functions/generate_distance_colour_map.R @@ -3,7 +3,7 @@ generate_distance_colour_map = function(plot_df, xvar_colname = "position", lig_dist_colname = "ligand_distance", #lig_dist_colours = c("green", "yellow", "orange", "red"), - lig_dist_colours = c("tan", "black"), + lig_dist_colours = c("green", "yellow", "magenta"), debug = TRUE ) { @@ -75,9 +75,9 @@ generate_distance_legend = function(plot_df, geom_tile(aes(fill = .data[[lig_dist_colname]]) , colour = "white") + scale_fill_gradient2(midpoint = lig_mean - , low = "tan" - , mid = "grey50" - , high = "black" + , low = "green" + , mid = "yellow" + , high = "magenta" , breaks = labels , limits = c(lig_min, lig_max) , labels = labelsD diff --git a/scripts/functions/logoP_or.R b/scripts/functions/logoP_or.R index 965f5d7..d3d75ca 100644 --- a/scripts/functions/logoP_or.R +++ b/scripts/functions/logoP_or.R @@ -250,8 +250,16 @@ LogoPlotCustomH <- function(plot_df #scale_x_discrete(x_lab, labels=factor(unique_colour_map$position)) + scale_color_manual(values=unique_colour_map$ligD_colours) + scale_fill_manual(values=unique_colour_map$ligD_colours) + - labs(y = NULL), NULL, - position_annotation(plot_df, bg=theme_bgc), + labs(y = NULL), + NULL, + position_annotation(plot_df, + bg = theme_bgc, + aa_pos_drug=aa_pos_drug, + active_aa_pos=active_aa_pos, + aa_pos_lig1=aa_pos_lig1, + aa_pos_lig2=aa_pos_lig2, + aa_pos_lig3=aa_pos_lig3 + ), ncol=1, align='v', rel_heights = c(16,0,1,0,1) ) diff --git a/scripts/functions/logoP_snp.R b/scripts/functions/logoP_snp.R index 1046f45..314c9fd 100644 --- a/scripts/functions/logoP_snp.R +++ b/scripts/functions/logoP_snp.R @@ -1,7 +1,7 @@ ########################a########################################################### # Input: # Data -# plot_df: merged_df3 containing the OR column to use as y-axis or any other relevant column +# mutable_df: merged_df3 containing the OR column to use as y-axis or any other relevant column # x_axis_colname = "position" # symbol_mut_colname = "mutant_type" @@ -38,16 +38,16 @@ LogoPlotSnps <- function(plot_df , my_logo_col = "chemistry" , x_lab = "Position" , y_lab = "Count" - , x_ats = 14 # text size + , x_ats = 7 # text size , x_tangle = 90 # text angle - , y_ats = 22 + , y_ats = 10 , y_tangle = 0 - , x_tts = 20 # title size - , y_tts = 23 + , x_tts = 10 # title size + , y_tts = 10 , leg_pos = "none" # can be top, left, right and bottom or c(0.8, 0.9) , leg_dir = "horizontal" #can be vertical or horizontal - , leg_ts = 20 # leg text size - , leg_tts = 16 # leg title size + , leg_ts = 10 # leg text size + , leg_tts = 8 # leg title size , tpos0 = 0 # 0 is a magic number that does my sensible default , tW0 = 1 , tH0 = 0.2 @@ -56,6 +56,7 @@ LogoPlotSnps <- function(plot_df ) { + mutable_df=cbind(plot_df) # handle funky omit_snp_count. DOES NOT WORK YET if (class(omit_snp_count) != "numeric"){ omit_snp_count <- as.numeric(unlist(str_extract_all(omit_snp_count, regex("[0-9]+")))) @@ -65,51 +66,51 @@ LogoPlotSnps <- function(plot_df ############################################ # Generate "ligand distance" colour map - plot_df = generate_distance_colour_map(plot_df, debug=TRUE) - unique_colour_map = unique(plot_df[,c("position","ligD_colours")]) - unique_colour_map = unique_colour_map[order(unique_colour_map$position), ] - rownames(unique_colour_map) = unique_colour_map$position - unique_colour_map2 = unique_colour_map - unique_colour_map2$position=as.factor(unique_colour_map2$position) - unique_colour_map2$ligD_colours = as.factor(unique_colour_map2$ligD_colours) + # mutable_df = generate_distance_colour_map(mutable_df, debug=TRUE) + # unique_colour_map = unique(mutable_df[,c("position","ligD_colours")]) + # unique_colour_map = unique_colour_map[order(unique_colour_map$position), ] + # rownames(unique_colour_map) = unique_colour_map$position + # unique_colour_map2 = unique_colour_map + # unique_colour_map2$position=as.factor(unique_colour_map2$position) + # unique_colour_map2$ligD_colours = as.factor(unique_colour_map2$ligD_colours) # - setDT(plot_df)[, mut_pos_occurrence := .N, by = .(eval(parse(text=x_axis_colname)))] + setDT(mutable_df)[, mut_pos_occurrence := .N, by = .(eval(parse(text=x_axis_colname)))] if (debug) { - table(plot_df[[x_axis_colname]]) - table(plot_df$mut_pos_occurrence) + table(mutable_df[[x_axis_colname]]) + table(mutable_df$mut_pos_occurrence) } - max_mut = max(table(plot_df[[x_axis_colname]])) + max_mut = max(table(mutable_df[[x_axis_colname]])) # Subset Data as specified by user cat("\nDisplaying nsSNP position frequency:\n") - print(table(plot_df$mut_pos_occurrence)) + print(table(mutable_df$mut_pos_occurrence)) if ( (length(omit_snp_count) ==1) && (omit_snp_count == 0) ){ - my_data_snp = plot_df + my_data_snp = mutable_df u = unique(my_data_snp[[x_axis_colname]]) max_mult_mut = max(table(my_data_snp[[x_axis_colname]])) if (debug) { cat("\nNo filtering requested:" - , "\nTotal no. of nsSNPs:", sum(table(plot_df$mut_pos_occurrence)) - , "\nTotal no. of nsSNPs omitted:", sum(table(plot_df$mut_pos_occurrence)[omit_snp_count]) + , "\nTotal no. of nsSNPs:", sum(table(mutable_df$mut_pos_occurrence)) + , "\nTotal no. of nsSNPs omitted:", sum(table(mutable_df$mut_pos_occurrence)[omit_snp_count]) , "\nDim of data:", dim(my_data_snp) , "\nNo. of positions:", length(u) , "\nMax no. of muts at any position:", max_mult_mut) } } else { - my_data_snp = subset(plot_df, !(mut_pos_occurrence%in%omit_snp_count) ) + my_data_snp = subset(mutable_df, !(mut_pos_occurrence%in%omit_snp_count) ) - exp_nrows = sum(table(plot_df$mut_pos_occurrence)) - sum(table(plot_df$mut_pos_occurrence)[omit_snp_count]) + exp_nrows = sum(table(mutable_df$mut_pos_occurrence)) - sum(table(mutable_df$mut_pos_occurrence)[omit_snp_count]) got_rows = sum(table(my_data_snp$mut_pos_occurrence)) u = unique(my_data_snp[[x_axis_colname]]) max_mult_mut = max(table(my_data_snp[[x_axis_colname]])) if (debug) { if (got_rows == exp_nrows) { cat("\nPass: Position with the stated nsSNP frequency filtered:", omit_snp_count - , "\nTotal no. of nsSNPs:", sum(table(plot_df$mut_pos_occurrence)) - , "\nTotal no. of nsSNPs omitted:", sum(table(plot_df$mut_pos_occurrence)[omit_snp_count]) + , "\nTotal no. of nsSNPs:", sum(table(mutable_df$mut_pos_occurrence)) + , "\nTotal no. of nsSNPs omitted:", sum(table(mutable_df$mut_pos_occurrence)[omit_snp_count]) , "\nDim of subsetted data:", dim(my_data_snp) , "\nNo. of positions:", length(u) , "\nMax no. of muts at any position:", max_mult_mut) @@ -145,7 +146,7 @@ LogoPlotSnps <- function(plot_df if (is.matrix(tab_mt)){ if (debug) { cat("\nCreating mutant matrix..." - #, "\nRownames of mutant matrix:", rownames(tab_mt) + #, "\nRowna mes of mutant matrix:", rownames(tab_mt) #, "\nColnames of mutant matrix:", colnames(tab_mt) ) } @@ -211,114 +212,95 @@ LogoPlotSnps <- function(plot_df ##################################### # Generating logo plots for nsSNPs ##################################### - cowplot::plot_grid( - #------------------- - # Mutant logo plot - #------------------- - ggseqlogo(tab_mt - , method = 'custom' - , col_scheme = my_logo_col - , seq_type = 'aa') + - - scale_x_continuous(breaks = 1:ncol(tab_mt) - , expand = c(0.01,0) - , labels = colnames(tab_mt))+ - - scale_y_continuous(breaks = 0:(max_mult_mut-1) - , labels = c(1:max_mult_mut) - , limits = c(0, max_mult_mut)) + - ylab(y_lab) + - theme(text=element_text(family="FreeSans") - , legend.position = leg_pos - , legend.direction = leg_dir - , legend.title = element_text(size = leg_tts - , colour = ytt_col) - , legend.text = element_text(size = leg_ts) - - , axis.text.x = element_text(size = x_ats - , angle = x_tangle - , hjust = 1 - , vjust = 0.4 - , colour = xfont_bgc) - , axis.text.y = element_text(size = y_ats - , angle = y_tangle - , hjust = 1 - , vjust = -1.0 - , colour = yfont_bgc) - , axis.title.x = element_text(size = x_tts - , colour = xtt_col) - , axis.title.y = element_text(size = y_tts - , colour = ytt_col) - - , plot.background = element_rect(fill = theme_bgc, colour=NA) - ), - ggseqlogo(tab_wt - , method = 'custom' - , col_scheme = my_logo_col - , seq_type = 'aa') + - scale_x_continuous(breaks = 1:ncol(tab_wt) - , expand = c(0.01,0) - , labels = as.factor(colnames(tab_wt))) + - theme(text = element_text(family="FreeSans") - , legend.position = "none" - , axis.text.x = element_blank() - , axis.text.y = element_blank() - , axis.title.x = element_blank() - , axis.title.y = element_blank() - , plot.background = element_rect(fill = theme_bgc, colour=NA) - ) + - labs(x=NULL, y=NULL), - ggplot( - data=unique_colour_map2, - aes( - x=factor(position), 0 # heat-mapped distance tiles along the bot - , fill = position - , colour = position - , linetype = "blank" - ) + #------------------- + # Mutant logo plot + #------------------- + logo_top =ggseqlogo(tab_mt + , method = 'custom' + , col_scheme = my_logo_col + , seq_type = 'aa') + + + scale_x_continuous(breaks = 1:ncol(tab_mt) + , expand = c(0.01,0) + , labels = colnames(tab_mt))+ + + scale_y_continuous(breaks = 0:(max_mult_mut-1) + , labels = c(1:max_mult_mut) + , limits = c(0, max_mult_mut)) + + ylab(y_lab) + + theme(text=element_text(family="FreeSans") + , legend.position = leg_pos + , legend.direction = leg_dir + , legend.title = element_text(size = leg_tts + , colour = ytt_col) + , legend.text = element_text(size = leg_ts) + + , axis.text.x = element_text(size = x_ats + , angle = x_tangle + #, hjust = 1 + #, vjust = 0.4 + , colour = xfont_bgc) + , axis.text.y = element_text(size = y_ats + , angle = y_tangle + , hjust = 1 + , vjust = -1.0 + , colour = yfont_bgc) + # , axis.title.x = element_text(size = x_tts + # , colour = xtt_col) + , axis.title.x = element_blank() + , axis.title.y = element_text(size = y_tts + , colour = ytt_col) + + , plot.background = element_rect(fill = theme_bgc, colour=NA) + ) + logo_bottom = ggseqlogo(tab_wt + , method = 'custom' + , col_scheme = my_logo_col + , seq_type = 'aa') + + scale_x_continuous(breaks = 1:ncol(tab_wt) + , expand = c(0.01,0) + , labels = as.factor(colnames(tab_wt))) + + theme(text = element_text(family="FreeSans") + , legend.position = "none" + , axis.text.x = element_blank() + , axis.text.y = element_blank() + , axis.title.x = element_blank() + , axis.title.y = element_blank() + , plot.background = element_rect(fill = theme_bgc, colour=NA) ) + - geom_tile() + - theme( - axis.text.x = element_blank() - , axis.ticks.x = element_blank() - # axis.text.x = element_text(size = x_ats - # , angle = x_tangle - # , hjust = 1 - # , vjust = 0.4 - # , colour = xfont_bgc) - , axis.text.y = element_blank() - , axis.ticks.y = element_blank() - , axis.title.x = element_blank() - - # , axis.title.x = element_text(size = x_tts - # , colour = xtt_col) - # , axis.title.y = element_text(size = y_tts - # , colour = ytt_col) - # , legend.title = element_text(size = leg_tts - # , colour = ytt_col) - , legend.text = element_text(size = leg_ts) - - , legend.position = leg_pos - , legend.direction = leg_dir - , plot.background = element_rect(fill = theme_bgc, colour=NA) - , plot.margin = margin(t=0) - , panel.grid=element_blank() - , panel.background = element_rect(fill = theme_bgc, colour=NA) - ) + - scale_x_discrete(x_lab, labels=unique_colour_map$position) + - #scale_x_discrete(x_lab, labels=factor(unique_colour_map$position)) + - scale_color_manual(values=unique_colour_map$ligD_colours) + - scale_fill_manual(values=unique_colour_map$ligD_colours) + - labs(y = NULL) - , NULL - , position_annotation(plot_df, bg=theme_bgc) - , ncol=1 - , align = "v" - , axis='lr' - , rel_heights = c(7/10, 2/7,1/7, -0.1, 0.5/7)) + labs(x=NULL, y=NULL) + + anno_bar = position_annotation(plot_df, + bg = theme_bgc, + aa_pos_drug=aa_pos_drug, + active_aa_pos=active_aa_pos, + aa_pos_lig1=aa_pos_lig1, + aa_pos_lig2=aa_pos_lig2, + aa_pos_lig3=aa_pos_lig3) + + aligned=align_plots(logo_top, logo_bottom, anno_bar, align='v', axis='lr') + cowplot::plot_grid( + aligned[[1]], aligned[[2]], aligned[[3]], + ncol=1, + #align = "v", + rel_heights = c(7, 1,1), + rel_widths = c(1,1,0.75) + ) + + # cowplot::plot_grid( + # logo_top, + # #NULL, + # logo_bottom, + # #NULL, + # anno_bar, + # ncol=1, + # align = "v", + # rel_heights = c(7, 1,1) + # ) + # top logo, bottom logo, heat bar, NULL, position annotation #------------------ # Wild logo plot #------------------ } -#LogoPlotSnps(small_df3) +#LogoPlotSnps(mutable_df3) diff --git a/scripts/functions/position_annotation.R b/scripts/functions/position_annotation.R index 706ddd4..5e4567f 100644 --- a/scripts/functions/position_annotation.R +++ b/scripts/functions/position_annotation.R @@ -1,7 +1,15 @@ # position_annotation takes a Data Frame (df) and returns a ggplot object. # # This plots position tiles for the (up to) three ligands as well as drug -position_annotation=function(plot_df, bg="transparent"){ +position_annotation=function(plot_df, + bg="transparent", + aa_pos_drug=1:100, + active_aa_pos=1:100, + aa_pos_lig1=1:100, + aa_pos_lig2=1:100, + aa_pos_lig3=1:100 +) +{ x_ats = 12 x_tangle = 90 x_tts = 20 @@ -13,48 +21,117 @@ position_annotation=function(plot_df, bg="transparent"){ leg_tts = 16 leg_pos = "none" + # plot_df=plot_df[order(plot_df$ligand_distance),] + # + # plot_df$position = factor(plot_df$position) #plot_df = generate_distance_colour_map(plot_df, debug=TRUE) + # plot_df$col_aa = ifelse(plot_df[["position"]]%in%active_aa_pos, + # "brown", "transparent") + plot_df$col_aa = ifelse(plot_df[["position"]]%in%active_aa_pos, + "transparent", "transparent") - ggplot(plot_df, - aes(x=factor(position), # THIS STUPID FUCKING FACTOR THING - ) - ) + - geom_tile(aes(y=0, fill= bg_all, colour = bg_all) + plot_df$bg_all = plot_df$col_aa + plot_df$bg_all = ifelse(plot_df[["position"]]%in%aa_pos_drug, + "green", plot_df$bg_all) + + plot_df$col_bg1 = plot_df$bg_all + plot_df$col_bg1 = ifelse(plot_df[["position"]]%in%aa_pos_lig1, + "slategrey", plot_df$col_bg1) + + plot_df$col_bg2 = plot_df$col_bg1 + plot_df$col_bg2 = ifelse(plot_df[["position"]]%in%aa_pos_lig2, + "navyblue", plot_df$col_bg2) + + + plot_df$col_bg3 = plot_df$col_bg2 + plot_df$col_bg3 = ifelse(plot_df[["position"]]%in%aa_pos_lig3 + , "purple", plot_df$col_bg3) + + plot_df = generate_distance_colour_map(plot_df, debug=TRUE) + + cowplot::plot_grid( + ggplot(plot_df, + aes(x=factor(position), # THIS STUPID FUCKING FACTOR THING + ) ) + - geom_tile(aes(y=1, fill= col_bg1, colour = col_bg1) + geom_tile(aes(y=0), + fill=plot_df$ligD_colours) + + #scale_x_discrete("Position", labels=factor(plot_df$position)) + + #theme_nothing() + + theme(plot.background = element_rect(fill = "transparent", colour=NA), + plot.margin = margin(t=0,b=0), + axis.ticks.x = element_blank(), + axis.ticks.y = element_blank(), + axis.text.y = element_blank(), + panel.grid = element_blank(), + panel.background = element_rect(fill = "transparent", colour=NA), + ) + + labs(x = NULL, y = NULL), #end of distance-heat-bar + #NULL, + ggplot(plot_df, + aes(x=factor(position), # THIS STUPID FUCKING FACTOR THING + #reorder(ligand_distance) + ) ) + - geom_tile(aes(y=2, fill= col_bg2, colour = col_bg2) - ) + - geom_tile(aes(y=3, fill= col_bg3, colour = col_bg3) - ) + - - scale_x_discrete("Position", labels=factor(plot_df$position)) + - scale_color_manual(values = c( - "brown"="brown", - "green"="green", - "transparent"="transparent", - "slategrey"="slategrey", - "navyblue"="navyblue", - "purple"="purple" - ), - expand=c(0,0) - ) + - scale_fill_manual(values = c( - "brown"="brown", - "green"="green", - "transparent"="transparent", - "slategrey"="slategrey", - "navyblue"="navyblue", - "purple"="purple" - ), - expand=c(0,0) - ) + - #scale_x_continuous(expand=c(0,0)) + - #scale_y_continuous(expand=c(0,0)) + - theme_nothing() + - - theme(plot.background = element_rect(fill = bg, colour=NA), - plot.margin = margin(t=0,b=0)) + - labs(x = NULL, y = NULL) + # geom_tile(aes(y = 0, fill = col_aa, colour = col_aa) + # ) + + geom_tile(aes(y = 1, fill = bg_all, colour = bg_all) + ) + + geom_tile(aes(y = 2, fill = col_bg1, colour = col_bg1) + ) + + geom_tile(aes(y = 3, fill = col_bg2, colour = col_bg2) + ) + + geom_tile(aes(y = 4, fill = col_bg3, colour = col_bg3) + ) + + + #scale_x_discrete("Position", labels=factor(plot_df$position)) + + scale_color_manual(values = c( + "brown"="brown", + "green"="#00ff00", + "transparent"="transparent", + "slategrey"="#2f4f4f", + "navyblue"="#000080", + "purple"="#a020f0" + ), + expand=c(0,0) + ) + + scale_fill_manual(values = c( + "brown"="brown", + "green"="#00ff00", + "transparent"="transparent", + "slategrey"="#2f4f4f", + "navyblue"="#000080", + "purple"="#a020f0" + ), + expand=c(0,0) + ) + + #scale_x_continuous(expand=c(0,0)) + + #scale_y_continuous(expand=c(0,0)) + + theme_nothing() + + + theme(plot.background = element_rect(fill = bg, colour=NA), + plot.margin = margin(t=0,b=0)) + + labs(x = NULL, y = NULL), + ncol=1, + rel_heights = c(1, + #-0.1, + 1) + ) } +position_annotation(merged_df3, + aa_pos_drug=aa_pos_drug, + active_aa_pos=active_aa_pos, + aa_pos_lig1=aa_pos_lig1, + aa_pos_lig2=aa_pos_lig2, + aa_pos_lig3=aa_pos_lig3 +) +# +# # proof that you can use this function to pass arbitrary lists of numbers :-) +# position_annotation(merged_df3, +# aa_pos_drug=1:1000, +# active_aa_pos=1:1000, +# aa_pos_lig1=1:1000, +# aa_pos_lig2=1:1000, +# aa_pos_lig3=1:1000 +# ) \ No newline at end of file diff --git a/scripts/functions/wideP_consurf3.R b/scripts/functions/wideP_consurf3.R index ac6c925..4053591 100644 --- a/scripts/functions/wideP_consurf3.R +++ b/scripts/functions/wideP_consurf3.R @@ -13,12 +13,6 @@ # input args #========================================================== wideP_consurf3 <- function(plot_df - , aa_pos_drug = NULL - , aa_pos_lig1 = NULL - , aa_pos_lig2 = NULL - , aa_pos_lig3 = NULL - , active_aa_pos = NULL - , xvar_colname = "position" , yvar_colname = "consurf_score" , yvar_colourN_colname = "consurf_colour_rev" # num from 0-1 @@ -65,26 +59,13 @@ wideP_consurf3 <- function(plot_df , annotate_ligand_distance = T , leg_title2 = "Ligand Distance" , lig_dist_colname = LigDist_colname # from globals - , lig_dist_colours = c("green", "yellow", "orange", "red") + , lig_dist_colours = c("tan", "black") , tpos0 = 0 # 0 is a magic number that does my sensible default , tW0 = 1 , tH0 = 0.3 # Custom 3: x-axis: geom tiles ~ active sites and ligand , annotate_active_sites = T - - , drug_aa_colour = "purple" - , active_aa_colour = "brown" - - , aa_colour_lig1 = "blue" - , tpos1 = 0 - - , aa_colour_lig2 = "cyan" - , tpos2 = 0 - - , aa_colour_lig3 = "cornflowerblue" - , tpos3 = 0 - , default_gt_clr = "white" , build_plot_df=FALSE , debug=FALSE @@ -174,68 +155,6 @@ wideP_consurf3 <- function(plot_df , ligD_cols = plot_df$ligD_colours)) } - ############################################### - # Custom 3: x-axis geom tiles ~ active sites - ################################################ - - #========================== - # Build Data with colours - # ~ on active sites - #========================== - aa_colour_colname = "bg_all" - aa_colour_colname1 = "col_bg1" - aa_colour_colname2 = "col_bg2" - aa_colour_colname3 = "col_bg3" - - if (build_plot_df) { - if(annotate_active_sites) { - cat("\nAnnotation for xvar requested. Building colours for annotation...") - - - #-------------------------------------------------- - # column colour 0: Active site + drug binding sites - #-------------------------------------------------- - plot_df[[aa_colour_colname]] = ifelse(plot_df[[xvar_colname]]%in%aa_pos_drug - , drug_aa_colour - , ifelse(plot_df[[xvar_colname]]%in%active_aa_pos - , active_aa_colour, default_gt_clr )) - plot_df[[aa_colour_colname]] - cat("\nColumn created 'bg_all':", length(plot_df[[aa_colour_colname]])) - - #------------------------------------------------ - # column colour 1: Ligand 1 + drug binding sites - #------------------------------------------------ - cat("\nAssigning colours to drug binding and ligand-1 binding residues") - plot_df[[aa_colour_colname1]] = plot_df[[aa_colour_colname]] - plot_df[[aa_colour_colname1]] = ifelse(plot_df[[xvar_colname]]%in%aa_pos_lig1 - , aa_colour_lig1, plot_df[[aa_colour_colname]]) - #------------------------------------------------ - # column colour 2: Ligand 2 - #------------------------------------------------ - plot_df[[aa_colour_colname2]] = plot_df[[aa_colour_colname1]] - plot_df[[aa_colour_colname2]] = ifelse(plot_df[[xvar_colname]]%in%aa_pos_lig2 - , aa_colour_lig2, plot_df[[aa_colour_colname1]]) - - #------------------------------------------------ - # column colour 3: Ligand 3 - #------------------------------------------------ - plot_df[[aa_colour_colname3]] = plot_df[[aa_colour_colname2]] - plot_df[[aa_colour_colname3]] = ifelse(plot_df[[xvar_colname]]%in%aa_pos_lig3 - , aa_colour_lig3, plot_df[[aa_colour_colname2]]) - - } - } else { - # set these to the string "DUMMY" so that the build-up-the-tiles bit works - aa_pos_drug = "DUMMY" - aa_pos_lig1 = "DUMMY" - active_aa_pos = "DUMMY" - if (aa_colour_colname2 %in% colnames(merged_df3)) { - aa_pos_lig2 = "DUMMY" - if (aa_colour_colname3 %in% colnames(merged_df3)) { - aa_pos_lig2 = "DUMMY" - } - } - } ################### # start plot ################### @@ -355,7 +274,14 @@ wideP_consurf3 <- function(plot_df plot.margin = margin(t=0,b=0)) + labs(x = NULL, y = NULL), #end of distance-heat-bar NULL, - position_annotation(plot_df, bg = panel_col), + position_annotation(plot_df, + bg = panel_col, + aa_pos_drug=aa_pos_drug, + active_aa_pos=active_aa_pos, + aa_pos_lig1=aa_pos_lig1, + aa_pos_lig2=aa_pos_lig2, + aa_pos_lig3=aa_pos_lig3 + ), ncol=1, align='v', axis='lr', diff --git a/scripts/plotting/plotting_thesis/corr_plots_thesis_ggpairs.R b/scripts/plotting/plotting_thesis/corr_plots_thesis_ggpairs.R new file mode 100644 index 0000000..d6e820f --- /dev/null +++ b/scripts/plotting/plotting_thesis/corr_plots_thesis_ggpairs.R @@ -0,0 +1,366 @@ +#!/usr/bin/env Rscript +#source("~/git/LSHTM_analysis/config/alr.R") +source("~/git/LSHTM_analysis/config/embb.R") +#source("~/git/LSHTM_analysis/config/katg.R") +#source("~/git/LSHTM_analysis/config/gid.R") +#source("~/git/LSHTM_analysis/config/pnca.R") +#source("~/git/LSHTM_analysis/config/rpob.R") + +# get plottting dfs +source("~/git/LSHTM_analysis/scripts/plotting/get_plotting_dfs.R") +source("~/git/LSHTM_analysis/scripts/plotting/plotting_colnames.R") +#################################################### + +# ggpairs wrapper + +my_gg_pairs=function(plot_df){ + ggpairs(plot_df, columns = 1:(ncol(plot_df)-1), + upper = list(continuous = wrap('cor', + method = "spearman", + title="ρ", + digits=2, + title_args=c(colour="black") + ) + ), + lower = list( + continuous = wrap("points", alpha = 0.7, size=0.5), + combo = wrap("dot", alpha = 0.7, size=0.5) + ), + aes(colour = factor(ifelse(plot_df$dst_mode==0, "S", "R")), alpha = 0.5), + title="Stability") + + + scale_colour_manual(values = c("red", "blue")) + + scale_fill_manual(values = c("red", "blue")) + + theme( + text = element_text(size=12, face="bold") + ) +} + + +#======= +# output +#======= +outdir_images = paste0("~/git/Writing/thesis/images/results/", tolower(gene), "/") + +#======= +# Input +#======= +merged_df3 = as.data.frame(merged_df3) +corr_plotdf = corr_data_extract(merged_df3 + , gene = gene + , drug = drug + , extract_scaled_cols = F) +colnames(corr_plotdf) + +if (all(colnames(corr_df_m3_f) == colnames(corr_plotdf))){ + cat("PASS: corr plot colnames match for dashboard") +}else{ + stop("Abort: corr plot colnames DO NOT match for dashboard") +} + +#corr_plotdf = corr_df_m3_f #for downstream code + +aff_dist_cols = colnames(corr_plotdf)[grep("Dist", colnames(corr_plotdf))] +aff_dist_cols + + +static_cols = c("Log10(MAF)" + , "Log10(OR)" + #, "-Log10(P)" +) + +#================ +# stability +#================ +#affinity_dist_colnames# lIg DIst and ppi Di +corr_ps_colnames = c(static_cols + , "DUET" + , "FoldX" + , "DeepDDG" + , "Dynamut2" + , aff_dist_cols + , "dst_mode") + +if (all(corr_ps_colnames%in%colnames(corr_plotdf))){ + cat("PASS: all colnames exist for correlation") +}else{ + stop("Abort: all colnames DO NOT exist for correlation") +} +corr_df_ps = corr_plotdf[, corr_ps_colnames] +complete_obs_ps = nrow(corr_df_ps) - sum(is.na(corr_df_ps$`Log(OR)`)) +cat("\nComplete muts for Conservation for", gene, ":", complete_obs_ps) + +color_coln = which(colnames(corr_df_ps) == "dst_mode") +#end = which(colnames(corr_df_ps) == drug) +#ncol_omit = 2 +#corr_end = end-ncol_omit +corr_end = color_coln-1 + +#------------------------ +# Output: stability corrP +#------------------------ +corr_psP = paste0(outdir_images + ,tolower(gene) + ,"_corr_stability.svg" ) + +cat("Corr plot stability with coloured dots:", corr_psP) +svg(corr_psP, width = 15, height = 15) + +my_corr_pairs(corr_data_all = corr_df_ps + , corr_cols = colnames(corr_df_ps[1:corr_end]) + , corr_method = "spearman" + , colour_categ_col = colnames(corr_df_ps[color_coln]) #"dst_mode" + , categ_colour = c("red", "blue") + , density_show = F + , hist_col = "coral4" + , dot_size = 1.6 + , ats = 1.5 + , corr_lab_size =2.5 + , corr_value_size = 1) + +dev.off() +#=============== +# CONSERVATION +#============== +corr_conservation_cols = c( static_cols + , "ConSurf" + , "SNAP2" + , "PROVEAN" + , aff_dist_cols + , "dst_mode" + , drug) + +if (all(corr_conservation_cols%in%colnames(corr_plotdf))){ + cat("PASS: all colnames exist for ConSurf-correlation") +}else{ + stop("Abort: all colnames DO NOT exist for ConSurf-correlation") +} + +corr_df_cons = corr_plotdf[, corr_conservation_cols] +complete_obs_cons = nrow(corr_df_cons) - sum(is.na(corr_df_cons$`Log(OR)`)) +cat("\nComplete muts for Conservation for", gene, ":", complete_obs_cons) + +color_coln = which(colnames(corr_df_cons) == "dst_mode") +# end = which(colnames(corr_df_cons) == drug) +# ncol_omit = 2 +# corr_end = end-ncol_omit +corr_end = color_coln-1 + + +#--------------------------- +# Output: Conservation corrP +#---------------------------- +corr_consP = paste0(outdir_images + ,tolower(gene) + ,"_corr_conservation.svg" ) + +cat("Corr plot conservation coloured dots:", corr_consP) +svg(corr_consP, width = 10, height = 10) + +my_corr_pairs(corr_data_all = corr_df_cons + , corr_cols = colnames(corr_df_cons[1:corr_end]) + , corr_method = "spearman" + , colour_categ_col = colnames(corr_df_cons[color_coln]) #"dst_mode" + , categ_colour = c("red", "blue") + , density_show = F + , hist_col = "coral4" + , dot_size =1.1 + , ats = 1.5 + , corr_lab_size = 1.8 + , corr_value_size = 1) + +dev.off() + +##################################################### +#DistCutOff = 10 +#LigDist_colname # = "ligand_distance" # from globals +#ppi2Dist_colname = "interface_dist" +#naDist_colname = "TBC" +##################################################### + +#================ +# ligand affinity +#================ +corr_df_lig = corr_plotdf[corr_plotdf["Lig-Dist"]