diff --git a/scripts/Header_TT.R b/scripts/Header_TT.R index 9a4aa76..26a418d 100755 --- a/scripts/Header_TT.R +++ b/scripts/Header_TT.R @@ -222,22 +222,9 @@ consurf_palette2 = c("0" = "yellow2" , "8" = "orchid4" , "9" = "darkorchid4") - -consurf_colours = c( - "0" = rgb(1.00,1.00,0.59) - , "9" = rgb(0.63,0.16,0.37) - , "8" = rgb(0.94,0.49,0.67) - , "7" = rgb(0.98,0.78,0.86) - , "6" = rgb(0.98,0.92,0.96) - , "5" = rgb(1.00,1.00,1.00) - , "4" = rgb(0.84,0.94,0.94) - , "3" = rgb(0.65,0.86,0.90) - , "2" = rgb(0.29,0.69,0.75) - , "1" = rgb(0.04,0.49,0.51) - ) - -# consurf_bp_colours = c( -# "0" = rgb(1.00,1.00,0.59) +# decreasing levels mess legend +# consurf_colours_LEVEL = c( +# "0" = rgb(1.00,1.00,0.59) # , "9" = rgb(0.63,0.16,0.37) # , "8" = rgb(0.94,0.49,0.67) # , "7" = rgb(0.98,0.78,0.86) @@ -247,7 +234,20 @@ consurf_colours = c( # , "3" = rgb(0.65,0.86,0.90) # , "2" = rgb(0.29,0.69,0.75) # , "1" = rgb(0.04,0.49,0.51) -# ) +# ) + +consurf_colours = c( + "0" = rgb(1.00,1.00,0.59) + , "1" = rgb(0.04,0.49,0.51) + , "2" = rgb(0.29,0.69,0.75) + , "3" = rgb(0.65,0.86,0.90) + , "4" = rgb(0.84,0.94,0.94) + , "5" = rgb(1.00,1.00,1.00) + , "6" = rgb(0.98,0.92,0.96) + , "7" = rgb(0.98,0.78,0.86) + , "8" = rgb(0.94,0.49,0.67) + , "9" = rgb(0.63,0.16,0.37) + ) ################################################## diff --git a/scripts/functions/position_count_bp.R b/scripts/functions/position_count_bp.R index 876a0fc..26a19e1 100755 --- a/scripts/functions/position_count_bp.R +++ b/scripts/functions/position_count_bp.R @@ -36,6 +36,8 @@ site_snp_count_bp <- function (plotdf , subtitle_size = 20 , subtitle_colour = "pink") { + + plotdf = as.data.frame(plotdf) # dim of plotdf cat(paste0("\noriginal df dimensions:" , "\nNo. of rows:", nrow(plotdf) @@ -45,14 +47,32 @@ site_snp_count_bp <- function (plotdf #------------------------------------------- # adding column: snpcount for each position #------------------------------------------- - setDT(plotdf)[, pos_count := .N, by = .(eval(parse(text = df_colname)))] + #setDT(plotdf)[, pos_count_check := .N, by = .(eval(parse(text = df_colname)))] + + # from dplyr + plotdf = plotdf %>% + dplyr::add_count(eval(parse(text = df_colname))) + class(plotdf) + plotdf = as.data.frame(plotdf) + class(plotdf) + nc_change = which(colnames(plotdf) == "n") + colnames(plotdf)[nc_change] <- "pos_count" + class(plotdf) + # if (all(plotdf$pos_count==plotdf$pos_count_check) ){ + # cat("\nPASS: pos_count column created") + # plotdf = plotdf[, !colnames(plotdf)%in%c("pos_count_check")] + # }else{ + # stop("\nAbort: pos count numbes mismatch from dplyr and data.table") + # } + cat("\nCumulative nssnp count\n" , table(plotdf$pos_count)) # calculating total no. of mutations tot_muts = sum(table(plotdf$pos_count)) + # sanity check if(tot_muts == nrow(plotdf)){ cat("\nPASS: total number of mutations match" diff --git a/scripts/functions/stability_count_bp.R b/scripts/functions/stability_count_bp.R index 14b0c9d..76858c7 100644 --- a/scripts/functions/stability_count_bp.R +++ b/scripts/functions/stability_count_bp.R @@ -24,7 +24,7 @@ stability_count_bp <- function(plotdf , geom_ls = 10 # geom_label size , yaxis_title = "Number of nsSNPs" , bp_plot_title = "" - , label_categories = c("LEVEL1", "LEVEL2") + , label_categories #= c("LEVEL1", "LEVEL2") , title_colour = "chocolate4" , subtitle_text = NULL , sts = 20 @@ -69,7 +69,8 @@ stability_count_bp <- function(plotdf scale_fill_manual(name = "" # name = leg_title , values = bar_fill_values - , labels = label_categories) + , labels = label_categories # problem with consurf decreasing level + ) return(OutPlot_count) diff --git a/scripts/plotting/plotting_colnames.R b/scripts/plotting/plotting_colnames.R index ea15973..d417de1 100644 --- a/scripts/plotting/plotting_colnames.R +++ b/scripts/plotting/plotting_colnames.R @@ -133,6 +133,23 @@ outcome_stability_cols = c("duet_outcome" , "foldx_outcome" , "avg_stability_outcome") +raw_conservation_cols = c("consurf_score" + , "provean_score" + , "snap2_score") + +scaled_conservation_cols = c("consurf_scaled" + ,"provean_scaled" + , "snap2_scaled") + +outcome_conservation_cols = c("consurf_outcome", "consurf_colour_rev" + ,"provean_outcome" + , "snap2_outcome") + + + all_stability_cols = c(raw_stability_cols , scaled_stability_cols - , outcome_stability_cols) + , outcome_stability_cols + , raw_conservation_cols + , scaled_conservation_cols + , outcome_conservation_cols) diff --git a/scripts/plotting/plotting_thesis/basic_barplots.R b/scripts/plotting/plotting_thesis/basic_barplots.R index f9d3c5e..0d75ae3 100755 --- a/scripts/plotting/plotting_thesis/basic_barplots.R +++ b/scripts/plotting/plotting_thesis/basic_barplots.R @@ -35,6 +35,26 @@ source("~/git/LSHTM_analysis/config/embb.R") source("~/git/LSHTM_analysis/scripts/plotting/get_plotting_dfs.R") source("~/git/LSHTM_analysis/scripts/plotting/plotting_colnames.R") +class(merged_df3) +merged_df3 = as.data.frame(merged_df3) + +class(df3) +head(df3$pos_count) + +nc_pc_CHANGE = which(colnames(merged_df3)== "pos_count") +colnames(merged_df3)[nc_pc_CHANGE] = "df2_pos_count_all" +head(merged_df3$pos_count) +head(merged_df3$pos_count_all) + +# DROP pos_count column +# merged_df3$pos_count <-NULL +merged_df3 = merged_df3[, !colnames(merged_df3)%in%c("pos_count")] +head(merged_df3$pos_count) + +df3 = merged_df3[, colnames(merged_df3)%in%plotting_cols] + + + #======= # output #======= @@ -42,36 +62,21 @@ outdir_images = paste0("~/git/Writing/thesis/images/results/", tolower(gene), "/ cat("plots will output to:", outdir_images) ########################################################### - # ConSurf labels -# consurf_colOld = "consurf_colour_rev" -# consurf_colNew = "consurf_outcome" -# df3[[consurf_colNew]] = df3[[consurf_colOld]] -# df3[[consurf_colNew]] = as.factor(df3[[consurf_colNew]]) -# df3[[consurf_colNew]] -consurf_colname = "consurf_outcome" -levels(df3[[consurf_colname]]) -# SNAP2 labels -snap2_colname = "snap2_outcome" -levels(df3[[snap2_colname]]) - -############################################################## -gene_all_cols = colnames(df3)[colnames(df3)%in%all_cols] -gene_outcome_cols = colnames(df3)[colnames(df3)%in%c(outcome_cols_stability - , outcome_cols_affinity - , outcome_cols_conservation)] -gene_outcome_cols -#======================================================================= #------------------------------ -# stability barplots: -outcome_cols_stability -# label_categories should be = levels(as.factor(plot_df[[df_colname]])) +# plot default sizes #------------------------------ sts = 22 subtitle_colour = "black" geom_ls = 10 +############################################################## +#------------------------------ +# stability barplots: +outcome_stability_cols +# label_categories should be = levels(as.factor(plot_df[[df_colname]])) +#------------------------- # duetP duetP = stability_count_bp(plotdf = df3 @@ -158,6 +163,95 @@ dynamut2P # , rel_heights = c(0.4/10,9/10)) # # dev.off() +########################################################### +#========================= +# Conservation outcome +# check this var: +outcome_conservation_cols +all(df3$consurf_colour_rev == df3$consurf_outcome) +#df3["consurf_outcome"] = as.factor(df3["consurf_outcome"]) +levels(df3[["consurf_outcome"]]) + +#========================== +table(df3$consurf_outcome) +ggplot(df3, aes_string(x = "consurf_outcome")) + + geom_bar(aes(fill = eval(parse(text = "consurf_outcome"))) + , show.legend = TRUE) + + scale_fill_manual(name = "" + , values = consurf_colours + #, labels = levels(df3[["snap2_outcome"]]) + ) + + +# consurf# had to turn label categories off for consurf +consurfP = stability_count_bp(plotdf = df3 + , df_colname = "consurf_outcome" + #, leg_title = "ConSurf" + #, label_categories = labels_consurf + , yaxis_title = "Number of nsSNPs" + , leg_position = "top" + , subtitle_text = "ConSurf" + , geom_ls = 5 + , bar_fill_values = consurf_colours # from globals + , sts = sts + , subtitle_colour= subtitle_colour) + +consurfP + +# provean +proveanP = stability_count_bp(plotdf = df3 + , df_colname = "provean_outcome" + #, leg_title = "PROVEAN" + #, label_categories = labels_provean + , yaxis_title = "" + , leg_position = "top" + , subtitle_text = "PROVEAN" + , geom_ls = geom_ls + , bar_fill_values = c("#D01C8B", "#F1B6DA") # light pink and deep + , sts = sts + , subtitle_colour= subtitle_colour) + +# snap2 +snap2P = stability_count_bp(plotdf = df3 + , df_colname = "snap2_outcome" + #, leg_title = "SNAP2" + #, label_categories = labels_snap2 + , yaxis_title = "" + , leg_position = "top" + , subtitle_text = "SNAP2" + , geom_ls = geom_ls + , bar_fill_values = c("#D01C8B", "#F1B6DA") # light pink and deep + , sts = sts + , subtitle_colour= subtitle_colour) + + +#============================ +# output: CONSERVATION PLOTS +#============================ +# bp_conservation_CLP = paste0(outdir_images +# ,tolower(gene) +# ,"_bp_conservation_CL.svg" ) +# +# print(paste0("plot filename:", bp_conservation_CLP)) +# svg(bp_conservation_CLP, width = 15, height = 6.5) +# +# cowplot::plot_grid(proveanP, snap2P, consurfP +# , nrow = 1 +# , ncol = 3 +# #, labels = c("(a)", "(b)", "(c)", "(d)") +# , labels = "AUTO" +# , label_size = 25 +# #, rel_heights = c(0.4/10,9/10)) +# , rel_widths = c(0.9, 0.9, 1.1)) +# +# +# dev.off() + + + + + + ########################################################### #========================= # Affinity outcome @@ -264,74 +358,7 @@ ppi2P = stability_count_bp(plotdf = df3_ppi2 # dev.off() ################################################################ -#========================= -# Conservation outcome -# check this var: -outcome_cols_conservation -#========================== -# consurf -consurfP = stability_count_bp(plotdf = df3 - , df_colname = "consurf_outcome" - #, leg_title = "ConSurf" - #, label_categories = labels_consurf - , yaxis_title = "Number of nsSNPs" - , leg_position = "top" - , subtitle_text = "ConSurf" - , geom_ls = 5 - , bar_fill_values = consurf_colours # from globals - , sts = sts - , subtitle_colour= subtitle_colour) -consurfP - -# provean -proveanP = stability_count_bp(plotdf = df3 - , df_colname = "provean_outcome" - #, leg_title = "PROVEAN" - #, label_categories = labels_provean - , yaxis_title = "" - , leg_position = "top" - , subtitle_text = "PROVEAN" - , geom_ls = geom_ls - , bar_fill_values = c("#D01C8B", "#F1B6DA") # light pink and deep - , sts = sts - , subtitle_colour= subtitle_colour) - -# snap2 -snap2P = stability_count_bp(plotdf = df3 - , df_colname = "snap2_outcome" - #, leg_title = "SNAP2" - #, label_categories = labels_snap2 - , yaxis_title = "" - , leg_position = "top" - , subtitle_text = "SNAP2" - , geom_ls = geom_ls - , bar_fill_values = c("#D01C8B", "#F1B6DA") # light pink and deep - , sts = sts - , subtitle_colour= subtitle_colour) - - -#============================ -# output: CONSERVATION PLOTS -#============================ -# bp_conservation_CLP = paste0(outdir_images -# ,tolower(gene) -# ,"_bp_conservation_CL.svg" ) -# -# print(paste0("plot filename:", bp_conservation_CLP)) -# svg(bp_conservation_CLP, width = 15, height = 6.5) -# -# cowplot::plot_grid(proveanP, snap2P, consurfP -# , nrow = 1 -# , ncol = 3 -# #, labels = c("(a)", "(b)", "(c)", "(d)") -# , labels = "AUTO" -# , label_size = 25 -# #, rel_heights = c(0.4/10,9/10)) -# , rel_widths = c(0.9, 0.9, 1.1)) -# -# -# dev.off() ##################################################################### #============ # Plot labels @@ -457,6 +484,41 @@ OutPlotBP() dev.off() ##################################################################### +# test + +setDT(df3)[, pos_count2 := .N, by = .(eval(parse(text = "position")))] +foo = df3[, c("mutationinformation", "position")] +df4 = foo[, c("mutationinformation", "position")] + + +var_pos = "position" +df4 = + df4 %>% + dplyr::add_count(eval(parse(text = var_pos))) + +class(df4) +df4 = as.data.frame(df4) +class(df4) +nc_change = which(colnames(df4) == "n") +colnames(df4)[nc_change] <- "pos_count" +class(df4) + +setDT(df4)[, pos_count2 := .N, by = .(eval(parse(text = "position")))] +class(df4) + +all(df4$pos_count==df4$pos_count2) + +# %>% + #group_by(pos_count = position) + +# df4 = +# df4 %>% +# dplyr::group_by(position) %>% +# count(position) + + +foo2 = df4[, c("mutationinformation", "position", "pos_count")] + ##################################################################### # ------------------------------ # bp site site count: ALL