removed setDT and replaced with dplyr alt in position_count_bp.R
This commit is contained in:
parent
65d697d3a2
commit
da8f8d90d4
5 changed files with 210 additions and 110 deletions
|
@ -222,22 +222,9 @@ consurf_palette2 = c("0" = "yellow2"
|
||||||
, "8" = "orchid4"
|
, "8" = "orchid4"
|
||||||
, "9" = "darkorchid4")
|
, "9" = "darkorchid4")
|
||||||
|
|
||||||
|
# decreasing levels mess legend
|
||||||
consurf_colours = c(
|
# consurf_colours_LEVEL = c(
|
||||||
"0" = rgb(1.00,1.00,0.59)
|
# "0" = rgb(1.00,1.00,0.59)
|
||||||
, "9" = rgb(0.63,0.16,0.37)
|
|
||||||
, "8" = rgb(0.94,0.49,0.67)
|
|
||||||
, "7" = rgb(0.98,0.78,0.86)
|
|
||||||
, "6" = rgb(0.98,0.92,0.96)
|
|
||||||
, "5" = rgb(1.00,1.00,1.00)
|
|
||||||
, "4" = rgb(0.84,0.94,0.94)
|
|
||||||
, "3" = rgb(0.65,0.86,0.90)
|
|
||||||
, "2" = rgb(0.29,0.69,0.75)
|
|
||||||
, "1" = rgb(0.04,0.49,0.51)
|
|
||||||
)
|
|
||||||
|
|
||||||
# consurf_bp_colours = c(
|
|
||||||
# "0" = rgb(1.00,1.00,0.59)
|
|
||||||
# , "9" = rgb(0.63,0.16,0.37)
|
# , "9" = rgb(0.63,0.16,0.37)
|
||||||
# , "8" = rgb(0.94,0.49,0.67)
|
# , "8" = rgb(0.94,0.49,0.67)
|
||||||
# , "7" = rgb(0.98,0.78,0.86)
|
# , "7" = rgb(0.98,0.78,0.86)
|
||||||
|
@ -247,7 +234,20 @@ consurf_colours = c(
|
||||||
# , "3" = rgb(0.65,0.86,0.90)
|
# , "3" = rgb(0.65,0.86,0.90)
|
||||||
# , "2" = rgb(0.29,0.69,0.75)
|
# , "2" = rgb(0.29,0.69,0.75)
|
||||||
# , "1" = rgb(0.04,0.49,0.51)
|
# , "1" = rgb(0.04,0.49,0.51)
|
||||||
# )
|
# )
|
||||||
|
|
||||||
|
consurf_colours = c(
|
||||||
|
"0" = rgb(1.00,1.00,0.59)
|
||||||
|
, "1" = rgb(0.04,0.49,0.51)
|
||||||
|
, "2" = rgb(0.29,0.69,0.75)
|
||||||
|
, "3" = rgb(0.65,0.86,0.90)
|
||||||
|
, "4" = rgb(0.84,0.94,0.94)
|
||||||
|
, "5" = rgb(1.00,1.00,1.00)
|
||||||
|
, "6" = rgb(0.98,0.92,0.96)
|
||||||
|
, "7" = rgb(0.98,0.78,0.86)
|
||||||
|
, "8" = rgb(0.94,0.49,0.67)
|
||||||
|
, "9" = rgb(0.63,0.16,0.37)
|
||||||
|
)
|
||||||
|
|
||||||
##################################################
|
##################################################
|
||||||
|
|
||||||
|
|
|
@ -36,6 +36,8 @@ site_snp_count_bp <- function (plotdf
|
||||||
, subtitle_size = 20
|
, subtitle_size = 20
|
||||||
, subtitle_colour = "pink")
|
, subtitle_colour = "pink")
|
||||||
{
|
{
|
||||||
|
|
||||||
|
plotdf = as.data.frame(plotdf)
|
||||||
# dim of plotdf
|
# dim of plotdf
|
||||||
cat(paste0("\noriginal df dimensions:"
|
cat(paste0("\noriginal df dimensions:"
|
||||||
, "\nNo. of rows:", nrow(plotdf)
|
, "\nNo. of rows:", nrow(plotdf)
|
||||||
|
@ -45,7 +47,24 @@ site_snp_count_bp <- function (plotdf
|
||||||
#-------------------------------------------
|
#-------------------------------------------
|
||||||
# adding column: snpcount for each position
|
# adding column: snpcount for each position
|
||||||
#-------------------------------------------
|
#-------------------------------------------
|
||||||
setDT(plotdf)[, pos_count := .N, by = .(eval(parse(text = df_colname)))]
|
#setDT(plotdf)[, pos_count_check := .N, by = .(eval(parse(text = df_colname)))]
|
||||||
|
|
||||||
|
# from dplyr
|
||||||
|
plotdf = plotdf %>%
|
||||||
|
dplyr::add_count(eval(parse(text = df_colname)))
|
||||||
|
class(plotdf)
|
||||||
|
plotdf = as.data.frame(plotdf)
|
||||||
|
class(plotdf)
|
||||||
|
nc_change = which(colnames(plotdf) == "n")
|
||||||
|
colnames(plotdf)[nc_change] <- "pos_count"
|
||||||
|
class(plotdf)
|
||||||
|
|
||||||
|
# if (all(plotdf$pos_count==plotdf$pos_count_check) ){
|
||||||
|
# cat("\nPASS: pos_count column created")
|
||||||
|
# plotdf = plotdf[, !colnames(plotdf)%in%c("pos_count_check")]
|
||||||
|
# }else{
|
||||||
|
# stop("\nAbort: pos count numbes mismatch from dplyr and data.table")
|
||||||
|
# }
|
||||||
|
|
||||||
cat("\nCumulative nssnp count\n"
|
cat("\nCumulative nssnp count\n"
|
||||||
, table(plotdf$pos_count))
|
, table(plotdf$pos_count))
|
||||||
|
@ -53,6 +72,7 @@ site_snp_count_bp <- function (plotdf
|
||||||
# calculating total no. of mutations
|
# calculating total no. of mutations
|
||||||
tot_muts = sum(table(plotdf$pos_count))
|
tot_muts = sum(table(plotdf$pos_count))
|
||||||
|
|
||||||
|
|
||||||
# sanity check
|
# sanity check
|
||||||
if(tot_muts == nrow(plotdf)){
|
if(tot_muts == nrow(plotdf)){
|
||||||
cat("\nPASS: total number of mutations match"
|
cat("\nPASS: total number of mutations match"
|
||||||
|
|
|
@ -24,7 +24,7 @@ stability_count_bp <- function(plotdf
|
||||||
, geom_ls = 10 # geom_label size
|
, geom_ls = 10 # geom_label size
|
||||||
, yaxis_title = "Number of nsSNPs"
|
, yaxis_title = "Number of nsSNPs"
|
||||||
, bp_plot_title = ""
|
, bp_plot_title = ""
|
||||||
, label_categories = c("LEVEL1", "LEVEL2")
|
, label_categories #= c("LEVEL1", "LEVEL2")
|
||||||
, title_colour = "chocolate4"
|
, title_colour = "chocolate4"
|
||||||
, subtitle_text = NULL
|
, subtitle_text = NULL
|
||||||
, sts = 20
|
, sts = 20
|
||||||
|
@ -69,7 +69,8 @@ stability_count_bp <- function(plotdf
|
||||||
scale_fill_manual(name = ""
|
scale_fill_manual(name = ""
|
||||||
# name = leg_title
|
# name = leg_title
|
||||||
, values = bar_fill_values
|
, values = bar_fill_values
|
||||||
, labels = label_categories)
|
, labels = label_categories # problem with consurf decreasing level
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
return(OutPlot_count)
|
return(OutPlot_count)
|
||||||
|
|
|
@ -133,6 +133,23 @@ outcome_stability_cols = c("duet_outcome"
|
||||||
, "foldx_outcome"
|
, "foldx_outcome"
|
||||||
, "avg_stability_outcome")
|
, "avg_stability_outcome")
|
||||||
|
|
||||||
|
raw_conservation_cols = c("consurf_score"
|
||||||
|
, "provean_score"
|
||||||
|
, "snap2_score")
|
||||||
|
|
||||||
|
scaled_conservation_cols = c("consurf_scaled"
|
||||||
|
,"provean_scaled"
|
||||||
|
, "snap2_scaled")
|
||||||
|
|
||||||
|
outcome_conservation_cols = c("consurf_outcome", "consurf_colour_rev"
|
||||||
|
,"provean_outcome"
|
||||||
|
, "snap2_outcome")
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
all_stability_cols = c(raw_stability_cols
|
all_stability_cols = c(raw_stability_cols
|
||||||
, scaled_stability_cols
|
, scaled_stability_cols
|
||||||
, outcome_stability_cols)
|
, outcome_stability_cols
|
||||||
|
, raw_conservation_cols
|
||||||
|
, scaled_conservation_cols
|
||||||
|
, outcome_conservation_cols)
|
||||||
|
|
|
@ -35,6 +35,26 @@ source("~/git/LSHTM_analysis/config/embb.R")
|
||||||
source("~/git/LSHTM_analysis/scripts/plotting/get_plotting_dfs.R")
|
source("~/git/LSHTM_analysis/scripts/plotting/get_plotting_dfs.R")
|
||||||
source("~/git/LSHTM_analysis/scripts/plotting/plotting_colnames.R")
|
source("~/git/LSHTM_analysis/scripts/plotting/plotting_colnames.R")
|
||||||
|
|
||||||
|
class(merged_df3)
|
||||||
|
merged_df3 = as.data.frame(merged_df3)
|
||||||
|
|
||||||
|
class(df3)
|
||||||
|
head(df3$pos_count)
|
||||||
|
|
||||||
|
nc_pc_CHANGE = which(colnames(merged_df3)== "pos_count")
|
||||||
|
colnames(merged_df3)[nc_pc_CHANGE] = "df2_pos_count_all"
|
||||||
|
head(merged_df3$pos_count)
|
||||||
|
head(merged_df3$pos_count_all)
|
||||||
|
|
||||||
|
# DROP pos_count column
|
||||||
|
# merged_df3$pos_count <-NULL
|
||||||
|
merged_df3 = merged_df3[, !colnames(merged_df3)%in%c("pos_count")]
|
||||||
|
head(merged_df3$pos_count)
|
||||||
|
|
||||||
|
df3 = merged_df3[, colnames(merged_df3)%in%plotting_cols]
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
#=======
|
#=======
|
||||||
# output
|
# output
|
||||||
#=======
|
#=======
|
||||||
|
@ -42,36 +62,21 @@ outdir_images = paste0("~/git/Writing/thesis/images/results/", tolower(gene), "/
|
||||||
cat("plots will output to:", outdir_images)
|
cat("plots will output to:", outdir_images)
|
||||||
|
|
||||||
###########################################################
|
###########################################################
|
||||||
|
|
||||||
# ConSurf labels
|
# ConSurf labels
|
||||||
|
|
||||||
# consurf_colOld = "consurf_colour_rev"
|
|
||||||
# consurf_colNew = "consurf_outcome"
|
|
||||||
# df3[[consurf_colNew]] = df3[[consurf_colOld]]
|
|
||||||
# df3[[consurf_colNew]] = as.factor(df3[[consurf_colNew]])
|
|
||||||
# df3[[consurf_colNew]]
|
|
||||||
consurf_colname = "consurf_outcome"
|
|
||||||
levels(df3[[consurf_colname]])
|
|
||||||
|
|
||||||
# SNAP2 labels
|
|
||||||
snap2_colname = "snap2_outcome"
|
|
||||||
levels(df3[[snap2_colname]])
|
|
||||||
|
|
||||||
##############################################################
|
|
||||||
gene_all_cols = colnames(df3)[colnames(df3)%in%all_cols]
|
|
||||||
gene_outcome_cols = colnames(df3)[colnames(df3)%in%c(outcome_cols_stability
|
|
||||||
, outcome_cols_affinity
|
|
||||||
, outcome_cols_conservation)]
|
|
||||||
gene_outcome_cols
|
|
||||||
#=======================================================================
|
|
||||||
#------------------------------
|
#------------------------------
|
||||||
# stability barplots:
|
# plot default sizes
|
||||||
outcome_cols_stability
|
|
||||||
# label_categories should be = levels(as.factor(plot_df[[df_colname]]))
|
|
||||||
#------------------------------
|
#------------------------------
|
||||||
sts = 22
|
sts = 22
|
||||||
subtitle_colour = "black"
|
subtitle_colour = "black"
|
||||||
geom_ls = 10
|
geom_ls = 10
|
||||||
|
##############################################################
|
||||||
|
#------------------------------
|
||||||
|
# stability barplots:
|
||||||
|
outcome_stability_cols
|
||||||
|
# label_categories should be = levels(as.factor(plot_df[[df_colname]]))
|
||||||
|
#-------------------------
|
||||||
|
|
||||||
# duetP
|
# duetP
|
||||||
duetP = stability_count_bp(plotdf = df3
|
duetP = stability_count_bp(plotdf = df3
|
||||||
|
@ -158,6 +163,95 @@ dynamut2P
|
||||||
# , rel_heights = c(0.4/10,9/10))
|
# , rel_heights = c(0.4/10,9/10))
|
||||||
#
|
#
|
||||||
# dev.off()
|
# dev.off()
|
||||||
|
###########################################################
|
||||||
|
#=========================
|
||||||
|
# Conservation outcome
|
||||||
|
# check this var:
|
||||||
|
outcome_conservation_cols
|
||||||
|
all(df3$consurf_colour_rev == df3$consurf_outcome)
|
||||||
|
#df3["consurf_outcome"] = as.factor(df3["consurf_outcome"])
|
||||||
|
levels(df3[["consurf_outcome"]])
|
||||||
|
|
||||||
|
#==========================
|
||||||
|
table(df3$consurf_outcome)
|
||||||
|
ggplot(df3, aes_string(x = "consurf_outcome")) +
|
||||||
|
geom_bar(aes(fill = eval(parse(text = "consurf_outcome")))
|
||||||
|
, show.legend = TRUE) +
|
||||||
|
scale_fill_manual(name = ""
|
||||||
|
, values = consurf_colours
|
||||||
|
#, labels = levels(df3[["snap2_outcome"]])
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
# consurf# had to turn label categories off for consurf
|
||||||
|
consurfP = stability_count_bp(plotdf = df3
|
||||||
|
, df_colname = "consurf_outcome"
|
||||||
|
#, leg_title = "ConSurf"
|
||||||
|
#, label_categories = labels_consurf
|
||||||
|
, yaxis_title = "Number of nsSNPs"
|
||||||
|
, leg_position = "top"
|
||||||
|
, subtitle_text = "ConSurf"
|
||||||
|
, geom_ls = 5
|
||||||
|
, bar_fill_values = consurf_colours # from globals
|
||||||
|
, sts = sts
|
||||||
|
, subtitle_colour= subtitle_colour)
|
||||||
|
|
||||||
|
consurfP
|
||||||
|
|
||||||
|
# provean
|
||||||
|
proveanP = stability_count_bp(plotdf = df3
|
||||||
|
, df_colname = "provean_outcome"
|
||||||
|
#, leg_title = "PROVEAN"
|
||||||
|
#, label_categories = labels_provean
|
||||||
|
, yaxis_title = ""
|
||||||
|
, leg_position = "top"
|
||||||
|
, subtitle_text = "PROVEAN"
|
||||||
|
, geom_ls = geom_ls
|
||||||
|
, bar_fill_values = c("#D01C8B", "#F1B6DA") # light pink and deep
|
||||||
|
, sts = sts
|
||||||
|
, subtitle_colour= subtitle_colour)
|
||||||
|
|
||||||
|
# snap2
|
||||||
|
snap2P = stability_count_bp(plotdf = df3
|
||||||
|
, df_colname = "snap2_outcome"
|
||||||
|
#, leg_title = "SNAP2"
|
||||||
|
#, label_categories = labels_snap2
|
||||||
|
, yaxis_title = ""
|
||||||
|
, leg_position = "top"
|
||||||
|
, subtitle_text = "SNAP2"
|
||||||
|
, geom_ls = geom_ls
|
||||||
|
, bar_fill_values = c("#D01C8B", "#F1B6DA") # light pink and deep
|
||||||
|
, sts = sts
|
||||||
|
, subtitle_colour= subtitle_colour)
|
||||||
|
|
||||||
|
|
||||||
|
#============================
|
||||||
|
# output: CONSERVATION PLOTS
|
||||||
|
#============================
|
||||||
|
# bp_conservation_CLP = paste0(outdir_images
|
||||||
|
# ,tolower(gene)
|
||||||
|
# ,"_bp_conservation_CL.svg" )
|
||||||
|
#
|
||||||
|
# print(paste0("plot filename:", bp_conservation_CLP))
|
||||||
|
# svg(bp_conservation_CLP, width = 15, height = 6.5)
|
||||||
|
#
|
||||||
|
# cowplot::plot_grid(proveanP, snap2P, consurfP
|
||||||
|
# , nrow = 1
|
||||||
|
# , ncol = 3
|
||||||
|
# #, labels = c("(a)", "(b)", "(c)", "(d)")
|
||||||
|
# , labels = "AUTO"
|
||||||
|
# , label_size = 25
|
||||||
|
# #, rel_heights = c(0.4/10,9/10))
|
||||||
|
# , rel_widths = c(0.9, 0.9, 1.1))
|
||||||
|
#
|
||||||
|
#
|
||||||
|
# dev.off()
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
###########################################################
|
###########################################################
|
||||||
#=========================
|
#=========================
|
||||||
# Affinity outcome
|
# Affinity outcome
|
||||||
|
@ -264,74 +358,7 @@ ppi2P = stability_count_bp(plotdf = df3_ppi2
|
||||||
# dev.off()
|
# dev.off()
|
||||||
|
|
||||||
################################################################
|
################################################################
|
||||||
#=========================
|
|
||||||
# Conservation outcome
|
|
||||||
# check this var:
|
|
||||||
outcome_cols_conservation
|
|
||||||
#==========================
|
|
||||||
# consurf
|
|
||||||
consurfP = stability_count_bp(plotdf = df3
|
|
||||||
, df_colname = "consurf_outcome"
|
|
||||||
#, leg_title = "ConSurf"
|
|
||||||
#, label_categories = labels_consurf
|
|
||||||
, yaxis_title = "Number of nsSNPs"
|
|
||||||
, leg_position = "top"
|
|
||||||
, subtitle_text = "ConSurf"
|
|
||||||
, geom_ls = 5
|
|
||||||
, bar_fill_values = consurf_colours # from globals
|
|
||||||
, sts = sts
|
|
||||||
, subtitle_colour= subtitle_colour)
|
|
||||||
|
|
||||||
consurfP
|
|
||||||
|
|
||||||
# provean
|
|
||||||
proveanP = stability_count_bp(plotdf = df3
|
|
||||||
, df_colname = "provean_outcome"
|
|
||||||
#, leg_title = "PROVEAN"
|
|
||||||
#, label_categories = labels_provean
|
|
||||||
, yaxis_title = ""
|
|
||||||
, leg_position = "top"
|
|
||||||
, subtitle_text = "PROVEAN"
|
|
||||||
, geom_ls = geom_ls
|
|
||||||
, bar_fill_values = c("#D01C8B", "#F1B6DA") # light pink and deep
|
|
||||||
, sts = sts
|
|
||||||
, subtitle_colour= subtitle_colour)
|
|
||||||
|
|
||||||
# snap2
|
|
||||||
snap2P = stability_count_bp(plotdf = df3
|
|
||||||
, df_colname = "snap2_outcome"
|
|
||||||
#, leg_title = "SNAP2"
|
|
||||||
#, label_categories = labels_snap2
|
|
||||||
, yaxis_title = ""
|
|
||||||
, leg_position = "top"
|
|
||||||
, subtitle_text = "SNAP2"
|
|
||||||
, geom_ls = geom_ls
|
|
||||||
, bar_fill_values = c("#D01C8B", "#F1B6DA") # light pink and deep
|
|
||||||
, sts = sts
|
|
||||||
, subtitle_colour= subtitle_colour)
|
|
||||||
|
|
||||||
|
|
||||||
#============================
|
|
||||||
# output: CONSERVATION PLOTS
|
|
||||||
#============================
|
|
||||||
# bp_conservation_CLP = paste0(outdir_images
|
|
||||||
# ,tolower(gene)
|
|
||||||
# ,"_bp_conservation_CL.svg" )
|
|
||||||
#
|
|
||||||
# print(paste0("plot filename:", bp_conservation_CLP))
|
|
||||||
# svg(bp_conservation_CLP, width = 15, height = 6.5)
|
|
||||||
#
|
|
||||||
# cowplot::plot_grid(proveanP, snap2P, consurfP
|
|
||||||
# , nrow = 1
|
|
||||||
# , ncol = 3
|
|
||||||
# #, labels = c("(a)", "(b)", "(c)", "(d)")
|
|
||||||
# , labels = "AUTO"
|
|
||||||
# , label_size = 25
|
|
||||||
# #, rel_heights = c(0.4/10,9/10))
|
|
||||||
# , rel_widths = c(0.9, 0.9, 1.1))
|
|
||||||
#
|
|
||||||
#
|
|
||||||
# dev.off()
|
|
||||||
#####################################################################
|
#####################################################################
|
||||||
#============
|
#============
|
||||||
# Plot labels
|
# Plot labels
|
||||||
|
@ -457,6 +484,41 @@ OutPlotBP()
|
||||||
dev.off()
|
dev.off()
|
||||||
|
|
||||||
#####################################################################
|
#####################################################################
|
||||||
|
# test
|
||||||
|
|
||||||
|
setDT(df3)[, pos_count2 := .N, by = .(eval(parse(text = "position")))]
|
||||||
|
foo = df3[, c("mutationinformation", "position")]
|
||||||
|
df4 = foo[, c("mutationinformation", "position")]
|
||||||
|
|
||||||
|
|
||||||
|
var_pos = "position"
|
||||||
|
df4 =
|
||||||
|
df4 %>%
|
||||||
|
dplyr::add_count(eval(parse(text = var_pos)))
|
||||||
|
|
||||||
|
class(df4)
|
||||||
|
df4 = as.data.frame(df4)
|
||||||
|
class(df4)
|
||||||
|
nc_change = which(colnames(df4) == "n")
|
||||||
|
colnames(df4)[nc_change] <- "pos_count"
|
||||||
|
class(df4)
|
||||||
|
|
||||||
|
setDT(df4)[, pos_count2 := .N, by = .(eval(parse(text = "position")))]
|
||||||
|
class(df4)
|
||||||
|
|
||||||
|
all(df4$pos_count==df4$pos_count2)
|
||||||
|
|
||||||
|
# %>%
|
||||||
|
#group_by(pos_count = position)
|
||||||
|
|
||||||
|
# df4 =
|
||||||
|
# df4 %>%
|
||||||
|
# dplyr::group_by(position) %>%
|
||||||
|
# count(position)
|
||||||
|
|
||||||
|
|
||||||
|
foo2 = df4[, c("mutationinformation", "position", "pos_count")]
|
||||||
|
|
||||||
#####################################################################
|
#####################################################################
|
||||||
# ------------------------------
|
# ------------------------------
|
||||||
# bp site site count: ALL
|
# bp site site count: ALL
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue