removed setDT and replaced with dplyr alt in position_count_bp.R

This commit is contained in:
Tanushree Tunstall 2022-08-14 14:19:09 +01:00
parent 65d697d3a2
commit da8f8d90d4
5 changed files with 210 additions and 110 deletions

View file

@ -35,6 +35,26 @@ source("~/git/LSHTM_analysis/config/embb.R")
source("~/git/LSHTM_analysis/scripts/plotting/get_plotting_dfs.R")
source("~/git/LSHTM_analysis/scripts/plotting/plotting_colnames.R")
class(merged_df3)
merged_df3 = as.data.frame(merged_df3)
class(df3)
head(df3$pos_count)
nc_pc_CHANGE = which(colnames(merged_df3)== "pos_count")
colnames(merged_df3)[nc_pc_CHANGE] = "df2_pos_count_all"
head(merged_df3$pos_count)
head(merged_df3$pos_count_all)
# DROP pos_count column
# merged_df3$pos_count <-NULL
merged_df3 = merged_df3[, !colnames(merged_df3)%in%c("pos_count")]
head(merged_df3$pos_count)
df3 = merged_df3[, colnames(merged_df3)%in%plotting_cols]
#=======
# output
#=======
@ -42,36 +62,21 @@ outdir_images = paste0("~/git/Writing/thesis/images/results/", tolower(gene), "/
cat("plots will output to:", outdir_images)
###########################################################
# ConSurf labels
# consurf_colOld = "consurf_colour_rev"
# consurf_colNew = "consurf_outcome"
# df3[[consurf_colNew]] = df3[[consurf_colOld]]
# df3[[consurf_colNew]] = as.factor(df3[[consurf_colNew]])
# df3[[consurf_colNew]]
consurf_colname = "consurf_outcome"
levels(df3[[consurf_colname]])
# SNAP2 labels
snap2_colname = "snap2_outcome"
levels(df3[[snap2_colname]])
##############################################################
gene_all_cols = colnames(df3)[colnames(df3)%in%all_cols]
gene_outcome_cols = colnames(df3)[colnames(df3)%in%c(outcome_cols_stability
, outcome_cols_affinity
, outcome_cols_conservation)]
gene_outcome_cols
#=======================================================================
#------------------------------
# stability barplots:
outcome_cols_stability
# label_categories should be = levels(as.factor(plot_df[[df_colname]]))
# plot default sizes
#------------------------------
sts = 22
subtitle_colour = "black"
geom_ls = 10
##############################################################
#------------------------------
# stability barplots:
outcome_stability_cols
# label_categories should be = levels(as.factor(plot_df[[df_colname]]))
#-------------------------
# duetP
duetP = stability_count_bp(plotdf = df3
@ -158,6 +163,95 @@ dynamut2P
# , rel_heights = c(0.4/10,9/10))
#
# dev.off()
###########################################################
#=========================
# Conservation outcome
# check this var:
outcome_conservation_cols
all(df3$consurf_colour_rev == df3$consurf_outcome)
#df3["consurf_outcome"] = as.factor(df3["consurf_outcome"])
levels(df3[["consurf_outcome"]])
#==========================
table(df3$consurf_outcome)
ggplot(df3, aes_string(x = "consurf_outcome")) +
geom_bar(aes(fill = eval(parse(text = "consurf_outcome")))
, show.legend = TRUE) +
scale_fill_manual(name = ""
, values = consurf_colours
#, labels = levels(df3[["snap2_outcome"]])
)
# consurf# had to turn label categories off for consurf
consurfP = stability_count_bp(plotdf = df3
, df_colname = "consurf_outcome"
#, leg_title = "ConSurf"
#, label_categories = labels_consurf
, yaxis_title = "Number of nsSNPs"
, leg_position = "top"
, subtitle_text = "ConSurf"
, geom_ls = 5
, bar_fill_values = consurf_colours # from globals
, sts = sts
, subtitle_colour= subtitle_colour)
consurfP
# provean
proveanP = stability_count_bp(plotdf = df3
, df_colname = "provean_outcome"
#, leg_title = "PROVEAN"
#, label_categories = labels_provean
, yaxis_title = ""
, leg_position = "top"
, subtitle_text = "PROVEAN"
, geom_ls = geom_ls
, bar_fill_values = c("#D01C8B", "#F1B6DA") # light pink and deep
, sts = sts
, subtitle_colour= subtitle_colour)
# snap2
snap2P = stability_count_bp(plotdf = df3
, df_colname = "snap2_outcome"
#, leg_title = "SNAP2"
#, label_categories = labels_snap2
, yaxis_title = ""
, leg_position = "top"
, subtitle_text = "SNAP2"
, geom_ls = geom_ls
, bar_fill_values = c("#D01C8B", "#F1B6DA") # light pink and deep
, sts = sts
, subtitle_colour= subtitle_colour)
#============================
# output: CONSERVATION PLOTS
#============================
# bp_conservation_CLP = paste0(outdir_images
# ,tolower(gene)
# ,"_bp_conservation_CL.svg" )
#
# print(paste0("plot filename:", bp_conservation_CLP))
# svg(bp_conservation_CLP, width = 15, height = 6.5)
#
# cowplot::plot_grid(proveanP, snap2P, consurfP
# , nrow = 1
# , ncol = 3
# #, labels = c("(a)", "(b)", "(c)", "(d)")
# , labels = "AUTO"
# , label_size = 25
# #, rel_heights = c(0.4/10,9/10))
# , rel_widths = c(0.9, 0.9, 1.1))
#
#
# dev.off()
###########################################################
#=========================
# Affinity outcome
@ -264,74 +358,7 @@ ppi2P = stability_count_bp(plotdf = df3_ppi2
# dev.off()
################################################################
#=========================
# Conservation outcome
# check this var:
outcome_cols_conservation
#==========================
# consurf
consurfP = stability_count_bp(plotdf = df3
, df_colname = "consurf_outcome"
#, leg_title = "ConSurf"
#, label_categories = labels_consurf
, yaxis_title = "Number of nsSNPs"
, leg_position = "top"
, subtitle_text = "ConSurf"
, geom_ls = 5
, bar_fill_values = consurf_colours # from globals
, sts = sts
, subtitle_colour= subtitle_colour)
consurfP
# provean
proveanP = stability_count_bp(plotdf = df3
, df_colname = "provean_outcome"
#, leg_title = "PROVEAN"
#, label_categories = labels_provean
, yaxis_title = ""
, leg_position = "top"
, subtitle_text = "PROVEAN"
, geom_ls = geom_ls
, bar_fill_values = c("#D01C8B", "#F1B6DA") # light pink and deep
, sts = sts
, subtitle_colour= subtitle_colour)
# snap2
snap2P = stability_count_bp(plotdf = df3
, df_colname = "snap2_outcome"
#, leg_title = "SNAP2"
#, label_categories = labels_snap2
, yaxis_title = ""
, leg_position = "top"
, subtitle_text = "SNAP2"
, geom_ls = geom_ls
, bar_fill_values = c("#D01C8B", "#F1B6DA") # light pink and deep
, sts = sts
, subtitle_colour= subtitle_colour)
#============================
# output: CONSERVATION PLOTS
#============================
# bp_conservation_CLP = paste0(outdir_images
# ,tolower(gene)
# ,"_bp_conservation_CL.svg" )
#
# print(paste0("plot filename:", bp_conservation_CLP))
# svg(bp_conservation_CLP, width = 15, height = 6.5)
#
# cowplot::plot_grid(proveanP, snap2P, consurfP
# , nrow = 1
# , ncol = 3
# #, labels = c("(a)", "(b)", "(c)", "(d)")
# , labels = "AUTO"
# , label_size = 25
# #, rel_heights = c(0.4/10,9/10))
# , rel_widths = c(0.9, 0.9, 1.1))
#
#
# dev.off()
#####################################################################
#============
# Plot labels
@ -457,6 +484,41 @@ OutPlotBP()
dev.off()
#####################################################################
# test
setDT(df3)[, pos_count2 := .N, by = .(eval(parse(text = "position")))]
foo = df3[, c("mutationinformation", "position")]
df4 = foo[, c("mutationinformation", "position")]
var_pos = "position"
df4 =
df4 %>%
dplyr::add_count(eval(parse(text = var_pos)))
class(df4)
df4 = as.data.frame(df4)
class(df4)
nc_change = which(colnames(df4) == "n")
colnames(df4)[nc_change] <- "pos_count"
class(df4)
setDT(df4)[, pos_count2 := .N, by = .(eval(parse(text = "position")))]
class(df4)
all(df4$pos_count==df4$pos_count2)
# %>%
#group_by(pos_count = position)
# df4 =
# df4 %>%
# dplyr::group_by(position) %>%
# count(position)
foo2 = df4[, c("mutationinformation", "position", "pos_count")]
#####################################################################
# ------------------------------
# bp site site count: ALL