removed setDT and replaced with dplyr alt in position_count_bp.R

2022-08-14 14:19:09 +01:00 · 2022-08-14 14:19:09 +01:00 · da8f8d90d4
commit da8f8d90d4
parent 65d697d3a2
5 changed files with 210 additions and 110 deletions
--- a/scripts/plotting/plotting_thesis/basic_barplots.R
+++ b/scripts/plotting/plotting_thesis/basic_barplots.R
@ -35,6 +35,26 @@ source("~/git/LSHTM_analysis/config/embb.R")
 source("~/git/LSHTM_analysis/scripts/plotting/get_plotting_dfs.R")
 source("~/git/LSHTM_analysis/scripts/plotting/plotting_colnames.R")

+class(merged_df3)
+merged_df3 = as.data.frame(merged_df3)
+
+class(df3)
+head(df3$pos_count)
+
+nc_pc_CHANGE = which(colnames(merged_df3)== "pos_count")
+colnames(merged_df3)[nc_pc_CHANGE] = "df2_pos_count_all"
+head(merged_df3$pos_count)
+head(merged_df3$pos_count_all)
+
+# DROP pos_count column
+# merged_df3$pos_count <-NULL
+merged_df3 = merged_df3[, !colnames(merged_df3)%in%c("pos_count")]
+head(merged_df3$pos_count)
+
+df3 = merged_df3[, colnames(merged_df3)%in%plotting_cols]
+
+
+
 #=======
 # output
 #=======
@ -42,36 +62,21 @@ outdir_images = paste0("~/git/Writing/thesis/images/results/", tolower(gene), "/
 cat("plots will output to:", outdir_images)

 ###########################################################
-
 # ConSurf labels

-# consurf_colOld = "consurf_colour_rev"
-# consurf_colNew = "consurf_outcome"
-# df3[[consurf_colNew]] = df3[[consurf_colOld]]
-# df3[[consurf_colNew]] = as.factor(df3[[consurf_colNew]])
-# df3[[consurf_colNew]]
-consurf_colname = "consurf_outcome"
-levels(df3[[consurf_colname]])

-# SNAP2 labels
-snap2_colname = "snap2_outcome"
-levels(df3[[snap2_colname]])
-
-##############################################################
-gene_all_cols = colnames(df3)[colnames(df3)%in%all_cols]
-gene_outcome_cols = colnames(df3)[colnames(df3)%in%c(outcome_cols_stability
-                                                     , outcome_cols_affinity
-                                                     , outcome_cols_conservation)]
-gene_outcome_cols
-#=======================================================================
 #------------------------------
-# stability barplots:
-outcome_cols_stability
-# label_categories should be  = levels(as.factor(plot_df[[df_colname]]))
+# plot default sizes
 #------------------------------
 sts = 22
 subtitle_colour = "black"
 geom_ls = 10
+##############################################################
+#------------------------------
+# stability barplots:
+outcome_stability_cols
+# label_categories should be  = levels(as.factor(plot_df[[df_colname]]))
+#-------------------------

 # duetP
 duetP = stability_count_bp(plotdf = df3
@ -158,6 +163,95 @@ dynamut2P
 #   , rel_heights = c(0.4/10,9/10))
 # 
 # dev.off()
+###########################################################
+#=========================
+# Conservation outcome
+# check this var:
+outcome_conservation_cols
+all(df3$consurf_colour_rev == df3$consurf_outcome)
+#df3["consurf_outcome"] = as.factor(df3["consurf_outcome"])
+levels(df3[["consurf_outcome"]])
+
+#==========================
+table(df3$consurf_outcome)
+ggplot(df3, aes_string(x = "consurf_outcome")) +
+  geom_bar(aes(fill = eval(parse(text = "consurf_outcome")))
+           , show.legend = TRUE) +
+  scale_fill_manual(name = ""
+                    , values = consurf_colours
+                    #, labels = levels(df3[["snap2_outcome"]])
+                    )
+
+
+# consurf# had to turn label categories off for consurf
+consurfP = stability_count_bp(plotdf = df3
+                              , df_colname = "consurf_outcome"
+                              #, leg_title = "ConSurf"
+                              #, label_categories = labels_consurf
+                              , yaxis_title = "Number of nsSNPs"
+                              , leg_position = "top"
+                              , subtitle_text = "ConSurf"
+                              , geom_ls = 5
+                              , bar_fill_values = consurf_colours # from globals
+                              , sts = sts
+                              , subtitle_colour= subtitle_colour)
+
+consurfP
+
+# provean
+proveanP = stability_count_bp(plotdf = df3
+                              , df_colname = "provean_outcome"
+                              #, leg_title = "PROVEAN"
+                              #, label_categories = labels_provean
+                              , yaxis_title = ""
+                              , leg_position = "top"
+                              , subtitle_text = "PROVEAN"
+                              , geom_ls = geom_ls
+                              , bar_fill_values = c("#D01C8B", "#F1B6DA") # light pink and deep
+                              , sts = sts
+                              , subtitle_colour= subtitle_colour)
+
+# snap2
+snap2P = stability_count_bp(plotdf = df3
+                            , df_colname = "snap2_outcome"
+                            #, leg_title = "SNAP2"
+                            #, label_categories = labels_snap2
+                            , yaxis_title = ""
+                            , leg_position = "top"
+                            , subtitle_text = "SNAP2"
+                            , geom_ls = geom_ls
+                            , bar_fill_values = c("#D01C8B", "#F1B6DA") # light pink and deep
+                            , sts = sts
+                            , subtitle_colour= subtitle_colour)
+
+
+#============================
+# output: CONSERVATION PLOTS
+#============================
+# bp_conservation_CLP =  paste0(outdir_images
+#                               ,tolower(gene)
+#                               ,"_bp_conservation_CL.svg" )
+# 
+# print(paste0("plot filename:", bp_conservation_CLP))
+# svg(bp_conservation_CLP,  width = 15, height = 6.5)
+# 
+# cowplot::plot_grid(proveanP, snap2P, consurfP
+#                      , nrow = 1
+#                      , ncol = 3
+#                      #, labels = c("(a)", "(b)", "(c)", "(d)")
+#                      , labels = "AUTO"
+#                      , label_size = 25
+#                      #, rel_heights = c(0.4/10,9/10))
+#                      , rel_widths  = c(0.9, 0.9, 1.1))
+# 
+# 
+# dev.off()
+
+
+
+
+
+
 ###########################################################
 #=========================
 # Affinity outcome
@ -264,74 +358,7 @@ ppi2P = stability_count_bp(plotdf = df3_ppi2
 # dev.off()

 ################################################################
-#=========================
-# Conservation outcome
-# check this var:
-outcome_cols_conservation
-#==========================
-# consurf 
-consurfP = stability_count_bp(plotdf = df3
-                              , df_colname = "consurf_outcome"
-                              #, leg_title = "ConSurf"
-                              #, label_categories = labels_consurf
-                              , yaxis_title = "Number of nsSNPs"
-                              , leg_position = "top"
-                              , subtitle_text = "ConSurf"
-                              , geom_ls = 5
-                              , bar_fill_values = consurf_colours # from globals
-                              , sts = sts
-                              , subtitle_colour= subtitle_colour)

-consurfP
-
-# provean
-proveanP = stability_count_bp(plotdf = df3
-                              , df_colname = "provean_outcome"
-                              #, leg_title = "PROVEAN"
-                              #, label_categories = labels_provean
-                              , yaxis_title = ""
-                              , leg_position = "top"
-                              , subtitle_text = "PROVEAN"
-                              , geom_ls = geom_ls
-                              , bar_fill_values = c("#D01C8B", "#F1B6DA") # light pink and deep
-                              , sts = sts
-                              , subtitle_colour= subtitle_colour)
-
-# snap2
-snap2P = stability_count_bp(plotdf = df3
-                            , df_colname = "snap2_outcome"
-                            #, leg_title = "SNAP2"
-                            #, label_categories = labels_snap2
-                            , yaxis_title = ""
-                            , leg_position = "top"
-                            , subtitle_text = "SNAP2"
-                            , geom_ls = geom_ls
-                            , bar_fill_values = c("#D01C8B", "#F1B6DA") # light pink and deep
-                            , sts = sts
-                            , subtitle_colour= subtitle_colour)
-
-
-#============================
-# output: CONSERVATION PLOTS
-#============================
-# bp_conservation_CLP =  paste0(outdir_images
-#                               ,tolower(gene)
-#                               ,"_bp_conservation_CL.svg" )
-# 
-# print(paste0("plot filename:", bp_conservation_CLP))
-# svg(bp_conservation_CLP,  width = 15, height = 6.5)
-# 
-# cowplot::plot_grid(proveanP, snap2P, consurfP
-#                      , nrow = 1
-#                      , ncol = 3
-#                      #, labels = c("(a)", "(b)", "(c)", "(d)")
-#                      , labels = "AUTO"
-#                      , label_size = 25
-#                      #, rel_heights = c(0.4/10,9/10))
-#                      , rel_widths  = c(0.9, 0.9, 1.1))
-# 
-# 
-# dev.off()
 #####################################################################
 #============
 # Plot labels
@ -457,6 +484,41 @@ OutPlotBP()
 dev.off()

 #####################################################################
+# test
+
+setDT(df3)[, pos_count2 := .N, by = .(eval(parse(text = "position")))] 
+foo = df3[, c("mutationinformation", "position")]
+df4 = foo[, c("mutationinformation", "position")]
+
+
+var_pos = "position"
+df4 = 
+  df4 %>% 
+  dplyr::add_count(eval(parse(text = var_pos)))
+
+class(df4)
+df4 = as.data.frame(df4)
+class(df4)
+nc_change = which(colnames(df4) == "n")
+colnames(df4)[nc_change] <- "pos_count"
+class(df4)
+
+setDT(df4)[, pos_count2 := .N, by = .(eval(parse(text = "position")))] 
+class(df4)
+
+all(df4$pos_count==df4$pos_count2)
+
+# %>% 
+  #group_by(pos_count = position)
+
+# df4 = 
+#   df4 %>% 
+#   dplyr::group_by(position) %>%
+#   count(position)
+
+
+foo2 = df4[, c("mutationinformation", "position", "pos_count")]
+
 #####################################################################
 # ------------------------------
 # bp site site count: ALL