LSHTM_analysis/scripts/plotting/plotting_thesis/basic_barplots.R

586 lines
19 KiB
R
Executable file

#!/usr/bin/env Rscript
#########################################################
# TASK: Barplots for mCSM DUET, ligand affinity, and foldX
# basic barplots with count of mutations
# basic barplots with frequency of count of mutations
# , df_colname = ""
# , leg_title = ""
# , ats = 25 # axis text size
# , als = 22 # axis label size
# , lts = 20 # legend text size
# , ltis = 22 # label title size
# , geom_ls = 10 # geom_label size
# , yaxis_title = "Number of nsSNPs"
# , bp_plot_title = ""
# , label_categories = c("Destabilising", "Stabilising")
# , title_colour = "chocolate4"
# , subtitle_text = NULL
# , sts = 20
# , subtitle_colour = "pink"
# #, leg_position = c(0.73,0.8) # within plot area
# , leg_position = "top"
# , bar_fill_values = c("#F8766D", "#00BFC4")
#########################################################
#=============
# Data: Input
#==============
#source("~/git/LSHTM_analysis/config/alr.R")
source("~/git/LSHTM_analysis/config/embb.R")
#source("~/git/LSHTM_analysis/config/katg.R")
#source("~/git/LSHTM_analysis/config/gid.R")
#source("~/git/LSHTM_analysis/config/pnca.R")
#source("~/git/LSHTM_analysis/config/rpob.R")
source("~/git/LSHTM_analysis/scripts/plotting/get_plotting_dfs.R")
source("~/git/LSHTM_analysis/scripts/plotting/plotting_colnames.R")
class(merged_df3)
merged_df3 = as.data.frame(merged_df3)
class(df3)
head(merged_df3$pos_count)
nc_pc_CHANGE = which(colnames(merged_df3)== "pos_count")
colnames(merged_df3)[nc_pc_CHANGE] = "df2_pos_count_all"
head(merged_df3$pos_count)
head(merged_df3$df2_pos_count_all)
# DROP pos_count column
# merged_df3$pos_count <-NULL
merged_df3 = merged_df3[, !colnames(merged_df3)%in%c("pos_count")]
head(merged_df3$pos_count)
df3 = merged_df3[, colnames(merged_df3)%in%plotting_cols]
#=======
# output
#=======
outdir_images = paste0("~/git/Writing/thesis/images/results/", tolower(gene), "/")
cat("plots will output to:", outdir_images)
###########################################################
# ConSurf labels
#------------------------------
# plot default sizes
#------------------------------
sts = 22
subtitle_colour = "black"
geom_ls = 10
##############################################################
#------------------------------
# stability barplots:
outcome_stability_cols
# label_categories should be = levels(as.factor(plot_df[[df_colname]]))
#-------------------------
# duetP
duetP = stability_count_bp(plotdf = df3
, df_colname = "duet_outcome"
, leg_title = "mCSM-DUET"
#, label_categories = labels_duet
, yaxis_title = "Number of nsSNPs"
, leg_position = "none"
, subtitle_text = "mCSM-DUET"
, geom_ls = geom_ls
, bar_fill_values = c("#F8766D", "#00BFC4")
, sts = sts
, subtitle_colour= subtitle_colour)
# foldx
foldxP = stability_count_bp(plotdf = df3
, df_colname = "foldx_outcome"
#, leg_title = "FoldX"
#, label_categories = labels_foldx
, yaxis_title = ""
, leg_position = "none"
, subtitle_text = "FoldX"
, geom_ls = geom_ls
, bar_fill_values = c("#F8766D", "#00BFC4")
, sts = sts
, subtitle_colour= subtitle_colour)
# deepddg
deepddgP = stability_count_bp(plotdf = df3
, df_colname = "deepddg_outcome"
#, leg_title = "DeepDDG"
#, label_categories = labels_deepddg
, yaxis_title = "Number of nsSNPs"
, leg_position = "none"
, subtitle_text = "DeepDDG"
, geom_ls = geom_ls
, bar_fill_values = c("#F8766D", "#00BFC4")
, sts = sts
, subtitle_colour= subtitle_colour)
# deepddg
dynamut2P = stability_count_bp(plotdf = df3
, df_colname = "ddg_dynamut2_outcome"
#, leg_title = "Dynamut2"
#, label_categories = labels_ddg_dynamut2_outcome
, yaxis_title = ""
, leg_position = "none"
, subtitle_text = "Dynamut2"
, geom_ls = geom_ls
, bar_fill_values = c("#F8766D", "#00BFC4")
, sts = sts
, subtitle_colour= subtitle_colour)
dynamut2P
# # extract common legend
# common_legend = get_legend(duetP +
# guides(color = guide_legend(nrow = 1)) +
# theme(legend.position = "top"))
#
# #==========================
# #output: STABILITY PLOTS
# #===========================
# bp_stability_CLP = paste0(outdir_images
# , tolower(gene)
# ,"_bp_stability_CL.svg")
#
# svg(bp_stability_CLP, width = 15, height = 12)
# print(paste0("plot filename:", bp_stability_CLP))
#
# cowplot::plot_grid(
# common_legend,
# cowplot::plot_grid(duetP, foldxP
# , deepddgP, dynamut2P
# , nrow = 2
# , ncol = 2
# #, labels = c("(a)", "(b)", "(c)", "(d)")
# , labels = "AUTO"
# , label_size = 25)
# , ncol = 1
# , nrow = 2
# , rel_heights = c(0.4/10,9/10))
#
# dev.off()
###########################################################
#=========================
# Conservation outcome
# check this var:
outcome_conservation_cols
all(df3$consurf_colour_rev == df3$consurf_outcome)
#df3["consurf_outcome"] = as.factor(df3["consurf_outcome"])
levels(df3[["consurf_outcome"]])
#==========================
table(df3$consurf_outcome)
ggplot(df3, aes_string(x = "consurf_outcome")) +
geom_bar(aes(fill = eval(parse(text = "consurf_outcome")))
, show.legend = TRUE) +
scale_fill_manual(name = ""
, values = consurf_colours
#, labels = levels(df3[["snap2_outcome"]])
)
# consurf# had to turn label categories off for consurf
consurfP = stability_count_bp(plotdf = df3
, df_colname = "consurf_outcome"
#, leg_title = "ConSurf"
#, label_categories = labels_consurf
, yaxis_title = "Number of nsSNPs"
, leg_position = "top"
, subtitle_text = "ConSurf"
, geom_ls = 5
, bar_fill_values = consurf_colours # from globals
, sts = sts
, subtitle_colour= subtitle_colour)
consurfP
# provean
proveanP = stability_count_bp(plotdf = df3
, df_colname = "provean_outcome"
#, leg_title = "PROVEAN"
#, label_categories = labels_provean
, yaxis_title = ""
, leg_position = "top"
, subtitle_text = "PROVEAN"
, geom_ls = geom_ls
, bar_fill_values = c("#D01C8B", "#F1B6DA") # light pink and deep
, sts = sts
, subtitle_colour= subtitle_colour)
# snap2
snap2P = stability_count_bp(plotdf = df3
, df_colname = "snap2_outcome"
#, leg_title = "SNAP2"
#, label_categories = labels_snap2
, yaxis_title = ""
, leg_position = "top"
, subtitle_text = "SNAP2"
, geom_ls = geom_ls
, bar_fill_values = c("#D01C8B", "#F1B6DA") # light pink and deep
, sts = sts
, subtitle_colour= subtitle_colour)
#============================
# output: CONSERVATION PLOTS
#============================
# bp_conservation_CLP = paste0(outdir_images
# ,tolower(gene)
# ,"_bp_conservation_CL.svg" )
#
# print(paste0("plot filename:", bp_conservation_CLP))
# svg(bp_conservation_CLP, width = 15, height = 6.5)
#
# cowplot::plot_grid(proveanP, snap2P, consurfP
# , nrow = 1
# , ncol = 3
# #, labels = c("(a)", "(b)", "(c)", "(d)")
# , labels = "AUTO"
# , label_size = 25
# #, rel_heights = c(0.4/10,9/10))
# , rel_widths = c(0.9, 0.9, 1.1))
#
#
# dev.off()
###########################################################
#=========================
# Affinity outcome
# check this var: outcome_cols_affinity
# get from preformatting or put in globals
#==========================
DistCutOff
LigDist_colname # = "ligand_distance" # from globals
ppi2Dist_colname
naDist_colname
###########################################################
# get plotting data within the distance
df3_lig = df3[df3[[LigDist_colname]]<DistCutOff,]
df3_ppi2 = df3[df3[[ppi2Dist_colname]]<DistCutOff,]
df3_na = df3[df3[[naDist_colname]]<DistCutOff,]
common_bp_title = paste0("Sites <", DistCutOff, angstroms_symbol)
#------------------------------
# barplot for ligand affinity:
# <10 Ang of ligand
#------------------------------
mLigP = stability_count_bp(plotdf = df3_lig
, df_colname = "ligand_outcome"
#, leg_title = "mCSM-lig"
#, label_categories = labels_lig
, yaxis_title = "Number of nsSNPs"
, leg_position = "none"
, subtitle_text = "mCSM-lig"
, geom_ls = geom_ls
, bar_fill_values = c("#F8766D", "#00BFC4")
, sts = sts
, subtitle_colour= subtitle_colour
#, bp_plot_title = paste(common_bp_title, "ligand")
)
#------------------------------
# barplot for ligand affinity:
# <10 Ang of ligand
# mmCSM-lig: will be the same no. of sites but the effect will be different
#------------------------------
mmLigP = stability_count_bp(plotdf = df3_lig
, df_colname = "mmcsm_lig_outcome"
#, leg_title = "mmCSM-lig"
#, label_categories = labels_mmlig
, yaxis_title = ""
, leg_position = "none"
, subtitle_text = "mmCSM-lig"
, geom_ls = geom_ls
, bar_fill_values = c("#F8766D", "#00BFC4")
, sts = sts
, subtitle_colour= subtitle_colour
#, bp_plot_title = paste(common_bp_title, "ligand")
)
#------------------------------
# barplot for ppi2 affinity
# <10 Ang of interface
#------------------------------
ppi2P = stability_count_bp(plotdf = df3_ppi2
, df_colname = "mcsm_ppi2_outcome"
#, leg_title = "mCSM-ppi2"
#, label_categories = labels_ppi2
, yaxis_title = ""
, leg_position = "none"
, subtitle_text = "mCSM-ppi2"
, geom_ls = geom_ls
, bar_fill_values = c("#F8766D", "#00BFC4")
, sts = sts
, subtitle_colour= subtitle_colour
, bp_plot_title = paste(common_bp_title, "interface")
)
# # extract common legend
# common_legend_aff = get_legend(mLigP +
# guides(color = guide_legend(nrow = 1)) +
# theme(legend.position = "top"))
#
# #==========================
# # output: AFFINITY PLOTS
# #==========================
# bp_affinity_CLP = paste0(outdir_images
# ,tolower(gene)
# ,"_bp_affinity_CL.svg" )
#
# print(paste0("plot filename:", bp_stability_CLP))
# svg(bp_affinity_CLP, width = 15, height = 6.5)
#
# cowplot::plot_grid(
# common_legend,
# cowplot::plot_grid(mLigP, mmLigP
# , ppi2P
# , nrow = 1
# , ncol = 3
# #, labels = c("(a)", "(b)", "(c)", "(d)")
# , labels = "AUTO"
# , label_size = 25)
# , ncol = 1
# , nrow = 2
# , rel_heights = c(0.4/10,9/10))
# #, rel_widths = c(1,1,1))
#
#
# dev.off()
################################################################
#####################################################################
#============
# Plot labels
#============
tit1 = "Stability outcome"
tit2 = "Affinity outcome"
tit3 = "Conservation outcome"
pt_size = 30
theme_georgia <- function(...) {
theme_gray(base_family = "sans", ...) +
theme(plot.title = element_text(face = "bold"))
}
title_theme <- calc_element("plot.title", theme_georgia())
pt1 = ggdraw() +
draw_label(
tit1,
fontfamily = title_theme$family,
fontface = title_theme$face,
#size = title_theme$size
size = pt_size
)
pt2 = ggdraw() +
draw_label(
tit2,
fontfamily = title_theme$family,
fontface = title_theme$face,
size = pt_size
)
pt3 = ggdraw() +
draw_label(
tit3,
fontfamily = title_theme$family,
fontface = title_theme$face,
size = pt_size
)
# extract common legend
common_legend_outcome = get_legend(mLigP +
guides(color = guide_legend(nrow = 1)) +
theme(legend.position = "top"))
my_label_size = 25
#======================
# Output plot function
#======================
OutPlotBP = function(x){
cowplot::plot_grid(
cowplot::plot_grid(pt1,
common_legend_outcome,
cowplot::plot_grid( duetP, foldxP
, deepddgP, dynamut2P
, nrow = 2
, ncol = 2
, labels = c("A", "B", "C","D")
, label_size = my_label_size
)
, ncol = 1
, rel_heights = c(7, 3, 90)),
cowplot::plot_grid(pt2,
cowplot::plot_grid(mLigP, mmLigP, ppi2P
, nrow = 1
, ncol = 3
, labels = c("E","F", "G")
, label_size = my_label_size
)
, ncol = 1
, rel_heights = c(1, 9)),
cowplot::plot_grid(pt3,
cowplot::plot_grid(consurfP, proveanP, snap2P
, nrow = 1
, ncol = 3
, labels = c("H", "I", "J")
, labels_x = 0.2
, label_size = my_label_size
, rel_widths = c(0.2, 0.2, 0.2)
)
, ncol = 1
, rel_heights = c(0.07, 0.93)
),
nrow = 3,
rel_heights = c(0.58, 0.25, 0.27),
align = "hv"
)
}
#=====================
# OutPlot: svg and png
#======================
#ratio 11.69 by 8.27
w = 8.27*2
h = 11.69*2
#svg
bp_all_CLP = paste0(outdir_images
,tolower(gene)
,"_bp_all_CL.svg")
cat(paste0("plot filename:", bp_all_CLP))
svg(bp_all_CLP, width = w, height = h)
OutPlotBP()
dev.off()
#png
bp_all_CLP_png = paste0(outdir_images
,tolower(gene)
,"_bp_all_CL.png")
cat(paste0("plot filename:", bp_all_CLP_png))
png(bp_all_CLP_png, width = w, height = h, units = "in", res = 300 )
OutPlotBP()
dev.off()
#####################################################################
# test
#
# setDT(df3)[, pos_count2 := .N, by = .(eval(parse(text = "position")))]
# foo = df3[, c("mutationinformation", "position")]
# df4 = foo[, c("mutationinformation", "position")]
#
#
# var_pos = "position"
# df4 =
# df4 %>%
# dplyr::add_count(eval(parse(text = var_pos)))
#
# class(df4)
# df4 = as.data.frame(df4)
# class(df4)
# nc_change = which(colnames(df4) == "n")
# colnames(df4)[nc_change] <- "pos_count"
# class(df4)
#
# setDT(df4)[, pos_count2 := .N, by = .(eval(parse(text = "position")))]
# class(df4)
#
# all(df4$pos_count==df4$pos_count2)
#
# # %>%
# #group_by(pos_count = position)
#
# # df4 =
# # df4 %>%
# # dplyr::group_by(position) %>%
# # count(position)
#foo2 = df4[, c("mutationinformation", "position", "pos_count")]
#####################################################################
# ------------------------------
# bp site site count: ALL
# <10 Ang ligand
# ------------------------------
posC_all = site_snp_count_bp(plotdf = df3
, df_colname = "position"
, xaxis_title = "Number of nsSNPs"
, yaxis_title = "Number of Sites"
, subtitle_size = 20)
# ------------------------------
# bp site site count: mCSM-lig
# < 10 Ang ligand
# ------------------------------
common_bp_title = paste0("Sites <", DistCutOff, angstroms_symbol)
posC_lig = site_snp_count_bp(plotdf = df3_lig
, df_colname = "position"
, xaxis_title = "Number of nsSNPs"
, yaxis_title = "Number of Sites"#+ annotate("text", x = 1.5, y = 2.2, label = "Text No. 1")
#, subtitle_text = paste0(common_bp_title, " ligand")
, subtitle_size = 8
, subtitle_colour = subtitle_colour)
posC_lig
# ------------------------------
# bp site site count: ppi2
# < 10 Ang interface
# ------------------------------
posC_ppi2 = site_snp_count_bp(plotdf = df3_ppi2
, df_colname = "position"
, xaxis_title = "Number of nsSNPs"
, yaxis_title = "Number of Sites"
, subtitle_text = paste0(common_bp_title, " interface")
, subtitle_size = 20
, subtitle_colour = subtitle_colour)
posC_ppi2
# ------------------------------
#FIXME: bp site site count: na
# < 10 Ang TBC
# ------------------------------
# posC_na = site_snp_count_bp(plotdf = df3_na
# , df_colname = "position"
# , xaxis_title = ""
# , yaxis_title = "")
#===========================
# output: SITE SNP count:
# all + affinity
#==========================
# my_label_size = 25
# pos_count_combined_CLP = paste0(outdir_images
# ,tolower(gene)
# ,"_pos_count_PS_AFF.svg")
#
#
# svg(pos_count_combined_CLP, width = 20, height = 5.5)
# print(paste0("plot filename:", pos_count_combined_CLP))
#
# cowplot::plot_grid(posC_all, posC_lig, posC_ppi2
# #, posC_na
# , nrow = 1
# , ncol = 3
# , labels = "AUTO"
# , label_size = my_label_size)
#
# dev.off()
#===============================================================