modified bp with option for adding stats and boxplplots. Moved old one to redundant

This commit is contained in:
Tanushree Tunstall 2021-09-02 12:50:24 +01:00
parent 826d3c72b7
commit fcb4b85747
8 changed files with 443 additions and 102 deletions

193
scripts/functions/lf_bp.R Normal file
View file

@ -0,0 +1,193 @@
#############################
# Barplots: ggplot
# stats +/-
# violin +/-
# barplot +/
# beeswarm
#############################
lf_bp <- function(lf_df
, p_title = ""
, colour_categ = ""
, x_grp = "mutation_info"
, y_var = "param_value"
, facet_var = "param_type"
, n_facet_row = 1
, y_scales = "free_y"
, colour_bp_strip = "khaki2"
, dot_size = 3
, dot_transparency = 0.3
, violin_quantiles = c(0.25, 0.5, 0.75) # can be NULL
, my_ats = 22 # axis text size
, my_als = 20 # axis label size
, my_fls = 20 # facet label size
, my_pts = 22 # plot title size)
, make_boxplot = FALSE
, bp_width = c("auto", 0.5)
, add_stats = FALSE
, stat_grp_comp = c("DM", "OM")
, stat_method = "wilcox.test"
, my_paired = FALSE
, stat_label = c("p.format", "p.signif") ){
p1 <- ggplot(lf_df, aes(x = eval(parse(text = x_grp))
, y = eval(parse(text = y_var)) )) +
facet_wrap(~ eval(parse(text = facet_var))
, nrow = n_facet_row
, scales = y_scales) +
geom_violin(trim = T
, scale = "width"
#, position = position_dodge(width = 0.9)
, draw_quantiles = violin_quantiles)
if (make_boxplot){
if (bp_width == "auto"){
bp_width = 0.5/length(unique(lf_df[[x_grp]]))
cat("\nAutomatically calculated boxplot width, using bp_width:\n", bp_width, "\n")
}else{
cat("\nBoxplot width value provided, using:", bp_width, "\n")
bp_width = bp_width}
p2 = p1 + geom_boxplot(fill = "white"
, outlier.colour = NA
#, position = position_dodge(width = 0.9)
, width = bp_width) +
geom_beeswarm(priority = "density"
#, shape = 21
, size = dot_size
, alpha = dot_transparency
, show.legend = FALSE
, cex = 0.8
, aes(colour = factor(eval(parse(text = colour_categ))) ))
} else {
# ggbeeswarm (better than geom_point)
p2 = p1 + geom_beeswarm(priority = "density"
#, shape = 21
, size = dot_size
, alpha = dot_transparency
, show.legend = FALSE
, cex = 0.8
, aes(colour = factor(eval(parse(text = colour_categ))) ))
}
# Add foramtting to graph
OutPlot = p2 + theme(axis.text.x = element_text(size = my_ats)
, axis.text.y = element_text(size = my_ats
, angle = 0
, hjust = 1
, vjust = 0)
, axis.title.x = element_text(size = my_ats)
, axis.title.y = element_text(size = my_ats)
, plot.title = element_text(size = my_pts
, hjust = 0.5
, colour = "black"
, face = "bold")
, strip.background = element_rect(fill = colour_bp_strip)
, strip.text.x = element_text(size = my_fls
, colour = "black")
, legend.title = element_text(color = "black"
, size = my_als)
, legend.text = element_text(size = my_ats)
, legend.direction = "vertical") +
labs(title = p_title
, x = ""
, y = "")
if (add_stats){
my_comparisonsL <- list( stat_grp_comp )
OutPlot = OutPlot + stat_compare_means(comparisons = my_comparisonsL
, method = stat_method
, paired = my_paired
, label = stat_label[1])
}
return(OutPlot)
}
#############################
# Barplot NO stats: plotly
# violin +/-
# barplot +/
# beeswarm
# TODO: plot_ly()
#############################
lf_bp_plotly <- function(lf_df
, p_title = ""
, colour_categ = ""
, x_grp = mutation_info
, y_var = param_value
, facet_var = param_type
, n_facet_row = 1
, y_scales = "free_y"
, colour_bp_strip = "khaki2"
, dot_size = 3
, dot_transparency = 0.3
, violin_quantiles = c(0.25, 0.5, 0.75) # can be NULL
, my_ats = 20 # axis text size
, my_als = 18 # axis label size
, my_fls = 18 # facet label size
, my_pts = 22 # plot title size)
#, make_boxplot = FALSE
, bp_width = c("auto", 0.5)
#, add_stats = FALSE
#, stat_grp_comp = c("DM", "OM")
#, stat_method = "wilcox.test"
#, my_paired = FALSE
#, stat_label = c("p.format", "p.signif")
){
OutPlotly = ggplot(lf_df, aes(x = eval(parse(text = x_grp))
, y = eval(parse(text = y_var))
, label1 = x_grp
, label2 = y_var
, lable3 = colour_categ) ) +
facet_wrap(~ eval(parse(text = facet_var))
, nrow = n_facet_row
, scales = y_scales) +
geom_violin(trim = T
, scale = "width"
, draw_quantiles = violin_quantiles) +
geom_beeswarm(priority = "density"
, size = dot_size
, alpha = dot_transparency
, show.legend = FALSE
, cex = 0.8
, aes(colour = factor(eval(parse(text = colour_categ) ) ) ) ) +
theme(axis.text.x = element_text(size = my_ats)
, axis.text.y = element_text(size = my_ats
, angle = 0
, hjust = 1
, vjust = 0)
, axis.title.x = element_text(size = my_ats)
, axis.title.y = element_text(size = my_ats)
, plot.title = element_text(size = my_pts
, hjust = 0.5
, colour = "black"
, face = "bold")
, strip.background = element_rect(fill = colour_bp_strip)
, strip.text.x = element_text(size = my_fls
, colour = "black")
, legend.title = element_text(color = "black"
, size = my_als)
, legend.text = element_text(size = my_ats)
, legend.position = "none")+
labs(title = p_title
, x = ""
, y = "")
OutPlotly = ggplotly(OutPlotly
#, tooltip = c("label")
)
return(OutPlotly)
}

View file

@ -14,13 +14,23 @@ lf_bp_with_stats <- function(lf_df
, stat_grp_comp = c("DM", "OM")
, stat_method = "wilcox.test"
, my_paired = FALSE
#, stat_label = "p.format")
, bp_width = c("auto", 0.5)
, dot_size = 3
, dot_transparency = 0.3
, stat_label = c("p.format", "p.signif")
, my_ats = 22 # axis text size
, my_als = 20 # axis label size
, my_fls = 20 # facet label size
, my_pts = 22 # plot title size
) {
if (bp_width == "auto"){
bp_width = 0.5/length(unique(lf_df[[x_grp]]))
cat("\nAutomatically calculated boxplot width, using bp_width:\n", bp_width, "\n")
}else{
cat("\nBoxplot width value provided, using:", bp_width, "\n")
bp_width = bp_width
}
my_comparisonsL <- list( stat_grp_comp )
bp_statP <- ggplot(lf_df, aes(x = eval(parse(text = x_grp))
@ -30,15 +40,30 @@ lf_bp_with_stats <- function(lf_df
, nrow = n_facet_row
, scales = y_scales) +
geom_boxplot(fill = "white", outlier.colour = NA
#, position = position_dodge(width = 0.9)
, width = 0.2) +
geom_point(position = position_jitterdodge(dodge.width = 0.01)
, alpha = 0.5
, show.legend = FALSE
, aes(colour = factor(eval(parse(text = colour_categ))) )) +
geom_violin(trim = T
, scale = "width"
#, position = position_dodge(width = 0.9)
, draw_quantiles = c(0.25, 0.5, 0.75)) +
# geom_boxplot(fill = "white"
# , outlier.colour = NA
# #, position = position_dodge(width = 0.9)
# , width = bp_width) +
# geom_point(position = position_jitterdodge(dodge.width = 0.5)
# , alpha = 0.5
# , show.legend = FALSE
# , aes(colour = factor(eval(parse(text = colour_categ))) )) +
# ggbeeswarm (better than geom_point)
geom_beeswarm(priority = "density"
#, shape = 21
, size = dot_size
, alpha = dot_transparency
, show.legend = FALSE
, cex = 0.8
, aes(colour = factor(eval(parse(text = colour_categ))) )) +
theme(axis.text.x = element_text(size = my_ats)
, axis.text.y = element_text(size = my_ats
, angle = 0
@ -46,10 +71,15 @@ lf_bp_with_stats <- function(lf_df
, vjust = 0)
, axis.title.x = element_text(size = my_ats)
, axis.title.y = element_text(size = my_ats)
, plot.title = element_text(size = my_pts , hjust = 0.5, colour = "black", face = "bold")
, plot.title = element_text(size = my_pts
, hjust = 0.5
, colour = "black"
, face = "bold")
, strip.background = element_rect(fill = colour_bp_strip)
, strip.text.x = element_text(size = my_fls, colour = "black")
, legend.title = element_text(color = "black", size = my_als)
, strip.text.x = element_text(size = my_fls
, colour = "black")
, legend.title = element_text(color = "black"
, size = my_als)
, legend.text = element_text(size = my_ats)
, legend.direction = "vertical") +
@ -60,7 +90,6 @@ lf_bp_with_stats <- function(lf_df
stat_compare_means(comparisons = my_comparisonsL
, method = stat_method
, paired = my_paired
#, label = "p.format")
, label = stat_label[1])
return(bp_statP)

View file

@ -0,0 +1,83 @@
setwd("~/git/LSHTM_analysis/scripts/plotting/")
source("../functions/lf_bp_with_stats.R")
source("../functions/lf_bp.R")
######################
# Make plot
######################
# Note: Data
# run other_plots_data.R
# to get the long format data to test this function
lf_bp(lf_df = lf_dynamut2
, p_title = "Dynamut2"
, colour_categ = "ddg_dynamut2_outcome"
, x_grp = "mutation_info"
, y_var = "param_value"
, facet_var = "param_type"
, n_facet_row = 1
, y_scales = "free_y"
, colour_bp_strip = "khaki2"
, dot_size = 3
, dot_transparency = 0.3
, violin_quantiles = c(0.25, 0.5, 0.75)
, my_ats = 22 # axis text size
, my_als = 20 # axis label size
, my_fls = 20 # facet label size
, my_pts = 22 # plot title size
, make_boxplot = F
, bp_width = "auto"
, add_stats = T
, stat_grp_comp = c("DM", "OM")
, stat_method = "wilcox.test"
, my_paired = FALSE
, stat_label = c("p.format", "p.signif") )
# foo = lf_dynamut2 %>%
# group_by(mutation_info, param_type) %>%
# summarise( Mean = mean(param_value, na.rm = T)
# , SD = sd(param_value, na.rm = T)
# , Median = median(param_value, na.rm = T)
# , IQR = IQR(param_value, na.rm = T) )
# Quick tests
plotdata_sel = subset(lf_dynamut2
, lf_dynamut2$param_type == "ASA")
plot_sum = plotdata_sel %>%
group_by(mutation_info, param_type) %>%
summarise(n = n()
, Mean = mean(param_value, na.rm = T)
, SD = sd(param_value, na.rm = T)
, Min = min(param_value, na.rm = T)
, Q1 = quantile(param_value, na.rm = T, 0.25)
, Median = median(param_value, na.rm = T)
, Q3 = quantile(param_value, na.rm = T, 0.75)
, Max = max(param_value, na.rm = T) ) %>%
rename('Mutation Class' = mutation_info
, Parameter = param_type)
plot_sum = as.data.frame(plot_sum, row.names = NULL)
plot_sum
bar = compare_means(param_value ~ mutation_info
, group.by = "param_type"
, data = plotdata_sel
, paired = FALSE
, p.adjust.method = "BH")
bar2 = bar[c("param_type"
, "group1"
, "group2"
, "p.format"
, "p.signif"
, "p.adj")] %>%
rename(Parameter = param_type
, Group1 = group1
, Group2 = group2
, "P-value" = p.format
, "P-sig" = p.signif
, "P-adj" = p.adj)
bar2 = data.frame(bar2); bar2
library(Hmisc)
describe(lf_dynamut2)

View file

@ -0,0 +1,55 @@
setwd("~/git/LSHTM_analysis/scripts/plotting/")
source("Header_TT.R")
source("../functions/lf_bp.R")
# ================================================
# Data: run get_plotting_data.R
# to get the long format data to test this function
# drug = "streptomycin"
# gene = "gid"
# source("get_plotting_dfs.R")
# ==================================================
######################
# Make plot: ggplot
######################
lf_bp(lf_df = lf_dynamut2
, p_title = "Dynamut2"
, colour_categ = "ddg_dynamut2_outcome"
, x_grp = "mutation_info"
, y_var = "param_value"
, facet_var = "param_type"
, n_facet_row = 1
, y_scales = "free_y"
, colour_bp_strip = "khaki2"
, dot_size = 3
, dot_transparency = 0.3
, violin_quantiles = c(0.25, 0.5, 0.75)
, my_ats = 22 # axis text size
, my_als = 20 # axis label size
, my_fls = 20 # facet label size
, my_pts = 22 # plot title size
, make_boxplot = F
, bp_width = "auto"
, add_stats = T
, stat_grp_comp = c("DM", "OM")
, stat_method = "wilcox.test"
, my_paired = FALSE
, stat_label = c("p.format", "p.signif") )
######################
# Make plot: plotly
######################
# FIXME: This labels are not working as I want!
# lf_bp_plotly(lf_df = lf_deepddg
# , p_title = "DeepDDG"
# , colour_categ = "deepddg_outcome"
# , x_grp = "mutation_info"
# , y_var = "param_value"
# , facet_var = "param_type"
# , n_facet_row = 1
# , y_scales = "free_y"
# , colour_bp_strip = "khaki2"
# , dot_size = 3
# , dot_transparency = 0.3
# , violin_quantiles = c(0.25, 0.5, 0.75)
# )

View file

@ -1,28 +0,0 @@
setwd("~/git/LSHTM_analysis/scripts/plotting/")
source("../functions/lf_bp_with_stats.R")
######################
# call function
######################
# Note: Data
# run other_plots_data.R
# to get the long format data to test this function
lf_bp_with_stats(lf_df = lf_dynamut2
, x_grp = "mutation_info"
, y_var = "param_value"
, facet_var = "param_type"
, n_facet_row = 1
, y_scales = "free_y"
, p_title = "Dynamut2"
, colour_categ = "ddg_dynamut2_outcome"
, stat_grp_comp = c("DM", "OM")
, stat_method = "wilcox.test"
, my_paired = FALSE
#, stat_label = "p.format")
, stat_label = c("p.format", "p.signif")
, my_ats = 22 # axis text size
, my_als = 20 # axis label size
, my_fls = 20 # facet label size
, my_pts = 22 )# plot title size