359 lines
14 KiB
R
359 lines
14 KiB
R
#!/usr/bin/env Rscript
|
|
#########################################################
|
|
# TASK: producing histogram of AF and OR
|
|
# output svgs
|
|
# 1) hist of af
|
|
# 2) hist of or
|
|
# 3) hist of af_or combined
|
|
# 4) hist of af: from muts and samples dfs combined (EXPLORATORY!)
|
|
# 5) hist and barplots for af_or combined with median line
|
|
# 6) hist and barplots for af_or combined withOUT median line (Turned OFF)
|
|
|
|
#########################################################
|
|
#=======================================================================
|
|
# working dir and loading libraries
|
|
getwd()
|
|
setwd("~/git/LSHTM_analysis/scripts/plotting")
|
|
getwd()
|
|
|
|
#source("Header_TT.R")
|
|
library(ggplot2)
|
|
library(data.table)
|
|
library(dplyr)
|
|
library(plyr)
|
|
#source("plotting_data.R")
|
|
source("combining_dfs_plotting.R")
|
|
|
|
#=======================================================================
|
|
#=======
|
|
# output
|
|
#=======
|
|
# plot: with median line on hist
|
|
af_or_combined_med = "hist_bp_muts_combined_median_labelled.svg"
|
|
plot_af_or_combined_med = paste0(plotdir, "/", af_or_combined_med)
|
|
|
|
# plot 6: without median line on hist
|
|
af_or_combined = "hist_bp_muts_combined.svg"
|
|
plot_af_or_combined = paste0(plotdir, "/", af_or_combined)
|
|
|
|
#=======================================================================
|
|
merged_df3_comp$mutation_info_labels = ifelse(merged_df3_comp$mutation_info == dr_muts_col, "DM", "OM")
|
|
table(merged_df3_comp$mutation_info_labels)
|
|
table(merged_df3_comp$mutation_info) == table(merged_df3_comp$mutation_info_labels)
|
|
sum(table(merged_df3_comp$mutation_info))
|
|
|
|
merged_df2_comp$mutation_info_labels = ifelse(merged_df2_comp$mutation_info == dr_muts_col, "DM", "OM")
|
|
table(merged_df2_comp$mutation_info_labels)
|
|
table(merged_df2_comp$mutation_info) == table(merged_df2_comp$mutation_info_labels)
|
|
sum(table(merged_df2_comp$mutation_info))
|
|
#================
|
|
# Data for plots
|
|
#================
|
|
# REASSIGNMENT as necessary
|
|
#df = my_df_u
|
|
|
|
df3 = merged_df3_comp
|
|
|
|
# Contains duplicated samples, so you need to remove that for AF hist
|
|
merged_df2_comp_u = merged_df2_comp[!duplicated(merged_df2_comp$id),]
|
|
if ( nrow(merged_df2_comp_u) == length(unique(merged_df2_comp$id)) ){
|
|
cat("PASS: duplicated samples ommitted. Assiging for plotting...")
|
|
df2 = merged_df2_comp_u
|
|
}else{
|
|
cat("FAIL: duplicated samples could not be ommitted. Length mismatch"
|
|
, "Expected nrows:", length(unique(merged_df2_comp$id))
|
|
, "Got nrows:", nrow(merged_df2_comp_u))
|
|
}
|
|
|
|
df2_af_median <- ddply(df2, "mutation_info_labels", summarise, grp.median = median(af, na.rm = T))
|
|
head(df2_af_median)
|
|
|
|
########################################################
|
|
#############
|
|
# ggplots
|
|
#############
|
|
my_ats = 15 # axis text size
|
|
my_als = 18 # axis label size
|
|
#theme_set(theme_grey())
|
|
|
|
#-----------
|
|
# AF: hist
|
|
#-----------
|
|
g_af_hist = ggplot(df3, aes(x = af)) +
|
|
geom_histogram(colour = "white") +
|
|
theme(axis.text.x = element_text(size = my_ats)
|
|
, axis.text.y = element_text(size = my_ats)
|
|
#, axis.title.y = element_blank()
|
|
, axis.title.x = element_text(size = my_ats)
|
|
#, axis.title.x = element_blank()
|
|
, axis.title.y = element_text(size = my_ats)
|
|
, axis.ticks.y = element_blank()
|
|
, plot.title = element_blank())+
|
|
#, plot.title = element_text(size = my_ats+5, face ="bold", hjust = 0.5))+
|
|
labs(title = "Minor Allele Frequency (MAF)"
|
|
, x = "MAF"
|
|
, y = "Count")
|
|
|
|
g_af_hist
|
|
#=====================================================================
|
|
#------------------------
|
|
# AF: hist coloured by
|
|
# mutation class in the
|
|
# same graph
|
|
#---------------------
|
|
#library(plyr)
|
|
df3_af_median <- ddply(df3, "mutation_info_labels", summarise, grp.median = median(af, na.rm = T))
|
|
head(df3_af_median)
|
|
|
|
g_af_hist_col = ggplot(df3, aes(x = af, fill = mutation_info_labels)) +
|
|
geom_histogram(position = "stack") +
|
|
scale_fill_manual(values = c("#E69F00", "#999999")) +
|
|
theme(axis.text.x = element_text(size = my_ats)
|
|
, axis.text.y = element_text(size = my_ats)
|
|
#, axis.title.y = element_blank()
|
|
, axis.title.x = element_text(size = my_ats)
|
|
#, axis.title.x = element_blank()
|
|
, axis.title.y = element_text(size = my_ats)
|
|
, axis.ticks.y = element_blank()
|
|
, plot.title = element_blank())+
|
|
labs(y = "Count"
|
|
, x = "Minor Allele Frequency"
|
|
)
|
|
g_af_hist_col
|
|
g_af_hist_col_med = g_af_hist_col +
|
|
geom_vline(data = df3_af_median, aes(xintercept = grp.median),linetype = "dashed")
|
|
|
|
g_af_hist_col_med
|
|
#=====================================================================
|
|
#---------------------------------
|
|
# AF: hist facet by mutation_class
|
|
#---------------------------------
|
|
g_af_mutinfo = ggplot(df3, aes(x = af
|
|
, fill = mutation_info_labels)) +
|
|
scale_fill_manual(values = c("#E69F00", "#999999")) +
|
|
|
|
geom_histogram() +
|
|
|
|
facet_grid(mutation_info_labels ~ ., scales = "free") +
|
|
#facet_wrap(mutation_info_labels ~ ., scales = "free") +
|
|
|
|
theme(axis.text.x = element_text(size = my_ats)
|
|
, axis.text.y = element_text(size = my_ats)
|
|
, axis.title.x = element_text(size = my_ats)
|
|
#, axis.title.y = element_blank()
|
|
, axis.title.y = element_text(size = my_ats)
|
|
, axis.ticks.y = element_blank()
|
|
#, plot.title = element_text(size = my_ats+5, face ="bold", hjust = 0.5)
|
|
, plot.title = element_blank()
|
|
#, strip.text = element_text(size = my_als)
|
|
, strip.text = element_blank()
|
|
, strip.background = element_blank()
|
|
, legend.text = element_text(size = my_als-4)
|
|
, legend.title = element_text(size = my_als-4)
|
|
, legend.position = c(0.8, 0.9)) +
|
|
|
|
labs(title = "Minor Allele Frequency (MAF)"
|
|
, x = "MAF"
|
|
, y = "Count"
|
|
, fill = "Mutation class")
|
|
|
|
g_af_mutinfo
|
|
g_af_mutinfo_med = g_af_mutinfo +
|
|
geom_vline(data = df3_af_median, aes(xintercept = grp.median),linetype = "dashed")
|
|
g_af_mutinfo_med
|
|
|
|
#=====================================================================
|
|
#------------------------
|
|
# AF: boxplot with stats
|
|
#------------------------
|
|
my_comparisons <- list( c("DM", "OM") )
|
|
|
|
g_af_bp = ggplot(df3, aes(x = mutation_info_labels
|
|
, y = af
|
|
, fill = mutation_info_labels))+
|
|
scale_fill_manual(values = c("#E69F00", "#999999")) +
|
|
geom_boxplot() +
|
|
theme(axis.text.x = element_text(size = my_ats)
|
|
, axis.text.y = element_text(size = my_ats)
|
|
, axis.title.x = element_text(size = my_ats)
|
|
#, axis.title.y = element_blank()
|
|
, axis.title.y = element_text(size = my_ats)
|
|
, axis.ticks.y = element_blank()
|
|
, plot.title = element_blank()
|
|
, strip.text = element_text(size = my_als)
|
|
, legend.text = element_text(size = my_als-4)
|
|
, legend.title = element_text(size = my_als-4)
|
|
, legend.position = "none") +
|
|
labs(y = "MAF"
|
|
, x = ""
|
|
, fill = "Mutation class")
|
|
|
|
g_af_bp_stats = g_af_bp + stat_compare_means(comparisons = my_comparisons
|
|
, method = "wilcox.test"
|
|
, paired = FALSE
|
|
#, label = "p.format"
|
|
, label = "p.signif")
|
|
g_af_bp_stats
|
|
|
|
######################################################################
|
|
# OR plots
|
|
######################################################################
|
|
#-----------
|
|
# OR: hist
|
|
#-----------
|
|
g_or_hist = ggplot(df3, aes(x = or_mychisq)) +
|
|
geom_histogram(colour = "white") +
|
|
theme(axis.text.x = element_text(size = my_ats)
|
|
, axis.text.y = element_text(size = my_ats)
|
|
#, axis.title.y = element_blank()
|
|
, axis.title.x = element_text(size = my_ats)
|
|
#, axis.title.x = element_blank()
|
|
, axis.title.y = element_text(size = my_ats)
|
|
, axis.ticks.y = element_blank()
|
|
, plot.title = element_blank())+
|
|
#, plot.title = element_text(size = my_ats+5, face ="bold", hjust = 0.5))+
|
|
labs(title = "Odds Ratio (OR)"
|
|
, x = "OR"
|
|
, y = "Count")
|
|
|
|
g_or_hist
|
|
#=====================================================================
|
|
#---------------------------------
|
|
# OR: hist facet by mutation_class
|
|
#---------------------------------
|
|
df3_or_median <- ddply(df3, "mutation_info_labels", summarise, grp.median = median(or_mychisq, na.rm = T))
|
|
head(df3_or_median)
|
|
|
|
g_or_mutinfo = ggplot(df3, aes(x = or_mychisq
|
|
, fill = mutation_info_labels)) +
|
|
scale_fill_manual(values = c("#E69F00", "#999999")) +
|
|
|
|
geom_histogram() +
|
|
|
|
facet_grid(mutation_info_labels ~ ., scales = "free") +
|
|
#facet_wrap(mutation_info_labels ~ ., scales = "free") +
|
|
scale_y_continuous(breaks = c(0, 15, 30, 45, 60, 75)) +
|
|
|
|
theme(axis.text.x = element_text(size = my_ats)
|
|
, axis.text.y = element_text(size = my_ats)
|
|
, axis.title.x = element_text(size = my_ats)
|
|
, axis.title.y = element_text(size = my_ats)
|
|
#, axis.title.y = element_blank()
|
|
, axis.ticks.y = element_blank()
|
|
#, plot.title = element_text(size = my_ats+5, face ="bold", hjust = 0.5)
|
|
, plot.title = element_blank()
|
|
#, strip.text = element_text(size = my_als)
|
|
, strip.text = element_blank()
|
|
, strip.background = element_blank()
|
|
, legend.text = element_text(size = my_als-4)
|
|
, legend.title = element_text(size = my_als-4)
|
|
, legend.position = c(0.8, 0.9))+
|
|
labs(title = "Odds Ratio (OR)"
|
|
, x = "OR"
|
|
, y = "Count"
|
|
, fill = "Mutation class")
|
|
|
|
g_or_mutinfo
|
|
g_or_mutinfo_med = g_or_mutinfo + geom_vline(data = df3_or_median, aes(xintercept = grp.median),
|
|
linetype = "dashed")
|
|
g_or_mutinfo_med
|
|
#=====================================================================
|
|
#------------------------
|
|
# OR: boxplot with stats
|
|
#------------------------
|
|
g_or_bp = ggplot(df3, aes(x = mutation_info_labels
|
|
, y = or_mychisq
|
|
, fill = mutation_info_labels))+
|
|
scale_fill_manual(values = c("#E69F00", "#999999")) +
|
|
geom_boxplot()+
|
|
theme(axis.text.x = element_text(size = my_ats)
|
|
, axis.text.y = element_text(size = my_ats)
|
|
, axis.title.x = element_text(size = my_ats)
|
|
#, axis.title.y = element_blank()
|
|
, axis.title.y = element_text(size = my_ats)
|
|
, axis.ticks.y = element_blank()
|
|
, plot.title = element_blank()
|
|
, strip.text = element_text(size = my_als)
|
|
, legend.text = element_text(size = my_als-4)
|
|
, legend.title = element_text(size = my_als-4)
|
|
, legend.position = "none") +
|
|
labs(y = "OR"
|
|
, x = ""
|
|
, fill = "Mutation class")
|
|
|
|
g_or_bp_stats = g_or_bp + stat_compare_means(comparisons = my_comparisons
|
|
, method = "wilcox.test"
|
|
, paired = FALSE
|
|
#, label = "p.format"
|
|
, label = "p.signif")
|
|
g_or_bp_stats
|
|
############################################################################
|
|
#==============================
|
|
# combine plots for outputs
|
|
#==============================
|
|
|
|
#-----------------------------------------
|
|
# combine the AF plots with overall title
|
|
#-----------------------------------------
|
|
p1_af = cowplot::plot_grid(g_af_hist
|
|
, g_af_mutinfo_med
|
|
#, g_af_mutinfo # without median line
|
|
, g_af_bp_stats
|
|
, nrow = 1
|
|
#, labels = c("(a)", "(b)", "(c)")
|
|
, labels = c("A", "B", "C")
|
|
, hjust = -1.5
|
|
, vjust = 0.2
|
|
, rel_widths = c(1.2/4, 2/4, 0.8/4)
|
|
, label_size = 18)
|
|
p1_af
|
|
|
|
title_af = ggdraw() + draw_label("Minor Allele Frequency (MAF)", fontface='bold', size = my_ats + 5)
|
|
p1_af_title = plot_grid(title_af, p1_af, ncol = 1, rel_heights = c(0.1, 1)) # rel_heights values control title margins
|
|
p1_af_title
|
|
|
|
#-----------------------------------------
|
|
# combine the OR plots with overall title
|
|
#-----------------------------------------
|
|
p2_or = cowplot::plot_grid(g_or_hist
|
|
, g_or_mutinfo_med
|
|
#, g_or_mutinfo # without median line
|
|
, g_or_bp_stats
|
|
, nrow = 1
|
|
#, labels = c("(d)", "(e)", "(f)")
|
|
, labels = c("D", "E", "F")
|
|
, hjust = -1.5
|
|
, vjust = 0.2
|
|
, rel_widths = c(1.2/4, 2/4, 0.8/4)
|
|
, label_size = 18)
|
|
p2_or
|
|
|
|
title_or = ggdraw() + draw_label("Odds Ratio (OR)", fontface = 'bold', size = my_ats + 5)
|
|
p2_or_title = plot_grid(title_or, p2_or, ncol = 1, rel_heights = c(0.1, 1)) # rel_heights values control title margins
|
|
p2_or_title
|
|
|
|
|
|
#----------------------------------------
|
|
# Plot 1: Combined AF and OR hist plots
|
|
# with median line on hist
|
|
#----------------------------------------
|
|
print(paste0("plot combined filename:", plot_af_or_combined_med))
|
|
svg(plot_af_or_combined_med, width = 15, height = 9)
|
|
|
|
p_combined_med = cowplot::plot_grid(p1_af_title, p2_or_title, nrow = 2)
|
|
p_combined_med
|
|
|
|
dev.off()
|
|
|
|
#----------------------------------------
|
|
# Plot 2: Combined AF and OR hist plots
|
|
# with median line on hist
|
|
#----------------------------------------
|
|
# uncomment the correct vars in p1_af and p2_or to generate this
|
|
#print(paste0("plot combined filename:", plot_af_or_combined))
|
|
#svg(plot_af_or_combined, width = 15, height = 9)
|
|
|
|
#p_combined = cowplot::plot_grid(p1_af_title, p2_or_title, nrow = 2)
|
|
#p_combined
|
|
#dev.off()
|
|
######################################################################
|