saving work minor changes perhaps
This commit is contained in:
parent
7158f5b2c9
commit
bf3e830f64
2 changed files with 149 additions and 216 deletions
|
@ -1,6 +1,7 @@
|
|||
#!/usr/bin/env Rscript
|
||||
#########################################################
|
||||
# TASK: producing boxplots for dr and other muts
|
||||
# TASK: checking muts with extreme effects and other
|
||||
# quick analysis
|
||||
|
||||
#########################################################
|
||||
#=======================================================================
|
||||
|
@ -26,6 +27,34 @@ rm(df, merged_df3_short, df_output)
|
|||
#===============================================================
|
||||
df_comp = df_ordered[!is.na(df_ordered$af),]
|
||||
|
||||
#===============================================================
|
||||
#============
|
||||
# quick checks
|
||||
#============
|
||||
merged_df3_comp = merged_df3[!is.na(merged_df3$af),]
|
||||
|
||||
# AF
|
||||
round(summary(merged_df3_comp$af), 4)*100
|
||||
round(tapply(merged_df3_comp$af, list(merged_df3_comp$mutation_info), median), 4)*100
|
||||
ks.test(merged_df3_comp$af[merged_df3_comp$mutation_info == dr_muts_col]
|
||||
, merged_df3_comp$af[merged_df3_comp$mutation_info == other_muts_col])
|
||||
|
||||
# OR
|
||||
round(summary(merged_df3_comp$or_mychisq), 4)
|
||||
tapply(merged_df3_comp$or_mychisq, list(merged_df3_comp$mutation_info), median)
|
||||
ks.test(merged_df3_comp$or_mychisq[merged_df3_comp$mutation_info == dr_muts_col]
|
||||
, merged_df3_comp$or_mychisq[merged_df3_comp$mutation_info == other_muts_col])
|
||||
|
||||
# adjusted OR
|
||||
merged_df3_comp_v2 = merged_df3[!is.na(merged_df3$or_kin),]
|
||||
|
||||
round(summary(merged_df3_comp_v2$or_kin), 4)
|
||||
tapply(merged_df3_comp_v2$or_kin, list(merged_df3_comp_v2$mutation_info), median)
|
||||
ks.test(merged_df3_comp_v2$or_kin[merged_df3_comp_v2$mutation_info == dr_muts_col]
|
||||
, merged_df3_comp_v2$or_kin[merged_df3_comp_v2$mutation_info == other_muts_col])
|
||||
|
||||
|
||||
|
||||
#%%%%%%%%%%%%%%%%%%%%%
|
||||
# REASSIGNMENT
|
||||
df = df_comp
|
||||
|
|
|
@ -24,30 +24,11 @@ library(plyr)
|
|||
#source("plotting_data.R")
|
||||
source("combining_dfs_plotting.R")
|
||||
|
||||
# clear excess variable
|
||||
rm(my_df, upos, dup_muts, my_df_u_lig)
|
||||
|
||||
#=======================================================================
|
||||
#=======
|
||||
# output
|
||||
#=======
|
||||
# plot 1
|
||||
hist_af_muts = "hist_mutations_AF.svg"
|
||||
plot_hist_af_muts = paste0(plotdir,"/", hist_af_muts)
|
||||
|
||||
# plot 2
|
||||
hist_or_muts = "hist_mutations_OR.svg"
|
||||
plot_hist_or_muts = paste0(plotdir,"/", hist_or_muts)
|
||||
|
||||
# plot 3
|
||||
hist_af_muts_sample = "hist_af_muts_sample_combined.svg"
|
||||
plot_hist_af_muts_sample = paste0(plotdir,"/", hist_af_muts_sample)
|
||||
|
||||
# plot 4
|
||||
hist_af_or = "hist_af_or_combined.svg"
|
||||
plot_hist_af_or = paste0(plotdir,"/", hist_af_or)
|
||||
|
||||
# plot 5
|
||||
# plot: with median line on hist
|
||||
af_or_combined_med = "hist_bp_muts_combined_median.svg"
|
||||
plot_af_or_combined_med = paste0(plotdir, "/", af_or_combined_med)
|
||||
|
||||
|
@ -59,16 +40,17 @@ plot_af_or_combined = paste0(plotdir, "/", af_or_combined)
|
|||
merged_df3_comp$mutation_info_labels = ifelse(merged_df3_comp$mutation_info == dr_muts_col, "DM", "OM")
|
||||
table(merged_df3_comp$mutation_info_labels)
|
||||
table(merged_df3_comp$mutation_info) == table(merged_df3_comp$mutation_info_labels)
|
||||
sum(table(merged_df3_comp$mutation_info))
|
||||
|
||||
merged_df2_comp$mutation_info_labels = ifelse(merged_df2_comp$mutation_info == dr_muts_col, "DM", "OM")
|
||||
table(merged_df2_comp$mutation_info_labels)
|
||||
table(merged_df2_comp$mutation_info) == table(merged_df2_comp$mutation_info_labels)
|
||||
|
||||
sum(table(merged_df2_comp$mutation_info))
|
||||
#================
|
||||
# Data for plots
|
||||
#================
|
||||
# REASSIGNMENT as necessary
|
||||
df = my_df_u
|
||||
#df = my_df_u
|
||||
|
||||
df3 = merged_df3_comp
|
||||
|
||||
|
@ -86,164 +68,17 @@ if ( nrow(merged_df2_comp_u) == length(unique(merged_df2_comp$id)) ){
|
|||
df2_af_median <- ddply(df2, "mutation_info_labels", summarise, grp.median = median(af, na.rm = T))
|
||||
head(df2_af_median)
|
||||
|
||||
|
||||
#=======================================================================
|
||||
#****************
|
||||
# Plot 1: AF distribution: mutations
|
||||
#****************
|
||||
svg(plot_hist_af_muts)
|
||||
print(paste0("plot1 filename:", plot_hist_af_muts))
|
||||
|
||||
#--------------
|
||||
# start plot 1
|
||||
#--------------
|
||||
#par(mar=c(b, l, t, r))
|
||||
par(mar=c(5,6,1,0))
|
||||
|
||||
hist(df3$af
|
||||
, freq = T
|
||||
, breaks = 30
|
||||
, xlab = "Minor Allele Frequency"
|
||||
, ylab = "Frequency"
|
||||
, main = ""
|
||||
, cex.lab = 1.7
|
||||
, cex.axis = 1.5
|
||||
, cex.main = 1.5
|
||||
, cex.sub = 1.5)
|
||||
|
||||
dev.off()
|
||||
|
||||
#****************
|
||||
# Plot 2: AF distribution: samples
|
||||
#****************
|
||||
#--------------
|
||||
# start plot 2
|
||||
#--------------
|
||||
#par(mar=c(b, l, t, r))
|
||||
par(mar=c(5,6,1,0))
|
||||
|
||||
hist(df2$af
|
||||
, freq = T
|
||||
, breaks = 30
|
||||
, xlab = "Minor Allele Frequency"
|
||||
, ylab = "Frequency"
|
||||
, main = ""
|
||||
, cex.lab = 1.7
|
||||
, cex.axis = 1.5
|
||||
, cex.main = 1.5
|
||||
, cex.sub = 1.5)
|
||||
|
||||
#****************
|
||||
# Plot 3: OF distribution: mutations
|
||||
#****************
|
||||
svg(plot_hist_or_muts)
|
||||
print(paste0("plot3 filename:", plot_hist_or_muts))
|
||||
|
||||
#--------------
|
||||
# start plot 3
|
||||
#--------------
|
||||
#par(mar=c(b, l, t, r))
|
||||
par(mar=c(5,6,1,0))
|
||||
|
||||
hist(df3$or_mychisq
|
||||
, freq = T
|
||||
, breaks = 30
|
||||
, xlab = "Odds Ratio"
|
||||
, ylab = "Frequency"
|
||||
, main = ""
|
||||
, cex.lab = 1.7
|
||||
, cex.axis = 1.5
|
||||
, cex.main = 1.5
|
||||
, cex.sub = 1.5)
|
||||
|
||||
dev.off()
|
||||
#====================================================================
|
||||
#==========
|
||||
# combine and output
|
||||
#==========
|
||||
#--------------
|
||||
# combine: af and or
|
||||
#-------------
|
||||
svg(plot_hist_af_or, width = 10, height = 8)
|
||||
print(paste0("plot3 filename:", plot_hist_af_or))
|
||||
#par(bty = "l")
|
||||
par(mfrow=c(2,1))
|
||||
par(mar=c(4.5, 5.5, 2, 0))
|
||||
hist(df3$af
|
||||
, freq = T
|
||||
, breaks = 30
|
||||
, xlab = "Minor Allele Frequency (MAF)"
|
||||
, ylab = "Frequency"
|
||||
, main = ""
|
||||
, cex.lab = 1.3
|
||||
, cex.axis = 1.3
|
||||
, cex.main = 1.5
|
||||
, cex.sub = 1.5)
|
||||
|
||||
# print the overall labels
|
||||
mtext(expression(bold('(a)')), side = 3, adj = -0.1, cex = 1.8)
|
||||
|
||||
hist(df3$or_mychisq
|
||||
, freq = T
|
||||
, breaks = 30
|
||||
, xlab = "Odds Ratio (OR)"
|
||||
, ylab = "Frequency"
|
||||
, main = ""
|
||||
, cex.lab = 1.3
|
||||
, cex.axis = 1.3
|
||||
, cex.main = 1.5
|
||||
, cex.sub = 1.5)
|
||||
|
||||
# print the overall labels
|
||||
mtext(expression(bold('(b)')), side = 3, adj = -0.1, cex = 1.8)
|
||||
dev.off()
|
||||
|
||||
|
||||
#--------------
|
||||
# combine: af (mutations and samples)
|
||||
#-------------
|
||||
svg(plot_hist_af_muts_sample, width = 10, height = 8)
|
||||
print(paste0("plot3 filename:", plot_hist_af_muts_sample))
|
||||
#par(bty = "l")
|
||||
par(mfrow = c(1,2))
|
||||
par(mar=c(4.5, 5.5, 2, 0))
|
||||
hist(df3$af
|
||||
, freq = T
|
||||
, breaks = 30
|
||||
, xlab = "Minor Allele Frequency (MAF)"
|
||||
, ylab = "Frequency"
|
||||
, main = paste0(nrow(df3),"_pnca_mutations")
|
||||
, cex.lab = 1.3
|
||||
, cex.axis = 1.3
|
||||
, cex.main = 1.5
|
||||
, cex.sub = 1.5)
|
||||
|
||||
hist(df2$af
|
||||
, freq = T
|
||||
, breaks = 30
|
||||
, xlab = "Minor Allele Frequency (MAF)"
|
||||
, ylab = "Frequency"
|
||||
, main = paste0(nrow(df2),"_pnca_samples")
|
||||
, cex.lab = 1.3
|
||||
, cex.axis = 1.3
|
||||
, cex.main = 1.5
|
||||
, cex.sub = 1.5)
|
||||
dev.off()
|
||||
|
||||
|
||||
########################################################
|
||||
#############
|
||||
# ggplots
|
||||
#############
|
||||
#library(plyr)
|
||||
df3_af_median <- ddply(df3, "mutation_info_labels", summarise, grp.median = median(af, na.rm = T))
|
||||
head(df3_af_median)
|
||||
|
||||
my_ats = 15 # axis text size
|
||||
my_als = 18 # axis label size
|
||||
#theme_set(theme_grey())
|
||||
|
||||
theme_set(theme_grey())
|
||||
|
||||
#-----------
|
||||
# AF: hist
|
||||
#-----------
|
||||
g_af_hist = ggplot(df3, aes(x = af)) +
|
||||
geom_histogram(colour = "white") +
|
||||
theme(axis.text.x = element_text(size = my_ats)
|
||||
|
@ -254,12 +89,19 @@ g_af_hist = ggplot(df3, aes(x = af)) +
|
|||
, axis.title.y = element_text(size = my_ats)
|
||||
, axis.ticks.y = element_blank()
|
||||
, plot.title = element_blank())+
|
||||
labs(y = "Count"
|
||||
, x = "Minor Allele Frequency"
|
||||
)
|
||||
#, plot.title = element_text(size = my_ats+5, face ="bold", hjust = 0.5))+
|
||||
labs(title = "Minor Allele Frequency (MAF)"
|
||||
, x = "MAF"
|
||||
, y = "Count")
|
||||
|
||||
g_af_hist
|
||||
#=====================================================================
|
||||
#------------------------
|
||||
# AF: hist coloured by
|
||||
# mutation class in the
|
||||
# same graph
|
||||
#---------------------
|
||||
#library(plyr)
|
||||
df3_af_median <- ddply(df3, "mutation_info_labels", summarise, grp.median = median(af, na.rm = T))
|
||||
head(df3_af_median)
|
||||
|
||||
|
@ -278,10 +120,14 @@ g_af_hist_col = ggplot(df3, aes(x = af, fill = mutation_info_labels)) +
|
|||
, x = "Minor Allele Frequency"
|
||||
)
|
||||
g_af_hist_col
|
||||
g_af_hist_col_med = g_af_hist_col + geom_vline(data = df3_af_median, aes(xintercept = grp.median),
|
||||
linetype = "dashed")
|
||||
g_af_hist_col_med = g_af_hist_col +
|
||||
geom_vline(data = df3_af_median, aes(xintercept = grp.median),linetype = "dashed")
|
||||
|
||||
g_af_hist_col_med
|
||||
#=====================================================================
|
||||
#---------------------------------
|
||||
# AF: hist facet by mutation_class
|
||||
#---------------------------------
|
||||
g_af_mutinfo = ggplot(df3, aes(x = af
|
||||
, fill = mutation_info_labels)) +
|
||||
scale_fill_manual(values = c("#E69F00", "#999999")) +
|
||||
|
@ -297,7 +143,8 @@ g_af_mutinfo = ggplot(df3, aes(x = af
|
|||
#, axis.title.y = element_blank()
|
||||
, axis.title.y = element_text(size = my_ats)
|
||||
, axis.ticks.y = element_blank()
|
||||
, plot.title = element_text(size = my_ats+5, face ="bold", hjust = 0.5)
|
||||
#, plot.title = element_text(size = my_ats+5, face ="bold", hjust = 0.5)
|
||||
, plot.title = element_blank()
|
||||
#, strip.text = element_text(size = my_als)
|
||||
, strip.text = element_blank()
|
||||
, strip.background = element_blank()
|
||||
|
@ -311,18 +158,21 @@ g_af_mutinfo = ggplot(df3, aes(x = af
|
|||
, fill = "Mutation class")
|
||||
|
||||
g_af_mutinfo
|
||||
g_af_mutinfo_med = g_af_mutinfo + geom_vline(data = df3_af_median, aes(xintercept = grp.median),
|
||||
linetype = "dashed")
|
||||
g_af_mutinfo_med = g_af_mutinfo +
|
||||
geom_vline(data = df3_af_median, aes(xintercept = grp.median),linetype = "dashed")
|
||||
g_af_mutinfo_med
|
||||
|
||||
#=====================================================================
|
||||
#------------------------
|
||||
# AF: boxplot with stats
|
||||
#------------------------
|
||||
my_comparisons <- list( c("DM", "OM") )
|
||||
|
||||
g_af_bp = ggplot(df3, aes(x = mutation_info_labels
|
||||
, y = af
|
||||
, fill = mutation_info_labels))+
|
||||
scale_fill_manual(values = c("#E69F00", "#999999")) +
|
||||
geom_boxplot()+
|
||||
geom_boxplot() +
|
||||
theme(axis.text.x = element_text(size = my_ats)
|
||||
, axis.text.y = element_text(size = my_ats)
|
||||
, axis.title.x = element_text(size = my_ats)
|
||||
|
@ -344,9 +194,33 @@ g_af_bp_stats = g_af_bp + stat_compare_means(comparisons = my_comparisons
|
|||
#, label = "p.format"
|
||||
, label = "p.signif")
|
||||
g_af_bp_stats
|
||||
|
||||
######################################################################
|
||||
# OR
|
||||
# OR plots
|
||||
######################################################################
|
||||
#-----------
|
||||
# OR: hist
|
||||
#-----------
|
||||
g_or_hist = ggplot(df3, aes(x = or_mychisq)) +
|
||||
geom_histogram(colour = "white") +
|
||||
theme(axis.text.x = element_text(size = my_ats)
|
||||
, axis.text.y = element_text(size = my_ats)
|
||||
#, axis.title.y = element_blank()
|
||||
, axis.title.x = element_text(size = my_ats)
|
||||
#, axis.title.x = element_blank()
|
||||
, axis.title.y = element_text(size = my_ats)
|
||||
, axis.ticks.y = element_blank()
|
||||
, plot.title = element_blank())+
|
||||
#, plot.title = element_text(size = my_ats+5, face ="bold", hjust = 0.5))+
|
||||
labs(title = "Odds Ratio (OR)"
|
||||
, x = "OR"
|
||||
, y = "Count")
|
||||
|
||||
g_or_hist
|
||||
#=====================================================================
|
||||
#---------------------------------
|
||||
# OR: hist facet by mutation_class
|
||||
#---------------------------------
|
||||
df3_or_median <- ddply(df3, "mutation_info_labels", summarise, grp.median = median(or_mychisq, na.rm = T))
|
||||
head(df3_or_median)
|
||||
|
||||
|
@ -358,6 +232,7 @@ g_or_mutinfo = ggplot(df3, aes(x = or_mychisq
|
|||
|
||||
facet_grid(mutation_info_labels ~ ., scales = "free") +
|
||||
#facet_wrap(mutation_info_labels ~ ., scales = "free") +
|
||||
scale_y_continuous(breaks = c(0, 15, 30, 45, 60, 75)) +
|
||||
|
||||
theme(axis.text.x = element_text(size = my_ats)
|
||||
, axis.text.y = element_text(size = my_ats)
|
||||
|
@ -365,7 +240,8 @@ g_or_mutinfo = ggplot(df3, aes(x = or_mychisq
|
|||
, axis.title.y = element_text(size = my_ats)
|
||||
#, axis.title.y = element_blank()
|
||||
, axis.ticks.y = element_blank()
|
||||
, plot.title = element_text(size = my_ats+5, face ="bold", hjust = 0.5)
|
||||
#, plot.title = element_text(size = my_ats+5, face ="bold", hjust = 0.5)
|
||||
, plot.title = element_blank()
|
||||
#, strip.text = element_text(size = my_als)
|
||||
, strip.text = element_blank()
|
||||
, strip.background = element_blank()
|
||||
|
@ -382,6 +258,9 @@ g_or_mutinfo_med = g_or_mutinfo + geom_vline(data = df3_or_median, aes(xintercep
|
|||
linetype = "dashed")
|
||||
g_or_mutinfo_med
|
||||
#=====================================================================
|
||||
#------------------------
|
||||
# OR: boxplot with stats
|
||||
#------------------------
|
||||
g_or_bp = ggplot(df3, aes(x = mutation_info_labels
|
||||
, y = or_mychisq
|
||||
, fill = mutation_info_labels))+
|
||||
|
@ -412,42 +291,67 @@ g_or_bp_stats
|
|||
#==============================
|
||||
# combine plots for outputs
|
||||
#==============================
|
||||
#------------------------------------
|
||||
# Plot 1: hist withOUT median line
|
||||
#------------------------------------
|
||||
# combined plots without median
|
||||
#print(paste0("plot combined filename:", plot_af_or_combined))
|
||||
#svg(plot_af_or_combined, width = 16, height = 9)
|
||||
|
||||
c_combined = cowplot::plot_grid(g_af_mutinfo
|
||||
#-----------------------------------------
|
||||
# combine the AF plots with overall title
|
||||
#-----------------------------------------
|
||||
p1_af = cowplot::plot_grid(g_af_hist
|
||||
, g_af_mutinfo_med
|
||||
#, g_af_mutinfo # without median line
|
||||
, g_af_bp_stats
|
||||
, g_or_mutinfo
|
||||
, g_or_bp_stats
|
||||
, nrow = 2
|
||||
, labels = c("(a)", "(b)", "(c)", "(d)")
|
||||
, rel_widths = c(2/3, 1/3)
|
||||
, label_size = 20)
|
||||
, nrow = 1
|
||||
, labels = c("(a)", "(b)", "(c)")
|
||||
, hjust = -0.5
|
||||
, vjust = 0.2
|
||||
, rel_widths = c(1.2/4, 2/4, 0.8/4)
|
||||
, label_size = 18)
|
||||
p1_af
|
||||
|
||||
#print(c_combined)
|
||||
#dev.off()
|
||||
title_af = ggdraw() + draw_label("Minor Allele Frequency (MAF)", fontface='bold', size = my_ats + 5)
|
||||
p1_af_title = plot_grid(title_af, p1_af, ncol = 1, rel_heights = c(0.1, 1)) # rel_heights values control title margins
|
||||
p1_af_title
|
||||
|
||||
#-------------------------------
|
||||
# Plot 2: hist WITH median line
|
||||
#-------------------------------
|
||||
print(paste0("plot combined filename:", plot_af_or_combined_med))
|
||||
svg(plot_af_or_combined_med, width = 16, height = 9)
|
||||
c_combined_med = cowplot::plot_grid(g_af_mutinfo_med
|
||||
, g_af_bp_stats
|
||||
#-----------------------------------------
|
||||
# combine the OR plots with overall title
|
||||
#-----------------------------------------
|
||||
p2_or = cowplot::plot_grid(g_or_hist
|
||||
, g_or_mutinfo_med
|
||||
#, g_or_mutinfo # without median line
|
||||
, g_or_bp_stats
|
||||
, nrow = 2
|
||||
, labels = c("(a)", "(b)", "(c)", "(d)")
|
||||
, rel_widths = c(3/4, 1/4)
|
||||
, label_size = 20)
|
||||
, nrow = 1
|
||||
, labels = c("(d)", "(e)", "(f)")
|
||||
, hjust = -0.5
|
||||
, vjust = 0.2
|
||||
, rel_widths = c(1.2/4, 2/4, 0.8/4)
|
||||
, label_size = 18)
|
||||
p2_or
|
||||
|
||||
title_or = ggdraw() + draw_label("Odds Ratio (OR)", fontface = 'bold', size = my_ats + 5)
|
||||
p2_or_title = plot_grid(title_or, p2_or, ncol = 1, rel_heights = c(0.1, 1)) # rel_heights values control title margins
|
||||
p2_or_title
|
||||
|
||||
|
||||
#----------------------------------------
|
||||
# Plot 1: Combined AF and OR hist plots
|
||||
# with median line on hist
|
||||
#----------------------------------------
|
||||
print(paste0("plot combined filename:", plot_af_or_combined_med))
|
||||
svg(plot_af_or_combined_med, width = 15, height = 9)
|
||||
|
||||
p_combined_med = cowplot::plot_grid(p1_af_title, p2_or_title, nrow = 2)
|
||||
p_combined_med
|
||||
|
||||
print(c_combined_med)
|
||||
dev.off()
|
||||
|
||||
#----------------------------------------
|
||||
# Plot 2: Combined AF and OR hist plots
|
||||
# with median line on hist
|
||||
#----------------------------------------
|
||||
# uncomment the correct vars in p1_af and p2_or to generate this
|
||||
#print(paste0("plot combined filename:", plot_af_or_combined))
|
||||
#svg(plot_af_or_combined, width = 15, height = 9)
|
||||
|
||||
#p_combined = cowplot::plot_grid(p1_af_title, p2_or_title, nrow = 2)
|
||||
#p_combined
|
||||
#dev.off()
|
||||
######################################################################
|
||||
|
||||
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue