added base histogram script for af and or
This commit is contained in:
parent
1f9ea3f789
commit
b2284f7216
1 changed files with 211 additions and 0 deletions
211
scripts/plotting/hist_af_or_base.R
Normal file
211
scripts/plotting/hist_af_or_base.R
Normal file
|
@ -0,0 +1,211 @@
|
|||
#!/usr/bin/env Rscript
|
||||
#########################################################
|
||||
# TASK: producing histogram of AF and OR
|
||||
# output svgs
|
||||
# 1) hist of af
|
||||
# 2) hist of or
|
||||
# 3) hist of af_or combined
|
||||
# 4) hist of af: from muts and samples dfs combined (EXPLORATORY!)
|
||||
# 5) hist and barplots for af_or combined with median line
|
||||
# 6) hist and barplots for af_or combined withOUT median line (Turned OFF)
|
||||
|
||||
#########################################################
|
||||
#=======================================================================
|
||||
# working dir and loading libraries
|
||||
getwd()
|
||||
setwd("~/git/LSHTM_analysis/scripts/plotting")
|
||||
getwd()
|
||||
|
||||
#source("Header_TT.R")
|
||||
library(ggplot2)
|
||||
library(data.table)
|
||||
library(dplyr)
|
||||
library(plyr)
|
||||
#source("plotting_data.R")
|
||||
source("combining_dfs_plotting.R")
|
||||
|
||||
#=======================================================================
|
||||
#=======
|
||||
# output
|
||||
#=======
|
||||
# plot 1
|
||||
hist_af_muts = "hist_mutations_AF.svg"
|
||||
plot_hist_af_muts = paste0(plotdir,"/", hist_af_muts)
|
||||
|
||||
# plot 2
|
||||
hist_or_muts = "hist_mutations_OR.svg"
|
||||
plot_hist_or_muts = paste0(plotdir,"/", hist_or_muts)
|
||||
|
||||
# plot 3
|
||||
hist_af_muts_sample = "hist_af_muts_sample_combined.svg"
|
||||
plot_hist_af_muts_sample = paste0(plotdir,"/", hist_af_muts_sample)
|
||||
|
||||
# plot 4
|
||||
hist_af_or = "hist_af_or_combined.svg"
|
||||
plot_hist_af_or = paste0(plotdir,"/", hist_af_or)
|
||||
|
||||
# plot 5
|
||||
af_or_combined_med = "hist_bp_muts_combined_median.svg"
|
||||
plot_af_or_combined_med = paste0(plotdir, "/", af_or_combined_med)
|
||||
|
||||
# plot 6: without median line on hist
|
||||
af_or_combined = "hist_bp_muts_combined.svg"
|
||||
plot_af_or_combined = paste0(plotdir, "/", af_or_combined)
|
||||
|
||||
#=======================================================================
|
||||
merged_df3_comp$mutation_info_labels = ifelse(merged_df3_comp$mutation_info == dr_muts_col, "DM", "OM")
|
||||
table(merged_df3_comp$mutation_info_labels)
|
||||
table(merged_df3_comp$mutation_info) == table(merged_df3_comp$mutation_info_labels)
|
||||
sum(table(merged_df3_comp$mutation_info))
|
||||
|
||||
merged_df2_comp$mutation_info_labels = ifelse(merged_df2_comp$mutation_info == dr_muts_col, "DM", "OM")
|
||||
table(merged_df2_comp$mutation_info_labels)
|
||||
table(merged_df2_comp$mutation_info) == table(merged_df2_comp$mutation_info_labels)
|
||||
sum(table(merged_df2_comp$mutation_info))
|
||||
#================
|
||||
# Data for plots
|
||||
#================
|
||||
# REASSIGNMENT as necessary
|
||||
#df = my_df_u
|
||||
|
||||
df3 = merged_df3_comp
|
||||
|
||||
# Contains duplicated samples, so you need to remove that for AF hist
|
||||
merged_df2_comp_u = merged_df2_comp[!duplicated(merged_df2_comp$id),]
|
||||
if ( nrow(merged_df2_comp_u) == length(unique(merged_df2_comp$id)) ){
|
||||
cat("PASS: duplicated samples ommitted. Assiging for plotting...")
|
||||
df2 = merged_df2_comp_u
|
||||
}else{
|
||||
cat("FAIL: duplicated samples could not be ommitted. Length mismatch"
|
||||
, "Expected nrows:", length(unique(merged_df2_comp$id))
|
||||
, "Got nrows:", nrow(merged_df2_comp_u))
|
||||
}
|
||||
#=======================================================================
|
||||
#****************
|
||||
# Plot 1: AF distribution: mutations
|
||||
#****************
|
||||
svg(plot_hist_af_muts)
|
||||
print(paste0("plot1 filename:", plot_hist_af_muts))
|
||||
|
||||
#par(mar=c(b, l, t, r))
|
||||
par(mar = c(4.5, 6, 1.8, 0))
|
||||
hist(df3$af
|
||||
, freq = T
|
||||
, breaks = 30
|
||||
, xlab = "MAF"
|
||||
, ylab = "Frequency"
|
||||
, main = "Minor Allele Frequency (MAF)"
|
||||
, cex.lab = 1.7
|
||||
, cex.axis = 1.5
|
||||
, cex.main = 1.5
|
||||
, cex.sub = 1.5)
|
||||
|
||||
dev.off()
|
||||
|
||||
#****************
|
||||
# Plot 2: AF distribution: samples
|
||||
#****************
|
||||
par(mar = c(4.5, 6, 1.8, 0))
|
||||
hist(df2$af
|
||||
, freq = T
|
||||
, breaks = 30
|
||||
, xlab = "MAF"
|
||||
, ylab = "Frequency"
|
||||
, main = "Minor Allele Frequency (MAF)"
|
||||
, cex.lab = 1.7
|
||||
, cex.lab = 1.7
|
||||
, cex.axis = 1.5
|
||||
, cex.main = 1.5
|
||||
, cex.sub = 1.5)
|
||||
|
||||
#****************
|
||||
# Plot 3: OF distribution: mutations
|
||||
#****************
|
||||
svg(plot_hist_or_muts)
|
||||
print(paste0("plot3 filename:", plot_hist_or_muts))
|
||||
|
||||
par(mar = c(4.5, 6, 1.8, 0))
|
||||
hist(df3$or_mychisq
|
||||
, freq = T
|
||||
, breaks = 30
|
||||
, xlab = "OR"
|
||||
, ylab = "Frequency"
|
||||
, main = "Odds Ratio (OR)"
|
||||
, cex.lab = 1.7
|
||||
, cex.axis = 1.5
|
||||
, cex.main = 1.5
|
||||
, cex.sub = 1.5)
|
||||
|
||||
dev.off()
|
||||
#====================================================================
|
||||
#==========
|
||||
# combine and output
|
||||
#==========
|
||||
#--------------
|
||||
# combine: af and or
|
||||
#-------------
|
||||
svg(plot_hist_af_or, width = 10, height = 8)
|
||||
print(paste0("plot3 filename:", plot_hist_af_or))
|
||||
|
||||
par(mfrow = c(1,2))
|
||||
par(mar = c(4.5, 5.5, 2, 0))
|
||||
hist(df3$af
|
||||
, freq = T
|
||||
, breaks = 30
|
||||
, xlab = "Minor Allele Frequency (MAF)"
|
||||
, ylab = "Frequency"
|
||||
, main = ""
|
||||
, cex.lab = 1.3
|
||||
, cex.axis = 1.3
|
||||
, cex.main = 1.5
|
||||
, cex.sub = 1.5)
|
||||
|
||||
# print the overall labels
|
||||
mtext(expression(bold('(a)')), side = 3, adj = -0.1, cex = 1.8)
|
||||
|
||||
hist(df3$or_mychisq
|
||||
, freq = T
|
||||
, breaks = 30
|
||||
, xlab = "Odds Ratio (OR)"
|
||||
, ylab = "Frequency"
|
||||
, main = ""
|
||||
, cex.lab = 1.3
|
||||
, cex.axis = 1.3
|
||||
, cex.main = 1.5
|
||||
, cex.sub = 1.5)
|
||||
|
||||
# print the overall labels
|
||||
mtext(expression(bold('(b)')), side = 3, adj = -0.1, cex = 1.8)
|
||||
dev.off()
|
||||
|
||||
#--------------
|
||||
# combine: af (mutations and samples)
|
||||
#-------------
|
||||
svg(plot_hist_af_muts_sample, width = 10, height = 8)
|
||||
print(paste0("plot3 filename:", plot_hist_af_muts_sample))
|
||||
|
||||
par(mfrow = c(1,2))
|
||||
par(mar = c(4.5, 5.5, 2, 0))
|
||||
hist(df3$af
|
||||
, freq = T
|
||||
, breaks = 30
|
||||
, xlab = "Minor Allele Frequency (MAF)"
|
||||
, ylab = "Frequency"
|
||||
, main = paste0(nrow(df3),"_pnca_mutations")
|
||||
, cex.lab = 1.3
|
||||
, cex.axis = 1.3
|
||||
, cex.main = 1.5
|
||||
, cex.sub = 1.5)
|
||||
|
||||
hist(df2$af
|
||||
, freq = T
|
||||
, breaks = 30
|
||||
, xlab = "Minor Allele Frequency (MAF)"
|
||||
, ylab = "Frequency"
|
||||
, main = paste0(nrow(df2),"_pnca_samples")
|
||||
, cex.lab = 1.3
|
||||
, cex.axis = 1.3
|
||||
, cex.main = 1.5
|
||||
, cex.sub = 1.5)
|
||||
dev.off()
|
||||
########################################################
|
Loading…
Add table
Add a link
Reference in a new issue