diff --git a/scripts/plotting/dist_plots.R b/scripts/plotting/dist_plots.R new file mode 100644 index 0000000..f8ce2b0 --- /dev/null +++ b/scripts/plotting/dist_plots.R @@ -0,0 +1,291 @@ +#!/usr/bin/env Rscript +######################################################### +# TASK: producing boxplots for dr and other muts + +######################################################### +#======================================================================= +# working dir and loading libraries +getwd() +setwd("~/git/LSHTM_analysis/scripts/plotting") +getwd() + +#source("Header_TT.R") +library(tidyverse) +library(ggplot2) +library(data.table) +library(dplyr) + +#========= +# Input +#========= +#source("combining_dfs_plotting.R") + +source("output_tables.R") + +#grid(nx=NA,ny=NULL,lty=1,lwd=1,col="gray") + +########################################################## +#==================== +# OR distributions +#==================== + +par(mfrow=c(2,2)) + +#----------- +# or_mychisq: raw +#----------- +ylim_or = max(density(merged_df3$or_mychisq, na.rm = T)$y) +ylim_or + +hist(merged_df3$or_mychisq + , prob = TRUE + , col = "grey" + , border= "black" + , ylim = c(0, ylim_or) + , xlab = "Odds Ratio" + , main = "Odds Ratio") +box(bty="l") +lines(density(merged_df3$or_mychisq, na.rm = T, bw = "nrd0") # default + , col = "black" + , lwd = 2) + +#plot(density(merged_df3$or_mychisq, na.rm = T, bw = "nrd0")) +#----------- +# or_mychisq: log10 +#----------- +ylim_or_log = max(density(log10(merged_df3$or_mychisq), na.rm = T)$y) + +hist(log10(merged_df3$or_mychisq) + , prob = TRUE + , col = "grey" + , border= "black" + , ylim = c(0, ylim_or_log) + , xlab = "Log10 (Odds Ratio)" + , main = "Odds Ratio") +box(bty="l") +lines(density(log10(merged_df3$or_mychisq), na.rm = T, bw = "nrd0") # default + , col = "black" + , lwd = 2) + +#====================================================================== +#------------------- +# adjusted OR: raw +#------------------- +ylim_or_ad = max(density(merged_df3$or_kin, na.rm = T)$y) +ylim_or_ad + +hist(merged_df3$or_kin + , prob = TRUE + , col = "grey" + , border= "black" + , ylim = c(0, ylim_or_ad) + , xlab = "Adjusted Odds Ratio" + , main = "Adjusted Odds Ratio") +box(bty="l") +lines(density(merged_df3$or_kin, na.rm = T, bw = "nrd0") # default + , col = "black" + , lwd = 2) + +#plot(density(merged_df3$or_kin, na.rm = T, bw = "nrd0")) +#-------------------- +# adjusted OR: log10 +#--------------------- +ylim_or_ad_log = max(density(log10(merged_df3$or_kin), na.rm = T)$y) + +hist(log10(merged_df3$or_kin) + , prob = TRUE + , col = "grey" + , border= "black" + , ylim = c(0, ylim_or_ad_log) + , xlab = "Log10 (Adjusted Odds Ratio)" + , main = "Adjusted Odds Ratio") +box(bty="l") +lines(density(log10(merged_df3$or_kin), na.rm = T, bw = "nrd0") # default + , col = "black" + , lwd = 2) + +#-------------------- +# logistic OR: raw +#------------------- +#ylim_or_logistic = max(density(merged_df3$or_logistic, na.rm = T)$y) +#ylim_or_logistic + +#hist(merged_df3$or_logistic +# , prob = TRUE +# , col = "grey" +# , border= "black" +# , ylim = c(0, ylim_or) +# , xlab = "Odds Ratio" +# , main = "Logistic Odds Ratio") +#box(bty="l") +#lines(density(merged_df3$or_logistic, na.rm = T, bw = "nrd0") # default +# , col = "black" +# , lwd = 2) + +#plot(density(merged_df3$or_logistic, na.rm = T, bw = "nrd0")) +###################################################################### +#================== +# Foldx and DUET +#================== +par(mfrow = c(2,2)) + +#----------------- +# Foldx: scaled +#----------------- + +ylim_foldx = max(density(merged_df3$ddg, na.rm = T)$y) +ylim_foldx + +hist(merged_df3$ddg + , prob = TRUE + , col = "grey" + , border= "black" + , ylim = c(0, ylim_foldx ) + , xlab = "Foldx ddg" + , main = "Foldx") +box(bty="l") +lines(density(merged_df3$ddg, na.rm = T, bw = "nrd0") # default + , col = "black" + , lwd = 2) + + +#----------------- +# Foldx: scaled +#----------------- +ylim_foldx_scaled = max(density(merged_df3$foldx_scaled, na.rm = T)$y) +ylim_foldx_scaled + +hist(merged_df3$foldx_scaled + , prob = TRUE + , col = "grey" + , border= "black" + , ylim = c(0, ylim_foldx_scaled) + , xlab = "Foldx ddg scaled" + , main = "Foldx") +box(bty="l") +lines(density(merged_df3$foldx_scaled, na.rm = T, bw = "nrd0") # default + , col = "black" + , lwd = 2) + +#=============================================================== +#------------- +# DUET: raw +#------------- + +ylim_duet = max(density(merged_df3$duet_stability_change, na.rm = T)$y) +ylim_duet + +hist(merged_df3$duet_stability_change + , prob = TRUE + , col = "grey" + , border= "black" + , ylim = c(0, ylim_duet ) + , xlab = "DUET ddg" + , main = "DUET ") +box(bty="l") +lines(density(merged_df3$duet_stability_change, na.rm = T, bw = "nrd0") # default + , col = "black" + , lwd = 2) + + +#--------------- +# DUET: scaled +#--------------- +ylim_duet_scaled = max(density(merged_df3$duet_scaled, na.rm = T)$y) +ylim_duet_scaled + +hist(merged_df3$duet_scaled + , prob = TRUE + , col = "grey" + , border= "black" + , ylim = c(0, ylim_duet_scaled) + , xlab = "DUET ddg scaled" + , main = "DUET") +box(bty="l") +lines(density(merged_df3$duet_scaled, na.rm = T, bw = "nrd0") # default + , col = "black" + , lwd = 2) + + + +################### +summary(merged_df3$duet_stability_change) +range(merged_df3$duet_stability_change) + +summary(merged_df3$ddg) +range(merged_df3$ddg) + +foldx_seq = seq(min(merged_df3$ddg), max(merged_df3$ddg)) +length(foldx_seq) + +duet_seq = seq(min(merged_df3$duet_stability_change), max(merged_df3$duet_stability_change)) +duet_seq + +wilcox.test(merged_df3$duet_stability_change + , merged_df3$ddg + , paired = T) + +boxplot(merged_df3$duet_stability_change + , merged_df3$ddg) + + +# scaled +summary(merged_df3$foldx_scaled) + +foldx_seq_scaled = seq(min(merged_df3$foldx_scaled) + , max(merged_df3$foldx_scaled)) + +length(foldx_seq_scaled) + + +summary(merged_df3$duet_scaled) +duet_seq_scaled = seq(min(merged_df3$duet_scaled) + , max(merged_df3$duet_scaled)) +length(duet_seq_scaled) + + +#===================== +# corr with mutaton class +table(merged_df3$mutation_info) +merged_df3$mutation_info_categ = ifelse(merged_df3$mutation_info == dr_muts_col, 1, 0) +table(merged_df3$mutation_info_categ) + +table(merged_df3$mutation_info) +merged_df3$mutation_info_categ = ifelse(merged_df3$mutation_info == dr_muts_col, 1, 0) +table(merged_df3$mutation_info_categ) + + +cor.test(merged_df3$mutation_info_categ, merged_df3$duet_scaled, method = "spearman") +cor.test(merged_df3$mutation_info_categ, merged_df3$foldx_scaled, method = "spearman") + + +cor.test(merged_df3$duet_scaled, merged_df3$foldx_scaled, method = "spearman") + +cor.test(merged_df3$duet_scaled, merged_df3$af, method = "spearman") + + +# normality test +#P-value > 0.05: we can assume the normality. +shapiro.test(merged_df3$or_mychisq) +shapiro.test(log10(merged_df3$or_mychisq)) +shapiro.test(merged_df3$or_kin) + + +shapiro.test(merged_df3$duet_stability_change) +shapiro.test(merged_df3$ddg) + +shapiro.test(merged_df3$duet_scaled) +shapiro.test(merged_df3$foldx_scaled) + + +summary(merged_df3$or_kin) +summary(log10(merged_df3$or_mychisq)) + +table(merged_df3$or_mychisq > 1) +table(log10(merged_df3$or_mychisq > 1)) + + +# percentage +(table(merged_df3$or_mychisq > 1)[[2]]/sum(table(merged_df3$or_mychisq > 1))*100) + +(table(merged_df3$or_kin > 1)[[2]]/sum(table(merged_df3$or_kin > 1))*100)