291 lines
7.2 KiB
R
291 lines
7.2 KiB
R
#!/usr/bin/env Rscript
|
|
#########################################################
|
|
# TASK: producing boxplots for dr and other muts
|
|
|
|
#########################################################
|
|
#=======================================================================
|
|
# working dir and loading libraries
|
|
getwd()
|
|
setwd("~/git/LSHTM_analysis/scripts/plotting")
|
|
getwd()
|
|
|
|
#source("Header_TT.R")
|
|
library(tidyverse)
|
|
library(ggplot2)
|
|
library(data.table)
|
|
library(dplyr)
|
|
|
|
#=========
|
|
# Input
|
|
#=========
|
|
#source("combining_dfs_plotting.R")
|
|
|
|
source("output_tables.R")
|
|
|
|
#grid(nx=NA,ny=NULL,lty=1,lwd=1,col="gray")
|
|
|
|
##########################################################
|
|
#====================
|
|
# OR distributions
|
|
#====================
|
|
|
|
par(mfrow=c(2,2))
|
|
|
|
#-----------
|
|
# or_mychisq: raw
|
|
#-----------
|
|
ylim_or = max(density(merged_df3$or_mychisq, na.rm = T)$y)
|
|
ylim_or
|
|
|
|
hist(merged_df3$or_mychisq
|
|
, prob = TRUE
|
|
, col = "grey"
|
|
, border= "black"
|
|
, ylim = c(0, ylim_or)
|
|
, xlab = "Odds Ratio"
|
|
, main = "Odds Ratio")
|
|
box(bty="l")
|
|
lines(density(merged_df3$or_mychisq, na.rm = T, bw = "nrd0") # default
|
|
, col = "black"
|
|
, lwd = 2)
|
|
|
|
#plot(density(merged_df3$or_mychisq, na.rm = T, bw = "nrd0"))
|
|
#-----------
|
|
# or_mychisq: log10
|
|
#-----------
|
|
ylim_or_log = max(density(log10(merged_df3$or_mychisq), na.rm = T)$y)
|
|
|
|
hist(log10(merged_df3$or_mychisq)
|
|
, prob = TRUE
|
|
, col = "grey"
|
|
, border= "black"
|
|
, ylim = c(0, ylim_or_log)
|
|
, xlab = "Log10 (Odds Ratio)"
|
|
, main = "Odds Ratio")
|
|
box(bty="l")
|
|
lines(density(log10(merged_df3$or_mychisq), na.rm = T, bw = "nrd0") # default
|
|
, col = "black"
|
|
, lwd = 2)
|
|
|
|
#======================================================================
|
|
#-------------------
|
|
# adjusted OR: raw
|
|
#-------------------
|
|
ylim_or_ad = max(density(merged_df3$or_kin, na.rm = T)$y)
|
|
ylim_or_ad
|
|
|
|
hist(merged_df3$or_kin
|
|
, prob = TRUE
|
|
, col = "grey"
|
|
, border= "black"
|
|
, ylim = c(0, ylim_or_ad)
|
|
, xlab = "Adjusted Odds Ratio"
|
|
, main = "Adjusted Odds Ratio")
|
|
box(bty="l")
|
|
lines(density(merged_df3$or_kin, na.rm = T, bw = "nrd0") # default
|
|
, col = "black"
|
|
, lwd = 2)
|
|
|
|
#plot(density(merged_df3$or_kin, na.rm = T, bw = "nrd0"))
|
|
#--------------------
|
|
# adjusted OR: log10
|
|
#---------------------
|
|
ylim_or_ad_log = max(density(log10(merged_df3$or_kin), na.rm = T)$y)
|
|
|
|
hist(log10(merged_df3$or_kin)
|
|
, prob = TRUE
|
|
, col = "grey"
|
|
, border= "black"
|
|
, ylim = c(0, ylim_or_ad_log)
|
|
, xlab = "Log10 (Adjusted Odds Ratio)"
|
|
, main = "Adjusted Odds Ratio")
|
|
box(bty="l")
|
|
lines(density(log10(merged_df3$or_kin), na.rm = T, bw = "nrd0") # default
|
|
, col = "black"
|
|
, lwd = 2)
|
|
|
|
#--------------------
|
|
# logistic OR: raw
|
|
#-------------------
|
|
#ylim_or_logistic = max(density(merged_df3$or_logistic, na.rm = T)$y)
|
|
#ylim_or_logistic
|
|
|
|
#hist(merged_df3$or_logistic
|
|
# , prob = TRUE
|
|
# , col = "grey"
|
|
# , border= "black"
|
|
# , ylim = c(0, ylim_or)
|
|
# , xlab = "Odds Ratio"
|
|
# , main = "Logistic Odds Ratio")
|
|
#box(bty="l")
|
|
#lines(density(merged_df3$or_logistic, na.rm = T, bw = "nrd0") # default
|
|
# , col = "black"
|
|
# , lwd = 2)
|
|
|
|
#plot(density(merged_df3$or_logistic, na.rm = T, bw = "nrd0"))
|
|
######################################################################
|
|
#==================
|
|
# Foldx and DUET
|
|
#==================
|
|
par(mfrow = c(2,2))
|
|
|
|
#-----------------
|
|
# Foldx: scaled
|
|
#-----------------
|
|
|
|
ylim_foldx = max(density(merged_df3$ddg, na.rm = T)$y)
|
|
ylim_foldx
|
|
|
|
hist(merged_df3$ddg
|
|
, prob = TRUE
|
|
, col = "grey"
|
|
, border= "black"
|
|
, ylim = c(0, ylim_foldx )
|
|
, xlab = "Foldx ddg"
|
|
, main = "Foldx")
|
|
box(bty="l")
|
|
lines(density(merged_df3$ddg, na.rm = T, bw = "nrd0") # default
|
|
, col = "black"
|
|
, lwd = 2)
|
|
|
|
|
|
#-----------------
|
|
# Foldx: scaled
|
|
#-----------------
|
|
ylim_foldx_scaled = max(density(merged_df3$foldx_scaled, na.rm = T)$y)
|
|
ylim_foldx_scaled
|
|
|
|
hist(merged_df3$foldx_scaled
|
|
, prob = TRUE
|
|
, col = "grey"
|
|
, border= "black"
|
|
, ylim = c(0, ylim_foldx_scaled)
|
|
, xlab = "Foldx ddg scaled"
|
|
, main = "Foldx")
|
|
box(bty="l")
|
|
lines(density(merged_df3$foldx_scaled, na.rm = T, bw = "nrd0") # default
|
|
, col = "black"
|
|
, lwd = 2)
|
|
|
|
#===============================================================
|
|
#-------------
|
|
# DUET: raw
|
|
#-------------
|
|
|
|
ylim_duet = max(density(merged_df3$duet_stability_change, na.rm = T)$y)
|
|
ylim_duet
|
|
|
|
hist(merged_df3$duet_stability_change
|
|
, prob = TRUE
|
|
, col = "grey"
|
|
, border= "black"
|
|
, ylim = c(0, ylim_duet )
|
|
, xlab = "DUET ddg"
|
|
, main = "DUET ")
|
|
box(bty="l")
|
|
lines(density(merged_df3$duet_stability_change, na.rm = T, bw = "nrd0") # default
|
|
, col = "black"
|
|
, lwd = 2)
|
|
|
|
|
|
#---------------
|
|
# DUET: scaled
|
|
#---------------
|
|
ylim_duet_scaled = max(density(merged_df3$duet_scaled, na.rm = T)$y)
|
|
ylim_duet_scaled
|
|
|
|
hist(merged_df3$duet_scaled
|
|
, prob = TRUE
|
|
, col = "grey"
|
|
, border= "black"
|
|
, ylim = c(0, ylim_duet_scaled)
|
|
, xlab = "DUET ddg scaled"
|
|
, main = "DUET")
|
|
box(bty="l")
|
|
lines(density(merged_df3$duet_scaled, na.rm = T, bw = "nrd0") # default
|
|
, col = "black"
|
|
, lwd = 2)
|
|
|
|
|
|
|
|
###################
|
|
summary(merged_df3$duet_stability_change)
|
|
range(merged_df3$duet_stability_change)
|
|
|
|
summary(merged_df3$ddg)
|
|
range(merged_df3$ddg)
|
|
|
|
foldx_seq = seq(min(merged_df3$ddg), max(merged_df3$ddg))
|
|
length(foldx_seq)
|
|
|
|
duet_seq = seq(min(merged_df3$duet_stability_change), max(merged_df3$duet_stability_change))
|
|
duet_seq
|
|
|
|
wilcox.test(merged_df3$duet_stability_change
|
|
, merged_df3$ddg
|
|
, paired = T)
|
|
|
|
boxplot(merged_df3$duet_stability_change
|
|
, merged_df3$ddg)
|
|
|
|
|
|
# scaled
|
|
summary(merged_df3$foldx_scaled)
|
|
|
|
foldx_seq_scaled = seq(min(merged_df3$foldx_scaled)
|
|
, max(merged_df3$foldx_scaled))
|
|
|
|
length(foldx_seq_scaled)
|
|
|
|
|
|
summary(merged_df3$duet_scaled)
|
|
duet_seq_scaled = seq(min(merged_df3$duet_scaled)
|
|
, max(merged_df3$duet_scaled))
|
|
length(duet_seq_scaled)
|
|
|
|
|
|
#=====================
|
|
# corr with mutaton class
|
|
table(merged_df3$mutation_info)
|
|
merged_df3$mutation_info_categ = ifelse(merged_df3$mutation_info == dr_muts_col, 1, 0)
|
|
table(merged_df3$mutation_info_categ)
|
|
|
|
table(merged_df3$mutation_info)
|
|
merged_df3$mutation_info_categ = ifelse(merged_df3$mutation_info == dr_muts_col, 1, 0)
|
|
table(merged_df3$mutation_info_categ)
|
|
|
|
|
|
cor.test(merged_df3$mutation_info_categ, merged_df3$duet_scaled, method = "spearman")
|
|
cor.test(merged_df3$mutation_info_categ, merged_df3$foldx_scaled, method = "spearman")
|
|
|
|
|
|
cor.test(merged_df3$duet_scaled, merged_df3$foldx_scaled, method = "spearman")
|
|
|
|
cor.test(merged_df3$duet_scaled, merged_df3$af, method = "spearman")
|
|
|
|
|
|
# normality test
|
|
#P-value > 0.05: we can assume the normality.
|
|
shapiro.test(merged_df3$or_mychisq)
|
|
shapiro.test(log10(merged_df3$or_mychisq))
|
|
shapiro.test(merged_df3$or_kin)
|
|
|
|
|
|
shapiro.test(merged_df3$duet_stability_change)
|
|
shapiro.test(merged_df3$ddg)
|
|
|
|
shapiro.test(merged_df3$duet_scaled)
|
|
shapiro.test(merged_df3$foldx_scaled)
|
|
|
|
|
|
summary(merged_df3$or_kin)
|
|
summary(log10(merged_df3$or_mychisq))
|
|
|
|
table(merged_df3$or_mychisq > 1)
|
|
table(log10(merged_df3$or_mychisq > 1))
|
|
|
|
|
|
# percentage
|
|
(table(merged_df3$or_mychisq > 1)[[2]]/sum(table(merged_df3$or_mychisq > 1))*100)
|
|
|
|
(table(merged_df3$or_kin > 1)[[2]]/sum(table(merged_df3$or_kin > 1))*100)
|