LSHTM_analysis/scripts/plotting/dist_plots_check.R

291 lines
7.2 KiB
R

#!/usr/bin/env Rscript
#########################################################
# TASK: producing boxplots for dr and other muts
#########################################################
#=======================================================================
# working dir and loading libraries
getwd()
setwd("~/git/LSHTM_analysis/scripts/plotting")
getwd()
#source("Header_TT.R")
library(tidyverse)
library(ggplot2)
library(data.table)
library(dplyr)
#=========
# Input
#=========
#source("combining_dfs_plotting.R")
source("output_tables.R")
#grid(nx=NA,ny=NULL,lty=1,lwd=1,col="gray")
##########################################################
#====================
# OR distributions
#====================
par(mfrow=c(2,2))
#-----------
# or_mychisq: raw
#-----------
ylim_or = max(density(merged_df3$or_mychisq, na.rm = T)$y)
ylim_or
hist(merged_df3$or_mychisq
, prob = TRUE
, col = "grey"
, border= "black"
, ylim = c(0, ylim_or)
, xlab = "Odds Ratio"
, main = "Odds Ratio")
box(bty="l")
lines(density(merged_df3$or_mychisq, na.rm = T, bw = "nrd0") # default
, col = "black"
, lwd = 2)
#plot(density(merged_df3$or_mychisq, na.rm = T, bw = "nrd0"))
#-----------
# or_mychisq: log10
#-----------
ylim_or_log = max(density(log10(merged_df3$or_mychisq), na.rm = T)$y)
hist(log10(merged_df3$or_mychisq)
, prob = TRUE
, col = "grey"
, border= "black"
, ylim = c(0, ylim_or_log)
, xlab = "Log10 (Odds Ratio)"
, main = "Odds Ratio")
box(bty="l")
lines(density(log10(merged_df3$or_mychisq), na.rm = T, bw = "nrd0") # default
, col = "black"
, lwd = 2)
#======================================================================
#-------------------
# adjusted OR: raw
#-------------------
ylim_or_ad = max(density(merged_df3$or_kin, na.rm = T)$y)
ylim_or_ad
hist(merged_df3$or_kin
, prob = TRUE
, col = "grey"
, border= "black"
, ylim = c(0, ylim_or_ad)
, xlab = "Adjusted Odds Ratio"
, main = "Adjusted Odds Ratio")
box(bty="l")
lines(density(merged_df3$or_kin, na.rm = T, bw = "nrd0") # default
, col = "black"
, lwd = 2)
#plot(density(merged_df3$or_kin, na.rm = T, bw = "nrd0"))
#--------------------
# adjusted OR: log10
#---------------------
ylim_or_ad_log = max(density(log10(merged_df3$or_kin), na.rm = T)$y)
hist(log10(merged_df3$or_kin)
, prob = TRUE
, col = "grey"
, border= "black"
, ylim = c(0, ylim_or_ad_log)
, xlab = "Log10 (Adjusted Odds Ratio)"
, main = "Adjusted Odds Ratio")
box(bty="l")
lines(density(log10(merged_df3$or_kin), na.rm = T, bw = "nrd0") # default
, col = "black"
, lwd = 2)
#--------------------
# logistic OR: raw
#-------------------
#ylim_or_logistic = max(density(merged_df3$or_logistic, na.rm = T)$y)
#ylim_or_logistic
#hist(merged_df3$or_logistic
# , prob = TRUE
# , col = "grey"
# , border= "black"
# , ylim = c(0, ylim_or)
# , xlab = "Odds Ratio"
# , main = "Logistic Odds Ratio")
#box(bty="l")
#lines(density(merged_df3$or_logistic, na.rm = T, bw = "nrd0") # default
# , col = "black"
# , lwd = 2)
#plot(density(merged_df3$or_logistic, na.rm = T, bw = "nrd0"))
######################################################################
#==================
# Foldx and DUET
#==================
par(mfrow = c(2,2))
#-----------------
# Foldx: scaled
#-----------------
ylim_foldx = max(density(merged_df3$ddg, na.rm = T)$y)
ylim_foldx
hist(merged_df3$ddg
, prob = TRUE
, col = "grey"
, border= "black"
, ylim = c(0, ylim_foldx )
, xlab = "Foldx ddg"
, main = "Foldx")
box(bty="l")
lines(density(merged_df3$ddg, na.rm = T, bw = "nrd0") # default
, col = "black"
, lwd = 2)
#-----------------
# Foldx: scaled
#-----------------
ylim_foldx_scaled = max(density(merged_df3$foldx_scaled, na.rm = T)$y)
ylim_foldx_scaled
hist(merged_df3$foldx_scaled
, prob = TRUE
, col = "grey"
, border= "black"
, ylim = c(0, ylim_foldx_scaled)
, xlab = "Foldx ddg scaled"
, main = "Foldx")
box(bty="l")
lines(density(merged_df3$foldx_scaled, na.rm = T, bw = "nrd0") # default
, col = "black"
, lwd = 2)
#===============================================================
#-------------
# DUET: raw
#-------------
ylim_duet = max(density(merged_df3$duet_stability_change, na.rm = T)$y)
ylim_duet
hist(merged_df3$duet_stability_change
, prob = TRUE
, col = "grey"
, border= "black"
, ylim = c(0, ylim_duet )
, xlab = "DUET ddg"
, main = "DUET ")
box(bty="l")
lines(density(merged_df3$duet_stability_change, na.rm = T, bw = "nrd0") # default
, col = "black"
, lwd = 2)
#---------------
# DUET: scaled
#---------------
ylim_duet_scaled = max(density(merged_df3$duet_scaled, na.rm = T)$y)
ylim_duet_scaled
hist(merged_df3$duet_scaled
, prob = TRUE
, col = "grey"
, border= "black"
, ylim = c(0, ylim_duet_scaled)
, xlab = "DUET ddg scaled"
, main = "DUET")
box(bty="l")
lines(density(merged_df3$duet_scaled, na.rm = T, bw = "nrd0") # default
, col = "black"
, lwd = 2)
###################
summary(merged_df3$duet_stability_change)
range(merged_df3$duet_stability_change)
summary(merged_df3$ddg)
range(merged_df3$ddg)
foldx_seq = seq(min(merged_df3$ddg), max(merged_df3$ddg))
length(foldx_seq)
duet_seq = seq(min(merged_df3$duet_stability_change), max(merged_df3$duet_stability_change))
duet_seq
wilcox.test(merged_df3$duet_stability_change
, merged_df3$ddg
, paired = T)
boxplot(merged_df3$duet_stability_change
, merged_df3$ddg)
# scaled
summary(merged_df3$foldx_scaled)
foldx_seq_scaled = seq(min(merged_df3$foldx_scaled)
, max(merged_df3$foldx_scaled))
length(foldx_seq_scaled)
summary(merged_df3$duet_scaled)
duet_seq_scaled = seq(min(merged_df3$duet_scaled)
, max(merged_df3$duet_scaled))
length(duet_seq_scaled)
#=====================
# corr with mutaton class
table(merged_df3$mutation_info)
merged_df3$mutation_info_categ = ifelse(merged_df3$mutation_info == dr_muts_col, 1, 0)
table(merged_df3$mutation_info_categ)
table(merged_df3$mutation_info)
merged_df3$mutation_info_categ = ifelse(merged_df3$mutation_info == dr_muts_col, 1, 0)
table(merged_df3$mutation_info_categ)
cor.test(merged_df3$mutation_info_categ, merged_df3$duet_scaled, method = "spearman")
cor.test(merged_df3$mutation_info_categ, merged_df3$foldx_scaled, method = "spearman")
cor.test(merged_df3$duet_scaled, merged_df3$foldx_scaled, method = "spearman")
cor.test(merged_df3$duet_scaled, merged_df3$af, method = "spearman")
# normality test
#P-value > 0.05: we can assume the normality.
shapiro.test(merged_df3$or_mychisq)
shapiro.test(log10(merged_df3$or_mychisq))
shapiro.test(merged_df3$or_kin)
shapiro.test(merged_df3$duet_stability_change)
shapiro.test(merged_df3$ddg)
shapiro.test(merged_df3$duet_scaled)
shapiro.test(merged_df3$foldx_scaled)
summary(merged_df3$or_kin)
summary(log10(merged_df3$or_mychisq))
table(merged_df3$or_mychisq > 1)
table(log10(merged_df3$or_mychisq > 1))
# percentage
(table(merged_df3$or_mychisq > 1)[[2]]/sum(table(merged_df3$or_mychisq > 1))*100)
(table(merged_df3$or_kin > 1)[[2]]/sum(table(merged_df3$or_kin > 1))*100)