renamed dist_plots.R to dist_plots_check.R as its exploratory
This commit is contained in:
parent
cc8443c7d4
commit
711781933c
1 changed files with 0 additions and 0 deletions
291
scripts/plotting/dist_plots_check.R
Normal file
291
scripts/plotting/dist_plots_check.R
Normal file
|
@ -0,0 +1,291 @@
|
|||
#!/usr/bin/env Rscript
|
||||
#########################################################
|
||||
# TASK: producing boxplots for dr and other muts
|
||||
|
||||
#########################################################
|
||||
#=======================================================================
|
||||
# working dir and loading libraries
|
||||
getwd()
|
||||
setwd("~/git/LSHTM_analysis/scripts/plotting")
|
||||
getwd()
|
||||
|
||||
#source("Header_TT.R")
|
||||
library(tidyverse)
|
||||
library(ggplot2)
|
||||
library(data.table)
|
||||
library(dplyr)
|
||||
|
||||
#=========
|
||||
# Input
|
||||
#=========
|
||||
#source("combining_dfs_plotting.R")
|
||||
|
||||
source("output_tables.R")
|
||||
|
||||
#grid(nx=NA,ny=NULL,lty=1,lwd=1,col="gray")
|
||||
|
||||
##########################################################
|
||||
#====================
|
||||
# OR distributions
|
||||
#====================
|
||||
|
||||
par(mfrow=c(2,2))
|
||||
|
||||
#-----------
|
||||
# or_mychisq: raw
|
||||
#-----------
|
||||
ylim_or = max(density(merged_df3$or_mychisq, na.rm = T)$y)
|
||||
ylim_or
|
||||
|
||||
hist(merged_df3$or_mychisq
|
||||
, prob = TRUE
|
||||
, col = "grey"
|
||||
, border= "black"
|
||||
, ylim = c(0, ylim_or)
|
||||
, xlab = "Odds Ratio"
|
||||
, main = "Odds Ratio")
|
||||
box(bty="l")
|
||||
lines(density(merged_df3$or_mychisq, na.rm = T, bw = "nrd0") # default
|
||||
, col = "black"
|
||||
, lwd = 2)
|
||||
|
||||
#plot(density(merged_df3$or_mychisq, na.rm = T, bw = "nrd0"))
|
||||
#-----------
|
||||
# or_mychisq: log10
|
||||
#-----------
|
||||
ylim_or_log = max(density(log10(merged_df3$or_mychisq), na.rm = T)$y)
|
||||
|
||||
hist(log10(merged_df3$or_mychisq)
|
||||
, prob = TRUE
|
||||
, col = "grey"
|
||||
, border= "black"
|
||||
, ylim = c(0, ylim_or_log)
|
||||
, xlab = "Log10 (Odds Ratio)"
|
||||
, main = "Odds Ratio")
|
||||
box(bty="l")
|
||||
lines(density(log10(merged_df3$or_mychisq), na.rm = T, bw = "nrd0") # default
|
||||
, col = "black"
|
||||
, lwd = 2)
|
||||
|
||||
#======================================================================
|
||||
#-------------------
|
||||
# adjusted OR: raw
|
||||
#-------------------
|
||||
ylim_or_ad = max(density(merged_df3$or_kin, na.rm = T)$y)
|
||||
ylim_or_ad
|
||||
|
||||
hist(merged_df3$or_kin
|
||||
, prob = TRUE
|
||||
, col = "grey"
|
||||
, border= "black"
|
||||
, ylim = c(0, ylim_or_ad)
|
||||
, xlab = "Adjusted Odds Ratio"
|
||||
, main = "Adjusted Odds Ratio")
|
||||
box(bty="l")
|
||||
lines(density(merged_df3$or_kin, na.rm = T, bw = "nrd0") # default
|
||||
, col = "black"
|
||||
, lwd = 2)
|
||||
|
||||
#plot(density(merged_df3$or_kin, na.rm = T, bw = "nrd0"))
|
||||
#--------------------
|
||||
# adjusted OR: log10
|
||||
#---------------------
|
||||
ylim_or_ad_log = max(density(log10(merged_df3$or_kin), na.rm = T)$y)
|
||||
|
||||
hist(log10(merged_df3$or_kin)
|
||||
, prob = TRUE
|
||||
, col = "grey"
|
||||
, border= "black"
|
||||
, ylim = c(0, ylim_or_ad_log)
|
||||
, xlab = "Log10 (Adjusted Odds Ratio)"
|
||||
, main = "Adjusted Odds Ratio")
|
||||
box(bty="l")
|
||||
lines(density(log10(merged_df3$or_kin), na.rm = T, bw = "nrd0") # default
|
||||
, col = "black"
|
||||
, lwd = 2)
|
||||
|
||||
#--------------------
|
||||
# logistic OR: raw
|
||||
#-------------------
|
||||
#ylim_or_logistic = max(density(merged_df3$or_logistic, na.rm = T)$y)
|
||||
#ylim_or_logistic
|
||||
|
||||
#hist(merged_df3$or_logistic
|
||||
# , prob = TRUE
|
||||
# , col = "grey"
|
||||
# , border= "black"
|
||||
# , ylim = c(0, ylim_or)
|
||||
# , xlab = "Odds Ratio"
|
||||
# , main = "Logistic Odds Ratio")
|
||||
#box(bty="l")
|
||||
#lines(density(merged_df3$or_logistic, na.rm = T, bw = "nrd0") # default
|
||||
# , col = "black"
|
||||
# , lwd = 2)
|
||||
|
||||
#plot(density(merged_df3$or_logistic, na.rm = T, bw = "nrd0"))
|
||||
######################################################################
|
||||
#==================
|
||||
# Foldx and DUET
|
||||
#==================
|
||||
par(mfrow = c(2,2))
|
||||
|
||||
#-----------------
|
||||
# Foldx: scaled
|
||||
#-----------------
|
||||
|
||||
ylim_foldx = max(density(merged_df3$ddg, na.rm = T)$y)
|
||||
ylim_foldx
|
||||
|
||||
hist(merged_df3$ddg
|
||||
, prob = TRUE
|
||||
, col = "grey"
|
||||
, border= "black"
|
||||
, ylim = c(0, ylim_foldx )
|
||||
, xlab = "Foldx ddg"
|
||||
, main = "Foldx")
|
||||
box(bty="l")
|
||||
lines(density(merged_df3$ddg, na.rm = T, bw = "nrd0") # default
|
||||
, col = "black"
|
||||
, lwd = 2)
|
||||
|
||||
|
||||
#-----------------
|
||||
# Foldx: scaled
|
||||
#-----------------
|
||||
ylim_foldx_scaled = max(density(merged_df3$foldx_scaled, na.rm = T)$y)
|
||||
ylim_foldx_scaled
|
||||
|
||||
hist(merged_df3$foldx_scaled
|
||||
, prob = TRUE
|
||||
, col = "grey"
|
||||
, border= "black"
|
||||
, ylim = c(0, ylim_foldx_scaled)
|
||||
, xlab = "Foldx ddg scaled"
|
||||
, main = "Foldx")
|
||||
box(bty="l")
|
||||
lines(density(merged_df3$foldx_scaled, na.rm = T, bw = "nrd0") # default
|
||||
, col = "black"
|
||||
, lwd = 2)
|
||||
|
||||
#===============================================================
|
||||
#-------------
|
||||
# DUET: raw
|
||||
#-------------
|
||||
|
||||
ylim_duet = max(density(merged_df3$duet_stability_change, na.rm = T)$y)
|
||||
ylim_duet
|
||||
|
||||
hist(merged_df3$duet_stability_change
|
||||
, prob = TRUE
|
||||
, col = "grey"
|
||||
, border= "black"
|
||||
, ylim = c(0, ylim_duet )
|
||||
, xlab = "DUET ddg"
|
||||
, main = "DUET ")
|
||||
box(bty="l")
|
||||
lines(density(merged_df3$duet_stability_change, na.rm = T, bw = "nrd0") # default
|
||||
, col = "black"
|
||||
, lwd = 2)
|
||||
|
||||
|
||||
#---------------
|
||||
# DUET: scaled
|
||||
#---------------
|
||||
ylim_duet_scaled = max(density(merged_df3$duet_scaled, na.rm = T)$y)
|
||||
ylim_duet_scaled
|
||||
|
||||
hist(merged_df3$duet_scaled
|
||||
, prob = TRUE
|
||||
, col = "grey"
|
||||
, border= "black"
|
||||
, ylim = c(0, ylim_duet_scaled)
|
||||
, xlab = "DUET ddg scaled"
|
||||
, main = "DUET")
|
||||
box(bty="l")
|
||||
lines(density(merged_df3$duet_scaled, na.rm = T, bw = "nrd0") # default
|
||||
, col = "black"
|
||||
, lwd = 2)
|
||||
|
||||
|
||||
|
||||
###################
|
||||
summary(merged_df3$duet_stability_change)
|
||||
range(merged_df3$duet_stability_change)
|
||||
|
||||
summary(merged_df3$ddg)
|
||||
range(merged_df3$ddg)
|
||||
|
||||
foldx_seq = seq(min(merged_df3$ddg), max(merged_df3$ddg))
|
||||
length(foldx_seq)
|
||||
|
||||
duet_seq = seq(min(merged_df3$duet_stability_change), max(merged_df3$duet_stability_change))
|
||||
duet_seq
|
||||
|
||||
wilcox.test(merged_df3$duet_stability_change
|
||||
, merged_df3$ddg
|
||||
, paired = T)
|
||||
|
||||
boxplot(merged_df3$duet_stability_change
|
||||
, merged_df3$ddg)
|
||||
|
||||
|
||||
# scaled
|
||||
summary(merged_df3$foldx_scaled)
|
||||
|
||||
foldx_seq_scaled = seq(min(merged_df3$foldx_scaled)
|
||||
, max(merged_df3$foldx_scaled))
|
||||
|
||||
length(foldx_seq_scaled)
|
||||
|
||||
|
||||
summary(merged_df3$duet_scaled)
|
||||
duet_seq_scaled = seq(min(merged_df3$duet_scaled)
|
||||
, max(merged_df3$duet_scaled))
|
||||
length(duet_seq_scaled)
|
||||
|
||||
|
||||
#=====================
|
||||
# corr with mutaton class
|
||||
table(merged_df3$mutation_info)
|
||||
merged_df3$mutation_info_categ = ifelse(merged_df3$mutation_info == dr_muts_col, 1, 0)
|
||||
table(merged_df3$mutation_info_categ)
|
||||
|
||||
table(merged_df3$mutation_info)
|
||||
merged_df3$mutation_info_categ = ifelse(merged_df3$mutation_info == dr_muts_col, 1, 0)
|
||||
table(merged_df3$mutation_info_categ)
|
||||
|
||||
|
||||
cor.test(merged_df3$mutation_info_categ, merged_df3$duet_scaled, method = "spearman")
|
||||
cor.test(merged_df3$mutation_info_categ, merged_df3$foldx_scaled, method = "spearman")
|
||||
|
||||
|
||||
cor.test(merged_df3$duet_scaled, merged_df3$foldx_scaled, method = "spearman")
|
||||
|
||||
cor.test(merged_df3$duet_scaled, merged_df3$af, method = "spearman")
|
||||
|
||||
|
||||
# normality test
|
||||
#P-value > 0.05: we can assume the normality.
|
||||
shapiro.test(merged_df3$or_mychisq)
|
||||
shapiro.test(log10(merged_df3$or_mychisq))
|
||||
shapiro.test(merged_df3$or_kin)
|
||||
|
||||
|
||||
shapiro.test(merged_df3$duet_stability_change)
|
||||
shapiro.test(merged_df3$ddg)
|
||||
|
||||
shapiro.test(merged_df3$duet_scaled)
|
||||
shapiro.test(merged_df3$foldx_scaled)
|
||||
|
||||
|
||||
summary(merged_df3$or_kin)
|
||||
summary(log10(merged_df3$or_mychisq))
|
||||
|
||||
table(merged_df3$or_mychisq > 1)
|
||||
table(log10(merged_df3$or_mychisq > 1))
|
||||
|
||||
|
||||
# percentage
|
||||
(table(merged_df3$or_mychisq > 1)[[2]]/sum(table(merged_df3$or_mychisq > 1))*100)
|
||||
|
||||
(table(merged_df3$or_kin > 1)[[2]]/sum(table(merged_df3$or_kin > 1))*100)
|
Loading…
Add table
Add a link
Reference in a new issue