#!/usr/bin/env Rscript ######################################################### # TASK: producing boxplots for dr and other muts ######################################################### #======================================================================= # working dir and loading libraries getwd() setwd("~/git/LSHTM_analysis/scripts/plotting") getwd() #source("Header_TT.R") library(tidyverse) library(ggplot2) library(data.table) library(dplyr) #========= # Input #========= #source("combining_dfs_plotting.R") source("output_tables.R") #grid(nx=NA,ny=NULL,lty=1,lwd=1,col="gray") ########################################################## #==================== # OR distributions #==================== par(mfrow=c(2,2)) #----------- # or_mychisq: raw #----------- ylim_or = max(density(merged_df3$or_mychisq, na.rm = T)$y) ylim_or hist(merged_df3$or_mychisq , prob = TRUE , col = "grey" , border= "black" , ylim = c(0, ylim_or) , xlab = "Odds Ratio" , main = "Odds Ratio") box(bty="l") lines(density(merged_df3$or_mychisq, na.rm = T, bw = "nrd0") # default , col = "black" , lwd = 2) #plot(density(merged_df3$or_mychisq, na.rm = T, bw = "nrd0")) #----------- # or_mychisq: log10 #----------- ylim_or_log = max(density(log10(merged_df3$or_mychisq), na.rm = T)$y) hist(log10(merged_df3$or_mychisq) , prob = TRUE , col = "grey" , border= "black" , ylim = c(0, ylim_or_log) , xlab = "Log10 (Odds Ratio)" , main = "Odds Ratio") box(bty="l") lines(density(log10(merged_df3$or_mychisq), na.rm = T, bw = "nrd0") # default , col = "black" , lwd = 2) #====================================================================== #------------------- # adjusted OR: raw #------------------- ylim_or_ad = max(density(merged_df3$or_kin, na.rm = T)$y) ylim_or_ad hist(merged_df3$or_kin , prob = TRUE , col = "grey" , border= "black" , ylim = c(0, ylim_or_ad) , xlab = "Adjusted Odds Ratio" , main = "Adjusted Odds Ratio") box(bty="l") lines(density(merged_df3$or_kin, na.rm = T, bw = "nrd0") # default , col = "black" , lwd = 2) #plot(density(merged_df3$or_kin, na.rm = T, bw = "nrd0")) #-------------------- # adjusted OR: log10 #--------------------- ylim_or_ad_log = max(density(log10(merged_df3$or_kin), na.rm = T)$y) hist(log10(merged_df3$or_kin) , prob = TRUE , col = "grey" , border= "black" , ylim = c(0, ylim_or_ad_log) , xlab = "Log10 (Adjusted Odds Ratio)" , main = "Adjusted Odds Ratio") box(bty="l") lines(density(log10(merged_df3$or_kin), na.rm = T, bw = "nrd0") # default , col = "black" , lwd = 2) #-------------------- # logistic OR: raw #------------------- #ylim_or_logistic = max(density(merged_df3$or_logistic, na.rm = T)$y) #ylim_or_logistic #hist(merged_df3$or_logistic # , prob = TRUE # , col = "grey" # , border= "black" # , ylim = c(0, ylim_or) # , xlab = "Odds Ratio" # , main = "Logistic Odds Ratio") #box(bty="l") #lines(density(merged_df3$or_logistic, na.rm = T, bw = "nrd0") # default # , col = "black" # , lwd = 2) #plot(density(merged_df3$or_logistic, na.rm = T, bw = "nrd0")) ###################################################################### #================== # Foldx and DUET #================== par(mfrow = c(2,2)) #----------------- # Foldx: scaled #----------------- ylim_foldx = max(density(merged_df3$ddg, na.rm = T)$y) ylim_foldx hist(merged_df3$ddg , prob = TRUE , col = "grey" , border= "black" , ylim = c(0, ylim_foldx ) , xlab = "Foldx ddg" , main = "Foldx") box(bty="l") lines(density(merged_df3$ddg, na.rm = T, bw = "nrd0") # default , col = "black" , lwd = 2) #----------------- # Foldx: scaled #----------------- ylim_foldx_scaled = max(density(merged_df3$foldx_scaled, na.rm = T)$y) ylim_foldx_scaled hist(merged_df3$foldx_scaled , prob = TRUE , col = "grey" , border= "black" , ylim = c(0, ylim_foldx_scaled) , xlab = "Foldx ddg scaled" , main = "Foldx") box(bty="l") lines(density(merged_df3$foldx_scaled, na.rm = T, bw = "nrd0") # default , col = "black" , lwd = 2) #=============================================================== #------------- # DUET: raw #------------- ylim_duet = max(density(merged_df3$duet_stability_change, na.rm = T)$y) ylim_duet hist(merged_df3$duet_stability_change , prob = TRUE , col = "grey" , border= "black" , ylim = c(0, ylim_duet ) , xlab = "DUET ddg" , main = "DUET ") box(bty="l") lines(density(merged_df3$duet_stability_change, na.rm = T, bw = "nrd0") # default , col = "black" , lwd = 2) #--------------- # DUET: scaled #--------------- ylim_duet_scaled = max(density(merged_df3$duet_scaled, na.rm = T)$y) ylim_duet_scaled hist(merged_df3$duet_scaled , prob = TRUE , col = "grey" , border= "black" , ylim = c(0, ylim_duet_scaled) , xlab = "DUET ddg scaled" , main = "DUET") box(bty="l") lines(density(merged_df3$duet_scaled, na.rm = T, bw = "nrd0") # default , col = "black" , lwd = 2) ################### summary(merged_df3$duet_stability_change) range(merged_df3$duet_stability_change) summary(merged_df3$ddg) range(merged_df3$ddg) foldx_seq = seq(min(merged_df3$ddg), max(merged_df3$ddg)) length(foldx_seq) duet_seq = seq(min(merged_df3$duet_stability_change), max(merged_df3$duet_stability_change)) duet_seq wilcox.test(merged_df3$duet_stability_change , merged_df3$ddg , paired = T) boxplot(merged_df3$duet_stability_change , merged_df3$ddg) # scaled summary(merged_df3$foldx_scaled) foldx_seq_scaled = seq(min(merged_df3$foldx_scaled) , max(merged_df3$foldx_scaled)) length(foldx_seq_scaled) summary(merged_df3$duet_scaled) duet_seq_scaled = seq(min(merged_df3$duet_scaled) , max(merged_df3$duet_scaled)) length(duet_seq_scaled) #===================== # corr with mutaton class table(merged_df3$mutation_info) merged_df3$mutation_info_categ = ifelse(merged_df3$mutation_info == dr_muts_col, 1, 0) table(merged_df3$mutation_info_categ) table(merged_df3$mutation_info) merged_df3$mutation_info_categ = ifelse(merged_df3$mutation_info == dr_muts_col, 1, 0) table(merged_df3$mutation_info_categ) cor.test(merged_df3$mutation_info_categ, merged_df3$duet_scaled, method = "spearman") cor.test(merged_df3$mutation_info_categ, merged_df3$foldx_scaled, method = "spearman") cor.test(merged_df3$duet_scaled, merged_df3$foldx_scaled, method = "spearman") cor.test(merged_df3$duet_scaled, merged_df3$af, method = "spearman") # normality test #P-value > 0.05: we can assume the normality. shapiro.test(merged_df3$or_mychisq) shapiro.test(log10(merged_df3$or_mychisq)) shapiro.test(merged_df3$or_kin) shapiro.test(merged_df3$duet_stability_change) shapiro.test(merged_df3$ddg) shapiro.test(merged_df3$duet_scaled) shapiro.test(merged_df3$foldx_scaled) summary(merged_df3$or_kin) summary(log10(merged_df3$or_mychisq)) table(merged_df3$or_mychisq > 1) table(log10(merged_df3$or_mychisq > 1)) # percentage (table(merged_df3$or_mychisq > 1)[[2]]/sum(table(merged_df3$or_mychisq > 1))*100) (table(merged_df3$or_kin > 1)[[2]]/sum(table(merged_df3$or_kin > 1))*100)