moved not required plots to scratch
This commit is contained in:
parent
9597997741
commit
4c345ea9f4
9 changed files with 2 additions and 1660 deletions
|
@ -188,6 +188,6 @@ OutPlot_lig_pos_count = g + geom_bar(aes (alpha = 0.5)
|
||||||
print(OutPlot_lig_pos_count)
|
print(OutPlot_lig_pos_count)
|
||||||
dev.off()
|
dev.off()
|
||||||
########################################################################
|
########################################################################
|
||||||
# end of lig barplots
|
# end of LIG barplots
|
||||||
########################################################################
|
########################################################################
|
||||||
|
|
||||||
|
|
|
@ -186,5 +186,5 @@ OutPlot_pos_count = g + geom_bar(aes (alpha = 0.5)
|
||||||
print(OutPlot_pos_count)
|
print(OutPlot_pos_count)
|
||||||
dev.off()
|
dev.off()
|
||||||
########################################################################
|
########################################################################
|
||||||
# end of Ligand barplots
|
# end of PS barplots
|
||||||
########################################################################
|
########################################################################
|
||||||
|
|
|
@ -1,166 +0,0 @@
|
||||||
#!/usr/bin/env Rscript
|
|
||||||
#########################################################
|
|
||||||
# TASK: Corr plots for PS and Lig
|
|
||||||
|
|
||||||
# Output: 1 svg
|
|
||||||
|
|
||||||
#=======================================================================
|
|
||||||
# working dir and loading libraries
|
|
||||||
getwd()
|
|
||||||
setwd("~/git/LSHTM_analysis/scripts/plotting/")
|
|
||||||
getwd()
|
|
||||||
|
|
||||||
#source("combining_dfs_plotting.R")
|
|
||||||
source("my_pairs_panel.R") # with lower panel turned off
|
|
||||||
source("corr_data.R")
|
|
||||||
|
|
||||||
#=======
|
|
||||||
# output
|
|
||||||
#=======
|
|
||||||
# PS
|
|
||||||
corr_ps_all_df2 = "corr_PS_ALL_df2.svg"
|
|
||||||
plot_corr_ps_all_df2 = paste0(plotdir,"/", corr_ps_all_df2)
|
|
||||||
|
|
||||||
corr_ps_all_df3 = "corr_PS_ALL_df3.svg"
|
|
||||||
plot_corr_ps_all_df3 = paste0(plotdir,"/", corr_ps_all_df3)
|
|
||||||
|
|
||||||
# LIG
|
|
||||||
corr_lig_all_df2 = "corr_LIG_ALL_df2.svg"
|
|
||||||
plot_corr_lig_all_df2 = paste0(plotdir,"/", corr_lig_all_df2)
|
|
||||||
|
|
||||||
corr_lig_all_df3 = "corr_LIG_ALL_df3.svg"
|
|
||||||
plot_corr_lig_all_df3 = paste0(plotdir,"/", corr_lig_all_df3)
|
|
||||||
|
|
||||||
####################################################################
|
|
||||||
# end of loading libraries and functions
|
|
||||||
####################################################################
|
|
||||||
#%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
|
||||||
# Data for plots
|
|
||||||
#%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
|
||||||
|
|
||||||
# PS
|
|
||||||
corr_ps_df2 = corr_ps_df2[-1]
|
|
||||||
corr_ps_df3 = corr_ps_df3[-1]
|
|
||||||
|
|
||||||
# Lig
|
|
||||||
corr_lig_df2 = corr_lig_df2[-1]
|
|
||||||
corr_lig_df3 = corr_lig_df3[-1]
|
|
||||||
|
|
||||||
#---------------------------------------
|
|
||||||
# generate corr PS plot 1: merged_df2
|
|
||||||
#---------------------------------------
|
|
||||||
cat("Corr plot PS DUET with coloured dots:", plot_corr_ps_all_df2)
|
|
||||||
svg(plot_corr_ps_all_df2, width = 30, height = 30)
|
|
||||||
|
|
||||||
OutPlot_ps_df2 = pairs.panels(corr_ps_df2[1:(length(corr_ps_df2)-2)]
|
|
||||||
, method = "spearman" # correlation method
|
|
||||||
, hist.col = "grey" ##00AFBB
|
|
||||||
, density = T # show density plots
|
|
||||||
, ellipses = F # show correlation ellipses
|
|
||||||
, stars = T
|
|
||||||
, rug = F
|
|
||||||
, breaks = "Sturges"
|
|
||||||
, show.points = T
|
|
||||||
, bg = c("#f8766d", "#00bfc4")[unclass(factor(corr_ps_df2$duet_outcome))] # can't use colour as duet and foldx are opposite
|
|
||||||
, pch = 21 # for bg
|
|
||||||
#, pch = 19
|
|
||||||
, jitter = T
|
|
||||||
, alpha = 1
|
|
||||||
#, points(pch = 19, col = c("#f8766d", "#00bfc4"))
|
|
||||||
, cex = 1.8
|
|
||||||
, cex.axis = 2
|
|
||||||
, cex.labels = 2
|
|
||||||
, cex.cor = 1
|
|
||||||
, smooth = F)
|
|
||||||
|
|
||||||
print(OutPlot_ps_df2)
|
|
||||||
dev.off()
|
|
||||||
|
|
||||||
#----------------------------------------------
|
|
||||||
# generate corr PS plot 2: merged_df3
|
|
||||||
#----------------------------------------------
|
|
||||||
cat("Corr plot PS DUET with coloured dots:", plot_corr_ps_all_df3)
|
|
||||||
svg(plot_corr_ps_all_df3, width = 30, height = 30)
|
|
||||||
|
|
||||||
OutPlot_ps_df3 = pairs.panels(corr_ps_df3[1:(length(corr_ps_df3)-2)]
|
|
||||||
, method = "spearman" # correlation method
|
|
||||||
, hist.col = "grey" ##00AFBB
|
|
||||||
, density = T # show density plots
|
|
||||||
, ellipses = F # show correlation ellipses
|
|
||||||
, stars = T
|
|
||||||
, rug = F
|
|
||||||
, breaks = "Sturges"
|
|
||||||
, show.points = T
|
|
||||||
, bg = c("#f8766d", "#00bfc4")[unclass(factor(corr_ps_df3$duet_outcome))] # can't use colour as duet and foldx are opposite
|
|
||||||
, pch = 21 # for bg
|
|
||||||
, cex = 2
|
|
||||||
, cex.axis = 1.6
|
|
||||||
, cex.labels = 2
|
|
||||||
, cex.cor = 1
|
|
||||||
, smooth = F
|
|
||||||
|
|
||||||
)
|
|
||||||
|
|
||||||
print(OutPlot_ps_df3)
|
|
||||||
dev.off()
|
|
||||||
|
|
||||||
################################################################################################
|
|
||||||
|
|
||||||
|
|
||||||
#---------------------------------------
|
|
||||||
# generate corr lig plot 1: merged_df2
|
|
||||||
#---------------------------------------
|
|
||||||
cat("Corr plot lig DUET with coloured dots:", plot_corr_lig_all_df2)
|
|
||||||
svg(plot_corr_lig_all_df2, width = 30, height = 30)
|
|
||||||
|
|
||||||
OutPlot_lig_df2 = pairs.panels(corr_lig_df2[1:(length(corr_lig_df2)-2)]
|
|
||||||
, method = "spearman" # correlation method
|
|
||||||
, hist.col = "grey" ##00AFBB
|
|
||||||
, density = T # show density plots
|
|
||||||
, ellipses = F # show correlation elliliges
|
|
||||||
, stars = T
|
|
||||||
, rug = F
|
|
||||||
, breaks = "Sturges"
|
|
||||||
, show.points = T
|
|
||||||
, bg = c("#f8766d", "#00bfc4")[unclass(factor(corr_lig_df2$ligand_outcome))] # can't use colour as duet and foldx are opposite
|
|
||||||
, pch = 21 # for bg
|
|
||||||
#, pch = 19
|
|
||||||
, jitter = T
|
|
||||||
, alpha = 1
|
|
||||||
#, points(pch = 19, col = c("#f8766d", "#00bfc4"))
|
|
||||||
, cex = 1.8
|
|
||||||
, cex.axis = 2
|
|
||||||
, cex.labels = 2
|
|
||||||
, cex.cor = 1
|
|
||||||
, smooth = F)
|
|
||||||
|
|
||||||
print(OutPlot_lig_df2)
|
|
||||||
dev.off()
|
|
||||||
|
|
||||||
#----------------------------------------------
|
|
||||||
# generate corr lig plot 2: merged_df3
|
|
||||||
#----------------------------------------------
|
|
||||||
cat("Corr plot lig DUET with coloured dots:", plot_corr_lig_all_df3)
|
|
||||||
svg(plot_corr_lig_all_df3, width = 30, height = 30)
|
|
||||||
|
|
||||||
OutPlot_lig_df3 = pairs.panels(corr_lig_df3[1:(length(corr_lig_df3)-2)]
|
|
||||||
, method = "spearman" # correlation method
|
|
||||||
, hist.col = "grey" ##00AFBB
|
|
||||||
, density = T # show density plots
|
|
||||||
, ellipses = F # show correlation elliliges
|
|
||||||
, stars = T
|
|
||||||
, rug = F
|
|
||||||
, breaks = "Sturges"
|
|
||||||
, show.points = T
|
|
||||||
, bg = c("#f8766d", "#00bfc4")[unclass(factor(corr_lig_df3$ligand_outcome))] # can't use colour as duet and foldx are opposite
|
|
||||||
, pch = 21 # for bg
|
|
||||||
, cex = 2
|
|
||||||
, cex.axis = 1.6
|
|
||||||
, cex.labels = 2
|
|
||||||
, cex.cor = 1
|
|
||||||
, smooth = F
|
|
||||||
|
|
||||||
)
|
|
||||||
|
|
||||||
print(OutPlot_lig_df3)
|
|
||||||
dev.off()
|
|
|
@ -1,176 +0,0 @@
|
||||||
#!/usr/bin/env Rscript
|
|
||||||
#########################################################
|
|
||||||
# TASK: Corr plots for PS and Lig
|
|
||||||
|
|
||||||
# Output: 1 svg
|
|
||||||
|
|
||||||
#=======================================================================
|
|
||||||
# working dir and loading libraries
|
|
||||||
getwd()
|
|
||||||
setwd("~/git/LSHTM_analysis/scripts/plotting/")
|
|
||||||
getwd()
|
|
||||||
|
|
||||||
#source("combining_dfs_plotting.R")
|
|
||||||
source("my_pairs_panel.R") # with lower panel turned off
|
|
||||||
source("corr_data.R")
|
|
||||||
|
|
||||||
#=======
|
|
||||||
# output
|
|
||||||
#=======
|
|
||||||
# PS
|
|
||||||
corrplot_ps_df2 = "corr_PS_df2.svg"
|
|
||||||
plot_corr_ps_df2 = paste0(plotdir,"/", corrplot_ps_df2)
|
|
||||||
|
|
||||||
corrplot_ps_df3 = "corr_PS_df3.svg"
|
|
||||||
plot_corr_ps_df3 = paste0(plotdir,"/", corrplot_ps_df3)
|
|
||||||
|
|
||||||
# LIG
|
|
||||||
corrplot_lig_df2 = "corr_LIG_df2.svg"
|
|
||||||
plot_corr_lig_df2 = paste0(plotdir,"/", corrplot_lig_df2)
|
|
||||||
|
|
||||||
corrplot_lig_df3 = "corr_LIG_df3.svg"
|
|
||||||
plot_corr_lig_df3 = paste0(plotdir,"/", corrplot_lig_df3)
|
|
||||||
|
|
||||||
####################################################################
|
|
||||||
# end of loading libraries and functions
|
|
||||||
####################################################################
|
|
||||||
#%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
|
||||||
# Data for plots
|
|
||||||
#%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
|
||||||
|
|
||||||
cols_to_drop = c("ASA", "AF_kin")
|
|
||||||
|
|
||||||
|
|
||||||
# PS
|
|
||||||
corr_ps_df2 = corr_ps_df2[!colnames(corr_ps_df2)%in%cols_to_drop]
|
|
||||||
corr_ps_df2 = corr_ps_df2[-1]
|
|
||||||
|
|
||||||
corr_ps_df3 = corr_ps_df3[!colnames(corr_ps_df3)%in%cols_to_drop]
|
|
||||||
corr_ps_df3 = corr_ps_df3[-1]
|
|
||||||
|
|
||||||
|
|
||||||
# Lig
|
|
||||||
corr_lig_df2 = corr_lig_df2[!colnames(corr_lig_df2)%in%cols_to_drop]
|
|
||||||
corr_lig_df2 = corr_lig_df2[-1]
|
|
||||||
|
|
||||||
corr_lig_df3 = corr_lig_df3[!colnames(corr_lig_df3)%in%cols_to_drop]
|
|
||||||
corr_lig_df3 = corr_lig_df3[-1]
|
|
||||||
|
|
||||||
#---------------------------------------
|
|
||||||
# generate corr PS plot 1: merged_df2
|
|
||||||
#---------------------------------------
|
|
||||||
cat("Corr plot PS DUET with coloured dots:", plot_corr_ps_df2)
|
|
||||||
svg(plot_corr_ps_df2, width = 30, height = 25)
|
|
||||||
|
|
||||||
OutPlot_ps_df2 = pairs.panels(corr_ps_df2[1:(length(corr_ps_df2)-2)]
|
|
||||||
, method = "spearman" # correlation method
|
|
||||||
, hist.col = "grey" ##00AFBB
|
|
||||||
, density = T # show density plots
|
|
||||||
, ellipses = F # show correlation ellipses
|
|
||||||
, stars = T
|
|
||||||
, rug = F
|
|
||||||
, breaks = "Sturges"
|
|
||||||
, show.points = T
|
|
||||||
, bg = c("#f8766d", "#00bfc4")[unclass(factor(corr_ps_df2$duet_outcome))] # can't use colour as duet and foldx are opposite
|
|
||||||
, pch = 21 # for bg
|
|
||||||
#, pch = 19
|
|
||||||
, jitter = T
|
|
||||||
, alpha = 1
|
|
||||||
#, points(pch = 19, col = c("#f8766d", "#00bfc4"))
|
|
||||||
, cex = 1.8
|
|
||||||
, cex.axis = 2
|
|
||||||
, cex.labels = 3.8
|
|
||||||
, cex.cor = 1
|
|
||||||
, smooth = F)
|
|
||||||
|
|
||||||
print(OutPlot_ps_df2)
|
|
||||||
dev.off()
|
|
||||||
|
|
||||||
#----------------------------------------------
|
|
||||||
# generate corr PS plot 2: merged_df3
|
|
||||||
#----------------------------------------------
|
|
||||||
cat("Corr plot PS DUET with coloured dots:", plot_corr_ps_df3)
|
|
||||||
svg(plot_corr_ps_df3, width = 30, height = 25)
|
|
||||||
|
|
||||||
OutPlot_ps_df3 = pairs.panels(corr_ps_df3[1:(length(corr_ps_df3)-2)]
|
|
||||||
, method = "spearman" # correlation method
|
|
||||||
, hist.col = "grey" ##00AFBB
|
|
||||||
, density = T # show density plots
|
|
||||||
, ellipses = F # show correlation ellipses
|
|
||||||
, stars = T
|
|
||||||
, rug = F
|
|
||||||
, breaks = "Sturges"
|
|
||||||
, show.points = T
|
|
||||||
, bg = c("#f8766d", "#00bfc4")[unclass(factor(corr_ps_df3$duet_outcome))] # can't use colour as duet and foldx are opposite
|
|
||||||
, pch = 21 # for bg
|
|
||||||
, cex = 3
|
|
||||||
, cex.axis = 1.6
|
|
||||||
, cex.labels = 3.8
|
|
||||||
, cex.cor = 1
|
|
||||||
, smooth = F
|
|
||||||
|
|
||||||
)
|
|
||||||
|
|
||||||
print(OutPlot_ps_df3)
|
|
||||||
dev.off()
|
|
||||||
|
|
||||||
################################################################################################
|
|
||||||
|
|
||||||
#---------------------------------------
|
|
||||||
# generate corr lig plot 1: merged_df2
|
|
||||||
#---------------------------------------
|
|
||||||
cat("Corr plot lig DUET with coloured dots:", plot_corr_lig_df2)
|
|
||||||
svg(plot_corr_lig_df2, width = 30, height = 25)
|
|
||||||
|
|
||||||
OutPlot_lig_df2 = pairs.panels(corr_lig_df2[1:(length(corr_lig_df2)-2)]
|
|
||||||
, method = "spearman" # correlation method
|
|
||||||
, hist.col = "grey" ##00AFBB
|
|
||||||
, density = T # show density plots
|
|
||||||
, ellipses = F # show correlation elliliges
|
|
||||||
, stars = T
|
|
||||||
, rug = F
|
|
||||||
, breaks = "Sturges"
|
|
||||||
, show.points = T
|
|
||||||
, bg = c("#f8766d", "#00bfc4")[unclass(factor(corr_lig_df2$ligand_outcome))] # can't use colour as duet and foldx are opposite
|
|
||||||
, pch = 21 # for bg
|
|
||||||
#, pch = 19
|
|
||||||
, jitter = T
|
|
||||||
, alpha = 1
|
|
||||||
#, points(pch = 19, col = c("#f8766d", "#00bfc4"))
|
|
||||||
, cex = 1.8
|
|
||||||
, cex.axis = 2
|
|
||||||
, cex.labels = 3.8
|
|
||||||
, cex.cor = 1
|
|
||||||
, smooth = F)
|
|
||||||
|
|
||||||
print(OutPlot_lig_df2)
|
|
||||||
dev.off()
|
|
||||||
|
|
||||||
#----------------------------------------------
|
|
||||||
# generate corr lig plot 2: merged_df3
|
|
||||||
#----------------------------------------------
|
|
||||||
cat("Corr plot lig DUET with coloured dots:", plot_corr_lig_df3)
|
|
||||||
svg(plot_corr_lig_df3, width = 30, height = 25)
|
|
||||||
|
|
||||||
OutPlot_lig_df3 = pairs.panels(corr_lig_df3[1:(length(corr_lig_df3)-2)]
|
|
||||||
, method = "spearman" # correlation method
|
|
||||||
, hist.col = "grey" ##00AFBB
|
|
||||||
, density = T # show density plots
|
|
||||||
, ellipses = F # show correlation elliliges
|
|
||||||
, stars = T
|
|
||||||
, rug = F
|
|
||||||
, breaks = "Sturges"
|
|
||||||
, show.points = T
|
|
||||||
, bg = c("#f8766d", "#00bfc4")[unclass(factor(corr_lig_df3$ligand_outcome))] # can't use colour as duet and foldx are opposite
|
|
||||||
, pch = 21 # for bg
|
|
||||||
, cex = 3
|
|
||||||
, cex.axis = 1.6
|
|
||||||
, cex.labels = 3.8
|
|
||||||
, cex.cor = 1
|
|
||||||
, smooth = F
|
|
||||||
|
|
||||||
)
|
|
||||||
|
|
||||||
print(OutPlot_lig_df3)
|
|
||||||
dev.off()
|
|
||||||
|
|
|
@ -1,191 +0,0 @@
|
||||||
#!/usr/bin/env Rscript
|
|
||||||
#########################################################
|
|
||||||
# TASK: Corr plots for PS and Lig
|
|
||||||
|
|
||||||
# Output: 1 svg
|
|
||||||
|
|
||||||
#=======================================================================
|
|
||||||
# working dir and loading libraries
|
|
||||||
getwd()
|
|
||||||
setwd("~/git/LSHTM_analysis/scripts/plotting/")
|
|
||||||
getwd()
|
|
||||||
|
|
||||||
|
|
||||||
source("Header_TT.R")
|
|
||||||
require(cowplot)
|
|
||||||
source("combining_dfs_plotting.R") # FIXME: add extra from other plots here
|
|
||||||
|
|
||||||
# should return the following dfs, directories and variables
|
|
||||||
|
|
||||||
|
|
||||||
#=======
|
|
||||||
# output
|
|
||||||
#=======
|
|
||||||
# can't combine by cowplot because not ggplots
|
|
||||||
#corr_plot_combined = "corr_combined.svg"
|
|
||||||
#plot_corr_plot_combined = paste0(plotdir,"/", corr_plot_combined)
|
|
||||||
|
|
||||||
# PS foldx
|
|
||||||
corr_foldx = "corr_adjusted_foldx.svg"
|
|
||||||
plot_corr_foldx = paste0(plotdir,"/", corr_foldx)
|
|
||||||
|
|
||||||
####################################################################
|
|
||||||
# end of loading libraries and functions #
|
|
||||||
########################################################################
|
|
||||||
|
|
||||||
#%%%%%%%%%%%%%%%%%%%%%%%%%
|
|
||||||
df_ps = merged_df3
|
|
||||||
#%%%%%%%%%%%%%%%%%%%%%%%%%
|
|
||||||
|
|
||||||
rm( merged_df2, merged_df2_comp, merged_df2_lig
|
|
||||||
, merged_df2_comp_lig
|
|
||||||
, merged_df3_comp, merged_df3_comp_lig
|
|
||||||
, my_df_u, my_df_u_lig)
|
|
||||||
|
|
||||||
########################################################################
|
|
||||||
# end of data extraction and cleaning for plots #
|
|
||||||
########################################################################
|
|
||||||
|
|
||||||
#===========================
|
|
||||||
# Data for Correlation plots:foldx
|
|
||||||
#===========================
|
|
||||||
#============================
|
|
||||||
# adding foldx scaled values
|
|
||||||
# scale data b/w -1 and 1
|
|
||||||
#============================
|
|
||||||
n = which(colnames(df_ps) == "ddg"); n
|
|
||||||
|
|
||||||
my_min = min(df_ps[,n]); my_min
|
|
||||||
my_max = max(df_ps[,n]); my_max
|
|
||||||
|
|
||||||
df_ps$foldx_scaled = ifelse(df_ps[,n] < 0
|
|
||||||
, df_ps[,n]/abs(my_min)
|
|
||||||
, df_ps[,n]/my_max)
|
|
||||||
# sanity check
|
|
||||||
my_min = min(df_ps$foldx_scaled); my_min
|
|
||||||
my_max = max(df_ps$foldx_scaled); my_max
|
|
||||||
|
|
||||||
if (my_min == -1 && my_max == 1){
|
|
||||||
cat("PASS: foldx ddg successfully scaled b/w -1 and 1"
|
|
||||||
, "\nProceeding with assigning foldx outcome category")
|
|
||||||
}else{
|
|
||||||
cat("FAIL: could not scale foldx ddg values"
|
|
||||||
, "Aborting!")
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
#================================
|
|
||||||
# adding foldx outcome category
|
|
||||||
# ddg<0 = "Stabilising" (-ve)
|
|
||||||
#=================================
|
|
||||||
|
|
||||||
c1 = table(df_ps$ddg < 0)
|
|
||||||
df_ps$foldx_outcome = ifelse(df_ps$ddg < 0, "Stabilising", "Destabilising")
|
|
||||||
c2 = table(df_ps$ddg < 0)
|
|
||||||
|
|
||||||
if ( all(c1 == c2) ){
|
|
||||||
cat("PASS: foldx outcome successfully created")
|
|
||||||
}else{
|
|
||||||
cat("FAIL: foldx outcome could not be created. Aborting!")
|
|
||||||
exit()
|
|
||||||
}
|
|
||||||
|
|
||||||
table(df_ps$foldx_outcome)
|
|
||||||
|
|
||||||
#======================
|
|
||||||
# adding log cols
|
|
||||||
#======================
|
|
||||||
df_ps$log10_or_mychisq = log10(df_ps$or_mychisq)
|
|
||||||
df_ps$neglog_pval_fisher = -log10(df_ps$pval_fisher)
|
|
||||||
|
|
||||||
df_ps$log10_or_kin = log10(df_ps$or_kin)
|
|
||||||
df_ps$neglog_pwald_kin = -log10(df_ps$pwald_kin)
|
|
||||||
|
|
||||||
# subset data to generate pairwise correlations
|
|
||||||
cols_to_select_foldx = c("foldx_scaled"
|
|
||||||
|
|
||||||
, "duet_scaled"
|
|
||||||
|
|
||||||
, "log10_or_mychisq"
|
|
||||||
, "neglog_pval_fisher"
|
|
||||||
|
|
||||||
, "log10_or_kin"
|
|
||||||
, "neglog_pwald_kin"
|
|
||||||
|
|
||||||
, "af"
|
|
||||||
|
|
||||||
, "foldx_outcome"
|
|
||||||
, drug)
|
|
||||||
|
|
||||||
corr_data_foldx = df_ps[, cols_to_select_foldx]
|
|
||||||
|
|
||||||
dim(corr_data_foldx)
|
|
||||||
|
|
||||||
#p_italic = substitute(paste("-Log(", italic('P'), ")"));p_italic
|
|
||||||
#p_adjusted_italic = substitute(paste("-Log(", italic('P adjusted'), ")"));p_adjusted_italic
|
|
||||||
|
|
||||||
# assign nice colnames (for display)
|
|
||||||
my_corr_colnames_foldx = c("Foldx"
|
|
||||||
|
|
||||||
,"DUET"
|
|
||||||
|
|
||||||
, "Log(OR)"
|
|
||||||
, "-Log(P)"
|
|
||||||
|
|
||||||
, "Log(OR adjusted)"
|
|
||||||
, "-Log(P wald)"
|
|
||||||
|
|
||||||
, "AF"
|
|
||||||
|
|
||||||
, "foldx_outcome"
|
|
||||||
, drug)
|
|
||||||
|
|
||||||
length(my_corr_colnames_foldx)
|
|
||||||
|
|
||||||
colnames(corr_data_foldx)
|
|
||||||
colnames(corr_data_foldx) <- my_corr_colnames_foldx
|
|
||||||
colnames(corr_data_foldx)
|
|
||||||
|
|
||||||
#-----------------
|
|
||||||
# generate corr foldx plot
|
|
||||||
#-----------------
|
|
||||||
start = 1
|
|
||||||
end = which(colnames(corr_data_foldx) == drug); end # should be the last column
|
|
||||||
offset = 1
|
|
||||||
|
|
||||||
my_corr_foldx = corr_data_foldx[start:(end-offset)]
|
|
||||||
head(my_corr_foldx)
|
|
||||||
|
|
||||||
#my_cols = c("#f8766d", "#00bfc4")
|
|
||||||
# deep blue :#007d85
|
|
||||||
# deep red: #ae301e
|
|
||||||
|
|
||||||
cat("Corr plot foldx:", plot_corr_foldx)
|
|
||||||
svg(plot_corr_foldx, width = 15, height = 15)
|
|
||||||
|
|
||||||
OutPlot_foldx= pairs.panels(my_corr_foldx[1:(length(my_corr_foldx)-1)]
|
|
||||||
, method = "spearman" # correlation method
|
|
||||||
, hist.col = "grey" ##00AFBB
|
|
||||||
, density = TRUE # show density plots
|
|
||||||
, ellipses = F # show correlation ellipses
|
|
||||||
, stars = T
|
|
||||||
, rug = F
|
|
||||||
, breaks = "Sturges"
|
|
||||||
, show.points = T
|
|
||||||
, bg = c("#f8766d", "#00bfc4")[unclass(factor(my_corr_foldx$foldx_outcome))]
|
|
||||||
, pch = 21
|
|
||||||
, jitter = T
|
|
||||||
#, alpha = .05
|
|
||||||
#, points(pch = 19, col = c("#f8766d", "#00bfc4"))
|
|
||||||
, cex = 3
|
|
||||||
, cex.axis = 2.5
|
|
||||||
, cex.labels = 2.1
|
|
||||||
, cex.cor = 1
|
|
||||||
, smooth = F
|
|
||||||
)
|
|
||||||
|
|
||||||
print(OutPlot_foldx)
|
|
||||||
dev.off()
|
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -1,289 +0,0 @@
|
||||||
|
|
||||||
|
|
||||||
#########################################################
|
|
||||||
# 1: Installing and loading required packages
|
|
||||||
#########################################################
|
|
||||||
|
|
||||||
#source("../Header_TT.R")
|
|
||||||
install.packages("qqman")
|
|
||||||
library(qqman)
|
|
||||||
|
|
||||||
source("combining_dfs_plotting.R")
|
|
||||||
#mcsm_data: raw file, 225, 15
|
|
||||||
#merged_df2 = 2201, 35
|
|
||||||
#merged_df3 = 205, 35 ("Can't trust non-numerical params')
|
|
||||||
|
|
||||||
#===============================================
|
|
||||||
# PLOTS: DUET vs GWAS: non-numerical
|
|
||||||
# lineage, country_code, etc
|
|
||||||
# merged_df2: 1592, 35
|
|
||||||
#===============================================
|
|
||||||
|
|
||||||
#########################
|
|
||||||
# Data for plot
|
|
||||||
#########################
|
|
||||||
df = merged_df2
|
|
||||||
#df = merged_df2_comp
|
|
||||||
|
|
||||||
|
|
||||||
#========================
|
|
||||||
# Plot 1a: Lineage barplot
|
|
||||||
# x = lineage y = No of samples
|
|
||||||
# col = Lineage
|
|
||||||
# fill = lineage
|
|
||||||
#========================
|
|
||||||
table(df$lineage)
|
|
||||||
|
|
||||||
# subset only lineages1-4
|
|
||||||
sel_lineages = c("lineage1"
|
|
||||||
, "lineage2"
|
|
||||||
, "lineage3"
|
|
||||||
, "lineage4"
|
|
||||||
#, "lineage5"
|
|
||||||
#, "lineage6"
|
|
||||||
#, "lineage7"
|
|
||||||
)
|
|
||||||
|
|
||||||
# uncomment as necessary
|
|
||||||
df_lin = subset(df, subset = lineage %in% sel_lineages )
|
|
||||||
table(df_lin$lineage)
|
|
||||||
|
|
||||||
#%%%%%%%%%%%%%%%%%%%%%%%%%
|
|
||||||
# REASSIGNMENT
|
|
||||||
df <- df_lin
|
|
||||||
#%%%%%%%%%%%%%%%%%%%%%%%%%
|
|
||||||
|
|
||||||
#%%%%%%%%%%%%%%%%%%%%%%%%
|
|
||||||
# REASSIGNMENT
|
|
||||||
df2 = df
|
|
||||||
#%%%%%%%%%%%%%%%%%%%%%%%%
|
|
||||||
df2 = df2%>%
|
|
||||||
add_count(country_code)
|
|
||||||
|
|
||||||
str(df2$country_code); str(df2$n)
|
|
||||||
|
|
||||||
n = which(colnames(df2) == "n")
|
|
||||||
colnames(df2)[n] = "count_country"
|
|
||||||
|
|
||||||
table(df2$count_country>100 & df$country_code!= "")
|
|
||||||
df3 = subset(df2, df2$count_country>100 & df2$country_code != "")
|
|
||||||
|
|
||||||
|
|
||||||
#%%%%%%%%%%%%%%%%%%%%%%%%
|
|
||||||
# REASSIGNMENT
|
|
||||||
df = df3
|
|
||||||
#%%%%%%%%%%%%%%%%%%%%%%%%
|
|
||||||
|
|
||||||
sample = sum(table(unique(df$id))); sample
|
|
||||||
table(df$country_code)
|
|
||||||
tab = sum(table(df$country_code)); tab
|
|
||||||
|
|
||||||
|
|
||||||
View(table(df$country_code))
|
|
||||||
View(t1)
|
|
||||||
|
|
||||||
############## begin plot
|
|
||||||
g = ggplot(df, aes(x = lineage))
|
|
||||||
g + geom_bar(aes(fill = lineage)) +
|
|
||||||
theme( axis.text.x = element_text(size = 13
|
|
||||||
, angle = 90
|
|
||||||
, hjust = 1
|
|
||||||
, vjust = 0.4)
|
|
||||||
, axis.text.y = element_text(size = 15
|
|
||||||
, angle = 0
|
|
||||||
, hjust = 1
|
|
||||||
, vjust = 0)
|
|
||||||
, axis.title.x = element_text(size = 15)
|
|
||||||
, axis.title.y = element_text(size = 15) ) +
|
|
||||||
labs(title = "Lineage"
|
|
||||||
, x = "Lineage"
|
|
||||||
, y = "No of samples")
|
|
||||||
|
|
||||||
|
|
||||||
#========================
|
|
||||||
# Plot 2: DUET, lineage, country_code and or_mychisq
|
|
||||||
# x = lineage y = DUET
|
|
||||||
# col = Lineage
|
|
||||||
# fill = country_code
|
|
||||||
#========================
|
|
||||||
### begin plot
|
|
||||||
g = ggplot(df, aes(x = country_code
|
|
||||||
, y = duet_scaled))
|
|
||||||
g + geom_point(aes(col = lineage
|
|
||||||
, size = or_mychisq)) +
|
|
||||||
theme(axis.text.x = element_text(size = 13
|
|
||||||
, angle = 90
|
|
||||||
, hjust = 1
|
|
||||||
, vjust = 0.4)
|
|
||||||
, axis.text.y = element_text(size = 15
|
|
||||||
, angle = 0
|
|
||||||
, hjust = 1
|
|
||||||
, vjust = 0)
|
|
||||||
, axis.title.x = element_text(size = 15)
|
|
||||||
, axis.title.y = element_text(size = 15) ) +
|
|
||||||
labs(title = "DUET, country_code, lineage, or_mychisq"
|
|
||||||
, x = "Lineage"
|
|
||||||
, y = "DUET (PS)")
|
|
||||||
|
|
||||||
|
|
||||||
#############
|
|
||||||
#========================
|
|
||||||
# Plot 3: DUET, lineage, or_mychisq
|
|
||||||
# x = lineage y = DUET
|
|
||||||
# col = Lineage
|
|
||||||
# fill = country_code
|
|
||||||
#========================
|
|
||||||
|
|
||||||
### begin plot
|
|
||||||
table(df$lineage)
|
|
||||||
|
|
||||||
g = ggplot(df_lin, aes(x = lineage
|
|
||||||
, y = duet_scaled))
|
|
||||||
g + geom_point(aes(col = lineage
|
|
||||||
, size = or_mychisq)) +
|
|
||||||
theme(axis.text.x = element_text(size = 13
|
|
||||||
, angle = 90
|
|
||||||
, hjust = 1
|
|
||||||
, vjust = 0.4)
|
|
||||||
, axis.text.y = element_text(size = 15
|
|
||||||
, angle = 0
|
|
||||||
, hjust = 1
|
|
||||||
, vjust = 0)
|
|
||||||
, axis.title.x = element_text(size = 15)
|
|
||||||
, axis.title.y = element_text(size = 15) ) +
|
|
||||||
labs(title = "DUET, lineage, or_mychisq"
|
|
||||||
, x = "Lineage"
|
|
||||||
, y = "DUET (PS)")
|
|
||||||
|
|
||||||
#========================
|
|
||||||
# Plot 4-5: Distributions
|
|
||||||
# ggrdiges
|
|
||||||
#========================
|
|
||||||
|
|
||||||
|
|
||||||
#==================================================
|
|
||||||
my_ats = 15 # axis text size
|
|
||||||
my_als = 20 # axis label size
|
|
||||||
|
|
||||||
my_labels = c('Lineage 1', 'Lineage 2', 'Lineage 3', 'Lineage 4'
|
|
||||||
#, 'Lineage 5', 'Lineage 6', 'Lineage 7'
|
|
||||||
)
|
|
||||||
names(my_labels) = c('lineage1', 'lineage2', 'lineage3', 'lineage4'
|
|
||||||
# , 'lineage5', 'lineage6', 'lineage7'
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
#========================
|
|
||||||
# Plot 4: Distribution
|
|
||||||
# x = duet_scaled
|
|
||||||
# y = country
|
|
||||||
# fill = country_code
|
|
||||||
# facet = lineage
|
|
||||||
#========================
|
|
||||||
# works neatly!
|
|
||||||
|
|
||||||
p1 = ggplot(df, aes(x = duet_scaled
|
|
||||||
, y = country_code))+
|
|
||||||
|
|
||||||
#printFile=geom_density_ridges_gradient(
|
|
||||||
geom_density_ridges_gradient(aes(fill = country_code)
|
|
||||||
, jittered_points = TRUE
|
|
||||||
, scale = 3
|
|
||||||
, size = 0.3 ) +
|
|
||||||
facet_wrap( ~lineage
|
|
||||||
, scales = "free"
|
|
||||||
, switch = 'x'
|
|
||||||
, labeller = labeller(lineage = my_labels)
|
|
||||||
) +
|
|
||||||
coord_cartesian( xlim = c(-1, 1)) +
|
|
||||||
#scale_fill_gradientn(colours = c("#f8766d", "white", "#00bfc4")
|
|
||||||
# , name = "DUET" ) +
|
|
||||||
theme(axis.text.x = element_text(size = my_ats
|
|
||||||
, angle = 90
|
|
||||||
, hjust = 1
|
|
||||||
, vjust = 0.4)
|
|
||||||
|
|
||||||
#, axis.text.y = element_blank()
|
|
||||||
, axis.title.x = element_blank()
|
|
||||||
, axis.title.y = element_blank()
|
|
||||||
, axis.ticks.y = element_blank()
|
|
||||||
, plot.title = element_blank()
|
|
||||||
, strip.text = element_text(size = my_als)
|
|
||||||
, legend.text = element_text(size = my_als-5)
|
|
||||||
, legend.title = element_text(size = my_als)
|
|
||||||
)
|
|
||||||
|
|
||||||
p1
|
|
||||||
|
|
||||||
|
|
||||||
#========================
|
|
||||||
# Plot 5: Distribution
|
|
||||||
# x = duet_scaled
|
|
||||||
# y = country_code
|
|
||||||
# fill = lineage
|
|
||||||
# facet = NONE
|
|
||||||
#========================
|
|
||||||
# no facet wrap
|
|
||||||
|
|
||||||
p2 = ggplot(df, aes(x = duet_scaled
|
|
||||||
, y = country_code))+
|
|
||||||
|
|
||||||
geom_density_ridges_gradient(aes(fill = factor(lineage))
|
|
||||||
, scale = 3
|
|
||||||
, size = 0.3 ) +
|
|
||||||
coord_cartesian( xlim = c(-1, 1)) +
|
|
||||||
#scale_fill_gradientn(colours = c("#f8766d", "white", "#00bfc4")
|
|
||||||
# , name = "DUET" ) +
|
|
||||||
#scale_fill_continuous(colours = c("darkgreen", "pink", "orange", "brown")
|
|
||||||
# , name = "lineage" ) +
|
|
||||||
theme(axis.text.x = element_text(size = my_ats
|
|
||||||
, angle = 90
|
|
||||||
, hjust = 1
|
|
||||||
, vjust = 0.4)
|
|
||||||
|
|
||||||
#, axis.text.y = element_blank()
|
|
||||||
, axis.title.x = element_blank()
|
|
||||||
, axis.title.y = element_blank()
|
|
||||||
, axis.ticks.y = element_blank()
|
|
||||||
, plot.title = element_blank()
|
|
||||||
, strip.text = element_text(size = my_als)
|
|
||||||
, legend.text = element_text(size = my_als-5)
|
|
||||||
, legend.title = element_text(size = my_als)
|
|
||||||
)
|
|
||||||
|
|
||||||
p2
|
|
||||||
|
|
||||||
|
|
||||||
#===============
|
|
||||||
# lineage only
|
|
||||||
#================
|
|
||||||
#svg(plot_lineage_duet)
|
|
||||||
p3 = ggplot(df, aes(x = duet_scaled
|
|
||||||
, y = duet_outcome))+
|
|
||||||
geom_density_ridges_gradient(aes(fill = ..x..)
|
|
||||||
, jittered_points = TRUE
|
|
||||||
, scale = 3
|
|
||||||
, size = 0.3 ) +
|
|
||||||
facet_wrap( ~lineage
|
|
||||||
, scales = "free"
|
|
||||||
#, switch = 'x'
|
|
||||||
, labeller = labeller(lineage = my_labels) ) +
|
|
||||||
coord_cartesian( xlim = c(-1, 1)) +
|
|
||||||
scale_fill_gradientn(colours = c("#f8766d", "white", "#00bfc4")
|
|
||||||
, name = "DUET" ) +
|
|
||||||
theme(axis.text.x = element_text(size = my_ats
|
|
||||||
, angle = 90
|
|
||||||
, hjust = 1
|
|
||||||
, vjust = 0.4)
|
|
||||||
|
|
||||||
, axis.text.y = element_blank()
|
|
||||||
, axis.title.x = element_blank()
|
|
||||||
, axis.title.y = element_blank()
|
|
||||||
, axis.ticks.y = element_blank()
|
|
||||||
, plot.title = element_blank()
|
|
||||||
, strip.text = element_text(size = my_als)
|
|
||||||
, legend.text = element_text(size = my_als-5)
|
|
||||||
, legend.title = element_text(size = my_als)
|
|
||||||
)
|
|
||||||
|
|
||||||
print(p3)
|
|
|
@ -1,426 +0,0 @@
|
||||||
#!/usr/bin/env Rscript
|
|
||||||
#########################################################
|
|
||||||
# TASK: prediction
|
|
||||||
|
|
||||||
#=======================================================================
|
|
||||||
# working dir and loading libraries
|
|
||||||
getwd()
|
|
||||||
setwd("~/git/LSHTM_analysis/scripts/")
|
|
||||||
getwd()
|
|
||||||
|
|
||||||
source("plotting/combining_dfs_plotting.R")
|
|
||||||
|
|
||||||
####################################################################
|
|
||||||
# end of loading libraries and functions
|
|
||||||
####################################################################
|
|
||||||
|
|
||||||
#%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
|
||||||
# ps
|
|
||||||
table(merged_df2$mutation_info)
|
|
||||||
merged_df2$mutation_info_labels = ifelse(merged_df2$mutation_info == dr_muts_col, 1, 0)
|
|
||||||
table(merged_df2$mutation_info_labels)
|
|
||||||
|
|
||||||
|
|
||||||
table(merged_df3$mutation_info)
|
|
||||||
merged_df3$mutation_info_labels = ifelse(merged_df3$mutation_info == dr_muts_col, 1, 0)
|
|
||||||
table(merged_df3$mutation_info_labels)
|
|
||||||
|
|
||||||
|
|
||||||
# lig
|
|
||||||
table(merged_df2_lig$mutation_info)
|
|
||||||
merged_df2_lig$mutation_info_labels = ifelse(merged_df2_lig$mutation_info == dr_muts_col, 1, 0)
|
|
||||||
table(merged_df2_lig$mutation_info_labels)
|
|
||||||
|
|
||||||
|
|
||||||
table(merged_df3_lig$mutation_info)
|
|
||||||
merged_df3_lig$mutation_info_labels = ifelse(merged_df3_lig$mutation_info == dr_muts_col, 1, 0)
|
|
||||||
table(merged_df3_lig$mutation_info_labels)
|
|
||||||
|
|
||||||
#%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
|
||||||
|
|
||||||
###############################################################################
|
|
||||||
model_ind = glm(mutation_info_labels ~ or_mychisq
|
|
||||||
, data = merged_df2
|
|
||||||
, family = "binomial")
|
|
||||||
summary(model_ind)
|
|
||||||
nobs(model_ind)
|
|
||||||
|
|
||||||
#=============
|
|
||||||
# try loop
|
|
||||||
#=============
|
|
||||||
my_ivs = c("or_mychisq", "or_kin", "pval_fisher", "af", "duet_stability_change", "duet_scaled")
|
|
||||||
|
|
||||||
for( i in my_ivs){
|
|
||||||
cat ("===============================\n")
|
|
||||||
cat(i)
|
|
||||||
cat ("\n===============================\n")
|
|
||||||
print(summary(glm(mutation_info_labels ~ eval(parse(text=i))
|
|
||||||
, data = merged_df2
|
|
||||||
, family = "binomial")))
|
|
||||||
}
|
|
||||||
###############################################################################
|
|
||||||
#========================================
|
|
||||||
# merged_df2: UNadjusted,loop
|
|
||||||
#========================================
|
|
||||||
my_ivs = c("or_mychisq", "or_kin", "pval_fisher", "af"
|
|
||||||
, "ligand_distance"
|
|
||||||
, "rsa"
|
|
||||||
, "rd_values"
|
|
||||||
, "kd_values"
|
|
||||||
, "duet_stability_change"
|
|
||||||
, "duet_scaled"
|
|
||||||
, "duet_outcome"
|
|
||||||
, "ddg"
|
|
||||||
, "foldx_scaled"
|
|
||||||
, "foldx_outcome")
|
|
||||||
|
|
||||||
ps_logistic_df2 = data.frame()
|
|
||||||
|
|
||||||
for( i in my_ivs){
|
|
||||||
print(i)
|
|
||||||
|
|
||||||
df = data.frame(var_name = NA
|
|
||||||
, number_samples = NA
|
|
||||||
, beta = NA
|
|
||||||
, odds_ratio = NA
|
|
||||||
, pvalue = NA
|
|
||||||
, se = NA
|
|
||||||
, zvalue = NA
|
|
||||||
, ci_lower = NA
|
|
||||||
, ci_upper = NA)
|
|
||||||
|
|
||||||
model = glm(mutation_info_labels ~ eval(parse(text=i))
|
|
||||||
, data = merged_df2
|
|
||||||
, family = "binomial")
|
|
||||||
|
|
||||||
var_name = i
|
|
||||||
|
|
||||||
number_samples = nobs(model)
|
|
||||||
|
|
||||||
beta_logistic = summary(model)$coefficients[2,1]; beta_logistic
|
|
||||||
|
|
||||||
or_logistic = exp(summary(model)$coefficients[2,1])
|
|
||||||
|
|
||||||
pval_logistic = summary(model)$coefficients[2,4]
|
|
||||||
|
|
||||||
se_logistic = summary(model)$coefficients[2,2]
|
|
||||||
|
|
||||||
zval_logistic = summary(model)$coefficients[2,3]
|
|
||||||
|
|
||||||
ci_mod = exp(confint(model))[2,]
|
|
||||||
|
|
||||||
ci_lower_logistic = ci_mod[["2.5 %"]]
|
|
||||||
ci_upper_logistic = ci_mod[["97.5 %"]]
|
|
||||||
|
|
||||||
print(c(var_name, beta_logistic, or_logistic, pval_logistic, se_logistic, zval_logistic, ci_mod))
|
|
||||||
|
|
||||||
df$var_name = var_name
|
|
||||||
df$number_samples = number_samples
|
|
||||||
df$beta = beta_logistic
|
|
||||||
df$odds_ratio = or_logistic
|
|
||||||
df$pvalue = pval_logistic
|
|
||||||
df$se = se_logistic
|
|
||||||
df$zvalue = zval_logistic
|
|
||||||
df$ci_lower = ci_lower_logistic
|
|
||||||
df$ci_upper = ci_upper_logistic
|
|
||||||
|
|
||||||
print(df)
|
|
||||||
ps_logistic_df2 = rbind(ps_logistic_df2, df)
|
|
||||||
|
|
||||||
}
|
|
||||||
#--------------------
|
|
||||||
# formatting df
|
|
||||||
#--------------------
|
|
||||||
ps_logistic_df2$data_source = "df2"
|
|
||||||
ps_logistic_df2$model = "unadjusted"
|
|
||||||
|
|
||||||
ps_logistic_df2$odds_ratio = round(ps_logistic_df2$odds_ratio, 2)
|
|
||||||
ps_logistic_df2$ci_lower = round(ps_logistic_df2$ci_lower, 2)
|
|
||||||
ps_logistic_df2$ci_upper = round(ps_logistic_df2$ci_upper, 2)
|
|
||||||
|
|
||||||
# adding pvalue_signif
|
|
||||||
ps_logistic_df2$pvalue_signif = ps_logistic_df2$pvalue
|
|
||||||
str(ps_logistic_df2$pvalue_signif)
|
|
||||||
|
|
||||||
ps_logistic_df2 = dplyr::mutate(ps_logistic_df2
|
|
||||||
, pvalue_signif = case_when(pvalue_signif == 0.05 ~ "."
|
|
||||||
, pvalue_signif <=0.0001 ~ '****'
|
|
||||||
, pvalue_signif <=0.001 ~ '***'
|
|
||||||
, pvalue_signif <=0.01 ~ '**'
|
|
||||||
, pvalue_signif <0.05 ~ '*'
|
|
||||||
, TRUE ~ 'ns'))
|
|
||||||
# rearranging columns
|
|
||||||
colnames(ps_logistic_df2)
|
|
||||||
ps_logistic_df2_o = ps_logistic_df2 [c("var_name"
|
|
||||||
, "number_samples"
|
|
||||||
, "model"
|
|
||||||
, "odds_ratio"
|
|
||||||
, "pvalue"
|
|
||||||
, "pvalue_signif"
|
|
||||||
, "beta"
|
|
||||||
, "se"
|
|
||||||
, "zvalue"
|
|
||||||
, "ci_lower"
|
|
||||||
, "ci_upper"
|
|
||||||
, "data_source")]
|
|
||||||
|
|
||||||
###############################################################################
|
|
||||||
#========================================
|
|
||||||
# merged_df2: adjusted, loop
|
|
||||||
#========================================
|
|
||||||
#model_adjusted = glm(mutation_info_labels ~ or_mychisq + rsa + rd_values + kd_values +
|
|
||||||
# duet_stability_change + duet_scaled + duet_outcome + ddg + foldx_scaled + foldx_outcome
|
|
||||||
# , data = merged_df2
|
|
||||||
# , family = "binomial")
|
|
||||||
|
|
||||||
model_adjusted_df2 = glm(mutation_info_labels ~ or_mychisq + or_kin + rd_values + kd_values +
|
|
||||||
ligand_distance + duet_stability_change
|
|
||||||
, data = merged_df2
|
|
||||||
, family = "binomial");summary(model_adjusted_df2)
|
|
||||||
|
|
||||||
var_names_df = as.data.frame(names(model_adjusted_df2$coefficients))
|
|
||||||
names(var_names_df) = c("var_name")
|
|
||||||
|
|
||||||
ci_mod = exp(confint(model_adjusted_df2))
|
|
||||||
ci_mod_df = as.data.frame(ci_mod)
|
|
||||||
names(ci_mod_df) = c("ci_lower", "ci_upper")
|
|
||||||
ci_mod_df$ci_lower = round(ci_mod_df$ci_lower, 2)
|
|
||||||
ci_mod_df$ci_upper = round(ci_mod_df$ci_upper, 2)
|
|
||||||
|
|
||||||
|
|
||||||
estimates_df = as.data.frame(summary(model_adjusted_df2)$coefficients)
|
|
||||||
names(estimates_df) = c("beta", "se", "zvalue", "pvalue")
|
|
||||||
estimates_df$odds_ratio = round(exp(estimates_df$beta), 2)
|
|
||||||
number_samples = nobs(model_adjusted_df2)
|
|
||||||
estimates_df$number_samples = number_samples
|
|
||||||
estimates_df$data_source = "df2"
|
|
||||||
estimates_df$model = "adjusted"
|
|
||||||
|
|
||||||
names(ps_logistic_adjusted_df2)
|
|
||||||
|
|
||||||
if ( all(rownames(estimates_df) == rownames(ci_mod_df)) ){
|
|
||||||
cat("PASS: rownames match. Preparing to merge...")
|
|
||||||
ps_logistic_adjusted_df2 = merge(estimates_df, ci_mod_df, by = "row.names", all = T)
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
colnames(ps_logistic_adjusted_df2)[1] <- c("var_name")
|
|
||||||
d1 = which(ps_logistic_adjusted_df2$var_name == "(Intercept)")
|
|
||||||
ps_logistic_adjusted_df2 = ps_logistic_adjusted_df2[-d1,]
|
|
||||||
|
|
||||||
# adding pvalue_signif
|
|
||||||
ps_logistic_adjusted_df2$pvalue_signif = ps_logistic_adjusted_df2$pvalue
|
|
||||||
str(ps_logistic_adjusted_df2$pvalue_signif)
|
|
||||||
|
|
||||||
ps_logistic_adjusted_df2 = dplyr::mutate(ps_logistic_adjusted_df2
|
|
||||||
, pvalue_signif = case_when(pvalue_signif == 0.05 ~ "."
|
|
||||||
, pvalue_signif <=0.0001 ~ '****'
|
|
||||||
, pvalue_signif <=0.001 ~ '***'
|
|
||||||
, pvalue_signif <=0.01 ~ '**'
|
|
||||||
, pvalue_signif <0.05 ~ '*'
|
|
||||||
, TRUE ~ 'ns'))
|
|
||||||
# rearranging columns
|
|
||||||
colnames(ps_logistic_adjusted_df2)
|
|
||||||
ps_logistic_adjusted_df2_o = ps_logistic_adjusted_df2[c("var_name"
|
|
||||||
, "number_samples"
|
|
||||||
, "model"
|
|
||||||
, "odds_ratio"
|
|
||||||
, "pvalue"
|
|
||||||
, "pvalue_signif"
|
|
||||||
, "beta"
|
|
||||||
, "se"
|
|
||||||
, "zvalue"
|
|
||||||
, "ci_lower"
|
|
||||||
, "ci_upper"
|
|
||||||
, "data_source")]
|
|
||||||
|
|
||||||
###############################################################################
|
|
||||||
|
|
||||||
###############################################################################
|
|
||||||
#========================================
|
|
||||||
# merged_df3: UNadjusted,loop
|
|
||||||
#========================================
|
|
||||||
|
|
||||||
my_ivs = c("or_mychisq", "or_kin", "pval_fisher", "af"
|
|
||||||
, "ligand_distance"
|
|
||||||
, "rsa"
|
|
||||||
, "rd_values"
|
|
||||||
, "kd_values"
|
|
||||||
, "duet_stability_change"
|
|
||||||
, "duet_scaled"
|
|
||||||
, "duet_outcome"
|
|
||||||
, "ddg"
|
|
||||||
, "foldx_scaled"
|
|
||||||
, "foldx_outcome")
|
|
||||||
|
|
||||||
ps_logistic_df3 = data.frame()
|
|
||||||
|
|
||||||
for( i in my_ivs){
|
|
||||||
print(i)
|
|
||||||
|
|
||||||
df = data.frame(var_name = NA
|
|
||||||
, number_samples = NA
|
|
||||||
, beta = NA
|
|
||||||
, odds_ratio = NA
|
|
||||||
, pvalue = NA
|
|
||||||
, se = NA
|
|
||||||
, zvalue = NA
|
|
||||||
, ci_lower = NA
|
|
||||||
, ci_upper = NA)
|
|
||||||
|
|
||||||
model = glm(mutation_info_labels ~ eval(parse(text=i))
|
|
||||||
, data = merged_df3
|
|
||||||
, family = "binomial")
|
|
||||||
|
|
||||||
var_name = i
|
|
||||||
|
|
||||||
number_samples = nobs(model)
|
|
||||||
|
|
||||||
beta_logistic = summary(model)$coefficients[2,1]; beta_logistic
|
|
||||||
|
|
||||||
or_logistic = exp(summary(model)$coefficients[2,1])
|
|
||||||
|
|
||||||
pval_logistic = summary(model)$coefficients[2,4]
|
|
||||||
|
|
||||||
se_logistic = summary(model)$coefficients[2,2]
|
|
||||||
|
|
||||||
zval_logistic = summary(model)$coefficients[2,3]
|
|
||||||
|
|
||||||
ci_mod = exp(confint(model))[2,]
|
|
||||||
|
|
||||||
ci_lower_logistic = ci_mod[["2.5 %"]]
|
|
||||||
ci_upper_logistic = ci_mod[["97.5 %"]]
|
|
||||||
|
|
||||||
print(c(var_name, beta_logistic, or_logistic, pval_logistic, se_logistic, zval_logistic, ci_mod))
|
|
||||||
|
|
||||||
df$var_name = var_name
|
|
||||||
df$number_samples = number_samples
|
|
||||||
df$beta = beta_logistic
|
|
||||||
df$odds_ratio = or_logistic
|
|
||||||
df$pvalue = pval_logistic
|
|
||||||
df$se = se_logistic
|
|
||||||
df$zvalue = zval_logistic
|
|
||||||
df$ci_lower = ci_lower_logistic
|
|
||||||
df$ci_upper = ci_upper_logistic
|
|
||||||
|
|
||||||
print(df)
|
|
||||||
ps_logistic_df3 = rbind(ps_logistic_df3, df)
|
|
||||||
|
|
||||||
}
|
|
||||||
#--------------------
|
|
||||||
# formatting df
|
|
||||||
#--------------------
|
|
||||||
ps_logistic_df3$data_source = "df3"
|
|
||||||
ps_logistic_df3$model = "unadjusted"
|
|
||||||
|
|
||||||
ps_logistic_df3$odds_ratio = round(ps_logistic_df3$odds_ratio, 2)
|
|
||||||
ps_logistic_df3$ci_lower = round(ps_logistic_df3$ci_lower, 2)
|
|
||||||
ps_logistic_df3$ci_upper = round(ps_logistic_df3$ci_upper, 2)
|
|
||||||
|
|
||||||
# adding pvalue_signif
|
|
||||||
ps_logistic_df3$pvalue_signif = ps_logistic_df3$pvalue
|
|
||||||
str(ps_logistic_df3$pvalue_signif)
|
|
||||||
|
|
||||||
ps_logistic_df3 = dplyr::mutate(ps_logistic_df3
|
|
||||||
, pvalue_signif = case_when(pvalue_signif == 0.05 ~ "."
|
|
||||||
, pvalue_signif <=0.0001 ~ '****'
|
|
||||||
, pvalue_signif <=0.001 ~ '***'
|
|
||||||
, pvalue_signif <=0.01 ~ '**'
|
|
||||||
, pvalue_signif <0.05 ~ '*'
|
|
||||||
, TRUE ~ 'ns'))
|
|
||||||
# rearranging columns
|
|
||||||
ps_logistic_df3_o = ps_logistic_df3[c("var_name"
|
|
||||||
, "number_samples"
|
|
||||||
, "model"
|
|
||||||
, "odds_ratio"
|
|
||||||
, "pvalue"
|
|
||||||
, "pvalue_signif"
|
|
||||||
, "beta"
|
|
||||||
, "se"
|
|
||||||
, "zvalue"
|
|
||||||
, "ci_lower"
|
|
||||||
, "ci_upper"
|
|
||||||
, "data_source")]
|
|
||||||
|
|
||||||
#========================================
|
|
||||||
# merged_df3: adjusted, loop
|
|
||||||
#========================================
|
|
||||||
#model_adjusted_df3 = glm(mutation_info_labels ~ or_mychisq + rsa + rd_values + kd_values +
|
|
||||||
# duet_stability_change + duet_scaled + duet_outcome + ddg + foldx_scaled + foldx_outcome
|
|
||||||
# , data = merged_df3
|
|
||||||
# , family = "binomial")
|
|
||||||
|
|
||||||
model_adjusted_df3 = glm(mutation_info_labels ~ rd_values +
|
|
||||||
ligand_distance + duet_stability_change
|
|
||||||
, data = merged_df3
|
|
||||||
, family = "binomial");summary(model_adjusted_df3)
|
|
||||||
|
|
||||||
var_names_df = as.data.frame(names(model_adjusted_df3$coefficients))
|
|
||||||
names(var_names_df) = c("var_name")
|
|
||||||
|
|
||||||
ci_mod = exp(confint(model_adjusted_df3))
|
|
||||||
ci_mod_df = as.data.frame(ci_mod)
|
|
||||||
names(ci_mod_df) = c("ci_lower", "ci_upper")
|
|
||||||
ci_mod_df$ci_lower = round(ci_mod_df$ci_lower, 2)
|
|
||||||
ci_mod_df$ci_upper = round(ci_mod_df$ci_upper, 2)
|
|
||||||
|
|
||||||
|
|
||||||
estimates_df = as.data.frame(summary(model_adjusted_df3)$coefficients)
|
|
||||||
names(estimates_df) = c("beta", "se", "zvalue", "pvalue")
|
|
||||||
estimates_df$odds_ratio = round(exp(estimates_df$beta), 2)
|
|
||||||
number_samples = nobs(model_adjusted_df3)
|
|
||||||
estimates_df$number_samples = number_samples
|
|
||||||
estimates_df$data_source = "df3"
|
|
||||||
estimates_df$model = "adjusted"
|
|
||||||
|
|
||||||
names(ps_logistic_adjusted_df3)
|
|
||||||
|
|
||||||
if ( all(rownames(estimates_df) == rownames(ci_mod_df)) ){
|
|
||||||
cat("PASS: rownames match. Preparing to merge...")
|
|
||||||
ps_logistic_adjusted_df3 = merge(estimates_df, ci_mod_df, by = "row.names", all = T)
|
|
||||||
}
|
|
||||||
|
|
||||||
colnames(ps_logistic_adjusted_df3)[1] <- c("var_name")
|
|
||||||
d2 = which(ps_logistic_adjusted_df3$var_name == "(Intercept)")
|
|
||||||
ps_logistic_adjusted_df3 = ps_logistic_adjusted_df3[-d2,]
|
|
||||||
|
|
||||||
# adding pvalue_signif
|
|
||||||
ps_logistic_adjusted_df3$pvalue_signif = ps_logistic_adjusted_df3$pvalue
|
|
||||||
str(ps_logistic_adjusted_df3$pvalue_signif)
|
|
||||||
|
|
||||||
ps_logistic_adjusted_df3 = dplyr::mutate(ps_logistic_adjusted_df3
|
|
||||||
, pvalue_signif = case_when(pvalue_signif == 0.05 ~ "."
|
|
||||||
, pvalue_signif <=0.0001 ~ '****'
|
|
||||||
, pvalue_signif <=0.001 ~ '***'
|
|
||||||
, pvalue_signif <=0.01 ~ '**'
|
|
||||||
, pvalue_signif <0.05 ~ '*'
|
|
||||||
, TRUE ~ 'ns'))
|
|
||||||
# rearranging columns
|
|
||||||
colnames(ps_logistic_adjusted_df3)
|
|
||||||
ps_logistic_adjusted_df3_o = ps_logistic_adjusted_df3[c("var_name"
|
|
||||||
, "number_samples"
|
|
||||||
, "model"
|
|
||||||
, "odds_ratio"
|
|
||||||
, "pvalue"
|
|
||||||
, "pvalue_signif"
|
|
||||||
, "beta"
|
|
||||||
, "se"
|
|
||||||
, "zvalue"
|
|
||||||
, "ci_lower"
|
|
||||||
, "ci_upper"
|
|
||||||
,"data_source")]
|
|
||||||
|
|
||||||
#-------------
|
|
||||||
# lm
|
|
||||||
#-------------
|
|
||||||
|
|
||||||
model_lm = lm(or_kin ~ rsa + rd_values + duet_stability_change + ddg + mutation_info_labels
|
|
||||||
, data = merged_df3)
|
|
||||||
|
|
||||||
summary(model_lm)
|
|
||||||
|
|
||||||
|
|
||||||
model_lm1 = lm(or_mychisq ~ mutation_info_labels
|
|
||||||
, data = merged_df2)
|
|
||||||
|
|
||||||
summary(model_lm1)
|
|
|
@ -1,203 +0,0 @@
|
||||||
#!/usr/bin/env Rscript
|
|
||||||
#########################################################
|
|
||||||
# TASK: prediction lig
|
|
||||||
|
|
||||||
#=======================================================================
|
|
||||||
# working dir and loading libraries
|
|
||||||
getwd()
|
|
||||||
setwd("~/git/LSHTM_analysis/scripts/")
|
|
||||||
getwd()
|
|
||||||
|
|
||||||
source("plotting/combining_dfs_plotting.R")
|
|
||||||
|
|
||||||
#=======
|
|
||||||
# output
|
|
||||||
#=======
|
|
||||||
lig_unadjusted = paste0(outdir, "/results/", tolower(gene), "_unadjusted_logistic_LIG.csv")
|
|
||||||
lig_adjusted = paste0(outdir, "/results/", tolower(gene), "_adjusted_logistic_LIG.csv")
|
|
||||||
|
|
||||||
####################################################################
|
|
||||||
# end of loading libraries and functions
|
|
||||||
####################################################################
|
|
||||||
|
|
||||||
#%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
|
||||||
# lig
|
|
||||||
table(merged_df3_lig$mutation_info)
|
|
||||||
merged_df3_lig$mutation_info_labels = ifelse(merged_df3_lig$mutation_info == dr_muts_col, 1, 0)
|
|
||||||
table(merged_df3_lig$mutation_info_labels)
|
|
||||||
|
|
||||||
#%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
|
||||||
|
|
||||||
###############################################################################
|
|
||||||
|
|
||||||
#========================================
|
|
||||||
# merged_df3_lig: UNadjusted,loop
|
|
||||||
#========================================
|
|
||||||
my_ivs = c("or_mychisq", "or_kin", "pval_fisher", "af"
|
|
||||||
#, "ligand_distance"
|
|
||||||
, "rsa"
|
|
||||||
, "rd_values"
|
|
||||||
, "kd_values"
|
|
||||||
, "ligand_affinity_change"
|
|
||||||
, "affinity_scaled"
|
|
||||||
, "ligand_outcome")
|
|
||||||
|
|
||||||
lig_logistic_df3 = data.frame()
|
|
||||||
|
|
||||||
for( i in my_ivs){
|
|
||||||
print(i)
|
|
||||||
|
|
||||||
df = data.frame(var_name = NA
|
|
||||||
, number_samples = NA
|
|
||||||
, beta = NA
|
|
||||||
, odds_ratio = NA
|
|
||||||
, pvalue = NA
|
|
||||||
, se = NA
|
|
||||||
, zvalue = NA
|
|
||||||
, ci_lower = NA
|
|
||||||
, ci_upper = NA)
|
|
||||||
|
|
||||||
model_lig = glm(mutation_info_labels ~ eval(parse(text=i))
|
|
||||||
, data = merged_df3_lig
|
|
||||||
, family = "binomial")
|
|
||||||
|
|
||||||
var_name = i
|
|
||||||
|
|
||||||
number_samples = nobs(model_lig)
|
|
||||||
|
|
||||||
beta_logistic = summary(model_lig)$coefficients[2,1]; beta_logistic
|
|
||||||
|
|
||||||
or_logistic = exp(summary(model_lig)$coefficients[2,1])
|
|
||||||
|
|
||||||
pval_logistic = summary(model_lig)$coefficients[2,4]
|
|
||||||
|
|
||||||
se_logistic = summary(model_lig)$coefficients[2,2]
|
|
||||||
|
|
||||||
zval_logistic = summary(model_lig)$coefficients[2,3]
|
|
||||||
|
|
||||||
ci_mod = exp(confint(model_lig))[2,]
|
|
||||||
|
|
||||||
ci_lower_logistic = ci_mod[["2.5 %"]]
|
|
||||||
ci_upper_logistic = ci_mod[["97.5 %"]]
|
|
||||||
|
|
||||||
print(c(var_name, beta_logistic, or_logistic, pval_logistic, se_logistic, zval_logistic, ci_mod))
|
|
||||||
|
|
||||||
df$var_name = var_name
|
|
||||||
df$number_samples = number_samples
|
|
||||||
df$beta = beta_logistic
|
|
||||||
df$odds_ratio = or_logistic
|
|
||||||
df$pvalue = pval_logistic
|
|
||||||
df$se = se_logistic
|
|
||||||
df$zvalue = zval_logistic
|
|
||||||
df$ci_lower = ci_lower_logistic
|
|
||||||
df$ci_upper = ci_upper_logistic
|
|
||||||
|
|
||||||
print(df)
|
|
||||||
lig_logistic_df3 = rbind(lig_logistic_df3, df)
|
|
||||||
|
|
||||||
}
|
|
||||||
#--------------------
|
|
||||||
# formatting df
|
|
||||||
#--------------------
|
|
||||||
lig_logistic_df3$data_source = "df3_lig"
|
|
||||||
lig_logistic_df3$model_lig = "unadjusted"
|
|
||||||
|
|
||||||
lig_logistic_df3$odds_ratio = round(lig_logistic_df3$odds_ratio, 2)
|
|
||||||
lig_logistic_df3$ci_lower = round(lig_logistic_df3$ci_lower, 2)
|
|
||||||
lig_logistic_df3$ci_upper = round(lig_logistic_df3$ci_upper, 2)
|
|
||||||
|
|
||||||
# adding pvalue_signif
|
|
||||||
lig_logistic_df3$pvalue_signif = lig_logistic_df3$pvalue
|
|
||||||
str(lig_logistic_df3$pvalue_signif)
|
|
||||||
|
|
||||||
lig_logistic_df3 = dplyr::mutate(lig_logistic_df3
|
|
||||||
, pvalue_signif = case_when(pvalue_signif == 0.05 ~ "."
|
|
||||||
, pvalue_signif <=0.0001 ~ '****'
|
|
||||||
, pvalue_signif <=0.001 ~ '***'
|
|
||||||
, pvalue_signif <=0.01 ~ '**'
|
|
||||||
, pvalue_signif <0.05 ~ '*'
|
|
||||||
, TRUE ~ 'ns'))
|
|
||||||
# rearranging columns
|
|
||||||
lig_logistic_df3_o = lig_logistic_df3[c("var_name"
|
|
||||||
, "number_samples"
|
|
||||||
, "model_lig"
|
|
||||||
, "odds_ratio"
|
|
||||||
, "pvalue"
|
|
||||||
, "pvalue_signif"
|
|
||||||
, "beta"
|
|
||||||
, "se"
|
|
||||||
, "zvalue"
|
|
||||||
, "ci_lower"
|
|
||||||
, "ci_upper"
|
|
||||||
, "data_source")]
|
|
||||||
# writing file
|
|
||||||
write.csv(lig_logistic_df3_o, lig_unadjusted, row.names = F)
|
|
||||||
|
|
||||||
#========================================
|
|
||||||
# merged_df3_lig: adjusted, loop
|
|
||||||
#========================================
|
|
||||||
#model_lig_adjusted_df3 = glm(mutation_info_labels ~ or_mychisq + rsa + rd_values + kd_values +
|
|
||||||
# ligand_affinity_change + affinity_scaled + ligand_outcome
|
|
||||||
# , data = merged_df3_lig
|
|
||||||
# , family = "binomial")
|
|
||||||
|
|
||||||
model_lig_adjusted_df3 = glm(mutation_info_labels ~ rd_values + ligand_affinity_change
|
|
||||||
, data = merged_df3_lig
|
|
||||||
, family = "binomial");summary(model_lig_adjusted_df3)
|
|
||||||
|
|
||||||
var_names_df = as.data.frame(names(model_lig_adjusted_df3$coefficients))
|
|
||||||
names(var_names_df) = c("var_name")
|
|
||||||
|
|
||||||
ci_mod = exp(confint(model_lig_adjusted_df3))
|
|
||||||
ci_mod_df = as.data.frame(ci_mod)
|
|
||||||
names(ci_mod_df) = c("ci_lower", "ci_upper")
|
|
||||||
ci_mod_df$ci_lower = round(ci_mod_df$ci_lower, 2)
|
|
||||||
ci_mod_df$ci_upper = round(ci_mod_df$ci_upper, 2)
|
|
||||||
|
|
||||||
|
|
||||||
estimates_df = as.data.frame(summary(model_lig_adjusted_df3)$coefficients)
|
|
||||||
names(estimates_df) = c("beta", "se", "zvalue", "pvalue")
|
|
||||||
estimates_df$odds_ratio = round(exp(estimates_df$beta), 2)
|
|
||||||
number_samples = nobs(model_lig_adjusted_df3)
|
|
||||||
estimates_df$number_samples = number_samples
|
|
||||||
estimates_df$data_source = "df3_lig"
|
|
||||||
estimates_df$model_lig = "adjusted"
|
|
||||||
|
|
||||||
names(lig_logistic_adjusted_df3)
|
|
||||||
|
|
||||||
if ( all(rownames(estimates_df) == rownames(ci_mod_df)) ){
|
|
||||||
cat("PASS: rownames match. Preparing to merge...")
|
|
||||||
lig_logistic_adjusted_df3 = merge(estimates_df, ci_mod_df, by = "row.names", all = T)
|
|
||||||
}
|
|
||||||
|
|
||||||
colnames(lig_logistic_adjusted_df3)[1] <- c("var_name")
|
|
||||||
d2 = which(lig_logistic_adjusted_df3$var_name == "(Intercept)")
|
|
||||||
lig_logistic_adjusted_df3 = lig_logistic_adjusted_df3[-d2,]
|
|
||||||
|
|
||||||
# adding pvalue_signif
|
|
||||||
lig_logistic_adjusted_df3$pvalue_signif = lig_logistic_adjusted_df3$pvalue
|
|
||||||
str(lig_logistic_adjusted_df3$pvalue_signif)
|
|
||||||
|
|
||||||
lig_logistic_adjusted_df3 = dplyr::mutate(lig_logistic_adjusted_df3
|
|
||||||
, pvalue_signif = case_when(pvalue_signif == 0.05 ~ "."
|
|
||||||
, pvalue_signif <=0.0001 ~ '****'
|
|
||||||
, pvalue_signif <=0.001 ~ '***'
|
|
||||||
, pvalue_signif <=0.01 ~ '**'
|
|
||||||
, pvalue_signif <0.05 ~ '*'
|
|
||||||
, TRUE ~ 'ns'))
|
|
||||||
# rearranging columns
|
|
||||||
colnames(lig_logistic_adjusted_df3)
|
|
||||||
lig_logistic_adjusted_df3_o = lig_logistic_adjusted_df3[c("var_name"
|
|
||||||
, "number_samples"
|
|
||||||
, "model_lig"
|
|
||||||
, "odds_ratio"
|
|
||||||
, "pvalue"
|
|
||||||
, "pvalue_signif"
|
|
||||||
, "beta"
|
|
||||||
, "se"
|
|
||||||
, "zvalue"
|
|
||||||
, "ci_lower"
|
|
||||||
, "ci_upper"
|
|
||||||
,"data_source")]
|
|
||||||
# writing file
|
|
||||||
write.csv(lig_logistic_adjusted_df3_o, lig_adjusted, row.names = F)
|
|
|
@ -1,207 +0,0 @@
|
||||||
#!/usr/bin/env Rscript
|
|
||||||
#########################################################
|
|
||||||
# TASK: prediction_ps
|
|
||||||
|
|
||||||
#=======================================================================
|
|
||||||
# working dir and loading libraries
|
|
||||||
getwd()
|
|
||||||
setwd("~/git/LSHTM_analysis/scripts/")
|
|
||||||
getwd()
|
|
||||||
|
|
||||||
source("plotting/combining_dfs_plotting.R")
|
|
||||||
|
|
||||||
#=======
|
|
||||||
# output
|
|
||||||
#=======
|
|
||||||
ps_unadjusted = paste0(outdir, "/results/", tolower(gene), "_unadjusted_logistic_PS.csv")
|
|
||||||
ps_adjusted = paste0(outdir, "/results/", tolower(gene), "_adjusted_logistic_PS.csv")
|
|
||||||
|
|
||||||
####################################################################
|
|
||||||
# end of loading libraries and functions
|
|
||||||
####################################################################
|
|
||||||
|
|
||||||
#%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
|
||||||
# ps
|
|
||||||
table(merged_df3$mutation_info)
|
|
||||||
merged_df3$mutation_info_labels = ifelse(merged_df3$mutation_info == dr_muts_col, 1, 0)
|
|
||||||
table(merged_df3$mutation_info_labels)
|
|
||||||
|
|
||||||
#%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
|
||||||
|
|
||||||
###############################################################################
|
|
||||||
#========================================
|
|
||||||
# merged_df3: UNadjusted,loop
|
|
||||||
#========================================
|
|
||||||
my_ivs = c("or_mychisq", "or_kin", "pval_fisher", "af"
|
|
||||||
, "ligand_distance"
|
|
||||||
, "rsa"
|
|
||||||
, "rd_values"
|
|
||||||
, "kd_values"
|
|
||||||
, "duet_stability_change"
|
|
||||||
, "duet_scaled"
|
|
||||||
, "duet_outcome"
|
|
||||||
, "ddg"
|
|
||||||
, "foldx_scaled"
|
|
||||||
, "foldx_outcome")
|
|
||||||
|
|
||||||
ps_logistic_df3 = data.frame()
|
|
||||||
|
|
||||||
for( i in my_ivs){
|
|
||||||
print(i)
|
|
||||||
|
|
||||||
df = data.frame(var_name = NA
|
|
||||||
, number_samples = NA
|
|
||||||
, beta = NA
|
|
||||||
, odds_ratio = NA
|
|
||||||
, pvalue = NA
|
|
||||||
, se = NA
|
|
||||||
, zvalue = NA
|
|
||||||
, ci_lower = NA
|
|
||||||
, ci_upper = NA)
|
|
||||||
|
|
||||||
model = glm(mutation_info_labels ~ eval(parse(text=i))
|
|
||||||
, data = merged_df3
|
|
||||||
, family = "binomial")
|
|
||||||
|
|
||||||
var_name = i
|
|
||||||
|
|
||||||
number_samples = nobs(model)
|
|
||||||
|
|
||||||
beta_logistic = summary(model)$coefficients[2,1]; beta_logistic
|
|
||||||
|
|
||||||
or_logistic = exp(summary(model)$coefficients[2,1])
|
|
||||||
|
|
||||||
pval_logistic = summary(model)$coefficients[2,4]
|
|
||||||
|
|
||||||
se_logistic = summary(model)$coefficients[2,2]
|
|
||||||
|
|
||||||
zval_logistic = summary(model)$coefficients[2,3]
|
|
||||||
|
|
||||||
ci_mod = exp(confint(model))[2,]
|
|
||||||
|
|
||||||
ci_lower_logistic = ci_mod[["2.5 %"]]
|
|
||||||
ci_upper_logistic = ci_mod[["97.5 %"]]
|
|
||||||
|
|
||||||
print(c(var_name, beta_logistic, or_logistic, pval_logistic, se_logistic, zval_logistic, ci_mod))
|
|
||||||
|
|
||||||
df$var_name = var_name
|
|
||||||
df$number_samples = number_samples
|
|
||||||
df$beta = beta_logistic
|
|
||||||
df$odds_ratio = or_logistic
|
|
||||||
df$pvalue = pval_logistic
|
|
||||||
df$se = se_logistic
|
|
||||||
df$zvalue = zval_logistic
|
|
||||||
df$ci_lower = ci_lower_logistic
|
|
||||||
df$ci_upper = ci_upper_logistic
|
|
||||||
|
|
||||||
print(df)
|
|
||||||
ps_logistic_df3 = rbind(ps_logistic_df3, df)
|
|
||||||
|
|
||||||
}
|
|
||||||
#--------------------
|
|
||||||
# formatting df
|
|
||||||
#--------------------
|
|
||||||
ps_logistic_df3$data_source = "df3"
|
|
||||||
ps_logistic_df3$model = "unadjusted"
|
|
||||||
|
|
||||||
ps_logistic_df3$odds_ratio = round(ps_logistic_df3$odds_ratio, 2)
|
|
||||||
ps_logistic_df3$ci_lower = round(ps_logistic_df3$ci_lower, 2)
|
|
||||||
ps_logistic_df3$ci_upper = round(ps_logistic_df3$ci_upper, 2)
|
|
||||||
|
|
||||||
# adding pvalue_signif
|
|
||||||
ps_logistic_df3$pvalue_signif = ps_logistic_df3$pvalue
|
|
||||||
str(ps_logistic_df3$pvalue_signif)
|
|
||||||
|
|
||||||
ps_logistic_df3 = dplyr::mutate(ps_logistic_df3
|
|
||||||
, pvalue_signif = case_when(pvalue_signif == 0.05 ~ "."
|
|
||||||
, pvalue_signif <=0.0001 ~ '****'
|
|
||||||
, pvalue_signif <=0.001 ~ '***'
|
|
||||||
, pvalue_signif <=0.01 ~ '**'
|
|
||||||
, pvalue_signif <0.05 ~ '*'
|
|
||||||
, TRUE ~ 'ns'))
|
|
||||||
# rearranging columns
|
|
||||||
ps_logistic_df3_o = ps_logistic_df3[c("var_name"
|
|
||||||
, "number_samples"
|
|
||||||
, "model"
|
|
||||||
, "odds_ratio"
|
|
||||||
, "pvalue"
|
|
||||||
, "pvalue_signif"
|
|
||||||
, "beta"
|
|
||||||
, "se"
|
|
||||||
, "zvalue"
|
|
||||||
, "ci_lower"
|
|
||||||
, "ci_upper"
|
|
||||||
, "data_source")]
|
|
||||||
# writing file
|
|
||||||
write.csv(ps_logistic_df3_o, ps_unadjusted, row.names = F)
|
|
||||||
|
|
||||||
#========================================
|
|
||||||
# merged_df3: adjusted, loop
|
|
||||||
#========================================
|
|
||||||
#model_adjusted_df3 = glm(mutation_info_labels ~ or_mychisq + rsa + rd_values + kd_values +
|
|
||||||
# duet_stability_change + duet_scaled + duet_outcome + ddg + foldx_scaled + foldx_outcome
|
|
||||||
# , data = merged_df3
|
|
||||||
# , family = "binomial")
|
|
||||||
|
|
||||||
model_adjusted_df3 = glm(mutation_info_labels ~ rd_values +
|
|
||||||
ligand_distance + duet_stability_change
|
|
||||||
, data = merged_df3
|
|
||||||
, family = "binomial");summary(model_adjusted_df3)
|
|
||||||
|
|
||||||
var_names_df = as.data.frame(names(model_adjusted_df3$coefficients))
|
|
||||||
names(var_names_df) = c("var_name")
|
|
||||||
|
|
||||||
ci_mod = exp(confint(model_adjusted_df3))
|
|
||||||
ci_mod_df = as.data.frame(ci_mod)
|
|
||||||
names(ci_mod_df) = c("ci_lower", "ci_upper")
|
|
||||||
ci_mod_df$ci_lower = round(ci_mod_df$ci_lower, 2)
|
|
||||||
ci_mod_df$ci_upper = round(ci_mod_df$ci_upper, 2)
|
|
||||||
|
|
||||||
|
|
||||||
estimates_df = as.data.frame(summary(model_adjusted_df3)$coefficients)
|
|
||||||
names(estimates_df) = c("beta", "se", "zvalue", "pvalue")
|
|
||||||
estimates_df$odds_ratio = round(exp(estimates_df$beta), 2)
|
|
||||||
number_samples = nobs(model_adjusted_df3)
|
|
||||||
estimates_df$number_samples = number_samples
|
|
||||||
estimates_df$data_source = "df3"
|
|
||||||
estimates_df$model = "adjusted"
|
|
||||||
|
|
||||||
names(ps_logistic_adjusted_df3)
|
|
||||||
|
|
||||||
if ( all(rownames(estimates_df) == rownames(ci_mod_df)) ){
|
|
||||||
cat("PASS: rownames match. Preparing to merge...")
|
|
||||||
ps_logistic_adjusted_df3 = merge(estimates_df, ci_mod_df, by = "row.names", all = T)
|
|
||||||
}
|
|
||||||
|
|
||||||
colnames(ps_logistic_adjusted_df3)[1] <- c("var_name")
|
|
||||||
d2 = which(ps_logistic_adjusted_df3$var_name == "(Intercept)")
|
|
||||||
ps_logistic_adjusted_df3 = ps_logistic_adjusted_df3[-d2,]
|
|
||||||
|
|
||||||
# adding pvalue_signif
|
|
||||||
ps_logistic_adjusted_df3$pvalue_signif = ps_logistic_adjusted_df3$pvalue
|
|
||||||
str(ps_logistic_adjusted_df3$pvalue_signif)
|
|
||||||
|
|
||||||
ps_logistic_adjusted_df3 = dplyr::mutate(ps_logistic_adjusted_df3
|
|
||||||
, pvalue_signif = case_when(pvalue_signif == 0.05 ~ "."
|
|
||||||
, pvalue_signif <=0.0001 ~ '****'
|
|
||||||
, pvalue_signif <=0.001 ~ '***'
|
|
||||||
, pvalue_signif <=0.01 ~ '**'
|
|
||||||
, pvalue_signif <0.05 ~ '*'
|
|
||||||
, TRUE ~ 'ns'))
|
|
||||||
# rearranging columns
|
|
||||||
colnames(ps_logistic_adjusted_df3)
|
|
||||||
ps_logistic_adjusted_df3_o = ps_logistic_adjusted_df3[c("var_name"
|
|
||||||
, "number_samples"
|
|
||||||
, "model"
|
|
||||||
, "odds_ratio"
|
|
||||||
, "pvalue"
|
|
||||||
, "pvalue_signif"
|
|
||||||
, "beta"
|
|
||||||
, "se"
|
|
||||||
, "zvalue"
|
|
||||||
, "ci_lower"
|
|
||||||
, "ci_upper"
|
|
||||||
,"data_source")]
|
|
||||||
# writing file
|
|
||||||
write.csv(ps_logistic_adjusted_df3_o, ps_adjusted, row.names = F)
|
|
||||||
###############################################################################
|
|
Loading…
Add table
Add a link
Reference in a new issue