moved not required plots to scratch

This commit is contained in:
Tanushree Tunstall 2020-10-06 09:52:54 +01:00
parent 9597997741
commit 4c345ea9f4
9 changed files with 2 additions and 1660 deletions

View file

@ -188,6 +188,6 @@ OutPlot_lig_pos_count = g + geom_bar(aes (alpha = 0.5)
print(OutPlot_lig_pos_count) print(OutPlot_lig_pos_count)
dev.off() dev.off()
######################################################################## ########################################################################
# end of lig barplots # end of LIG barplots
######################################################################## ########################################################################

View file

@ -186,5 +186,5 @@ OutPlot_pos_count = g + geom_bar(aes (alpha = 0.5)
print(OutPlot_pos_count) print(OutPlot_pos_count)
dev.off() dev.off()
######################################################################## ########################################################################
# end of Ligand barplots # end of PS barplots
######################################################################## ########################################################################

View file

@ -1,166 +0,0 @@
#!/usr/bin/env Rscript
#########################################################
# TASK: Corr plots for PS and Lig
# Output: 1 svg
#=======================================================================
# working dir and loading libraries
getwd()
setwd("~/git/LSHTM_analysis/scripts/plotting/")
getwd()
#source("combining_dfs_plotting.R")
source("my_pairs_panel.R") # with lower panel turned off
source("corr_data.R")
#=======
# output
#=======
# PS
corr_ps_all_df2 = "corr_PS_ALL_df2.svg"
plot_corr_ps_all_df2 = paste0(plotdir,"/", corr_ps_all_df2)
corr_ps_all_df3 = "corr_PS_ALL_df3.svg"
plot_corr_ps_all_df3 = paste0(plotdir,"/", corr_ps_all_df3)
# LIG
corr_lig_all_df2 = "corr_LIG_ALL_df2.svg"
plot_corr_lig_all_df2 = paste0(plotdir,"/", corr_lig_all_df2)
corr_lig_all_df3 = "corr_LIG_ALL_df3.svg"
plot_corr_lig_all_df3 = paste0(plotdir,"/", corr_lig_all_df3)
####################################################################
# end of loading libraries and functions
####################################################################
#%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
# Data for plots
#%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
# PS
corr_ps_df2 = corr_ps_df2[-1]
corr_ps_df3 = corr_ps_df3[-1]
# Lig
corr_lig_df2 = corr_lig_df2[-1]
corr_lig_df3 = corr_lig_df3[-1]
#---------------------------------------
# generate corr PS plot 1: merged_df2
#---------------------------------------
cat("Corr plot PS DUET with coloured dots:", plot_corr_ps_all_df2)
svg(plot_corr_ps_all_df2, width = 30, height = 30)
OutPlot_ps_df2 = pairs.panels(corr_ps_df2[1:(length(corr_ps_df2)-2)]
, method = "spearman" # correlation method
, hist.col = "grey" ##00AFBB
, density = T # show density plots
, ellipses = F # show correlation ellipses
, stars = T
, rug = F
, breaks = "Sturges"
, show.points = T
, bg = c("#f8766d", "#00bfc4")[unclass(factor(corr_ps_df2$duet_outcome))] # can't use colour as duet and foldx are opposite
, pch = 21 # for bg
#, pch = 19
, jitter = T
, alpha = 1
#, points(pch = 19, col = c("#f8766d", "#00bfc4"))
, cex = 1.8
, cex.axis = 2
, cex.labels = 2
, cex.cor = 1
, smooth = F)
print(OutPlot_ps_df2)
dev.off()
#----------------------------------------------
# generate corr PS plot 2: merged_df3
#----------------------------------------------
cat("Corr plot PS DUET with coloured dots:", plot_corr_ps_all_df3)
svg(plot_corr_ps_all_df3, width = 30, height = 30)
OutPlot_ps_df3 = pairs.panels(corr_ps_df3[1:(length(corr_ps_df3)-2)]
, method = "spearman" # correlation method
, hist.col = "grey" ##00AFBB
, density = T # show density plots
, ellipses = F # show correlation ellipses
, stars = T
, rug = F
, breaks = "Sturges"
, show.points = T
, bg = c("#f8766d", "#00bfc4")[unclass(factor(corr_ps_df3$duet_outcome))] # can't use colour as duet and foldx are opposite
, pch = 21 # for bg
, cex = 2
, cex.axis = 1.6
, cex.labels = 2
, cex.cor = 1
, smooth = F
)
print(OutPlot_ps_df3)
dev.off()
################################################################################################
#---------------------------------------
# generate corr lig plot 1: merged_df2
#---------------------------------------
cat("Corr plot lig DUET with coloured dots:", plot_corr_lig_all_df2)
svg(plot_corr_lig_all_df2, width = 30, height = 30)
OutPlot_lig_df2 = pairs.panels(corr_lig_df2[1:(length(corr_lig_df2)-2)]
, method = "spearman" # correlation method
, hist.col = "grey" ##00AFBB
, density = T # show density plots
, ellipses = F # show correlation elliliges
, stars = T
, rug = F
, breaks = "Sturges"
, show.points = T
, bg = c("#f8766d", "#00bfc4")[unclass(factor(corr_lig_df2$ligand_outcome))] # can't use colour as duet and foldx are opposite
, pch = 21 # for bg
#, pch = 19
, jitter = T
, alpha = 1
#, points(pch = 19, col = c("#f8766d", "#00bfc4"))
, cex = 1.8
, cex.axis = 2
, cex.labels = 2
, cex.cor = 1
, smooth = F)
print(OutPlot_lig_df2)
dev.off()
#----------------------------------------------
# generate corr lig plot 2: merged_df3
#----------------------------------------------
cat("Corr plot lig DUET with coloured dots:", plot_corr_lig_all_df3)
svg(plot_corr_lig_all_df3, width = 30, height = 30)
OutPlot_lig_df3 = pairs.panels(corr_lig_df3[1:(length(corr_lig_df3)-2)]
, method = "spearman" # correlation method
, hist.col = "grey" ##00AFBB
, density = T # show density plots
, ellipses = F # show correlation elliliges
, stars = T
, rug = F
, breaks = "Sturges"
, show.points = T
, bg = c("#f8766d", "#00bfc4")[unclass(factor(corr_lig_df3$ligand_outcome))] # can't use colour as duet and foldx are opposite
, pch = 21 # for bg
, cex = 2
, cex.axis = 1.6
, cex.labels = 2
, cex.cor = 1
, smooth = F
)
print(OutPlot_lig_df3)
dev.off()

View file

@ -1,176 +0,0 @@
#!/usr/bin/env Rscript
#########################################################
# TASK: Corr plots for PS and Lig
# Output: 1 svg
#=======================================================================
# working dir and loading libraries
getwd()
setwd("~/git/LSHTM_analysis/scripts/plotting/")
getwd()
#source("combining_dfs_plotting.R")
source("my_pairs_panel.R") # with lower panel turned off
source("corr_data.R")
#=======
# output
#=======
# PS
corrplot_ps_df2 = "corr_PS_df2.svg"
plot_corr_ps_df2 = paste0(plotdir,"/", corrplot_ps_df2)
corrplot_ps_df3 = "corr_PS_df3.svg"
plot_corr_ps_df3 = paste0(plotdir,"/", corrplot_ps_df3)
# LIG
corrplot_lig_df2 = "corr_LIG_df2.svg"
plot_corr_lig_df2 = paste0(plotdir,"/", corrplot_lig_df2)
corrplot_lig_df3 = "corr_LIG_df3.svg"
plot_corr_lig_df3 = paste0(plotdir,"/", corrplot_lig_df3)
####################################################################
# end of loading libraries and functions
####################################################################
#%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
# Data for plots
#%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
cols_to_drop = c("ASA", "AF_kin")
# PS
corr_ps_df2 = corr_ps_df2[!colnames(corr_ps_df2)%in%cols_to_drop]
corr_ps_df2 = corr_ps_df2[-1]
corr_ps_df3 = corr_ps_df3[!colnames(corr_ps_df3)%in%cols_to_drop]
corr_ps_df3 = corr_ps_df3[-1]
# Lig
corr_lig_df2 = corr_lig_df2[!colnames(corr_lig_df2)%in%cols_to_drop]
corr_lig_df2 = corr_lig_df2[-1]
corr_lig_df3 = corr_lig_df3[!colnames(corr_lig_df3)%in%cols_to_drop]
corr_lig_df3 = corr_lig_df3[-1]
#---------------------------------------
# generate corr PS plot 1: merged_df2
#---------------------------------------
cat("Corr plot PS DUET with coloured dots:", plot_corr_ps_df2)
svg(plot_corr_ps_df2, width = 30, height = 25)
OutPlot_ps_df2 = pairs.panels(corr_ps_df2[1:(length(corr_ps_df2)-2)]
, method = "spearman" # correlation method
, hist.col = "grey" ##00AFBB
, density = T # show density plots
, ellipses = F # show correlation ellipses
, stars = T
, rug = F
, breaks = "Sturges"
, show.points = T
, bg = c("#f8766d", "#00bfc4")[unclass(factor(corr_ps_df2$duet_outcome))] # can't use colour as duet and foldx are opposite
, pch = 21 # for bg
#, pch = 19
, jitter = T
, alpha = 1
#, points(pch = 19, col = c("#f8766d", "#00bfc4"))
, cex = 1.8
, cex.axis = 2
, cex.labels = 3.8
, cex.cor = 1
, smooth = F)
print(OutPlot_ps_df2)
dev.off()
#----------------------------------------------
# generate corr PS plot 2: merged_df3
#----------------------------------------------
cat("Corr plot PS DUET with coloured dots:", plot_corr_ps_df3)
svg(plot_corr_ps_df3, width = 30, height = 25)
OutPlot_ps_df3 = pairs.panels(corr_ps_df3[1:(length(corr_ps_df3)-2)]
, method = "spearman" # correlation method
, hist.col = "grey" ##00AFBB
, density = T # show density plots
, ellipses = F # show correlation ellipses
, stars = T
, rug = F
, breaks = "Sturges"
, show.points = T
, bg = c("#f8766d", "#00bfc4")[unclass(factor(corr_ps_df3$duet_outcome))] # can't use colour as duet and foldx are opposite
, pch = 21 # for bg
, cex = 3
, cex.axis = 1.6
, cex.labels = 3.8
, cex.cor = 1
, smooth = F
)
print(OutPlot_ps_df3)
dev.off()
################################################################################################
#---------------------------------------
# generate corr lig plot 1: merged_df2
#---------------------------------------
cat("Corr plot lig DUET with coloured dots:", plot_corr_lig_df2)
svg(plot_corr_lig_df2, width = 30, height = 25)
OutPlot_lig_df2 = pairs.panels(corr_lig_df2[1:(length(corr_lig_df2)-2)]
, method = "spearman" # correlation method
, hist.col = "grey" ##00AFBB
, density = T # show density plots
, ellipses = F # show correlation elliliges
, stars = T
, rug = F
, breaks = "Sturges"
, show.points = T
, bg = c("#f8766d", "#00bfc4")[unclass(factor(corr_lig_df2$ligand_outcome))] # can't use colour as duet and foldx are opposite
, pch = 21 # for bg
#, pch = 19
, jitter = T
, alpha = 1
#, points(pch = 19, col = c("#f8766d", "#00bfc4"))
, cex = 1.8
, cex.axis = 2
, cex.labels = 3.8
, cex.cor = 1
, smooth = F)
print(OutPlot_lig_df2)
dev.off()
#----------------------------------------------
# generate corr lig plot 2: merged_df3
#----------------------------------------------
cat("Corr plot lig DUET with coloured dots:", plot_corr_lig_df3)
svg(plot_corr_lig_df3, width = 30, height = 25)
OutPlot_lig_df3 = pairs.panels(corr_lig_df3[1:(length(corr_lig_df3)-2)]
, method = "spearman" # correlation method
, hist.col = "grey" ##00AFBB
, density = T # show density plots
, ellipses = F # show correlation elliliges
, stars = T
, rug = F
, breaks = "Sturges"
, show.points = T
, bg = c("#f8766d", "#00bfc4")[unclass(factor(corr_lig_df3$ligand_outcome))] # can't use colour as duet and foldx are opposite
, pch = 21 # for bg
, cex = 3
, cex.axis = 1.6
, cex.labels = 3.8
, cex.cor = 1
, smooth = F
)
print(OutPlot_lig_df3)
dev.off()

View file

@ -1,191 +0,0 @@
#!/usr/bin/env Rscript
#########################################################
# TASK: Corr plots for PS and Lig
# Output: 1 svg
#=======================================================================
# working dir and loading libraries
getwd()
setwd("~/git/LSHTM_analysis/scripts/plotting/")
getwd()
source("Header_TT.R")
require(cowplot)
source("combining_dfs_plotting.R") # FIXME: add extra from other plots here
# should return the following dfs, directories and variables
#=======
# output
#=======
# can't combine by cowplot because not ggplots
#corr_plot_combined = "corr_combined.svg"
#plot_corr_plot_combined = paste0(plotdir,"/", corr_plot_combined)
# PS foldx
corr_foldx = "corr_adjusted_foldx.svg"
plot_corr_foldx = paste0(plotdir,"/", corr_foldx)
####################################################################
# end of loading libraries and functions #
########################################################################
#%%%%%%%%%%%%%%%%%%%%%%%%%
df_ps = merged_df3
#%%%%%%%%%%%%%%%%%%%%%%%%%
rm( merged_df2, merged_df2_comp, merged_df2_lig
, merged_df2_comp_lig
, merged_df3_comp, merged_df3_comp_lig
, my_df_u, my_df_u_lig)
########################################################################
# end of data extraction and cleaning for plots #
########################################################################
#===========================
# Data for Correlation plots:foldx
#===========================
#============================
# adding foldx scaled values
# scale data b/w -1 and 1
#============================
n = which(colnames(df_ps) == "ddg"); n
my_min = min(df_ps[,n]); my_min
my_max = max(df_ps[,n]); my_max
df_ps$foldx_scaled = ifelse(df_ps[,n] < 0
, df_ps[,n]/abs(my_min)
, df_ps[,n]/my_max)
# sanity check
my_min = min(df_ps$foldx_scaled); my_min
my_max = max(df_ps$foldx_scaled); my_max
if (my_min == -1 && my_max == 1){
cat("PASS: foldx ddg successfully scaled b/w -1 and 1"
, "\nProceeding with assigning foldx outcome category")
}else{
cat("FAIL: could not scale foldx ddg values"
, "Aborting!")
}
#================================
# adding foldx outcome category
# ddg<0 = "Stabilising" (-ve)
#=================================
c1 = table(df_ps$ddg < 0)
df_ps$foldx_outcome = ifelse(df_ps$ddg < 0, "Stabilising", "Destabilising")
c2 = table(df_ps$ddg < 0)
if ( all(c1 == c2) ){
cat("PASS: foldx outcome successfully created")
}else{
cat("FAIL: foldx outcome could not be created. Aborting!")
exit()
}
table(df_ps$foldx_outcome)
#======================
# adding log cols
#======================
df_ps$log10_or_mychisq = log10(df_ps$or_mychisq)
df_ps$neglog_pval_fisher = -log10(df_ps$pval_fisher)
df_ps$log10_or_kin = log10(df_ps$or_kin)
df_ps$neglog_pwald_kin = -log10(df_ps$pwald_kin)
# subset data to generate pairwise correlations
cols_to_select_foldx = c("foldx_scaled"
, "duet_scaled"
, "log10_or_mychisq"
, "neglog_pval_fisher"
, "log10_or_kin"
, "neglog_pwald_kin"
, "af"
, "foldx_outcome"
, drug)
corr_data_foldx = df_ps[, cols_to_select_foldx]
dim(corr_data_foldx)
#p_italic = substitute(paste("-Log(", italic('P'), ")"));p_italic
#p_adjusted_italic = substitute(paste("-Log(", italic('P adjusted'), ")"));p_adjusted_italic
# assign nice colnames (for display)
my_corr_colnames_foldx = c("Foldx"
,"DUET"
, "Log(OR)"
, "-Log(P)"
, "Log(OR adjusted)"
, "-Log(P wald)"
, "AF"
, "foldx_outcome"
, drug)
length(my_corr_colnames_foldx)
colnames(corr_data_foldx)
colnames(corr_data_foldx) <- my_corr_colnames_foldx
colnames(corr_data_foldx)
#-----------------
# generate corr foldx plot
#-----------------
start = 1
end = which(colnames(corr_data_foldx) == drug); end # should be the last column
offset = 1
my_corr_foldx = corr_data_foldx[start:(end-offset)]
head(my_corr_foldx)
#my_cols = c("#f8766d", "#00bfc4")
# deep blue :#007d85
# deep red: #ae301e
cat("Corr plot foldx:", plot_corr_foldx)
svg(plot_corr_foldx, width = 15, height = 15)
OutPlot_foldx= pairs.panels(my_corr_foldx[1:(length(my_corr_foldx)-1)]
, method = "spearman" # correlation method
, hist.col = "grey" ##00AFBB
, density = TRUE # show density plots
, ellipses = F # show correlation ellipses
, stars = T
, rug = F
, breaks = "Sturges"
, show.points = T
, bg = c("#f8766d", "#00bfc4")[unclass(factor(my_corr_foldx$foldx_outcome))]
, pch = 21
, jitter = T
#, alpha = .05
#, points(pch = 19, col = c("#f8766d", "#00bfc4"))
, cex = 3
, cex.axis = 2.5
, cex.labels = 2.1
, cex.cor = 1
, smooth = F
)
print(OutPlot_foldx)
dev.off()

View file

@ -1,289 +0,0 @@
#########################################################
# 1: Installing and loading required packages
#########################################################
#source("../Header_TT.R")
install.packages("qqman")
library(qqman)
source("combining_dfs_plotting.R")
#mcsm_data: raw file, 225, 15
#merged_df2 = 2201, 35
#merged_df3 = 205, 35 ("Can't trust non-numerical params')
#===============================================
# PLOTS: DUET vs GWAS: non-numerical
# lineage, country_code, etc
# merged_df2: 1592, 35
#===============================================
#########################
# Data for plot
#########################
df = merged_df2
#df = merged_df2_comp
#========================
# Plot 1a: Lineage barplot
# x = lineage y = No of samples
# col = Lineage
# fill = lineage
#========================
table(df$lineage)
# subset only lineages1-4
sel_lineages = c("lineage1"
, "lineage2"
, "lineage3"
, "lineage4"
#, "lineage5"
#, "lineage6"
#, "lineage7"
)
# uncomment as necessary
df_lin = subset(df, subset = lineage %in% sel_lineages )
table(df_lin$lineage)
#%%%%%%%%%%%%%%%%%%%%%%%%%
# REASSIGNMENT
df <- df_lin
#%%%%%%%%%%%%%%%%%%%%%%%%%
#%%%%%%%%%%%%%%%%%%%%%%%%
# REASSIGNMENT
df2 = df
#%%%%%%%%%%%%%%%%%%%%%%%%
df2 = df2%>%
add_count(country_code)
str(df2$country_code); str(df2$n)
n = which(colnames(df2) == "n")
colnames(df2)[n] = "count_country"
table(df2$count_country>100 & df$country_code!= "")
df3 = subset(df2, df2$count_country>100 & df2$country_code != "")
#%%%%%%%%%%%%%%%%%%%%%%%%
# REASSIGNMENT
df = df3
#%%%%%%%%%%%%%%%%%%%%%%%%
sample = sum(table(unique(df$id))); sample
table(df$country_code)
tab = sum(table(df$country_code)); tab
View(table(df$country_code))
View(t1)
############## begin plot
g = ggplot(df, aes(x = lineage))
g + geom_bar(aes(fill = lineage)) +
theme( axis.text.x = element_text(size = 13
, angle = 90
, hjust = 1
, vjust = 0.4)
, axis.text.y = element_text(size = 15
, angle = 0
, hjust = 1
, vjust = 0)
, axis.title.x = element_text(size = 15)
, axis.title.y = element_text(size = 15) ) +
labs(title = "Lineage"
, x = "Lineage"
, y = "No of samples")
#========================
# Plot 2: DUET, lineage, country_code and or_mychisq
# x = lineage y = DUET
# col = Lineage
# fill = country_code
#========================
### begin plot
g = ggplot(df, aes(x = country_code
, y = duet_scaled))
g + geom_point(aes(col = lineage
, size = or_mychisq)) +
theme(axis.text.x = element_text(size = 13
, angle = 90
, hjust = 1
, vjust = 0.4)
, axis.text.y = element_text(size = 15
, angle = 0
, hjust = 1
, vjust = 0)
, axis.title.x = element_text(size = 15)
, axis.title.y = element_text(size = 15) ) +
labs(title = "DUET, country_code, lineage, or_mychisq"
, x = "Lineage"
, y = "DUET (PS)")
#############
#========================
# Plot 3: DUET, lineage, or_mychisq
# x = lineage y = DUET
# col = Lineage
# fill = country_code
#========================
### begin plot
table(df$lineage)
g = ggplot(df_lin, aes(x = lineage
, y = duet_scaled))
g + geom_point(aes(col = lineage
, size = or_mychisq)) +
theme(axis.text.x = element_text(size = 13
, angle = 90
, hjust = 1
, vjust = 0.4)
, axis.text.y = element_text(size = 15
, angle = 0
, hjust = 1
, vjust = 0)
, axis.title.x = element_text(size = 15)
, axis.title.y = element_text(size = 15) ) +
labs(title = "DUET, lineage, or_mychisq"
, x = "Lineage"
, y = "DUET (PS)")
#========================
# Plot 4-5: Distributions
# ggrdiges
#========================
#==================================================
my_ats = 15 # axis text size
my_als = 20 # axis label size
my_labels = c('Lineage 1', 'Lineage 2', 'Lineage 3', 'Lineage 4'
#, 'Lineage 5', 'Lineage 6', 'Lineage 7'
)
names(my_labels) = c('lineage1', 'lineage2', 'lineage3', 'lineage4'
# , 'lineage5', 'lineage6', 'lineage7'
)
#========================
# Plot 4: Distribution
# x = duet_scaled
# y = country
# fill = country_code
# facet = lineage
#========================
# works neatly!
p1 = ggplot(df, aes(x = duet_scaled
, y = country_code))+
#printFile=geom_density_ridges_gradient(
geom_density_ridges_gradient(aes(fill = country_code)
, jittered_points = TRUE
, scale = 3
, size = 0.3 ) +
facet_wrap( ~lineage
, scales = "free"
, switch = 'x'
, labeller = labeller(lineage = my_labels)
) +
coord_cartesian( xlim = c(-1, 1)) +
#scale_fill_gradientn(colours = c("#f8766d", "white", "#00bfc4")
# , name = "DUET" ) +
theme(axis.text.x = element_text(size = my_ats
, angle = 90
, hjust = 1
, vjust = 0.4)
#, axis.text.y = element_blank()
, axis.title.x = element_blank()
, axis.title.y = element_blank()
, axis.ticks.y = element_blank()
, plot.title = element_blank()
, strip.text = element_text(size = my_als)
, legend.text = element_text(size = my_als-5)
, legend.title = element_text(size = my_als)
)
p1
#========================
# Plot 5: Distribution
# x = duet_scaled
# y = country_code
# fill = lineage
# facet = NONE
#========================
# no facet wrap
p2 = ggplot(df, aes(x = duet_scaled
, y = country_code))+
geom_density_ridges_gradient(aes(fill = factor(lineage))
, scale = 3
, size = 0.3 ) +
coord_cartesian( xlim = c(-1, 1)) +
#scale_fill_gradientn(colours = c("#f8766d", "white", "#00bfc4")
# , name = "DUET" ) +
#scale_fill_continuous(colours = c("darkgreen", "pink", "orange", "brown")
# , name = "lineage" ) +
theme(axis.text.x = element_text(size = my_ats
, angle = 90
, hjust = 1
, vjust = 0.4)
#, axis.text.y = element_blank()
, axis.title.x = element_blank()
, axis.title.y = element_blank()
, axis.ticks.y = element_blank()
, plot.title = element_blank()
, strip.text = element_text(size = my_als)
, legend.text = element_text(size = my_als-5)
, legend.title = element_text(size = my_als)
)
p2
#===============
# lineage only
#================
#svg(plot_lineage_duet)
p3 = ggplot(df, aes(x = duet_scaled
, y = duet_outcome))+
geom_density_ridges_gradient(aes(fill = ..x..)
, jittered_points = TRUE
, scale = 3
, size = 0.3 ) +
facet_wrap( ~lineage
, scales = "free"
#, switch = 'x'
, labeller = labeller(lineage = my_labels) ) +
coord_cartesian( xlim = c(-1, 1)) +
scale_fill_gradientn(colours = c("#f8766d", "white", "#00bfc4")
, name = "DUET" ) +
theme(axis.text.x = element_text(size = my_ats
, angle = 90
, hjust = 1
, vjust = 0.4)
, axis.text.y = element_blank()
, axis.title.x = element_blank()
, axis.title.y = element_blank()
, axis.ticks.y = element_blank()
, plot.title = element_blank()
, strip.text = element_text(size = my_als)
, legend.text = element_text(size = my_als-5)
, legend.title = element_text(size = my_als)
)
print(p3)

View file

@ -1,426 +0,0 @@
#!/usr/bin/env Rscript
#########################################################
# TASK: prediction
#=======================================================================
# working dir and loading libraries
getwd()
setwd("~/git/LSHTM_analysis/scripts/")
getwd()
source("plotting/combining_dfs_plotting.R")
####################################################################
# end of loading libraries and functions
####################################################################
#%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
# ps
table(merged_df2$mutation_info)
merged_df2$mutation_info_labels = ifelse(merged_df2$mutation_info == dr_muts_col, 1, 0)
table(merged_df2$mutation_info_labels)
table(merged_df3$mutation_info)
merged_df3$mutation_info_labels = ifelse(merged_df3$mutation_info == dr_muts_col, 1, 0)
table(merged_df3$mutation_info_labels)
# lig
table(merged_df2_lig$mutation_info)
merged_df2_lig$mutation_info_labels = ifelse(merged_df2_lig$mutation_info == dr_muts_col, 1, 0)
table(merged_df2_lig$mutation_info_labels)
table(merged_df3_lig$mutation_info)
merged_df3_lig$mutation_info_labels = ifelse(merged_df3_lig$mutation_info == dr_muts_col, 1, 0)
table(merged_df3_lig$mutation_info_labels)
#%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
###############################################################################
model_ind = glm(mutation_info_labels ~ or_mychisq
, data = merged_df2
, family = "binomial")
summary(model_ind)
nobs(model_ind)
#=============
# try loop
#=============
my_ivs = c("or_mychisq", "or_kin", "pval_fisher", "af", "duet_stability_change", "duet_scaled")
for( i in my_ivs){
cat ("===============================\n")
cat(i)
cat ("\n===============================\n")
print(summary(glm(mutation_info_labels ~ eval(parse(text=i))
, data = merged_df2
, family = "binomial")))
}
###############################################################################
#========================================
# merged_df2: UNadjusted,loop
#========================================
my_ivs = c("or_mychisq", "or_kin", "pval_fisher", "af"
, "ligand_distance"
, "rsa"
, "rd_values"
, "kd_values"
, "duet_stability_change"
, "duet_scaled"
, "duet_outcome"
, "ddg"
, "foldx_scaled"
, "foldx_outcome")
ps_logistic_df2 = data.frame()
for( i in my_ivs){
print(i)
df = data.frame(var_name = NA
, number_samples = NA
, beta = NA
, odds_ratio = NA
, pvalue = NA
, se = NA
, zvalue = NA
, ci_lower = NA
, ci_upper = NA)
model = glm(mutation_info_labels ~ eval(parse(text=i))
, data = merged_df2
, family = "binomial")
var_name = i
number_samples = nobs(model)
beta_logistic = summary(model)$coefficients[2,1]; beta_logistic
or_logistic = exp(summary(model)$coefficients[2,1])
pval_logistic = summary(model)$coefficients[2,4]
se_logistic = summary(model)$coefficients[2,2]
zval_logistic = summary(model)$coefficients[2,3]
ci_mod = exp(confint(model))[2,]
ci_lower_logistic = ci_mod[["2.5 %"]]
ci_upper_logistic = ci_mod[["97.5 %"]]
print(c(var_name, beta_logistic, or_logistic, pval_logistic, se_logistic, zval_logistic, ci_mod))
df$var_name = var_name
df$number_samples = number_samples
df$beta = beta_logistic
df$odds_ratio = or_logistic
df$pvalue = pval_logistic
df$se = se_logistic
df$zvalue = zval_logistic
df$ci_lower = ci_lower_logistic
df$ci_upper = ci_upper_logistic
print(df)
ps_logistic_df2 = rbind(ps_logistic_df2, df)
}
#--------------------
# formatting df
#--------------------
ps_logistic_df2$data_source = "df2"
ps_logistic_df2$model = "unadjusted"
ps_logistic_df2$odds_ratio = round(ps_logistic_df2$odds_ratio, 2)
ps_logistic_df2$ci_lower = round(ps_logistic_df2$ci_lower, 2)
ps_logistic_df2$ci_upper = round(ps_logistic_df2$ci_upper, 2)
# adding pvalue_signif
ps_logistic_df2$pvalue_signif = ps_logistic_df2$pvalue
str(ps_logistic_df2$pvalue_signif)
ps_logistic_df2 = dplyr::mutate(ps_logistic_df2
, pvalue_signif = case_when(pvalue_signif == 0.05 ~ "."
, pvalue_signif <=0.0001 ~ '****'
, pvalue_signif <=0.001 ~ '***'
, pvalue_signif <=0.01 ~ '**'
, pvalue_signif <0.05 ~ '*'
, TRUE ~ 'ns'))
# rearranging columns
colnames(ps_logistic_df2)
ps_logistic_df2_o = ps_logistic_df2 [c("var_name"
, "number_samples"
, "model"
, "odds_ratio"
, "pvalue"
, "pvalue_signif"
, "beta"
, "se"
, "zvalue"
, "ci_lower"
, "ci_upper"
, "data_source")]
###############################################################################
#========================================
# merged_df2: adjusted, loop
#========================================
#model_adjusted = glm(mutation_info_labels ~ or_mychisq + rsa + rd_values + kd_values +
# duet_stability_change + duet_scaled + duet_outcome + ddg + foldx_scaled + foldx_outcome
# , data = merged_df2
# , family = "binomial")
model_adjusted_df2 = glm(mutation_info_labels ~ or_mychisq + or_kin + rd_values + kd_values +
ligand_distance + duet_stability_change
, data = merged_df2
, family = "binomial");summary(model_adjusted_df2)
var_names_df = as.data.frame(names(model_adjusted_df2$coefficients))
names(var_names_df) = c("var_name")
ci_mod = exp(confint(model_adjusted_df2))
ci_mod_df = as.data.frame(ci_mod)
names(ci_mod_df) = c("ci_lower", "ci_upper")
ci_mod_df$ci_lower = round(ci_mod_df$ci_lower, 2)
ci_mod_df$ci_upper = round(ci_mod_df$ci_upper, 2)
estimates_df = as.data.frame(summary(model_adjusted_df2)$coefficients)
names(estimates_df) = c("beta", "se", "zvalue", "pvalue")
estimates_df$odds_ratio = round(exp(estimates_df$beta), 2)
number_samples = nobs(model_adjusted_df2)
estimates_df$number_samples = number_samples
estimates_df$data_source = "df2"
estimates_df$model = "adjusted"
names(ps_logistic_adjusted_df2)
if ( all(rownames(estimates_df) == rownames(ci_mod_df)) ){
cat("PASS: rownames match. Preparing to merge...")
ps_logistic_adjusted_df2 = merge(estimates_df, ci_mod_df, by = "row.names", all = T)
}
colnames(ps_logistic_adjusted_df2)[1] <- c("var_name")
d1 = which(ps_logistic_adjusted_df2$var_name == "(Intercept)")
ps_logistic_adjusted_df2 = ps_logistic_adjusted_df2[-d1,]
# adding pvalue_signif
ps_logistic_adjusted_df2$pvalue_signif = ps_logistic_adjusted_df2$pvalue
str(ps_logistic_adjusted_df2$pvalue_signif)
ps_logistic_adjusted_df2 = dplyr::mutate(ps_logistic_adjusted_df2
, pvalue_signif = case_when(pvalue_signif == 0.05 ~ "."
, pvalue_signif <=0.0001 ~ '****'
, pvalue_signif <=0.001 ~ '***'
, pvalue_signif <=0.01 ~ '**'
, pvalue_signif <0.05 ~ '*'
, TRUE ~ 'ns'))
# rearranging columns
colnames(ps_logistic_adjusted_df2)
ps_logistic_adjusted_df2_o = ps_logistic_adjusted_df2[c("var_name"
, "number_samples"
, "model"
, "odds_ratio"
, "pvalue"
, "pvalue_signif"
, "beta"
, "se"
, "zvalue"
, "ci_lower"
, "ci_upper"
, "data_source")]
###############################################################################
###############################################################################
#========================================
# merged_df3: UNadjusted,loop
#========================================
my_ivs = c("or_mychisq", "or_kin", "pval_fisher", "af"
, "ligand_distance"
, "rsa"
, "rd_values"
, "kd_values"
, "duet_stability_change"
, "duet_scaled"
, "duet_outcome"
, "ddg"
, "foldx_scaled"
, "foldx_outcome")
ps_logistic_df3 = data.frame()
for( i in my_ivs){
print(i)
df = data.frame(var_name = NA
, number_samples = NA
, beta = NA
, odds_ratio = NA
, pvalue = NA
, se = NA
, zvalue = NA
, ci_lower = NA
, ci_upper = NA)
model = glm(mutation_info_labels ~ eval(parse(text=i))
, data = merged_df3
, family = "binomial")
var_name = i
number_samples = nobs(model)
beta_logistic = summary(model)$coefficients[2,1]; beta_logistic
or_logistic = exp(summary(model)$coefficients[2,1])
pval_logistic = summary(model)$coefficients[2,4]
se_logistic = summary(model)$coefficients[2,2]
zval_logistic = summary(model)$coefficients[2,3]
ci_mod = exp(confint(model))[2,]
ci_lower_logistic = ci_mod[["2.5 %"]]
ci_upper_logistic = ci_mod[["97.5 %"]]
print(c(var_name, beta_logistic, or_logistic, pval_logistic, se_logistic, zval_logistic, ci_mod))
df$var_name = var_name
df$number_samples = number_samples
df$beta = beta_logistic
df$odds_ratio = or_logistic
df$pvalue = pval_logistic
df$se = se_logistic
df$zvalue = zval_logistic
df$ci_lower = ci_lower_logistic
df$ci_upper = ci_upper_logistic
print(df)
ps_logistic_df3 = rbind(ps_logistic_df3, df)
}
#--------------------
# formatting df
#--------------------
ps_logistic_df3$data_source = "df3"
ps_logistic_df3$model = "unadjusted"
ps_logistic_df3$odds_ratio = round(ps_logistic_df3$odds_ratio, 2)
ps_logistic_df3$ci_lower = round(ps_logistic_df3$ci_lower, 2)
ps_logistic_df3$ci_upper = round(ps_logistic_df3$ci_upper, 2)
# adding pvalue_signif
ps_logistic_df3$pvalue_signif = ps_logistic_df3$pvalue
str(ps_logistic_df3$pvalue_signif)
ps_logistic_df3 = dplyr::mutate(ps_logistic_df3
, pvalue_signif = case_when(pvalue_signif == 0.05 ~ "."
, pvalue_signif <=0.0001 ~ '****'
, pvalue_signif <=0.001 ~ '***'
, pvalue_signif <=0.01 ~ '**'
, pvalue_signif <0.05 ~ '*'
, TRUE ~ 'ns'))
# rearranging columns
ps_logistic_df3_o = ps_logistic_df3[c("var_name"
, "number_samples"
, "model"
, "odds_ratio"
, "pvalue"
, "pvalue_signif"
, "beta"
, "se"
, "zvalue"
, "ci_lower"
, "ci_upper"
, "data_source")]
#========================================
# merged_df3: adjusted, loop
#========================================
#model_adjusted_df3 = glm(mutation_info_labels ~ or_mychisq + rsa + rd_values + kd_values +
# duet_stability_change + duet_scaled + duet_outcome + ddg + foldx_scaled + foldx_outcome
# , data = merged_df3
# , family = "binomial")
model_adjusted_df3 = glm(mutation_info_labels ~ rd_values +
ligand_distance + duet_stability_change
, data = merged_df3
, family = "binomial");summary(model_adjusted_df3)
var_names_df = as.data.frame(names(model_adjusted_df3$coefficients))
names(var_names_df) = c("var_name")
ci_mod = exp(confint(model_adjusted_df3))
ci_mod_df = as.data.frame(ci_mod)
names(ci_mod_df) = c("ci_lower", "ci_upper")
ci_mod_df$ci_lower = round(ci_mod_df$ci_lower, 2)
ci_mod_df$ci_upper = round(ci_mod_df$ci_upper, 2)
estimates_df = as.data.frame(summary(model_adjusted_df3)$coefficients)
names(estimates_df) = c("beta", "se", "zvalue", "pvalue")
estimates_df$odds_ratio = round(exp(estimates_df$beta), 2)
number_samples = nobs(model_adjusted_df3)
estimates_df$number_samples = number_samples
estimates_df$data_source = "df3"
estimates_df$model = "adjusted"
names(ps_logistic_adjusted_df3)
if ( all(rownames(estimates_df) == rownames(ci_mod_df)) ){
cat("PASS: rownames match. Preparing to merge...")
ps_logistic_adjusted_df3 = merge(estimates_df, ci_mod_df, by = "row.names", all = T)
}
colnames(ps_logistic_adjusted_df3)[1] <- c("var_name")
d2 = which(ps_logistic_adjusted_df3$var_name == "(Intercept)")
ps_logistic_adjusted_df3 = ps_logistic_adjusted_df3[-d2,]
# adding pvalue_signif
ps_logistic_adjusted_df3$pvalue_signif = ps_logistic_adjusted_df3$pvalue
str(ps_logistic_adjusted_df3$pvalue_signif)
ps_logistic_adjusted_df3 = dplyr::mutate(ps_logistic_adjusted_df3
, pvalue_signif = case_when(pvalue_signif == 0.05 ~ "."
, pvalue_signif <=0.0001 ~ '****'
, pvalue_signif <=0.001 ~ '***'
, pvalue_signif <=0.01 ~ '**'
, pvalue_signif <0.05 ~ '*'
, TRUE ~ 'ns'))
# rearranging columns
colnames(ps_logistic_adjusted_df3)
ps_logistic_adjusted_df3_o = ps_logistic_adjusted_df3[c("var_name"
, "number_samples"
, "model"
, "odds_ratio"
, "pvalue"
, "pvalue_signif"
, "beta"
, "se"
, "zvalue"
, "ci_lower"
, "ci_upper"
,"data_source")]
#-------------
# lm
#-------------
model_lm = lm(or_kin ~ rsa + rd_values + duet_stability_change + ddg + mutation_info_labels
, data = merged_df3)
summary(model_lm)
model_lm1 = lm(or_mychisq ~ mutation_info_labels
, data = merged_df2)
summary(model_lm1)

View file

@ -1,203 +0,0 @@
#!/usr/bin/env Rscript
#########################################################
# TASK: prediction lig
#=======================================================================
# working dir and loading libraries
getwd()
setwd("~/git/LSHTM_analysis/scripts/")
getwd()
source("plotting/combining_dfs_plotting.R")
#=======
# output
#=======
lig_unadjusted = paste0(outdir, "/results/", tolower(gene), "_unadjusted_logistic_LIG.csv")
lig_adjusted = paste0(outdir, "/results/", tolower(gene), "_adjusted_logistic_LIG.csv")
####################################################################
# end of loading libraries and functions
####################################################################
#%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
# lig
table(merged_df3_lig$mutation_info)
merged_df3_lig$mutation_info_labels = ifelse(merged_df3_lig$mutation_info == dr_muts_col, 1, 0)
table(merged_df3_lig$mutation_info_labels)
#%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
###############################################################################
#========================================
# merged_df3_lig: UNadjusted,loop
#========================================
my_ivs = c("or_mychisq", "or_kin", "pval_fisher", "af"
#, "ligand_distance"
, "rsa"
, "rd_values"
, "kd_values"
, "ligand_affinity_change"
, "affinity_scaled"
, "ligand_outcome")
lig_logistic_df3 = data.frame()
for( i in my_ivs){
print(i)
df = data.frame(var_name = NA
, number_samples = NA
, beta = NA
, odds_ratio = NA
, pvalue = NA
, se = NA
, zvalue = NA
, ci_lower = NA
, ci_upper = NA)
model_lig = glm(mutation_info_labels ~ eval(parse(text=i))
, data = merged_df3_lig
, family = "binomial")
var_name = i
number_samples = nobs(model_lig)
beta_logistic = summary(model_lig)$coefficients[2,1]; beta_logistic
or_logistic = exp(summary(model_lig)$coefficients[2,1])
pval_logistic = summary(model_lig)$coefficients[2,4]
se_logistic = summary(model_lig)$coefficients[2,2]
zval_logistic = summary(model_lig)$coefficients[2,3]
ci_mod = exp(confint(model_lig))[2,]
ci_lower_logistic = ci_mod[["2.5 %"]]
ci_upper_logistic = ci_mod[["97.5 %"]]
print(c(var_name, beta_logistic, or_logistic, pval_logistic, se_logistic, zval_logistic, ci_mod))
df$var_name = var_name
df$number_samples = number_samples
df$beta = beta_logistic
df$odds_ratio = or_logistic
df$pvalue = pval_logistic
df$se = se_logistic
df$zvalue = zval_logistic
df$ci_lower = ci_lower_logistic
df$ci_upper = ci_upper_logistic
print(df)
lig_logistic_df3 = rbind(lig_logistic_df3, df)
}
#--------------------
# formatting df
#--------------------
lig_logistic_df3$data_source = "df3_lig"
lig_logistic_df3$model_lig = "unadjusted"
lig_logistic_df3$odds_ratio = round(lig_logistic_df3$odds_ratio, 2)
lig_logistic_df3$ci_lower = round(lig_logistic_df3$ci_lower, 2)
lig_logistic_df3$ci_upper = round(lig_logistic_df3$ci_upper, 2)
# adding pvalue_signif
lig_logistic_df3$pvalue_signif = lig_logistic_df3$pvalue
str(lig_logistic_df3$pvalue_signif)
lig_logistic_df3 = dplyr::mutate(lig_logistic_df3
, pvalue_signif = case_when(pvalue_signif == 0.05 ~ "."
, pvalue_signif <=0.0001 ~ '****'
, pvalue_signif <=0.001 ~ '***'
, pvalue_signif <=0.01 ~ '**'
, pvalue_signif <0.05 ~ '*'
, TRUE ~ 'ns'))
# rearranging columns
lig_logistic_df3_o = lig_logistic_df3[c("var_name"
, "number_samples"
, "model_lig"
, "odds_ratio"
, "pvalue"
, "pvalue_signif"
, "beta"
, "se"
, "zvalue"
, "ci_lower"
, "ci_upper"
, "data_source")]
# writing file
write.csv(lig_logistic_df3_o, lig_unadjusted, row.names = F)
#========================================
# merged_df3_lig: adjusted, loop
#========================================
#model_lig_adjusted_df3 = glm(mutation_info_labels ~ or_mychisq + rsa + rd_values + kd_values +
# ligand_affinity_change + affinity_scaled + ligand_outcome
# , data = merged_df3_lig
# , family = "binomial")
model_lig_adjusted_df3 = glm(mutation_info_labels ~ rd_values + ligand_affinity_change
, data = merged_df3_lig
, family = "binomial");summary(model_lig_adjusted_df3)
var_names_df = as.data.frame(names(model_lig_adjusted_df3$coefficients))
names(var_names_df) = c("var_name")
ci_mod = exp(confint(model_lig_adjusted_df3))
ci_mod_df = as.data.frame(ci_mod)
names(ci_mod_df) = c("ci_lower", "ci_upper")
ci_mod_df$ci_lower = round(ci_mod_df$ci_lower, 2)
ci_mod_df$ci_upper = round(ci_mod_df$ci_upper, 2)
estimates_df = as.data.frame(summary(model_lig_adjusted_df3)$coefficients)
names(estimates_df) = c("beta", "se", "zvalue", "pvalue")
estimates_df$odds_ratio = round(exp(estimates_df$beta), 2)
number_samples = nobs(model_lig_adjusted_df3)
estimates_df$number_samples = number_samples
estimates_df$data_source = "df3_lig"
estimates_df$model_lig = "adjusted"
names(lig_logistic_adjusted_df3)
if ( all(rownames(estimates_df) == rownames(ci_mod_df)) ){
cat("PASS: rownames match. Preparing to merge...")
lig_logistic_adjusted_df3 = merge(estimates_df, ci_mod_df, by = "row.names", all = T)
}
colnames(lig_logistic_adjusted_df3)[1] <- c("var_name")
d2 = which(lig_logistic_adjusted_df3$var_name == "(Intercept)")
lig_logistic_adjusted_df3 = lig_logistic_adjusted_df3[-d2,]
# adding pvalue_signif
lig_logistic_adjusted_df3$pvalue_signif = lig_logistic_adjusted_df3$pvalue
str(lig_logistic_adjusted_df3$pvalue_signif)
lig_logistic_adjusted_df3 = dplyr::mutate(lig_logistic_adjusted_df3
, pvalue_signif = case_when(pvalue_signif == 0.05 ~ "."
, pvalue_signif <=0.0001 ~ '****'
, pvalue_signif <=0.001 ~ '***'
, pvalue_signif <=0.01 ~ '**'
, pvalue_signif <0.05 ~ '*'
, TRUE ~ 'ns'))
# rearranging columns
colnames(lig_logistic_adjusted_df3)
lig_logistic_adjusted_df3_o = lig_logistic_adjusted_df3[c("var_name"
, "number_samples"
, "model_lig"
, "odds_ratio"
, "pvalue"
, "pvalue_signif"
, "beta"
, "se"
, "zvalue"
, "ci_lower"
, "ci_upper"
,"data_source")]
# writing file
write.csv(lig_logistic_adjusted_df3_o, lig_adjusted, row.names = F)

View file

@ -1,207 +0,0 @@
#!/usr/bin/env Rscript
#########################################################
# TASK: prediction_ps
#=======================================================================
# working dir and loading libraries
getwd()
setwd("~/git/LSHTM_analysis/scripts/")
getwd()
source("plotting/combining_dfs_plotting.R")
#=======
# output
#=======
ps_unadjusted = paste0(outdir, "/results/", tolower(gene), "_unadjusted_logistic_PS.csv")
ps_adjusted = paste0(outdir, "/results/", tolower(gene), "_adjusted_logistic_PS.csv")
####################################################################
# end of loading libraries and functions
####################################################################
#%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
# ps
table(merged_df3$mutation_info)
merged_df3$mutation_info_labels = ifelse(merged_df3$mutation_info == dr_muts_col, 1, 0)
table(merged_df3$mutation_info_labels)
#%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
###############################################################################
#========================================
# merged_df3: UNadjusted,loop
#========================================
my_ivs = c("or_mychisq", "or_kin", "pval_fisher", "af"
, "ligand_distance"
, "rsa"
, "rd_values"
, "kd_values"
, "duet_stability_change"
, "duet_scaled"
, "duet_outcome"
, "ddg"
, "foldx_scaled"
, "foldx_outcome")
ps_logistic_df3 = data.frame()
for( i in my_ivs){
print(i)
df = data.frame(var_name = NA
, number_samples = NA
, beta = NA
, odds_ratio = NA
, pvalue = NA
, se = NA
, zvalue = NA
, ci_lower = NA
, ci_upper = NA)
model = glm(mutation_info_labels ~ eval(parse(text=i))
, data = merged_df3
, family = "binomial")
var_name = i
number_samples = nobs(model)
beta_logistic = summary(model)$coefficients[2,1]; beta_logistic
or_logistic = exp(summary(model)$coefficients[2,1])
pval_logistic = summary(model)$coefficients[2,4]
se_logistic = summary(model)$coefficients[2,2]
zval_logistic = summary(model)$coefficients[2,3]
ci_mod = exp(confint(model))[2,]
ci_lower_logistic = ci_mod[["2.5 %"]]
ci_upper_logistic = ci_mod[["97.5 %"]]
print(c(var_name, beta_logistic, or_logistic, pval_logistic, se_logistic, zval_logistic, ci_mod))
df$var_name = var_name
df$number_samples = number_samples
df$beta = beta_logistic
df$odds_ratio = or_logistic
df$pvalue = pval_logistic
df$se = se_logistic
df$zvalue = zval_logistic
df$ci_lower = ci_lower_logistic
df$ci_upper = ci_upper_logistic
print(df)
ps_logistic_df3 = rbind(ps_logistic_df3, df)
}
#--------------------
# formatting df
#--------------------
ps_logistic_df3$data_source = "df3"
ps_logistic_df3$model = "unadjusted"
ps_logistic_df3$odds_ratio = round(ps_logistic_df3$odds_ratio, 2)
ps_logistic_df3$ci_lower = round(ps_logistic_df3$ci_lower, 2)
ps_logistic_df3$ci_upper = round(ps_logistic_df3$ci_upper, 2)
# adding pvalue_signif
ps_logistic_df3$pvalue_signif = ps_logistic_df3$pvalue
str(ps_logistic_df3$pvalue_signif)
ps_logistic_df3 = dplyr::mutate(ps_logistic_df3
, pvalue_signif = case_when(pvalue_signif == 0.05 ~ "."
, pvalue_signif <=0.0001 ~ '****'
, pvalue_signif <=0.001 ~ '***'
, pvalue_signif <=0.01 ~ '**'
, pvalue_signif <0.05 ~ '*'
, TRUE ~ 'ns'))
# rearranging columns
ps_logistic_df3_o = ps_logistic_df3[c("var_name"
, "number_samples"
, "model"
, "odds_ratio"
, "pvalue"
, "pvalue_signif"
, "beta"
, "se"
, "zvalue"
, "ci_lower"
, "ci_upper"
, "data_source")]
# writing file
write.csv(ps_logistic_df3_o, ps_unadjusted, row.names = F)
#========================================
# merged_df3: adjusted, loop
#========================================
#model_adjusted_df3 = glm(mutation_info_labels ~ or_mychisq + rsa + rd_values + kd_values +
# duet_stability_change + duet_scaled + duet_outcome + ddg + foldx_scaled + foldx_outcome
# , data = merged_df3
# , family = "binomial")
model_adjusted_df3 = glm(mutation_info_labels ~ rd_values +
ligand_distance + duet_stability_change
, data = merged_df3
, family = "binomial");summary(model_adjusted_df3)
var_names_df = as.data.frame(names(model_adjusted_df3$coefficients))
names(var_names_df) = c("var_name")
ci_mod = exp(confint(model_adjusted_df3))
ci_mod_df = as.data.frame(ci_mod)
names(ci_mod_df) = c("ci_lower", "ci_upper")
ci_mod_df$ci_lower = round(ci_mod_df$ci_lower, 2)
ci_mod_df$ci_upper = round(ci_mod_df$ci_upper, 2)
estimates_df = as.data.frame(summary(model_adjusted_df3)$coefficients)
names(estimates_df) = c("beta", "se", "zvalue", "pvalue")
estimates_df$odds_ratio = round(exp(estimates_df$beta), 2)
number_samples = nobs(model_adjusted_df3)
estimates_df$number_samples = number_samples
estimates_df$data_source = "df3"
estimates_df$model = "adjusted"
names(ps_logistic_adjusted_df3)
if ( all(rownames(estimates_df) == rownames(ci_mod_df)) ){
cat("PASS: rownames match. Preparing to merge...")
ps_logistic_adjusted_df3 = merge(estimates_df, ci_mod_df, by = "row.names", all = T)
}
colnames(ps_logistic_adjusted_df3)[1] <- c("var_name")
d2 = which(ps_logistic_adjusted_df3$var_name == "(Intercept)")
ps_logistic_adjusted_df3 = ps_logistic_adjusted_df3[-d2,]
# adding pvalue_signif
ps_logistic_adjusted_df3$pvalue_signif = ps_logistic_adjusted_df3$pvalue
str(ps_logistic_adjusted_df3$pvalue_signif)
ps_logistic_adjusted_df3 = dplyr::mutate(ps_logistic_adjusted_df3
, pvalue_signif = case_when(pvalue_signif == 0.05 ~ "."
, pvalue_signif <=0.0001 ~ '****'
, pvalue_signif <=0.001 ~ '***'
, pvalue_signif <=0.01 ~ '**'
, pvalue_signif <0.05 ~ '*'
, TRUE ~ 'ns'))
# rearranging columns
colnames(ps_logistic_adjusted_df3)
ps_logistic_adjusted_df3_o = ps_logistic_adjusted_df3[c("var_name"
, "number_samples"
, "model"
, "odds_ratio"
, "pvalue"
, "pvalue_signif"
, "beta"
, "se"
, "zvalue"
, "ci_lower"
, "ci_upper"
,"data_source")]
# writing file
write.csv(ps_logistic_adjusted_df3_o, ps_adjusted, row.names = F)
###############################################################################