added boxplots and stats for other numerical params
This commit is contained in:
parent
bba3487829
commit
1708194912
2 changed files with 425 additions and 0 deletions
173
scripts/plotting/other_plots.R
Normal file
173
scripts/plotting/other_plots.R
Normal file
|
@ -0,0 +1,173 @@
|
||||||
|
#!/usr/bin/env Rscript
|
||||||
|
#########################################################
|
||||||
|
# TASK: producing boxplots for dr and other muts
|
||||||
|
|
||||||
|
#########################################################
|
||||||
|
#=======================================================================
|
||||||
|
# working dir and loading libraries
|
||||||
|
getwd()
|
||||||
|
setwd("~/git/LSHTM_analysis/scripts/plotting")
|
||||||
|
getwd()
|
||||||
|
|
||||||
|
source("Header_TT.R")
|
||||||
|
#library(ggplot2)
|
||||||
|
#library(data.table)
|
||||||
|
#library(dplyr)
|
||||||
|
source("other_plots_data.R")
|
||||||
|
|
||||||
|
#=======
|
||||||
|
# output
|
||||||
|
#=======
|
||||||
|
#dr_other_plots_combined = "dr_other_combined.svg"
|
||||||
|
#plot_dr_other_plots_combined = paste0(plotdir,"/", dr_other_plots_combined)
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
########################################################################
|
||||||
|
# end of data extraction and cleaning for plots #
|
||||||
|
########################################################################
|
||||||
|
#my_comparisons <- list( c(dr_muts_col, other_muts_col) )
|
||||||
|
my_comparisons <- list( c("DM", "OM") )
|
||||||
|
|
||||||
|
#===========
|
||||||
|
# Plot1: PS
|
||||||
|
#===========
|
||||||
|
|
||||||
|
my_stat_ps = compare_means(param_value~mutation_info, group.by = "param_type"
|
||||||
|
, data = df_lf_ps, paired = FALSE, p.adjust.method = "BH")
|
||||||
|
|
||||||
|
y_value = "param_value"
|
||||||
|
|
||||||
|
p1 = ggplot(df_lf_ps, aes(x = mutation_info
|
||||||
|
, y = eval(parse(text=y_value)) )) +
|
||||||
|
facet_wrap(~ param_type
|
||||||
|
, nrow = 1
|
||||||
|
, scales = "free_y") +
|
||||||
|
geom_boxplot(fill = "white", outlier.colour = NA
|
||||||
|
# position = position_dodge(width = 0.9)
|
||||||
|
, width = 0.5) +
|
||||||
|
geom_point(position = position_jitterdodge(dodge.width=0.01)
|
||||||
|
, show.legend = FALSE
|
||||||
|
, aes(colour = factor(duet_outcome))) +
|
||||||
|
theme(axis.text.x = element_text(size = 15)
|
||||||
|
, axis.text.y = element_text(size = 15
|
||||||
|
, angle = 0
|
||||||
|
, hjust = 1
|
||||||
|
, vjust = 0)
|
||||||
|
, axis.title.x = element_text(size = 15)
|
||||||
|
, axis.title.y = element_text(size = 15)
|
||||||
|
, plot.title = element_text(size = 20, hjust = 0.5)
|
||||||
|
, strip.text.x = element_text(size = 15, colour = "black")
|
||||||
|
, legend.title = element_text(color = "black", size = 20)
|
||||||
|
, legend.text = element_text(size = 15)
|
||||||
|
, legend.direction = "vertical") +
|
||||||
|
labs(title = "DUET"
|
||||||
|
, x = ""
|
||||||
|
, y = "")+
|
||||||
|
stat_compare_means(comparisons = my_comparisons
|
||||||
|
, method = "wilcox.test"
|
||||||
|
, paired = FALSE
|
||||||
|
#, label = "p.format")
|
||||||
|
, label = "p.signif")
|
||||||
|
p1
|
||||||
|
|
||||||
|
#===========
|
||||||
|
# Plot 2: Foldx
|
||||||
|
#===========
|
||||||
|
my_stat_foldx = compare_means(param_value~mutation_info, group.by = "param_type"
|
||||||
|
, data = df_lf_foldx, paired = FALSE, p.adjust.method = "BH")
|
||||||
|
|
||||||
|
y_value = "param_value"
|
||||||
|
|
||||||
|
p2 = ggplot(df_lf_foldx, aes(x = mutation_info
|
||||||
|
, y = eval(parse(text=y_value)) )) +
|
||||||
|
facet_wrap(~ param_type
|
||||||
|
, nrow = 1
|
||||||
|
, scales = "free_y") +
|
||||||
|
geom_boxplot(fill = "white", outlier.colour = NA
|
||||||
|
# position = position_dodge(width = 0.9)
|
||||||
|
, width = 0.5) +
|
||||||
|
geom_point(position = position_jitterdodge(dodge.width=0.01)
|
||||||
|
, show.legend = FALSE
|
||||||
|
, aes(colour = factor(foldx_outcome))) +
|
||||||
|
theme(axis.text.x = element_text(size = 15)
|
||||||
|
, axis.text.y = element_text(size = 15
|
||||||
|
, angle = 0
|
||||||
|
, hjust = 1
|
||||||
|
, vjust = 0)
|
||||||
|
, axis.title.x = element_text(size = 15)
|
||||||
|
, axis.title.y = element_text(size = 15)
|
||||||
|
, plot.title = element_text(size = 20, hjust = 0.5)
|
||||||
|
, strip.text.x = element_text(size = 15, colour = "black")
|
||||||
|
, legend.title = element_text(color = "black", size = 20)
|
||||||
|
, legend.text = element_text(size = 15)
|
||||||
|
, legend.direction = "vertical") +
|
||||||
|
labs(title = "Foldx"
|
||||||
|
, x = ""
|
||||||
|
, y = "")+
|
||||||
|
stat_compare_means(comparisons = my_comparisons
|
||||||
|
, method = "wilcox.test"
|
||||||
|
, paired = FALSE
|
||||||
|
#, label = "p.format")
|
||||||
|
, label = "p.signif")
|
||||||
|
p2
|
||||||
|
|
||||||
|
|
||||||
|
#===========
|
||||||
|
# Plot 3: LIG
|
||||||
|
#===========
|
||||||
|
|
||||||
|
my_stat_lig = compare_means(param_value~mutation_info, group.by = "param_type"
|
||||||
|
, data = df_lf_lig, paired = FALSE, p.adjust.method = "BH")
|
||||||
|
|
||||||
|
y_value = "param_value"
|
||||||
|
|
||||||
|
p3 = ggplot(df_lf_lig, aes(x = mutation_info
|
||||||
|
, y = eval(parse(text=y_value)) )) +
|
||||||
|
facet_wrap(~ param_type
|
||||||
|
, nrow = 1
|
||||||
|
, scales = "free_y") +
|
||||||
|
geom_boxplot(fill = "white", outlier.colour = NA
|
||||||
|
# position = position_dodge(width = 0.9)
|
||||||
|
, width = 0.5) +
|
||||||
|
geom_point(position = position_jitterdodge(dodge.width=0.01)
|
||||||
|
, show.legend = FALSE
|
||||||
|
, aes(colour = factor(ligand_outcome))) +
|
||||||
|
theme(axis.text.x = element_text(size = 15)
|
||||||
|
, axis.text.y = element_text(size = 15
|
||||||
|
, angle = 0
|
||||||
|
, hjust = 1
|
||||||
|
, vjust = 0)
|
||||||
|
, axis.title.x = element_text(size = 15)
|
||||||
|
, axis.title.y = element_text(size = 15)
|
||||||
|
, plot.title = element_text(size = 20, hjust = 0.5)
|
||||||
|
, strip.text.x = element_text(size = 15, colour = "black")
|
||||||
|
, legend.title = element_text(color = "black", size = 20)
|
||||||
|
, legend.text = element_text(size = 15)
|
||||||
|
, legend.direction = "vertical") +
|
||||||
|
labs(title = "Ligand Affinity"
|
||||||
|
, x = ""
|
||||||
|
, y = "")+
|
||||||
|
stat_compare_means(comparisons = my_comparisons
|
||||||
|
, method = "wilcox.test"
|
||||||
|
, paired = FALSE
|
||||||
|
#, label = "p.format")
|
||||||
|
, label = "p.signif")
|
||||||
|
|
||||||
|
p3
|
||||||
|
|
||||||
|
#===========================
|
||||||
|
# combine
|
||||||
|
#===========================
|
||||||
|
#svg(plot_or_combined, width = 32, height = 12)
|
||||||
|
|
||||||
|
theme_set(theme_gray()) # to preserve default theme
|
||||||
|
|
||||||
|
printFile = cowplot::plot_grid(plot_grid(p1, p2, p3
|
||||||
|
, nrow = 3
|
||||||
|
, align = 'h'
|
||||||
|
, labels = c("", "", "")
|
||||||
|
, label_size = 20))
|
||||||
|
print(printFile)
|
||||||
|
dev.off()
|
||||||
|
|
252
scripts/plotting/other_plots_data.R
Normal file
252
scripts/plotting/other_plots_data.R
Normal file
|
@ -0,0 +1,252 @@
|
||||||
|
#!/usr/bin/env Rscript
|
||||||
|
#########################################################
|
||||||
|
# TASK: producing boxplots for dr and other muts
|
||||||
|
|
||||||
|
#########################################################
|
||||||
|
#=======================================================================
|
||||||
|
# working dir and loading libraries
|
||||||
|
getwd()
|
||||||
|
setwd("~/git/LSHTM_analysis/scripts/plotting")
|
||||||
|
getwd()
|
||||||
|
|
||||||
|
#source("Header_TT.R")
|
||||||
|
library(ggplot2)
|
||||||
|
library(data.table)
|
||||||
|
library(dplyr)
|
||||||
|
source("combining_dfs_plotting.R")
|
||||||
|
|
||||||
|
#=======
|
||||||
|
# output
|
||||||
|
#=======
|
||||||
|
#lineage_dist_combined = "lineage_dist_combined_PS.svg"
|
||||||
|
#plot_lineage_dist_combined = paste0(plotdir,"/", lineage_dist_combined)
|
||||||
|
|
||||||
|
|
||||||
|
rm(merged_df2, merged_df2_comp, merged_df2_lig, merged_df2_comp_lig
|
||||||
|
, merged_df3_comp, merged_df3_comp_lig
|
||||||
|
, my_df_u, my_df_u_lig)
|
||||||
|
#========================================================================
|
||||||
|
#%%%%%%%%%%%%%%%%%%%
|
||||||
|
# REASSIGNMENT: PS
|
||||||
|
#%%%%%%%%%%%%%%%%%%%%
|
||||||
|
df_ps = merged_df3
|
||||||
|
|
||||||
|
# name tidying
|
||||||
|
df_ps$mutation_info = as.factor(df_ps$mutation_info)
|
||||||
|
df_ps$duet_outcome = as.factor(df_ps$duet_outcome)
|
||||||
|
df_ps$foldx_outcome = as.factor(df_ps$foldx_outcome)
|
||||||
|
df_ps$ligand_outcome = as.factor(df_ps$ligand_outcome)
|
||||||
|
|
||||||
|
# check
|
||||||
|
table(df_ps$mutation_info)
|
||||||
|
|
||||||
|
#%%%%%%%%%%%%%%%%%%%
|
||||||
|
# REASSIGNMENT: LIG
|
||||||
|
#%%%%%%%%%%%%%%%%%%%%
|
||||||
|
|
||||||
|
df_lig = merged_df3_lig
|
||||||
|
|
||||||
|
# name tidying
|
||||||
|
df_lig$mutation_info = as.factor(df_lig$mutation_info)
|
||||||
|
df_lig$duet_outcome = as.factor(df_lig$duet_outcome)
|
||||||
|
df_lig$ligand_outcome = as.factor(df_lig$ligand_outcome)
|
||||||
|
|
||||||
|
# check
|
||||||
|
table(df_lig$mutation_info)
|
||||||
|
|
||||||
|
#========================================================================
|
||||||
|
n = which(colnames(df_ps) == "ddg"); n # 10
|
||||||
|
|
||||||
|
my_min = min(df_ps[,n]); my_min
|
||||||
|
my_max = max(df_ps[,n]); my_max
|
||||||
|
|
||||||
|
df_ps$foldx_scaled = ifelse(df_ps[,n] < 0
|
||||||
|
, df_ps[,n]/abs(my_min)
|
||||||
|
, df_ps[,n]/my_max) #335, 15
|
||||||
|
#sanity check
|
||||||
|
my_min = min(df_ps$foldx_scaled); my_min
|
||||||
|
my_max = max(df_ps$foldx_scaled); my_max
|
||||||
|
|
||||||
|
# assign foldx
|
||||||
|
#ddg<0 = "Stabilising" (-ve)
|
||||||
|
c1 = table(df_ps$ddg < 0)
|
||||||
|
df_ps$foldx_outcome = ifelse(df_ps$ddg < 0, "Stabilising", "Destabilising")
|
||||||
|
c2 = table(df_ps$ddg < 0)
|
||||||
|
|
||||||
|
if ( all(c1 == c2) ){
|
||||||
|
cat("PASS: foldx outcome successfully created")
|
||||||
|
}else{
|
||||||
|
cat("FAIL: foldx outcome could not be created. Aborting!")
|
||||||
|
exit()
|
||||||
|
}
|
||||||
|
|
||||||
|
#===========
|
||||||
|
# Data: ps
|
||||||
|
#===========
|
||||||
|
# keep similar dtypes cols together
|
||||||
|
cols_to_select_ps = c("mutationinformation", "mutation", "position", "mutation_info"
|
||||||
|
, "duet_outcome"
|
||||||
|
|
||||||
|
, "duet_scaled"
|
||||||
|
, "ligand_distance"
|
||||||
|
, "asa"
|
||||||
|
, "rsa"
|
||||||
|
, "rd_values"
|
||||||
|
, "kd_values")
|
||||||
|
|
||||||
|
df_wf_ps = df_ps[, cols_to_select_ps]
|
||||||
|
|
||||||
|
pivot_cols_ps = cols_to_select_ps[1:5]; pivot_cols_ps
|
||||||
|
|
||||||
|
expected_rows_lf_ps = nrow(df_wf_ps) * (length(df_wf_ps) - length(pivot_cols_ps))
|
||||||
|
expected_rows_lf_ps
|
||||||
|
|
||||||
|
# LF data: duet
|
||||||
|
df_lf_ps = gather(df_wf_ps, param_type, param_value, duet_scaled:kd_values, factor_key=TRUE)
|
||||||
|
|
||||||
|
if (nrow(df_lf_ps) == expected_rows_lf_ps){
|
||||||
|
cat("PASS: long format data created for duet")
|
||||||
|
}else{
|
||||||
|
cat("FAIL: long format data could not be created for duet")
|
||||||
|
exit()
|
||||||
|
}
|
||||||
|
|
||||||
|
str(df_wf_ps)
|
||||||
|
str(df_lf_ps)
|
||||||
|
|
||||||
|
# assign pretty labels: param_type
|
||||||
|
levels(df_lf_ps$param_type); table(df_lf_ps$param_type)
|
||||||
|
|
||||||
|
levels(df_lf_ps$param_type)[levels(df_lf_ps$param_type)=="duet_scaled"] <- "DUET"
|
||||||
|
levels(df_lf_ps$param_type)[levels(df_lf_ps$param_type)=="ligand_distance"] <- "Ligand Distance"
|
||||||
|
levels(df_lf_ps$param_type)[levels(df_lf_ps$param_type)=="asa"] <- "ASA"
|
||||||
|
levels(df_lf_ps$param_type)[levels(df_lf_ps$param_type)=="rsa"] <- "RSA"
|
||||||
|
levels(df_lf_ps$param_type)[levels(df_lf_ps$param_type)=="rd_values"] <- "RD"
|
||||||
|
levels(df_lf_ps$param_type)[levels(df_lf_ps$param_type)=="kd_values"] <- "KD"
|
||||||
|
# check
|
||||||
|
levels(df_lf_ps$param_type); table(df_lf_ps$param_type)
|
||||||
|
|
||||||
|
# assign pretty labels: mutation_info
|
||||||
|
levels(df_lf_ps$mutation_info); table(df_lf_ps$mutation_info)
|
||||||
|
sum(table(df_lf_ps$mutation_info)) == nrow(df_lf_ps)
|
||||||
|
|
||||||
|
levels(df_lf_ps$mutation_info)[levels(df_lf_ps$mutation_info)==dr_muts_col] <- "DM"
|
||||||
|
levels(df_lf_ps$mutation_info)[levels(df_lf_ps$mutation_info)==other_muts_col] <- "OM"
|
||||||
|
# check
|
||||||
|
levels(df_lf_ps$mutation_info); table(df_lf_ps$mutation_info)
|
||||||
|
|
||||||
|
############################################################################
|
||||||
|
#===========
|
||||||
|
# Data: foldx
|
||||||
|
#===========
|
||||||
|
# keep similar dtypes cols together
|
||||||
|
cols_to_select_foldx = c("mutationinformation", "mutation", "position", "mutation_info"
|
||||||
|
, "foldx_outcome"
|
||||||
|
|
||||||
|
, "foldx_scaled"
|
||||||
|
, "ligand_distance"
|
||||||
|
, "asa"
|
||||||
|
, "rsa"
|
||||||
|
, "rd_values"
|
||||||
|
, "kd_values")
|
||||||
|
|
||||||
|
|
||||||
|
df_wf_foldx = df_ps[, cols_to_select_foldx]
|
||||||
|
|
||||||
|
pivot_cols_foldx = cols_to_select_foldx[1:5]; pivot_cols_foldx
|
||||||
|
|
||||||
|
expected_rows_lf_foldx = nrow(df_wf_foldx) * (length(df_wf_foldx) - length(pivot_cols_foldx))
|
||||||
|
expected_rows_lf_foldx
|
||||||
|
|
||||||
|
# LF data: foldx
|
||||||
|
df_lf_foldx = gather(df_wf_foldx, param_type, param_value, foldx_scaled:kd_values, factor_key=TRUE)
|
||||||
|
|
||||||
|
if (nrow(df_lf_foldx) == expected_rows_lf_foldx){
|
||||||
|
cat("PASS: long format data created for foldx")
|
||||||
|
}else{
|
||||||
|
cat("FAIL: long format data could not be created for foldx")
|
||||||
|
exit()
|
||||||
|
}
|
||||||
|
|
||||||
|
# assign pretty labels: param type
|
||||||
|
levels(df_lf_foldx$param_type); table(df_lf_foldx$param_type)
|
||||||
|
|
||||||
|
levels(df_lf_foldx$param_type)[levels(df_lf_foldx$param_type)=="foldx_scaled"] <- "Foldx"
|
||||||
|
levels(df_lf_foldx$param_type)[levels(df_lf_foldx$param_type)=="ligand_distance"] <- "Ligand Distance"
|
||||||
|
levels(df_lf_foldx$param_type)[levels(df_lf_foldx$param_type)=="asa"] <- "ASA"
|
||||||
|
levels(df_lf_foldx$param_type)[levels(df_lf_foldx$param_type)=="rsa"] <- "RSA"
|
||||||
|
levels(df_lf_foldx$param_type)[levels(df_lf_foldx$param_type)=="rd_values"] <- "RD"
|
||||||
|
levels(df_lf_foldx$param_type)[levels(df_lf_foldx$param_type)=="kd_values"] <- "KD"
|
||||||
|
# check
|
||||||
|
levels(df_lf_foldx$param_type); table(df_lf_foldx$param_type)
|
||||||
|
|
||||||
|
# assign pretty labels: mutation_info
|
||||||
|
levels(df_lf_foldx$mutation_info); table(df_lf_foldx$mutation_info)
|
||||||
|
sum(table(df_lf_foldx$mutation_info)) == nrow(df_lf_foldx)
|
||||||
|
|
||||||
|
levels(df_lf_foldx$mutation_info)[levels(df_lf_foldx$mutation_info)==dr_muts_col] <- "DM"
|
||||||
|
levels(df_lf_foldx$mutation_info)[levels(df_lf_foldx$mutation_info)==other_muts_col] <- "OM"
|
||||||
|
# check
|
||||||
|
levels(df_lf_foldx$mutation_info); table(df_lf_foldx$mutation_info)
|
||||||
|
|
||||||
|
############################################################################
|
||||||
|
#===========
|
||||||
|
# LF data: LIG
|
||||||
|
#===========
|
||||||
|
# keep similar dtypes cols together
|
||||||
|
cols_to_select_lig = c("mutationinformation", "mutation", "position", "mutation_info"
|
||||||
|
, "ligand_outcome"
|
||||||
|
|
||||||
|
, "affinity_scaled"
|
||||||
|
, "ligand_distance"
|
||||||
|
, "asa"
|
||||||
|
, "rsa"
|
||||||
|
, "rd_values"
|
||||||
|
, "kd_values")
|
||||||
|
|
||||||
|
df_wf_lig = df_lig[, cols_to_select_lig]
|
||||||
|
|
||||||
|
pivot_cols_lig = cols_to_select_lig[1:5]; pivot_cols_lig
|
||||||
|
|
||||||
|
expected_rows_lf_lig = nrow(df_wf_lig) * (length(df_wf_lig) - length(pivot_cols_lig))
|
||||||
|
expected_rows_lf_lig
|
||||||
|
|
||||||
|
# LF data: foldx
|
||||||
|
df_lf_lig = gather(df_wf_lig, param_type, param_value, affinity_scaled:kd_values, factor_key=TRUE)
|
||||||
|
|
||||||
|
if (nrow(df_lf_lig) == expected_rows_lf_lig){
|
||||||
|
cat("PASS: long format data created for foldx")
|
||||||
|
}else{
|
||||||
|
cat("FAIL: long format data could not be created for foldx")
|
||||||
|
exit()
|
||||||
|
}
|
||||||
|
|
||||||
|
# assign pretty labels: param_type
|
||||||
|
levels(df_lf_lig$param_type); table(df_lf_lig$param_type)
|
||||||
|
|
||||||
|
levels(df_lf_lig$param_type)[levels(df_lf_lig$param_type)=="affinity_scaled"] <- "Ligand Affinity"
|
||||||
|
levels(df_lf_lig$param_type)[levels(df_lf_lig$param_type)=="ligand_distance"] <- "Ligand Distance"
|
||||||
|
levels(df_lf_lig$param_type)[levels(df_lf_lig$param_type)=="asa"] <- "ASA"
|
||||||
|
levels(df_lf_lig$param_type)[levels(df_lf_lig$param_type)=="rsa"] <- "RSA"
|
||||||
|
levels(df_lf_lig$param_type)[levels(df_lf_lig$param_type)=="rd_values"] <- "RD"
|
||||||
|
levels(df_lf_lig$param_type)[levels(df_lf_lig$param_type)=="kd_values"] <- "KD"
|
||||||
|
#check
|
||||||
|
levels(df_lf_lig$param_type); table(df_lf_lig$param_type)
|
||||||
|
|
||||||
|
# assign pretty labels: mutation_info
|
||||||
|
levels(df_lf_lig$mutation_info); table(df_lf_lig$mutation_info)
|
||||||
|
sum(table(df_lf_lig$mutation_info)) == nrow(df_lf_lig)
|
||||||
|
|
||||||
|
levels(df_lf_lig$mutation_info)[levels(df_lf_lig$mutation_info)==dr_muts_col] <- "DM"
|
||||||
|
levels(df_lf_lig$mutation_info)[levels(df_lf_lig$mutation_info)==other_muts_col] <- "OM"
|
||||||
|
# check
|
||||||
|
levels(df_lf_lig$mutation_info); table(df_lf_lig$mutation_info)
|
||||||
|
|
||||||
|
###############################################################################
|
||||||
|
|
||||||
|
# clear excess variables
|
||||||
|
rm(cols_to_select_ps, cols_to_select_foldx, cols_to_select_lig
|
||||||
|
, pivot_cols_ps, pivot_cols_foldx, pivot_cols_lig
|
||||||
|
, expected_rows_lf_ps, expected_rows_lf_foldx, expected_rows_lf_lig
|
||||||
|
, my_max, my_min, na_count, na_count_df2, na_count_df3, dup_muts_nu
|
||||||
|
, c1, c2, n)
|
Loading…
Add table
Add a link
Reference in a new issue