saving other_plots.R
This commit is contained in:
parent
c9040cad21
commit
14182df12f
5 changed files with 227 additions and 241 deletions
|
@ -319,8 +319,7 @@ all.equal(foo, bar)
|
||||||
# clear variables
|
# clear variables
|
||||||
rm(foo, bar, gene_metadata
|
rm(foo, bar, gene_metadata
|
||||||
, in_filename_params, infile_params, merging_cols
|
, in_filename_params, infile_params, merging_cols
|
||||||
, in_filename_gene_metadata, infile_gene_metadata
|
, in_filename_gene_metadata, infile_gene_metadata)
|
||||||
, merged_df2v2, merged_df2v3)
|
|
||||||
#*************************
|
#*************************
|
||||||
#####################################################################
|
#####################################################################
|
||||||
# Combining: LIG
|
# Combining: LIG
|
||||||
|
|
|
@ -64,3 +64,13 @@ table(snpsBYpos_df$snpsBYpos)
|
||||||
# with dups
|
# with dups
|
||||||
1 2 3 4 5 6 7
|
1 2 3 4 5 6 7
|
||||||
39 30 25 23 20 6 2
|
39 30 25 23 20 6 2
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
#======================
|
||||||
|
# dr and other muts count = PS
|
||||||
|
#======================
|
||||||
|
dr_mutations_pyrazinamide : 227
|
||||||
|
other_mutations_pyrazinamide : 197
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -18,19 +18,29 @@ source("other_plots_data.R")
|
||||||
#=======
|
#=======
|
||||||
# output
|
# output
|
||||||
#=======
|
#=======
|
||||||
dr_other_plots_combined = "dr_other_muts.svg"
|
dr_other_combined = "dr_other_muts.svg"
|
||||||
plot_dr_other_plots_combined = paste0(plotdir,"/", dr_other_plots_combined)
|
plot_dr_other_combined = paste0(plotdir,"/", dr_other_combined)
|
||||||
|
|
||||||
|
dr_other_foldx = "dr_other_muts_foldx.svg"
|
||||||
|
plot_dr_other_foldx = paste0(plotdir,"/", dr_other_foldx)
|
||||||
|
|
||||||
|
point_legend = "point_legend.svg"
|
||||||
|
plot_point_legend = paste0(plotdir,"/", point_legend)
|
||||||
########################################################################
|
########################################################################
|
||||||
# end of data extraction and cleaning for plots #
|
# end of data extraction and cleaning for plots #
|
||||||
########################################################################
|
########################################################################
|
||||||
#my_comparisons <- list( c(dr_muts_col, other_muts_col) )
|
#my_comparisons <- list( c(dr_muts_col, other_muts_col) )
|
||||||
my_comparisons <- list( c("DM", "OM") )
|
my_comparisons <- list( c("DM", "OM") )
|
||||||
|
|
||||||
|
|
||||||
|
my_ats = 15# axis text size
|
||||||
|
my_als = 20 # axis label size
|
||||||
|
my_fls = 20 # facet label size
|
||||||
|
my_pts = 22 # plot title size
|
||||||
|
|
||||||
#===========
|
#===========
|
||||||
# Plot1: PS
|
# Plot1: PS
|
||||||
#===========
|
#===========
|
||||||
|
|
||||||
my_stat_ps = compare_means(param_value~mutation_info, group.by = "param_type"
|
my_stat_ps = compare_means(param_value~mutation_info, group.by = "param_type"
|
||||||
, data = df_lf_ps, paired = FALSE, p.adjust.method = "BH")
|
, data = df_lf_ps, paired = FALSE, p.adjust.method = "BH")
|
||||||
|
|
||||||
|
@ -42,22 +52,23 @@ p1 = ggplot(df_lf_ps, aes(x = mutation_info
|
||||||
, nrow = 1
|
, nrow = 1
|
||||||
, scales = "free_y") +
|
, scales = "free_y") +
|
||||||
geom_boxplot(fill = "white", outlier.colour = NA
|
geom_boxplot(fill = "white", outlier.colour = NA
|
||||||
# position = position_dodge(width = 0.9)
|
#, position = position_dodge(width = 0.9)
|
||||||
, width = 0.5) +
|
, width = 0.2) +
|
||||||
geom_point(position = position_jitterdodge(dodge.width=0.01)
|
geom_point(position = position_jitterdodge(dodge.width=0.01)
|
||||||
|
, alpha = 0.5
|
||||||
, show.legend = FALSE
|
, show.legend = FALSE
|
||||||
, aes(colour = factor(duet_outcome))) +
|
, aes(colour = factor(duet_outcome))) +
|
||||||
theme(axis.text.x = element_text(size = 15)
|
theme(axis.text.x = element_text(size = my_ats)
|
||||||
, axis.text.y = element_text(size = 15
|
, axis.text.y = element_text(size = my_ats
|
||||||
, angle = 0
|
, angle = 0
|
||||||
, hjust = 1
|
, hjust = 1
|
||||||
, vjust = 0)
|
, vjust = 0)
|
||||||
, axis.title.x = element_text(size = 15)
|
, axis.title.x = element_text(size = my_ats)
|
||||||
, axis.title.y = element_text(size = 15)
|
, axis.title.y = element_text(size = my_ats)
|
||||||
, plot.title = element_text(size = 20, hjust = 0.5)
|
, plot.title = element_text(size = my_pts , hjust = 0.5)
|
||||||
, strip.text.x = element_text(size = 15, colour = "black")
|
, strip.text.x = element_text(size = my_fls, colour = "black")
|
||||||
, legend.title = element_text(color = "black", size = 20)
|
, legend.title = element_text(color = "black", size = my_als)
|
||||||
, legend.text = element_text(size = 15)
|
, legend.text = element_text(size = my_ats)
|
||||||
, legend.direction = "vertical") +
|
, legend.direction = "vertical") +
|
||||||
labs(title = "DUET"
|
labs(title = "DUET"
|
||||||
, x = ""
|
, x = ""
|
||||||
|
@ -68,51 +79,9 @@ p1 = ggplot(df_lf_ps, aes(x = mutation_info
|
||||||
#, label = "p.format")
|
#, label = "p.format")
|
||||||
, label = "p.signif")
|
, label = "p.signif")
|
||||||
p1
|
p1
|
||||||
|
#===============================================================
|
||||||
#===========
|
#===========
|
||||||
# Plot 2: Foldx
|
# Plot 2: LIG
|
||||||
#===========
|
|
||||||
my_stat_foldx = compare_means(param_value~mutation_info, group.by = "param_type"
|
|
||||||
, data = df_lf_foldx, paired = FALSE, p.adjust.method = "BH")
|
|
||||||
|
|
||||||
y_value = "param_value"
|
|
||||||
|
|
||||||
p2 = ggplot(df_lf_foldx, aes(x = mutation_info
|
|
||||||
, y = eval(parse(text=y_value)) )) +
|
|
||||||
facet_wrap(~ param_type
|
|
||||||
, nrow = 1
|
|
||||||
, scales = "free_y") +
|
|
||||||
geom_boxplot(fill = "white", outlier.colour = NA
|
|
||||||
# position = position_dodge(width = 0.9)
|
|
||||||
, width = 0.5) +
|
|
||||||
geom_point(position = position_jitterdodge(dodge.width=0.01)
|
|
||||||
, show.legend = FALSE
|
|
||||||
, aes(colour = factor(foldx_outcome))) +
|
|
||||||
theme(axis.text.x = element_text(size = 15)
|
|
||||||
, axis.text.y = element_text(size = 15
|
|
||||||
, angle = 0
|
|
||||||
, hjust = 1
|
|
||||||
, vjust = 0)
|
|
||||||
, axis.title.x = element_text(size = 15)
|
|
||||||
, axis.title.y = element_text(size = 15)
|
|
||||||
, plot.title = element_text(size = 20, hjust = 0.5)
|
|
||||||
, strip.text.x = element_text(size = 15, colour = "black")
|
|
||||||
, legend.title = element_text(color = "black", size = 20)
|
|
||||||
, legend.text = element_text(size = 15)
|
|
||||||
, legend.direction = "vertical") +
|
|
||||||
labs(title = "Foldx"
|
|
||||||
, x = ""
|
|
||||||
, y = "")+
|
|
||||||
stat_compare_means(comparisons = my_comparisons
|
|
||||||
, method = "wilcox.test"
|
|
||||||
, paired = FALSE
|
|
||||||
#, label = "p.format")
|
|
||||||
, label = "p.signif")
|
|
||||||
p2
|
|
||||||
|
|
||||||
|
|
||||||
#===========
|
|
||||||
# Plot 3: LIG
|
|
||||||
#===========
|
#===========
|
||||||
|
|
||||||
my_stat_lig = compare_means(param_value~mutation_info, group.by = "param_type"
|
my_stat_lig = compare_means(param_value~mutation_info, group.by = "param_type"
|
||||||
|
@ -120,28 +89,29 @@ my_stat_lig = compare_means(param_value~mutation_info, group.by = "param_type"
|
||||||
|
|
||||||
y_value = "param_value"
|
y_value = "param_value"
|
||||||
|
|
||||||
p3 = ggplot(df_lf_lig, aes(x = mutation_info
|
p2 = ggplot(df_lf_lig, aes(x = mutation_info
|
||||||
, y = eval(parse(text=y_value)) )) +
|
, y = eval(parse(text=y_value)) )) +
|
||||||
facet_wrap(~ param_type
|
facet_wrap(~ param_type
|
||||||
, nrow = 1
|
, nrow = 1
|
||||||
, scales = "free_y") +
|
, scales = "free_y") +
|
||||||
geom_boxplot(fill = "white", outlier.colour = NA
|
geom_boxplot(fill = "white", outlier.colour = NA
|
||||||
# position = position_dodge(width = 0.9)
|
#, position = position_dodge(width = 0.9)
|
||||||
, width = 0.5) +
|
, width = 0.2) +
|
||||||
geom_point(position = position_jitterdodge(dodge.width=0.01)
|
geom_point(position = position_jitterdodge(dodge.width=0.01)
|
||||||
|
, alpha = 0.5
|
||||||
, show.legend = FALSE
|
, show.legend = FALSE
|
||||||
, aes(colour = factor(ligand_outcome))) +
|
, aes(colour = factor(ligand_outcome))) +
|
||||||
theme(axis.text.x = element_text(size = 15)
|
theme(axis.text.x = element_text(size = my_ats)
|
||||||
, axis.text.y = element_text(size = 15
|
, axis.text.y = element_text(size = my_ats
|
||||||
, angle = 0
|
, angle = 0
|
||||||
, hjust = 1
|
, hjust = 1
|
||||||
, vjust = 0)
|
, vjust = 0)
|
||||||
, axis.title.x = element_text(size = 15)
|
, axis.title.x = element_text(size = my_ats)
|
||||||
, axis.title.y = element_text(size = 15)
|
, axis.title.y = element_text(size = my_ats)
|
||||||
, plot.title = element_text(size = 20, hjust = 0.5)
|
, plot.title = element_text(size = my_pts , hjust = 0.5)
|
||||||
, strip.text.x = element_text(size = 15, colour = "black")
|
, strip.text.x = element_text(size = my_fls, colour = "black")
|
||||||
, legend.title = element_text(color = "black", size = 20)
|
, legend.title = element_text(color = "black", size = my_als)
|
||||||
, legend.text = element_text(size = 15)
|
, legend.text = element_text(size = my_ats)
|
||||||
, legend.direction = "vertical") +
|
, legend.direction = "vertical") +
|
||||||
labs(title = "Ligand Affinity"
|
labs(title = "Ligand Affinity"
|
||||||
, x = ""
|
, x = ""
|
||||||
|
@ -152,20 +122,108 @@ p3 = ggplot(df_lf_lig, aes(x = mutation_info
|
||||||
#, label = "p.format")
|
#, label = "p.format")
|
||||||
, label = "p.signif")
|
, label = "p.signif")
|
||||||
|
|
||||||
p3
|
p2
|
||||||
|
|
||||||
|
#====================================================================
|
||||||
|
#===========
|
||||||
|
# Plot 3: Foldx
|
||||||
|
#===========
|
||||||
|
my_stat_foldx = compare_means(param_value~mutation_info, group.by = "param_type"
|
||||||
|
, data = df_lf_foldx, paired = FALSE, p.adjust.method = "BH")
|
||||||
|
|
||||||
|
y_value = "param_value"
|
||||||
|
|
||||||
|
p3 = ggplot(df_lf_foldx, aes(x = mutation_info
|
||||||
|
, y = eval(parse(text=y_value)) )) +
|
||||||
|
facet_wrap(~ param_type
|
||||||
|
, nrow = 1
|
||||||
|
, scales = "free_y") +
|
||||||
|
geom_boxplot(fill = "white", outlier.colour = NA
|
||||||
|
#, position = position_dodge(width = 0.2)
|
||||||
|
, width = 0.5) +
|
||||||
|
geom_point(position = position_jitterdodge(dodge.width=0.01)
|
||||||
|
, alpha = 0.5
|
||||||
|
, show.legend = FALSE
|
||||||
|
, aes(colour = factor(foldx_outcome))) +
|
||||||
|
theme(axis.text.x = element_text(size = my_ats)
|
||||||
|
, axis.text.y = element_text(size = my_ats
|
||||||
|
, angle = 0
|
||||||
|
, hjust = 1
|
||||||
|
, vjust = 0)
|
||||||
|
, axis.title.x = element_text(size = my_ats)
|
||||||
|
, axis.title.y = element_text(size = my_ats)
|
||||||
|
, plot.title = element_text(size = my_pts , hjust = 0.5)
|
||||||
|
, strip.text.x = element_text(size = my_fls, colour = "black")
|
||||||
|
#, legend.title = element_text(color = "black", size = my_als)
|
||||||
|
, legend.text = element_text(size = my_ats)
|
||||||
|
, legend.direction = "vertical"
|
||||||
|
, legend.title = element_blank()) +
|
||||||
|
labs(title = ""
|
||||||
|
, x = ""
|
||||||
|
, y = "") +
|
||||||
|
stat_compare_means(comparisons = my_comparisons
|
||||||
|
, method = "wilcox.test"
|
||||||
|
, paired = FALSE
|
||||||
|
#, label = "p.format")
|
||||||
|
, label = "p.signif")
|
||||||
|
|
||||||
|
p3
|
||||||
|
|
||||||
|
#====================================================================
|
||||||
|
#===========
|
||||||
|
# legend
|
||||||
|
#===========
|
||||||
|
|
||||||
|
legend = ggplot(df_lf_foldx, aes(x = mutation_info
|
||||||
|
, y = eval(parse(text=y_value)) )) +
|
||||||
|
facet_wrap(~ param_type
|
||||||
|
, nrow = 1
|
||||||
|
, scales = "free_y") +
|
||||||
|
geom_boxplot(fill = "white", outlier.colour = NA
|
||||||
|
#, position = position_dodge(width = 0.2)
|
||||||
|
, width = 0.5) +
|
||||||
|
geom_point(position = position_jitterdodge(dodge.width=0.01)
|
||||||
|
, aes(colour = factor(foldx_outcome))) +
|
||||||
|
theme(legend.text = element_text(size = 25)
|
||||||
|
, legend.direction = "vertical"
|
||||||
|
, legend.title = element_blank())+
|
||||||
|
|
||||||
|
guides(color = guide_legend(override.aes = list(size = 6)))
|
||||||
|
|
||||||
|
legend
|
||||||
|
|
||||||
#===========================
|
#===========================
|
||||||
# combine
|
# combine
|
||||||
#===========================
|
#===========================
|
||||||
#svg(plot_or_combined, width = 32, height = 12)
|
#---------
|
||||||
svg("test.svg", width = 25, height = 12)
|
# plot 1
|
||||||
#theme_set(theme_gray()) # to preserve default theme
|
#---------
|
||||||
|
cat("Output plot:",plot_dr_other_combined)
|
||||||
|
svg(plot_dr_other_combined, width = 24, height = 12)
|
||||||
|
|
||||||
printFile = cowplot::plot_grid(plot_grid(p1, p2, p3
|
#theme_set(theme_gray()) # to preserve default theme
|
||||||
, nrow = 3
|
OutPlot = cowplot::plot_grid(p1, p2
|
||||||
, align = 'h'
|
, nrow = 2
|
||||||
, labels = c("", "", "")
|
, align = "hv"
|
||||||
, label_size = 20))
|
, axis = "b"
|
||||||
print(printFile)
|
, labels = c("", "", "")
|
||||||
|
, label_size = my_als)
|
||||||
|
print(OutPlot)
|
||||||
dev.off()
|
dev.off()
|
||||||
|
|
||||||
|
|
||||||
|
#---------
|
||||||
|
# plot 2
|
||||||
|
#---------
|
||||||
|
svg(plot_dr_other_foldx , width = 6, height = 7)
|
||||||
|
OutPlot2 = p3
|
||||||
|
print(OutPlot2)
|
||||||
|
dev.off()
|
||||||
|
|
||||||
|
#---------
|
||||||
|
# plot 3: legend
|
||||||
|
#---------
|
||||||
|
svg(plot_point_legend, width = 6, height = 7)
|
||||||
|
OutPlot3 = legend
|
||||||
|
print(OutPlot3)
|
||||||
|
dev.off()
|
||||||
|
|
|
@ -33,7 +33,7 @@ my_max = max(df_ps[,n]); my_max
|
||||||
df_ps$foldx_scaled = ifelse(df_ps[,n] < 0
|
df_ps$foldx_scaled = ifelse(df_ps[,n] < 0
|
||||||
, df_ps[,n]/abs(my_min)
|
, df_ps[,n]/abs(my_min)
|
||||||
, df_ps[,n]/my_max) #335, 15
|
, df_ps[,n]/my_max) #335, 15
|
||||||
#sanity check
|
# sanity check
|
||||||
my_min = min(df_ps$foldx_scaled); my_min
|
my_min = min(df_ps$foldx_scaled); my_min
|
||||||
my_max = max(df_ps$foldx_scaled); my_max
|
my_max = max(df_ps$foldx_scaled); my_max
|
||||||
|
|
||||||
|
@ -50,7 +50,6 @@ if ( all(c1 == c2) ){
|
||||||
exit()
|
exit()
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
# name tidying
|
# name tidying
|
||||||
df_ps$mutation_info = as.factor(df_ps$mutation_info)
|
df_ps$mutation_info = as.factor(df_ps$mutation_info)
|
||||||
df_ps$duet_outcome = as.factor(df_ps$duet_outcome)
|
df_ps$duet_outcome = as.factor(df_ps$duet_outcome)
|
||||||
|
@ -60,6 +59,21 @@ df_ps$ligand_outcome = as.factor(df_ps$ligand_outcome)
|
||||||
# check
|
# check
|
||||||
table(df_ps$mutation_info)
|
table(df_ps$mutation_info)
|
||||||
|
|
||||||
|
# further checks to make sure dr and other muts are indeed unique
|
||||||
|
dr_muts = df_ps[df_ps$mutation_info == dr_muts_col,]
|
||||||
|
dr_muts_names = unique(dr_muts$mutation)
|
||||||
|
|
||||||
|
other_muts = df_ps[df_ps$mutation_info == other_muts_col,]
|
||||||
|
other_muts_names = unique(other_muts$mutation)
|
||||||
|
|
||||||
|
if ( table(dr_muts_names%in%other_muts_names)[[1]] == length(dr_muts_names) &&
|
||||||
|
table(other_muts_names%in%dr_muts_names)[[1]] == length(other_muts_names) ){
|
||||||
|
cat("PASS: dr and other muts are indeed unique")
|
||||||
|
}else{
|
||||||
|
cat("FAIL: dr adn others muts are NOT unique!")
|
||||||
|
quit()
|
||||||
|
}
|
||||||
|
|
||||||
#%%%%%%%%%%%%%%%%%%%
|
#%%%%%%%%%%%%%%%%%%%
|
||||||
# REASSIGNMENT: LIG
|
# REASSIGNMENT: LIG
|
||||||
#%%%%%%%%%%%%%%%%%%%%
|
#%%%%%%%%%%%%%%%%%%%%
|
||||||
|
@ -133,78 +147,25 @@ levels(df_lf_ps$mutation_info)[levels(df_lf_ps$mutation_info)==other_muts_col] <
|
||||||
levels(df_lf_ps$mutation_info); table(df_lf_ps$mutation_info)
|
levels(df_lf_ps$mutation_info); table(df_lf_ps$mutation_info)
|
||||||
|
|
||||||
############################################################################
|
############################################################################
|
||||||
#===========
|
|
||||||
# Data: foldx
|
|
||||||
#===========
|
|
||||||
# keep similar dtypes cols together
|
|
||||||
cols_to_select_foldx = c("mutationinformation", "mutation", "position", "mutation_info"
|
|
||||||
, "foldx_outcome"
|
|
||||||
|
|
||||||
, "foldx_scaled"
|
|
||||||
, "ligand_distance"
|
|
||||||
, "asa"
|
|
||||||
, "rsa"
|
|
||||||
, "rd_values"
|
|
||||||
, "kd_values")
|
|
||||||
|
|
||||||
|
|
||||||
df_wf_foldx = df_ps[, cols_to_select_foldx]
|
|
||||||
|
|
||||||
pivot_cols_foldx = cols_to_select_foldx[1:5]; pivot_cols_foldx
|
|
||||||
|
|
||||||
expected_rows_lf_foldx = nrow(df_wf_foldx) * (length(df_wf_foldx) - length(pivot_cols_foldx))
|
|
||||||
expected_rows_lf_foldx
|
|
||||||
|
|
||||||
# LF data: foldx
|
|
||||||
df_lf_foldx = gather(df_wf_foldx, param_type, param_value, foldx_scaled:kd_values, factor_key=TRUE)
|
|
||||||
|
|
||||||
if (nrow(df_lf_foldx) == expected_rows_lf_foldx){
|
|
||||||
cat("PASS: long format data created for foldx")
|
|
||||||
}else{
|
|
||||||
cat("FAIL: long format data could not be created for foldx")
|
|
||||||
exit()
|
|
||||||
}
|
|
||||||
|
|
||||||
# assign pretty labels: param type
|
|
||||||
levels(df_lf_foldx$param_type); table(df_lf_foldx$param_type)
|
|
||||||
|
|
||||||
levels(df_lf_foldx$param_type)[levels(df_lf_foldx$param_type)=="foldx_scaled"] <- "Foldx"
|
|
||||||
levels(df_lf_foldx$param_type)[levels(df_lf_foldx$param_type)=="ligand_distance"] <- "Ligand Distance"
|
|
||||||
levels(df_lf_foldx$param_type)[levels(df_lf_foldx$param_type)=="asa"] <- "ASA"
|
|
||||||
levels(df_lf_foldx$param_type)[levels(df_lf_foldx$param_type)=="rsa"] <- "RSA"
|
|
||||||
levels(df_lf_foldx$param_type)[levels(df_lf_foldx$param_type)=="rd_values"] <- "RD"
|
|
||||||
levels(df_lf_foldx$param_type)[levels(df_lf_foldx$param_type)=="kd_values"] <- "KD"
|
|
||||||
# check
|
|
||||||
levels(df_lf_foldx$param_type); table(df_lf_foldx$param_type)
|
|
||||||
|
|
||||||
# assign pretty labels: mutation_info
|
|
||||||
levels(df_lf_foldx$mutation_info); table(df_lf_foldx$mutation_info)
|
|
||||||
sum(table(df_lf_foldx$mutation_info)) == nrow(df_lf_foldx)
|
|
||||||
|
|
||||||
levels(df_lf_foldx$mutation_info)[levels(df_lf_foldx$mutation_info)==dr_muts_col] <- "DM"
|
|
||||||
levels(df_lf_foldx$mutation_info)[levels(df_lf_foldx$mutation_info)==other_muts_col] <- "OM"
|
|
||||||
# check
|
|
||||||
levels(df_lf_foldx$mutation_info); table(df_lf_foldx$mutation_info)
|
|
||||||
|
|
||||||
############################################################################
|
|
||||||
#===========
|
#===========
|
||||||
# LF data: LIG
|
# LF data: LIG
|
||||||
#===========
|
#===========
|
||||||
# keep similar dtypes cols together
|
# keep similar dtypes cols together
|
||||||
cols_to_select_lig = c("mutationinformation", "mutation", "position", "mutation_info"
|
cols_to_select_lig = c("mutationinformation", "mutation", "position", "mutation_info"
|
||||||
, "ligand_outcome"
|
, "ligand_outcome"
|
||||||
|
|
||||||
, "affinity_scaled"
|
, "affinity_scaled"
|
||||||
, "ligand_distance"
|
, "ligand_distance"
|
||||||
, "asa"
|
, "asa"
|
||||||
, "rsa"
|
, "rsa"
|
||||||
, "rd_values"
|
, "rd_values"
|
||||||
, "kd_values")
|
, "kd_values")
|
||||||
|
|
||||||
df_wf_lig = df_lig[, cols_to_select_lig]
|
df_wf_lig = df_lig[, cols_to_select_lig]
|
||||||
|
|
||||||
pivot_cols_lig = cols_to_select_lig[1:5]; pivot_cols_lig
|
pivot_cols_lig = cols_to_select_lig[1:5]; pivot_cols_lig
|
||||||
|
|
||||||
expected_rows_lf_lig = nrow(df_wf_lig) * (length(df_wf_lig) - length(pivot_cols_lig))
|
expected_rows_lf_lig = nrow(df_wf_lig) * (length(df_wf_lig) - length(pivot_cols_lig))
|
||||||
expected_rows_lf_lig
|
expected_rows_lf_lig
|
||||||
|
|
||||||
|
@ -239,7 +200,61 @@ levels(df_lf_lig$mutation_info)[levels(df_lf_lig$mutation_info)==other_muts_col]
|
||||||
# check
|
# check
|
||||||
levels(df_lf_lig$mutation_info); table(df_lf_lig$mutation_info)
|
levels(df_lf_lig$mutation_info); table(df_lf_lig$mutation_info)
|
||||||
|
|
||||||
###############################################################################
|
#############################################################################
|
||||||
|
#===========
|
||||||
|
# Data: foldx
|
||||||
|
#===========
|
||||||
|
# keep similar dtypes cols together
|
||||||
|
cols_to_select_foldx = c("mutationinformation", "mutation", "position", "mutation_info"
|
||||||
|
, "foldx_outcome"
|
||||||
|
|
||||||
|
, "foldx_scaled")
|
||||||
|
#, "ligand_distance"
|
||||||
|
#, "asa"
|
||||||
|
#, "rsa"
|
||||||
|
#, "rd_values"
|
||||||
|
#, "kd_values")
|
||||||
|
|
||||||
|
|
||||||
|
df_wf_foldx = df_ps[, cols_to_select_foldx]
|
||||||
|
|
||||||
|
pivot_cols_foldx = cols_to_select_foldx[1:5]; pivot_cols_foldx
|
||||||
|
|
||||||
|
expected_rows_lf_foldx = nrow(df_wf_foldx) * (length(df_wf_foldx) - length(pivot_cols_foldx))
|
||||||
|
expected_rows_lf_foldx
|
||||||
|
|
||||||
|
# LF data: foldx
|
||||||
|
df_lf_foldx = gather(df_wf_foldx, param_type, param_value, foldx_scaled, factor_key=TRUE)
|
||||||
|
|
||||||
|
if (nrow(df_lf_foldx) == expected_rows_lf_foldx){
|
||||||
|
cat("PASS: long format data created for foldx")
|
||||||
|
}else{
|
||||||
|
cat("FAIL: long format data could not be created for foldx")
|
||||||
|
exit()
|
||||||
|
}
|
||||||
|
|
||||||
|
# assign pretty labels: param type
|
||||||
|
levels(df_lf_foldx$param_type); table(df_lf_foldx$param_type)
|
||||||
|
|
||||||
|
levels(df_lf_foldx$param_type)[levels(df_lf_foldx$param_type)=="foldx_scaled"] <- "Foldx"
|
||||||
|
#levels(df_lf_foldx$param_type)[levels(df_lf_foldx$param_type)=="ligand_distance"] <- "Ligand Distance"
|
||||||
|
#levels(df_lf_foldx$param_type)[levels(df_lf_foldx$param_type)=="asa"] <- "ASA"
|
||||||
|
#levels(df_lf_foldx$param_type)[levels(df_lf_foldx$param_type)=="rsa"] <- "RSA"
|
||||||
|
#levels(df_lf_foldx$param_type)[levels(df_lf_foldx$param_type)=="rd_values"] <- "RD"
|
||||||
|
#levels(df_lf_foldx$param_type)[levels(df_lf_foldx$param_type)=="kd_values"] <- "KD"
|
||||||
|
# check
|
||||||
|
levels(df_lf_foldx$param_type); table(df_lf_foldx$param_type)
|
||||||
|
|
||||||
|
# assign pretty labels: mutation_info
|
||||||
|
levels(df_lf_foldx$mutation_info); table(df_lf_foldx$mutation_info)
|
||||||
|
sum(table(df_lf_foldx$mutation_info)) == nrow(df_lf_foldx)
|
||||||
|
|
||||||
|
levels(df_lf_foldx$mutation_info)[levels(df_lf_foldx$mutation_info)==dr_muts_col] <- "DM"
|
||||||
|
levels(df_lf_foldx$mutation_info)[levels(df_lf_foldx$mutation_info)==other_muts_col] <- "OM"
|
||||||
|
# check
|
||||||
|
levels(df_lf_foldx$mutation_info); table(df_lf_foldx$mutation_info)
|
||||||
|
|
||||||
|
############################################################################
|
||||||
|
|
||||||
# clear excess variables
|
# clear excess variables
|
||||||
rm(cols_to_select_ps, cols_to_select_foldx, cols_to_select_lig
|
rm(cols_to_select_ps, cols_to_select_foldx, cols_to_select_lig
|
||||||
|
|
|
@ -1,96 +0,0 @@
|
||||||
setwd("/home/tanu/git/LSHTM_analysis/scripts/plotting")
|
|
||||||
|
|
||||||
source("combining_dfs_plotting.R")
|
|
||||||
|
|
||||||
table(merged_df3$mutation_info)
|
|
||||||
|
|
||||||
# assign foldx
|
|
||||||
#ddg<0 = "Stabilising" (-ve)
|
|
||||||
table(merged_df3$ddg < 0)
|
|
||||||
merged_df3$foldx_outcome = ifelse(merged_df3$ddg < 0, "Stabilising", "Destabilising")
|
|
||||||
|
|
||||||
#===========
|
|
||||||
# PS data
|
|
||||||
#===========
|
|
||||||
dr_muts = merged_df3[merged_df3$mutation_info == "dr_mutations_pyrazinamide",]
|
|
||||||
other_muts = merged_df3[merged_df3$mutation_info == "other_mutations_pyrazinamide",]
|
|
||||||
|
|
||||||
par(mfrow = c(1,1))
|
|
||||||
par(mfrow = c(2,6))
|
|
||||||
|
|
||||||
# mcsm duet
|
|
||||||
boxplot(dr_muts$duet_scaled, other_muts$duet_scaled, main = "DUET"
|
|
||||||
#, col = factor(merged_df3$duet_outcome)
|
|
||||||
)
|
|
||||||
wilcox.test(dr_muts$duet_scaled, other_muts$duet_scaled, paired = F)
|
|
||||||
|
|
||||||
# foldx ddg
|
|
||||||
boxplot(dr_muts$ddg, other_muts$ddg, main = "Foldx")
|
|
||||||
wilcox.test(dr_muts$ddg, other_muts$ddg, paired = F)
|
|
||||||
|
|
||||||
# rd
|
|
||||||
boxplot(dr_muts$rd_values, other_muts$rd_values, main = "RD")
|
|
||||||
wilcox.test(dr_muts$rd_values, other_muts$rd_values)
|
|
||||||
|
|
||||||
# kd
|
|
||||||
boxplot(dr_muts$kd_values, other_muts$kd_values, main = "KD")
|
|
||||||
wilcox.test(dr_muts$kd_values, other_muts$kd_values)
|
|
||||||
|
|
||||||
# asa
|
|
||||||
boxplot(dr_muts$asa, other_muts$asa, main = "ASA")
|
|
||||||
wilcox.test(dr_muts$asa, other_muts$asa)
|
|
||||||
|
|
||||||
# rsa
|
|
||||||
boxplot(dr_muts$rsa, other_muts$rsa, main = "RSA")
|
|
||||||
wilcox.test(dr_muts$rsa, other_muts$rsa)
|
|
||||||
|
|
||||||
#===================================================================
|
|
||||||
#==========
|
|
||||||
# LIG data
|
|
||||||
#==========
|
|
||||||
dr_muts_lig = merged_df3_lig[merged_df3_lig$mutation_info == "dr_mutations_pyrazinamide",]
|
|
||||||
other_muts_lig = merged_df3_lig[merged_df3_lig$mutation_info == "other_mutations_pyrazinamide",]
|
|
||||||
|
|
||||||
# mcsm ligand affinity
|
|
||||||
boxplot(dr_muts_lig$duet_scaled, other_muts_lig$duet_scaled, main = "Ligand affinity")
|
|
||||||
wilcox.test(dr_muts_lig$duet_scaled, other_muts_lig$duet_scaled, paired = F)
|
|
||||||
|
|
||||||
# rd
|
|
||||||
boxplot(dr_muts_lig$rd_values, other_muts_lig$rd_values, main = "RD")
|
|
||||||
wilcox.test(dr_muts_lig$rd_values, other_muts_lig$rd_values)
|
|
||||||
|
|
||||||
# kd
|
|
||||||
boxplot(dr_muts_lig$kd_values, other_muts_lig$kd_values, main = "KD")
|
|
||||||
wilcox.test(dr_muts_lig$kd_values, other_muts_lig$kd_values)
|
|
||||||
|
|
||||||
# asa
|
|
||||||
boxplot(dr_muts_lig$asa, other_muts_lig$asa, main = "ASA")
|
|
||||||
wilcox.test(dr_muts_lig$asa, other_muts_lig$asa)
|
|
||||||
|
|
||||||
# rsa
|
|
||||||
boxplot(dr_muts_lig$rsa, other_muts_lig$rsa, main = "RSA")
|
|
||||||
wilcox.test(dr_muts_lig$rsa, other_muts_lig$rsa)
|
|
||||||
|
|
||||||
# checking agreement b/w mcsm and foldx
|
|
||||||
cols_to_select = c("mutationinformation"
|
|
||||||
, "mutation_info"
|
|
||||||
, "duet_scaled"
|
|
||||||
, "ddg"
|
|
||||||
, "duet_outcome"
|
|
||||||
, "foldx_outcome")
|
|
||||||
|
|
||||||
merged_df3_short = select(merged_df3, cols_to_select)
|
|
||||||
|
|
||||||
mcsm_foldx = merged_df3_short[which(merged_df3_short$duet_outcome != merged_df3_short$foldx_outcome),]
|
|
||||||
|
|
||||||
mcsm_foldx$sign_comp = ifelse(sign(mcsm_foldx$duet_scaled)==sign(mcsm_foldx$ddg), "PASS", "FAIL")
|
|
||||||
table(mcsm_foldx$sign_comp)
|
|
||||||
|
|
||||||
# another way of checking
|
|
||||||
merged_df3$sign_comp = ifelse(sign(merged_df3$duet_scaled)==sign(merged_df3$ddg), "PASS", "FAIL")
|
|
||||||
table(merged_df3$sign_comp)
|
|
||||||
|
|
||||||
disagreement = table(merged_df3$sign_comp)[2]/nrow(merged_df3)*100
|
|
||||||
agreement = 100 - disagreement
|
|
||||||
|
|
||||||
cat("There is", agreement, "% between mcsm and foldx predictions")
|
|
Loading…
Add table
Add a link
Reference in a new issue