saving other_plots.R

This commit is contained in:
Tanushree Tunstall 2020-09-10 17:53:49 +01:00
parent 5102bbea1b
commit 6934faca10
5 changed files with 227 additions and 241 deletions

View file

@ -319,8 +319,7 @@ all.equal(foo, bar)
# clear variables
rm(foo, bar, gene_metadata
, in_filename_params, infile_params, merging_cols
, in_filename_gene_metadata, infile_gene_metadata
, merged_df2v2, merged_df2v3)
, in_filename_gene_metadata, infile_gene_metadata)
#*************************
#####################################################################
# Combining: LIG

View file

@ -64,3 +64,13 @@ table(snpsBYpos_df$snpsBYpos)
# with dups
1 2 3 4 5 6 7
39 30 25 23 20 6 2
#======================
# dr and other muts count = PS
#======================
dr_mutations_pyrazinamide : 227
other_mutations_pyrazinamide : 197

View file

@ -18,19 +18,29 @@ source("other_plots_data.R")
#=======
# output
#=======
dr_other_plots_combined = "dr_other_muts.svg"
plot_dr_other_plots_combined = paste0(plotdir,"/", dr_other_plots_combined)
dr_other_combined = "dr_other_muts.svg"
plot_dr_other_combined = paste0(plotdir,"/", dr_other_combined)
dr_other_foldx = "dr_other_muts_foldx.svg"
plot_dr_other_foldx = paste0(plotdir,"/", dr_other_foldx)
point_legend = "point_legend.svg"
plot_point_legend = paste0(plotdir,"/", point_legend)
########################################################################
# end of data extraction and cleaning for plots #
########################################################################
#my_comparisons <- list( c(dr_muts_col, other_muts_col) )
my_comparisons <- list( c("DM", "OM") )
my_ats = 15# axis text size
my_als = 20 # axis label size
my_fls = 20 # facet label size
my_pts = 22 # plot title size
#===========
# Plot1: PS
#===========
my_stat_ps = compare_means(param_value~mutation_info, group.by = "param_type"
, data = df_lf_ps, paired = FALSE, p.adjust.method = "BH")
@ -42,22 +52,23 @@ p1 = ggplot(df_lf_ps, aes(x = mutation_info
, nrow = 1
, scales = "free_y") +
geom_boxplot(fill = "white", outlier.colour = NA
# position = position_dodge(width = 0.9)
, width = 0.5) +
#, position = position_dodge(width = 0.9)
, width = 0.2) +
geom_point(position = position_jitterdodge(dodge.width=0.01)
, alpha = 0.5
, show.legend = FALSE
, aes(colour = factor(duet_outcome))) +
theme(axis.text.x = element_text(size = 15)
, axis.text.y = element_text(size = 15
theme(axis.text.x = element_text(size = my_ats)
, axis.text.y = element_text(size = my_ats
, angle = 0
, hjust = 1
, vjust = 0)
, axis.title.x = element_text(size = 15)
, axis.title.y = element_text(size = 15)
, plot.title = element_text(size = 20, hjust = 0.5)
, strip.text.x = element_text(size = 15, colour = "black")
, legend.title = element_text(color = "black", size = 20)
, legend.text = element_text(size = 15)
, axis.title.x = element_text(size = my_ats)
, axis.title.y = element_text(size = my_ats)
, plot.title = element_text(size = my_pts , hjust = 0.5)
, strip.text.x = element_text(size = my_fls, colour = "black")
, legend.title = element_text(color = "black", size = my_als)
, legend.text = element_text(size = my_ats)
, legend.direction = "vertical") +
labs(title = "DUET"
, x = ""
@ -68,51 +79,9 @@ p1 = ggplot(df_lf_ps, aes(x = mutation_info
#, label = "p.format")
, label = "p.signif")
p1
#===============================================================
#===========
# Plot 2: Foldx
#===========
my_stat_foldx = compare_means(param_value~mutation_info, group.by = "param_type"
, data = df_lf_foldx, paired = FALSE, p.adjust.method = "BH")
y_value = "param_value"
p2 = ggplot(df_lf_foldx, aes(x = mutation_info
, y = eval(parse(text=y_value)) )) +
facet_wrap(~ param_type
, nrow = 1
, scales = "free_y") +
geom_boxplot(fill = "white", outlier.colour = NA
# position = position_dodge(width = 0.9)
, width = 0.5) +
geom_point(position = position_jitterdodge(dodge.width=0.01)
, show.legend = FALSE
, aes(colour = factor(foldx_outcome))) +
theme(axis.text.x = element_text(size = 15)
, axis.text.y = element_text(size = 15
, angle = 0
, hjust = 1
, vjust = 0)
, axis.title.x = element_text(size = 15)
, axis.title.y = element_text(size = 15)
, plot.title = element_text(size = 20, hjust = 0.5)
, strip.text.x = element_text(size = 15, colour = "black")
, legend.title = element_text(color = "black", size = 20)
, legend.text = element_text(size = 15)
, legend.direction = "vertical") +
labs(title = "Foldx"
, x = ""
, y = "")+
stat_compare_means(comparisons = my_comparisons
, method = "wilcox.test"
, paired = FALSE
#, label = "p.format")
, label = "p.signif")
p2
#===========
# Plot 3: LIG
# Plot 2: LIG
#===========
my_stat_lig = compare_means(param_value~mutation_info, group.by = "param_type"
@ -120,28 +89,29 @@ my_stat_lig = compare_means(param_value~mutation_info, group.by = "param_type"
y_value = "param_value"
p3 = ggplot(df_lf_lig, aes(x = mutation_info
p2 = ggplot(df_lf_lig, aes(x = mutation_info
, y = eval(parse(text=y_value)) )) +
facet_wrap(~ param_type
, nrow = 1
, scales = "free_y") +
geom_boxplot(fill = "white", outlier.colour = NA
# position = position_dodge(width = 0.9)
, width = 0.5) +
#, position = position_dodge(width = 0.9)
, width = 0.2) +
geom_point(position = position_jitterdodge(dodge.width=0.01)
, alpha = 0.5
, show.legend = FALSE
, aes(colour = factor(ligand_outcome))) +
theme(axis.text.x = element_text(size = 15)
, axis.text.y = element_text(size = 15
theme(axis.text.x = element_text(size = my_ats)
, axis.text.y = element_text(size = my_ats
, angle = 0
, hjust = 1
, vjust = 0)
, axis.title.x = element_text(size = 15)
, axis.title.y = element_text(size = 15)
, plot.title = element_text(size = 20, hjust = 0.5)
, strip.text.x = element_text(size = 15, colour = "black")
, legend.title = element_text(color = "black", size = 20)
, legend.text = element_text(size = 15)
, axis.title.x = element_text(size = my_ats)
, axis.title.y = element_text(size = my_ats)
, plot.title = element_text(size = my_pts , hjust = 0.5)
, strip.text.x = element_text(size = my_fls, colour = "black")
, legend.title = element_text(color = "black", size = my_als)
, legend.text = element_text(size = my_ats)
, legend.direction = "vertical") +
labs(title = "Ligand Affinity"
, x = ""
@ -152,20 +122,108 @@ p3 = ggplot(df_lf_lig, aes(x = mutation_info
#, label = "p.format")
, label = "p.signif")
p2
#====================================================================
#===========
# Plot 3: Foldx
#===========
my_stat_foldx = compare_means(param_value~mutation_info, group.by = "param_type"
, data = df_lf_foldx, paired = FALSE, p.adjust.method = "BH")
y_value = "param_value"
p3 = ggplot(df_lf_foldx, aes(x = mutation_info
, y = eval(parse(text=y_value)) )) +
facet_wrap(~ param_type
, nrow = 1
, scales = "free_y") +
geom_boxplot(fill = "white", outlier.colour = NA
#, position = position_dodge(width = 0.2)
, width = 0.5) +
geom_point(position = position_jitterdodge(dodge.width=0.01)
, alpha = 0.5
, show.legend = FALSE
, aes(colour = factor(foldx_outcome))) +
theme(axis.text.x = element_text(size = my_ats)
, axis.text.y = element_text(size = my_ats
, angle = 0
, hjust = 1
, vjust = 0)
, axis.title.x = element_text(size = my_ats)
, axis.title.y = element_text(size = my_ats)
, plot.title = element_text(size = my_pts , hjust = 0.5)
, strip.text.x = element_text(size = my_fls, colour = "black")
#, legend.title = element_text(color = "black", size = my_als)
, legend.text = element_text(size = my_ats)
, legend.direction = "vertical"
, legend.title = element_blank()) +
labs(title = ""
, x = ""
, y = "") +
stat_compare_means(comparisons = my_comparisons
, method = "wilcox.test"
, paired = FALSE
#, label = "p.format")
, label = "p.signif")
p3
#====================================================================
#===========
# legend
#===========
legend = ggplot(df_lf_foldx, aes(x = mutation_info
, y = eval(parse(text=y_value)) )) +
facet_wrap(~ param_type
, nrow = 1
, scales = "free_y") +
geom_boxplot(fill = "white", outlier.colour = NA
#, position = position_dodge(width = 0.2)
, width = 0.5) +
geom_point(position = position_jitterdodge(dodge.width=0.01)
, aes(colour = factor(foldx_outcome))) +
theme(legend.text = element_text(size = 25)
, legend.direction = "vertical"
, legend.title = element_blank())+
guides(color = guide_legend(override.aes = list(size = 6)))
legend
#===========================
# combine
#===========================
#svg(plot_or_combined, width = 32, height = 12)
svg("test.svg", width = 25, height = 12)
#theme_set(theme_gray()) # to preserve default theme
#---------
# plot 1
#---------
cat("Output plot:",plot_dr_other_combined)
svg(plot_dr_other_combined, width = 24, height = 12)
printFile = cowplot::plot_grid(plot_grid(p1, p2, p3
, nrow = 3
, align = 'h'
#theme_set(theme_gray()) # to preserve default theme
OutPlot = cowplot::plot_grid(p1, p2
, nrow = 2
, align = "hv"
, axis = "b"
, labels = c("", "", "")
, label_size = 20))
print(printFile)
, label_size = my_als)
print(OutPlot)
dev.off()
#---------
# plot 2
#---------
svg(plot_dr_other_foldx , width = 6, height = 7)
OutPlot2 = p3
print(OutPlot2)
dev.off()
#---------
# plot 3: legend
#---------
svg(plot_point_legend, width = 6, height = 7)
OutPlot3 = legend
print(OutPlot3)
dev.off()

View file

@ -50,7 +50,6 @@ if ( all(c1 == c2) ){
exit()
}
# name tidying
df_ps$mutation_info = as.factor(df_ps$mutation_info)
df_ps$duet_outcome = as.factor(df_ps$duet_outcome)
@ -60,6 +59,21 @@ df_ps$ligand_outcome = as.factor(df_ps$ligand_outcome)
# check
table(df_ps$mutation_info)
# further checks to make sure dr and other muts are indeed unique
dr_muts = df_ps[df_ps$mutation_info == dr_muts_col,]
dr_muts_names = unique(dr_muts$mutation)
other_muts = df_ps[df_ps$mutation_info == other_muts_col,]
other_muts_names = unique(other_muts$mutation)
if ( table(dr_muts_names%in%other_muts_names)[[1]] == length(dr_muts_names) &&
table(other_muts_names%in%dr_muts_names)[[1]] == length(other_muts_names) ){
cat("PASS: dr and other muts are indeed unique")
}else{
cat("FAIL: dr adn others muts are NOT unique!")
quit()
}
#%%%%%%%%%%%%%%%%%%%
# REASSIGNMENT: LIG
#%%%%%%%%%%%%%%%%%%%%
@ -133,60 +147,7 @@ levels(df_lf_ps$mutation_info)[levels(df_lf_ps$mutation_info)==other_muts_col] <
levels(df_lf_ps$mutation_info); table(df_lf_ps$mutation_info)
############################################################################
#===========
# Data: foldx
#===========
# keep similar dtypes cols together
cols_to_select_foldx = c("mutationinformation", "mutation", "position", "mutation_info"
, "foldx_outcome"
, "foldx_scaled"
, "ligand_distance"
, "asa"
, "rsa"
, "rd_values"
, "kd_values")
df_wf_foldx = df_ps[, cols_to_select_foldx]
pivot_cols_foldx = cols_to_select_foldx[1:5]; pivot_cols_foldx
expected_rows_lf_foldx = nrow(df_wf_foldx) * (length(df_wf_foldx) - length(pivot_cols_foldx))
expected_rows_lf_foldx
# LF data: foldx
df_lf_foldx = gather(df_wf_foldx, param_type, param_value, foldx_scaled:kd_values, factor_key=TRUE)
if (nrow(df_lf_foldx) == expected_rows_lf_foldx){
cat("PASS: long format data created for foldx")
}else{
cat("FAIL: long format data could not be created for foldx")
exit()
}
# assign pretty labels: param type
levels(df_lf_foldx$param_type); table(df_lf_foldx$param_type)
levels(df_lf_foldx$param_type)[levels(df_lf_foldx$param_type)=="foldx_scaled"] <- "Foldx"
levels(df_lf_foldx$param_type)[levels(df_lf_foldx$param_type)=="ligand_distance"] <- "Ligand Distance"
levels(df_lf_foldx$param_type)[levels(df_lf_foldx$param_type)=="asa"] <- "ASA"
levels(df_lf_foldx$param_type)[levels(df_lf_foldx$param_type)=="rsa"] <- "RSA"
levels(df_lf_foldx$param_type)[levels(df_lf_foldx$param_type)=="rd_values"] <- "RD"
levels(df_lf_foldx$param_type)[levels(df_lf_foldx$param_type)=="kd_values"] <- "KD"
# check
levels(df_lf_foldx$param_type); table(df_lf_foldx$param_type)
# assign pretty labels: mutation_info
levels(df_lf_foldx$mutation_info); table(df_lf_foldx$mutation_info)
sum(table(df_lf_foldx$mutation_info)) == nrow(df_lf_foldx)
levels(df_lf_foldx$mutation_info)[levels(df_lf_foldx$mutation_info)==dr_muts_col] <- "DM"
levels(df_lf_foldx$mutation_info)[levels(df_lf_foldx$mutation_info)==other_muts_col] <- "OM"
# check
levels(df_lf_foldx$mutation_info); table(df_lf_foldx$mutation_info)
############################################################################
#===========
# LF data: LIG
#===========
@ -239,7 +200,61 @@ levels(df_lf_lig$mutation_info)[levels(df_lf_lig$mutation_info)==other_muts_col]
# check
levels(df_lf_lig$mutation_info); table(df_lf_lig$mutation_info)
###############################################################################
#############################################################################
#===========
# Data: foldx
#===========
# keep similar dtypes cols together
cols_to_select_foldx = c("mutationinformation", "mutation", "position", "mutation_info"
, "foldx_outcome"
, "foldx_scaled")
#, "ligand_distance"
#, "asa"
#, "rsa"
#, "rd_values"
#, "kd_values")
df_wf_foldx = df_ps[, cols_to_select_foldx]
pivot_cols_foldx = cols_to_select_foldx[1:5]; pivot_cols_foldx
expected_rows_lf_foldx = nrow(df_wf_foldx) * (length(df_wf_foldx) - length(pivot_cols_foldx))
expected_rows_lf_foldx
# LF data: foldx
df_lf_foldx = gather(df_wf_foldx, param_type, param_value, foldx_scaled, factor_key=TRUE)
if (nrow(df_lf_foldx) == expected_rows_lf_foldx){
cat("PASS: long format data created for foldx")
}else{
cat("FAIL: long format data could not be created for foldx")
exit()
}
# assign pretty labels: param type
levels(df_lf_foldx$param_type); table(df_lf_foldx$param_type)
levels(df_lf_foldx$param_type)[levels(df_lf_foldx$param_type)=="foldx_scaled"] <- "Foldx"
#levels(df_lf_foldx$param_type)[levels(df_lf_foldx$param_type)=="ligand_distance"] <- "Ligand Distance"
#levels(df_lf_foldx$param_type)[levels(df_lf_foldx$param_type)=="asa"] <- "ASA"
#levels(df_lf_foldx$param_type)[levels(df_lf_foldx$param_type)=="rsa"] <- "RSA"
#levels(df_lf_foldx$param_type)[levels(df_lf_foldx$param_type)=="rd_values"] <- "RD"
#levels(df_lf_foldx$param_type)[levels(df_lf_foldx$param_type)=="kd_values"] <- "KD"
# check
levels(df_lf_foldx$param_type); table(df_lf_foldx$param_type)
# assign pretty labels: mutation_info
levels(df_lf_foldx$mutation_info); table(df_lf_foldx$mutation_info)
sum(table(df_lf_foldx$mutation_info)) == nrow(df_lf_foldx)
levels(df_lf_foldx$mutation_info)[levels(df_lf_foldx$mutation_info)==dr_muts_col] <- "DM"
levels(df_lf_foldx$mutation_info)[levels(df_lf_foldx$mutation_info)==other_muts_col] <- "OM"
# check
levels(df_lf_foldx$mutation_info); table(df_lf_foldx$mutation_info)
############################################################################
# clear excess variables
rm(cols_to_select_ps, cols_to_select_foldx, cols_to_select_lig

View file

@ -1,96 +0,0 @@
setwd("/home/tanu/git/LSHTM_analysis/scripts/plotting")
source("combining_dfs_plotting.R")
table(merged_df3$mutation_info)
# assign foldx
#ddg<0 = "Stabilising" (-ve)
table(merged_df3$ddg < 0)
merged_df3$foldx_outcome = ifelse(merged_df3$ddg < 0, "Stabilising", "Destabilising")
#===========
# PS data
#===========
dr_muts = merged_df3[merged_df3$mutation_info == "dr_mutations_pyrazinamide",]
other_muts = merged_df3[merged_df3$mutation_info == "other_mutations_pyrazinamide",]
par(mfrow = c(1,1))
par(mfrow = c(2,6))
# mcsm duet
boxplot(dr_muts$duet_scaled, other_muts$duet_scaled, main = "DUET"
#, col = factor(merged_df3$duet_outcome)
)
wilcox.test(dr_muts$duet_scaled, other_muts$duet_scaled, paired = F)
# foldx ddg
boxplot(dr_muts$ddg, other_muts$ddg, main = "Foldx")
wilcox.test(dr_muts$ddg, other_muts$ddg, paired = F)
# rd
boxplot(dr_muts$rd_values, other_muts$rd_values, main = "RD")
wilcox.test(dr_muts$rd_values, other_muts$rd_values)
# kd
boxplot(dr_muts$kd_values, other_muts$kd_values, main = "KD")
wilcox.test(dr_muts$kd_values, other_muts$kd_values)
# asa
boxplot(dr_muts$asa, other_muts$asa, main = "ASA")
wilcox.test(dr_muts$asa, other_muts$asa)
# rsa
boxplot(dr_muts$rsa, other_muts$rsa, main = "RSA")
wilcox.test(dr_muts$rsa, other_muts$rsa)
#===================================================================
#==========
# LIG data
#==========
dr_muts_lig = merged_df3_lig[merged_df3_lig$mutation_info == "dr_mutations_pyrazinamide",]
other_muts_lig = merged_df3_lig[merged_df3_lig$mutation_info == "other_mutations_pyrazinamide",]
# mcsm ligand affinity
boxplot(dr_muts_lig$duet_scaled, other_muts_lig$duet_scaled, main = "Ligand affinity")
wilcox.test(dr_muts_lig$duet_scaled, other_muts_lig$duet_scaled, paired = F)
# rd
boxplot(dr_muts_lig$rd_values, other_muts_lig$rd_values, main = "RD")
wilcox.test(dr_muts_lig$rd_values, other_muts_lig$rd_values)
# kd
boxplot(dr_muts_lig$kd_values, other_muts_lig$kd_values, main = "KD")
wilcox.test(dr_muts_lig$kd_values, other_muts_lig$kd_values)
# asa
boxplot(dr_muts_lig$asa, other_muts_lig$asa, main = "ASA")
wilcox.test(dr_muts_lig$asa, other_muts_lig$asa)
# rsa
boxplot(dr_muts_lig$rsa, other_muts_lig$rsa, main = "RSA")
wilcox.test(dr_muts_lig$rsa, other_muts_lig$rsa)
# checking agreement b/w mcsm and foldx
cols_to_select = c("mutationinformation"
, "mutation_info"
, "duet_scaled"
, "ddg"
, "duet_outcome"
, "foldx_outcome")
merged_df3_short = select(merged_df3, cols_to_select)
mcsm_foldx = merged_df3_short[which(merged_df3_short$duet_outcome != merged_df3_short$foldx_outcome),]
mcsm_foldx$sign_comp = ifelse(sign(mcsm_foldx$duet_scaled)==sign(mcsm_foldx$ddg), "PASS", "FAIL")
table(mcsm_foldx$sign_comp)
# another way of checking
merged_df3$sign_comp = ifelse(sign(merged_df3$duet_scaled)==sign(merged_df3$ddg), "PASS", "FAIL")
table(merged_df3$sign_comp)
disagreement = table(merged_df3$sign_comp)[2]/nrow(merged_df3)*100
agreement = 100 - disagreement
cat("There is", agreement, "% between mcsm and foldx predictions")