diff --git a/scripts/plotting/test.R b/scripts/plotting/test.R new file mode 100644 index 0000000..8033517 --- /dev/null +++ b/scripts/plotting/test.R @@ -0,0 +1,96 @@ +setwd("/home/tanu/git/LSHTM_analysis/scripts/plotting") + +source("combining_dfs_plotting.R") + +table(merged_df3$mutation_info) + +# assign foldx +#ddg<0 = "Stabilising" (-ve) +table(merged_df3$ddg < 0) +merged_df3$foldx_outcome = ifelse(merged_df3$ddg < 0, "Stabilising", "Destabilising") + +#=========== +# PS data +#=========== +dr_muts = merged_df3[merged_df3$mutation_info == "dr_mutations_pyrazinamide",] +other_muts = merged_df3[merged_df3$mutation_info == "other_mutations_pyrazinamide",] + +par(mfrow = c(1,1)) +par(mfrow = c(2,6)) + +# mcsm duet +boxplot(dr_muts$duet_scaled, other_muts$duet_scaled, main = "DUET" + #, col = factor(merged_df3$duet_outcome) + ) +wilcox.test(dr_muts$duet_scaled, other_muts$duet_scaled, paired = F) + +# foldx ddg +boxplot(dr_muts$ddg, other_muts$ddg, main = "Foldx") +wilcox.test(dr_muts$ddg, other_muts$ddg, paired = F) + +# rd +boxplot(dr_muts$rd_values, other_muts$rd_values, main = "RD") +wilcox.test(dr_muts$rd_values, other_muts$rd_values) + +# kd +boxplot(dr_muts$kd_values, other_muts$kd_values, main = "KD") +wilcox.test(dr_muts$kd_values, other_muts$kd_values) + +# asa +boxplot(dr_muts$asa, other_muts$asa, main = "ASA") +wilcox.test(dr_muts$asa, other_muts$asa) + +# rsa +boxplot(dr_muts$rsa, other_muts$rsa, main = "RSA") +wilcox.test(dr_muts$rsa, other_muts$rsa) + +#=================================================================== +#========== +# LIG data +#========== +dr_muts_lig = merged_df3_lig[merged_df3_lig$mutation_info == "dr_mutations_pyrazinamide",] +other_muts_lig = merged_df3_lig[merged_df3_lig$mutation_info == "other_mutations_pyrazinamide",] + +# mcsm ligand affinity +boxplot(dr_muts_lig$duet_scaled, other_muts_lig$duet_scaled, main = "Ligand affinity") +wilcox.test(dr_muts_lig$duet_scaled, other_muts_lig$duet_scaled, paired = F) + +# rd +boxplot(dr_muts_lig$rd_values, other_muts_lig$rd_values, main = "RD") +wilcox.test(dr_muts_lig$rd_values, other_muts_lig$rd_values) + +# kd +boxplot(dr_muts_lig$kd_values, other_muts_lig$kd_values, main = "KD") +wilcox.test(dr_muts_lig$kd_values, other_muts_lig$kd_values) + +# asa +boxplot(dr_muts_lig$asa, other_muts_lig$asa, main = "ASA") +wilcox.test(dr_muts_lig$asa, other_muts_lig$asa) + +# rsa +boxplot(dr_muts_lig$rsa, other_muts_lig$rsa, main = "RSA") +wilcox.test(dr_muts_lig$rsa, other_muts_lig$rsa) + +# checking agreement b/w mcsm and foldx +cols_to_select = c("mutationinformation" + , "mutation_info" + , "duet_scaled" + , "ddg" + , "duet_outcome" + , "foldx_outcome") + +merged_df3_short = select(merged_df3, cols_to_select) + +mcsm_foldx = merged_df3_short[which(merged_df3_short$duet_outcome != merged_df3_short$foldx_outcome),] + +mcsm_foldx$sign_comp = ifelse(sign(mcsm_foldx$duet_scaled)==sign(mcsm_foldx$ddg), "PASS", "FAIL") +table(mcsm_foldx$sign_comp) + +# another way of checking +merged_df3$sign_comp = ifelse(sign(merged_df3$duet_scaled)==sign(merged_df3$ddg), "PASS", "FAIL") +table(merged_df3$sign_comp) + +disagreement = table(merged_df3$sign_comp)[2]/nrow(merged_df3)*100 +agreement = 100 - disagreement + +cat("There is", agreement, "% between mcsm and foldx predictions")