Combining dfs for PS and lig in one

2020-09-07 14:05:46 +01:00 · 2020-09-07 14:05:46 +01:00 · 739e9eadf8
commit 739e9eadf8
parent 93e19e3186
6 changed files with 464 additions and 621 deletions
--- a/scripts/plotting/opp_mcsm_muts.R
+++ b/scripts/plotting/opp_mcsm_muts.R
@ -0,0 +1,95 @@
+#!/usr/bin/env Rscript      
+#########################################################
+# TASK: To write muts with opposite effects on
+# protomer and ligand stability
+#########################################################
+# working dir and loading libraries
+
+getwd()
+setwd("~/git/LSHTM_analysis/scripts/plotting/")
+getwd()
+
+source("plotting_data.R")
+
+# should return the following dfs, directories and variables
+# my_df
+# my_df_u
+# my_df_u_lig
+# dup_muts
+
+cat(paste0("Directories imported:"
+           , "\ndatadir:", datadir
+           , "\nindir:", indir
+           , "\noutdir:", outdir
+           , "\nplotdir:", plotdir))
+
+cat(paste0("Variables imported:"
+           , "\ndrug:", drug
+           , "\ngene:", gene
+           , "\ngene_match:", gene_match
+           , "\nLength of upos:", length(upos)
+           , "\nAngstrom symbol:", angstroms_symbol))       
+
+# clear excess variable
+rm(my_df, upos, dup_muts)
+#========================================================
+#===========
+# input
+#===========
+#in_file1: output of plotting_data.R
+# my_df_u
+
+# output
+#===========
+# mutations with opposite effects
+out_filename_opp_muts = paste0(tolower(gene), "_muts_opp_effects.csv")
+outfile_opp_muts = paste0(outdir, "/", out_filename_opp_muts)
+
+#%%===============================================================
+
+# spelling Correction 1: DUET incase American spelling needed!
+table(my_df_u$duet_outcome); sum(table(my_df_u$duet_outcome) )
+#my_df_u$duet_outcome[my_df_u$duet_outcome=="Stabilising"] <- "Stabilizing"
+#my_df_u$duet_outcome[my_df_u$duet_outcome=="Destabilising"] <- "Destabilizing"
+
+
+# spelling Correction 2: Ligand incase American spelling needed!
+table(my_df_u$ligand_outcome); sum(table(my_df_u$ligand_outcome) )
+#my_df_u$ligand_outcome[my_df_u$ligand_outcome=="Stabilising"] <- "Stabilizing"
+#my_df_u$ligand_outcome[my_df_u$ligand_outcome=="Destabilising"] <- "Destabilizing"
+
+
+# muts with opposing effects on protomer and ligand stability
+table(my_df_u$duet_outcome != my_df_u$ligand_outcome)
+changes = my_df_u[which(my_df_u$duet_outcome != my_df_u$ligand_outcome),]
+
+# sanity check: redundant, but uber cautious!
+dl_i = which(my_df_u$duet_outcome != my_df_u$ligand_outcome)
+ld_i = which(my_df_u$ligand_outcome != my_df_u$duet_outcome)
+
+cat("Identifying muts with opposite stability effects")
+if(nrow(changes) == (table(my_df_u$duet_outcome != my_df_u$ligand_outcome)[[2]]) & identical(dl_i,ld_i)) {
+  cat("PASS: muts with opposite effects on stability and affinity correctly identified"
+        , "\nNo. of such muts: ", nrow(changes))
+}else {
+  cat("FAIL: unsuccessful in extracting muts with changed stability effects")
+}
+
+#==========================
+# write file: changed muts
+#==========================
+write.csv(changes, outfile_opp_muts)
+
+cat("Finished writing file for muts with opp effects:"
+    , "\nFilename: ", outfile_opp_muts
+    , "\nDim:", dim(changes))
+
+# clear variables
+rm(out_filename_opp_muts, outfile_opp_muts)
+rm(changes, dl_i, ld_i)
+
+# count na in each column
+na_count = sapply(my_df_u, function(y) sum(length(which(is.na(y))))); na_count
+df_ncols = ncol(my_df_u)
+
+#===================================== end of script