LSHTM_analysis/scripts/plotting/opp_mcsm_muts.R

96 lines
3.1 KiB
R

#!/usr/bin/env Rscript
#########################################################
# TASK: To write muts with opposite effects on
# protomer and ligand stability
#########################################################
# working dir and loading libraries
getwd()
setwd("~/git/LSHTM_analysis/scripts/plotting/")
getwd()
source("plotting_data.R")
# should return the following dfs, directories and variables
# my_df
# my_df_u
# my_df_u_lig
# dup_muts
cat(paste0("Directories imported:"
, "\ndatadir:", datadir
, "\nindir:", indir
, "\noutdir:", outdir
, "\nplotdir:", plotdir))
cat(paste0("Variables imported:"
, "\ndrug:", drug
, "\ngene:", gene
, "\ngene_match:", gene_match
, "\nLength of upos:", length(upos)
, "\nAngstrom symbol:", angstroms_symbol))
# clear excess variable
rm(my_df, upos, dup_muts)
#========================================================
#===========
# input
#===========
#in_file1: output of plotting_data.R
# my_df_u
#===========
# output
#===========
# mutations with opposite effects
out_filename_opp_muts = paste0(tolower(gene), "_muts_opp_effects.csv")
outfile_opp_muts = paste0(outdir, "/", out_filename_opp_muts)
#%%===============================================================
# spelling Correction 1: DUET incase American spelling needed!
table(my_df_u$duet_outcome); sum(table(my_df_u$duet_outcome) )
#my_df_u$duet_outcome[my_df_u$duet_outcome=="Stabilising"] <- "Stabilizing"
#my_df_u$duet_outcome[my_df_u$duet_outcome=="Destabilising"] <- "Destabilizing"
# spelling Correction 2: Ligand incase American spelling needed!
table(my_df_u$ligand_outcome); sum(table(my_df_u$ligand_outcome) )
#my_df_u$ligand_outcome[my_df_u$ligand_outcome=="Stabilising"] <- "Stabilizing"
#my_df_u$ligand_outcome[my_df_u$ligand_outcome=="Destabilising"] <- "Destabilizing"
# muts with opposing effects on protomer and ligand stability
table(my_df_u$duet_outcome != my_df_u$ligand_outcome)
changes = my_df_u[which(my_df_u$duet_outcome != my_df_u$ligand_outcome),]
# sanity check: redundant, but uber cautious!
dl_i = which(my_df_u$duet_outcome != my_df_u$ligand_outcome)
ld_i = which(my_df_u$ligand_outcome != my_df_u$duet_outcome)
cat("Identifying muts with opposite stability effects")
if(nrow(changes) == (table(my_df_u$duet_outcome != my_df_u$ligand_outcome)[[2]]) & identical(dl_i,ld_i)) {
cat("PASS: muts with opposite effects on stability and affinity correctly identified"
, "\nNo. of such muts: ", nrow(changes))
}else {
cat("FAIL: unsuccessful in extracting muts with changed stability effects")
}
#==========================
# write file: changed muts
#==========================
write.csv(changes, outfile_opp_muts)
cat("Finished writing file for muts with opp effects:"
, "\nFilename: ", outfile_opp_muts
, "\nDim:", dim(changes))
# clear variables
rm(out_filename_opp_muts, outfile_opp_muts)
rm(changes, dl_i, ld_i)
# count na in each column
na_count = sapply(my_df_u, function(y) sum(length(which(is.na(y))))); na_count
df_ncols = ncol(my_df_u)
#===================================== end of script