LSHTM_analysis/scripts/plotting/opp_mcsm_muts.R

#!/usr/bin/env Rscript
#########################################################
# TASK: To write muts with opposite effects on
# protomer and ligand stability
#########################################################
# working dir and loading libraries

getwd()
setwd("~/git/LSHTM_analysis/scripts/plotting/")
getwd()

source("plotting_data.R")

# should return the following dfs, directories and variables
# my_df
# my_df_u
# my_df_u_lig
# dup_muts

cat(paste0("Directories imported:"
           , "\ndatadir:", datadir
           , "\nindir:", indir
           , "\noutdir:", outdir
           , "\nplotdir:", plotdir))

cat(paste0("Variables imported:"
           , "\ndrug:", drug
           , "\ngene:", gene
           , "\ngene_match:", gene_match
           , "\nLength of upos:", length(upos)
           , "\nAngstrom symbol:", angstroms_symbol))

# clear excess variable
rm(my_df, upos, dup_muts)
#========================================================
#===========
# input
#===========
#in_file1: output of plotting_data.R
# my_df_u

#===========
# output
#===========
# mutations with opposite effects
out_filename_opp_muts = paste0(tolower(gene), "_muts_opp_effects.csv")
outfile_opp_muts = paste0(outdir, "/", out_filename_opp_muts)

#%%===============================================================

# spelling Correction 1: DUET incase American spelling needed!
table(my_df_u$duet_outcome); sum(table(my_df_u$duet_outcome) )
#my_df_u$duet_outcome[my_df_u$duet_outcome=="Stabilising"] <- "Stabilizing"
#my_df_u$duet_outcome[my_df_u$duet_outcome=="Destabilising"] <- "Destabilizing"


# spelling Correction 2: Ligand incase American spelling needed!
table(my_df_u$ligand_outcome); sum(table(my_df_u$ligand_outcome) )
#my_df_u$ligand_outcome[my_df_u$ligand_outcome=="Stabilising"] <- "Stabilizing"
#my_df_u$ligand_outcome[my_df_u$ligand_outcome=="Destabilising"] <- "Destabilizing"


# muts with opposing effects on protomer and ligand stability
table(my_df_u$duet_outcome != my_df_u$ligand_outcome)
changes = my_df_u[which(my_df_u$duet_outcome != my_df_u$ligand_outcome),]

# sanity check: redundant, but uber cautious!
dl_i = which(my_df_u$duet_outcome != my_df_u$ligand_outcome)
ld_i = which(my_df_u$ligand_outcome != my_df_u$duet_outcome)

cat("Identifying muts with opposite stability effects")
if(nrow(changes) == (table(my_df_u$duet_outcome != my_df_u$ligand_outcome)[[2]]) & identical(dl_i,ld_i)) {
  cat("PASS: muts with opposite effects on stability and affinity correctly identified"
        , "\nNo. of such muts: ", nrow(changes))
}else {
  cat("FAIL: unsuccessful in extracting muts with changed stability effects")
}

#==========================
# write file: changed muts
#==========================
write.csv(changes, outfile_opp_muts)

cat("Finished writing file for muts with opp effects:"
    , "\nFilename: ", outfile_opp_muts
    , "\nDim:", dim(changes))

# clear variables
rm(out_filename_opp_muts, outfile_opp_muts)
rm(changes, dl_i, ld_i)

# count na in each column
na_count = sapply(my_df_u, function(y) sum(length(which(is.na(y))))); na_count
df_ncols = ncol(my_df_u)

#===================================== end of script