#!/usr/bin/env Rscript ######################################################### # TASK: To write muts with opposite effects on # protomer and ligand stability ######################################################### # working dir and loading libraries getwd() setwd("~/git/LSHTM_analysis/scripts/plotting/") getwd() source("plotting_data.R") # should return the following dfs, directories and variables # my_df # my_df_u # my_df_u_lig # dup_muts cat(paste0("Directories imported:" , "\ndatadir:", datadir , "\nindir:", indir , "\noutdir:", outdir , "\nplotdir:", plotdir)) cat(paste0("Variables imported:" , "\ndrug:", drug , "\ngene:", gene , "\ngene_match:", gene_match , "\nLength of upos:", length(upos) , "\nAngstrom symbol:", angstroms_symbol)) # clear excess variable rm(my_df, upos, dup_muts) #======================================================== #=========== # input #=========== #in_file1: output of plotting_data.R # my_df_u # output #=========== # mutations with opposite effects out_filename_opp_muts = paste0(tolower(gene), "_muts_opp_effects.csv") outfile_opp_muts = paste0(outdir, "/", out_filename_opp_muts) #%%=============================================================== # spelling Correction 1: DUET incase American spelling needed! table(my_df_u$duet_outcome); sum(table(my_df_u$duet_outcome) ) #my_df_u$duet_outcome[my_df_u$duet_outcome=="Stabilising"] <- "Stabilizing" #my_df_u$duet_outcome[my_df_u$duet_outcome=="Destabilising"] <- "Destabilizing" # spelling Correction 2: Ligand incase American spelling needed! table(my_df_u$ligand_outcome); sum(table(my_df_u$ligand_outcome) ) #my_df_u$ligand_outcome[my_df_u$ligand_outcome=="Stabilising"] <- "Stabilizing" #my_df_u$ligand_outcome[my_df_u$ligand_outcome=="Destabilising"] <- "Destabilizing" # muts with opposing effects on protomer and ligand stability table(my_df_u$duet_outcome != my_df_u$ligand_outcome) changes = my_df_u[which(my_df_u$duet_outcome != my_df_u$ligand_outcome),] # sanity check: redundant, but uber cautious! dl_i = which(my_df_u$duet_outcome != my_df_u$ligand_outcome) ld_i = which(my_df_u$ligand_outcome != my_df_u$duet_outcome) cat("Identifying muts with opposite stability effects") if(nrow(changes) == (table(my_df_u$duet_outcome != my_df_u$ligand_outcome)[[2]]) & identical(dl_i,ld_i)) { cat("PASS: muts with opposite effects on stability and affinity correctly identified" , "\nNo. of such muts: ", nrow(changes)) }else { cat("FAIL: unsuccessful in extracting muts with changed stability effects") } #========================== # write file: changed muts #========================== write.csv(changes, outfile_opp_muts) cat("Finished writing file for muts with opp effects:" , "\nFilename: ", outfile_opp_muts , "\nDim:", dim(changes)) # clear variables rm(out_filename_opp_muts, outfile_opp_muts) rm(changes, dl_i, ld_i) # count na in each column na_count = sapply(my_df_u, function(y) sum(length(which(is.na(y))))); na_count df_ncols = ncol(my_df_u) #===================================== end of script