#!/usr/bin/env Rscript ######################################################### # TASK: Data for selective pressure (dnds calcs) # Output:2 csv #======================================================================= # working dir and loading libraries getwd() setwd("~/git/LSHTM_analysis/scripts/") getwd() source("plotting/combining_dfs_plotting.R") #========================= #======= # output #======= select_pressure_muts_all = paste0(outdir,"/", tolower(gene), "_dnds_all_muts.csv") select_pressure_muts_aa_site = paste0(outdir,"/", tolower(gene), "_dnds_active_site_muts.csv") #======================================================================= #---------- # df1 #---------- colnames(merged_df2) cols_to_select = c("id", "sample" , "mutation" , "mutationinformation" , "wild_type" , "position" , "mutant_type") dnds_df_all = merged_df2[, cols_to_select] #----------------------- # df2: active site only #----------------------- str(dnds_df_all) is.numeric(dnds_df_all$position) active_site_positions = c(8, 13, 49, 51, 57, 68, 71, 96, 103, 133, 134, 137, 138) aa_site_i = which(dnds_df_all$position%in%active_site_positions) dnds_df_aa_site = dnds_df_all[dnds_df_all$position%in%active_site_positions,] if (all(rownames(dnds_df_aa_site) == aa_site_i) ){ cat("PASS: active site positions subsetted correctly") }else{ cat("FAIL: active site positions mismatch") quit() } table(dnds_df_aa_site$position) #======================================================================= #============== # write output #============= #---------- # df1 #---------- cat("Filename:", select_pressure_muts_all) write.csv(dnds_df_all, select_pressure_muts_all, row.names = FALSE) #---------- # df1 #---------- cat("Filename:", select_pressure_muts_aa_site) write.csv(dnds_df_aa_site, select_pressure_muts_aa_site, row.names = FALSE)