From 10e1baee8257d7f360113829e6f686387b2b0021 Mon Sep 17 00:00:00 2001 From: Tanushree Tunstall Date: Tue, 6 Oct 2020 19:11:34 +0100 Subject: [PATCH] script to subset data for dnds cals --- scripts/data_selective_pressure.R | 70 +++++++++++++++++++++++++++++++ 1 file changed, 70 insertions(+) create mode 100644 scripts/data_selective_pressure.R diff --git a/scripts/data_selective_pressure.R b/scripts/data_selective_pressure.R new file mode 100644 index 0000000..95730a8 --- /dev/null +++ b/scripts/data_selective_pressure.R @@ -0,0 +1,70 @@ +#!/usr/bin/env Rscript +######################################################### +# TASK: Data for selective pressure (dnds calcs) + +# Output:2 csv +#======================================================================= +# working dir and loading libraries +getwd() +setwd("~/git/LSHTM_analysis/scripts/") +getwd() + +source("plotting/combining_dfs_plotting.R") + +#========================= +#======= +# output +#======= +select_pressure_muts_all = paste0(outdir,"/", tolower(gene), "_dnds_all_muts.csv") + +select_pressure_muts_aa_site = paste0(outdir,"/", tolower(gene), "_dnds_active_site_muts.csv") +#======================================================================= +#---------- +# df1 +#---------- +colnames(merged_df2) +cols_to_select = c("id", "sample" + , "mutation" + , "mutationinformation" + , "wild_type" + , "position" + , "mutant_type") + +dnds_df_all = merged_df2[, cols_to_select] + +#----------------------- +# df2: active site only +#----------------------- +str(dnds_df_all) +is.numeric(dnds_df_all$position) + +active_site_positions = c(8, 13, 49, 51, 57, 68, 71, 96, 103, 133, 134, 137, 138) + +aa_site_i = which(dnds_df_all$position%in%active_site_positions) +dnds_df_aa_site = dnds_df_all[dnds_df_all$position%in%active_site_positions,] + +if (all(rownames(dnds_df_aa_site) == aa_site_i) ){ + cat("PASS: active site positions subsetted correctly") +}else{ + cat("FAIL: active site positions mismatch") + quit() +} + +table(dnds_df_aa_site$position) + +#======================================================================= +#============== +# write output +#============= + +#---------- +# df1 +#---------- +cat("Filename:", select_pressure_muts_all) +write.csv(dnds_df_all, select_pressure_muts_all, row.names = FALSE) + +#---------- +# df1 +#---------- +cat("Filename:", select_pressure_muts_aa_site) +write.csv(dnds_df_aa_site, select_pressure_muts_aa_site, row.names = FALSE)