LSHTM_analysis/scripts/data_selective_pressure.R

76 lines
2 KiB
R

#!/usr/bin/env Rscript
#########################################################
# TASK: Data for selective pressure (dnds calcs)
# Output:2 csv
#=======================================================================
# working dir and loading libraries
getwd()
setwd("~/git/LSHTM_analysis/scripts/")
getwd()
source("plotting/combining_dfs_plotting.R")
#=========================
#=======
# output
#=======
select_pressure_muts_all = paste0(outdir,"/", tolower(gene), "_dnds_all_muts.csv")
select_pressure_muts_aa_site = paste0(outdir,"/", tolower(gene), "_dnds_active_site_muts.csv")
#=======================================================================
#----------
# df1
#----------
colnames(merged_df2)
cols_to_select = c("id", "sample"
, "mutation"
, "mutationinformation"
, "wild_type"
, "position"
, "mutant_type"
, "af"
, "or_mychisq"
, "or_kin")
dnds_df_all = merged_df2[, cols_to_select]
#-----------------------
# df2: active site only
#-----------------------
str(dnds_df_all)
is.numeric(dnds_df_all$position)
# !!!!BEWARE HARDCODED value for <gene> !!!
active_site_positions = c(8, 13, 49, 51, 57, 68, 71, 96, 103, 133, 134, 137, 138)
aa_site_i = which(dnds_df_all$position%in%active_site_positions)
dnds_df_aa_site = dnds_df_all[dnds_df_all$position%in%active_site_positions,]
if (all(rownames(dnds_df_aa_site) == aa_site_i) ){
cat("PASS: active site positions subsetted correctly")
}else{
cat("FAIL: active site positions mismatch")
quit()
}
table(dnds_df_aa_site$position)
#=======================================================================
#==============
# write output
#=============
#----------
# df1
#----------
cat("Filename:", select_pressure_muts_all)
write.csv(dnds_df_all, select_pressure_muts_all, row.names = FALSE)
#----------
# df1
#----------
cat("Filename:", select_pressure_muts_aa_site)
write.csv(dnds_df_aa_site, select_pressure_muts_aa_site, row.names = FALSE)
colnames(dnds_df_all)