76 lines
2 KiB
R
76 lines
2 KiB
R
#!/usr/bin/env Rscript
|
|
#########################################################
|
|
# TASK: Data for selective pressure (dnds calcs)
|
|
|
|
# Output:2 csv
|
|
#=======================================================================
|
|
# working dir and loading libraries
|
|
getwd()
|
|
setwd("~/git/LSHTM_analysis/scripts/")
|
|
getwd()
|
|
|
|
source("plotting/combining_dfs_plotting.R")
|
|
|
|
#=========================
|
|
#=======
|
|
# output
|
|
#=======
|
|
select_pressure_muts_all = paste0(outdir,"/", tolower(gene), "_dnds_all_muts.csv")
|
|
|
|
select_pressure_muts_aa_site = paste0(outdir,"/", tolower(gene), "_dnds_active_site_muts.csv")
|
|
#=======================================================================
|
|
#----------
|
|
# df1
|
|
#----------
|
|
colnames(merged_df2)
|
|
cols_to_select = c("id", "sample"
|
|
, "mutation"
|
|
, "mutationinformation"
|
|
, "wild_type"
|
|
, "position"
|
|
, "mutant_type"
|
|
, "af"
|
|
, "or_mychisq"
|
|
, "or_kin")
|
|
|
|
dnds_df_all = merged_df2[, cols_to_select]
|
|
|
|
#-----------------------
|
|
# df2: active site only
|
|
#-----------------------
|
|
str(dnds_df_all)
|
|
is.numeric(dnds_df_all$position)
|
|
|
|
# !!!!BEWARE HARDCODED value for <gene> !!!
|
|
active_site_positions = c(8, 13, 49, 51, 57, 68, 71, 96, 103, 133, 134, 137, 138)
|
|
|
|
aa_site_i = which(dnds_df_all$position%in%active_site_positions)
|
|
dnds_df_aa_site = dnds_df_all[dnds_df_all$position%in%active_site_positions,]
|
|
|
|
if (all(rownames(dnds_df_aa_site) == aa_site_i) ){
|
|
cat("PASS: active site positions subsetted correctly")
|
|
}else{
|
|
cat("FAIL: active site positions mismatch")
|
|
quit()
|
|
}
|
|
|
|
table(dnds_df_aa_site$position)
|
|
|
|
#=======================================================================
|
|
#==============
|
|
# write output
|
|
#=============
|
|
|
|
#----------
|
|
# df1
|
|
#----------
|
|
cat("Filename:", select_pressure_muts_all)
|
|
write.csv(dnds_df_all, select_pressure_muts_all, row.names = FALSE)
|
|
|
|
#----------
|
|
# df1
|
|
#----------
|
|
cat("Filename:", select_pressure_muts_aa_site)
|
|
write.csv(dnds_df_aa_site, select_pressure_muts_aa_site, row.names = FALSE)
|
|
|
|
colnames(dnds_df_all)
|