script to subset data for dnds cals
This commit is contained in:
parent
4285bbd59f
commit
be50636b15
1 changed files with 70 additions and 0 deletions
70
scripts/data_selective_pressure.R
Normal file
70
scripts/data_selective_pressure.R
Normal file
|
@ -0,0 +1,70 @@
|
|||
#!/usr/bin/env Rscript
|
||||
#########################################################
|
||||
# TASK: Data for selective pressure (dnds calcs)
|
||||
|
||||
# Output:2 csv
|
||||
#=======================================================================
|
||||
# working dir and loading libraries
|
||||
getwd()
|
||||
setwd("~/git/LSHTM_analysis/scripts/")
|
||||
getwd()
|
||||
|
||||
source("plotting/combining_dfs_plotting.R")
|
||||
|
||||
#=========================
|
||||
#=======
|
||||
# output
|
||||
#=======
|
||||
select_pressure_muts_all = paste0(outdir,"/", tolower(gene), "_dnds_all_muts.csv")
|
||||
|
||||
select_pressure_muts_aa_site = paste0(outdir,"/", tolower(gene), "_dnds_active_site_muts.csv")
|
||||
#=======================================================================
|
||||
#----------
|
||||
# df1
|
||||
#----------
|
||||
colnames(merged_df2)
|
||||
cols_to_select = c("id", "sample"
|
||||
, "mutation"
|
||||
, "mutationinformation"
|
||||
, "wild_type"
|
||||
, "position"
|
||||
, "mutant_type")
|
||||
|
||||
dnds_df_all = merged_df2[, cols_to_select]
|
||||
|
||||
#-----------------------
|
||||
# df2: active site only
|
||||
#-----------------------
|
||||
str(dnds_df_all)
|
||||
is.numeric(dnds_df_all$position)
|
||||
|
||||
active_site_positions = c(8, 13, 49, 51, 57, 68, 71, 96, 103, 133, 134, 137, 138)
|
||||
|
||||
aa_site_i = which(dnds_df_all$position%in%active_site_positions)
|
||||
dnds_df_aa_site = dnds_df_all[dnds_df_all$position%in%active_site_positions,]
|
||||
|
||||
if (all(rownames(dnds_df_aa_site) == aa_site_i) ){
|
||||
cat("PASS: active site positions subsetted correctly")
|
||||
}else{
|
||||
cat("FAIL: active site positions mismatch")
|
||||
quit()
|
||||
}
|
||||
|
||||
table(dnds_df_aa_site$position)
|
||||
|
||||
#=======================================================================
|
||||
#==============
|
||||
# write output
|
||||
#=============
|
||||
|
||||
#----------
|
||||
# df1
|
||||
#----------
|
||||
cat("Filename:", select_pressure_muts_all)
|
||||
write.csv(dnds_df_all, select_pressure_muts_all, row.names = FALSE)
|
||||
|
||||
#----------
|
||||
# df1
|
||||
#----------
|
||||
cat("Filename:", select_pressure_muts_aa_site)
|
||||
write.csv(dnds_df_aa_site, select_pressure_muts_aa_site, row.names = FALSE)
|
Loading…
Add table
Add a link
Reference in a new issue