updated extreme_muts.R with number of budding hotspots and mult muts numbers

This commit is contained in:
Tanushree Tunstall 2020-09-23 11:02:13 +01:00
parent f7280ceada
commit 5579e9527b

View file

@ -124,3 +124,40 @@ hs_foldx
#=============== #===============
aa_muts = merged_df3[merged_df3$ligand_distance<5,] aa_muts = merged_df3[merged_df3$ligand_distance<5,]
aa_dist = paste0(5, angstroms_symbol)
cat("No. of active site residues within", aa_dist, ":", nrow(aa_muts))
#====================
# budding hotspots
#====================
# Method
# this is what you want
foo = merged_df3 %>% group_by(position) %>% tally()
bar = merged_df3 %>% group_by(position) %>% count()
# sanity check
all(table(foo$n) == table(bar$n))
table(foo$n)
n_budding_sites = table(foo$n)[[2]]
n_mult_muts_sites = sum(table(foo$n)) - (table(foo$n)[[1]] - table(foo$n)[[2]])
cat("No of budding hotspots (sites with 2 mutations):", n_budding_sites
, "\nNo. of sites with mutiple (>2) mutations:", n_mult_muts_sites)
# another way
setDT(merged_df3)[, pos_count := .N, by = .(position)]
# this is cummulative
table(merged_df3$pos_count)
# use group by on this: same as the
snpsBYpos_df <- merged_df3 %>%
group_by(position) %>%
summarize(snpsBYpos = mean(pos_count))