diff --git a/scripts/plotting/extreme_muts.R b/scripts/plotting/extreme_muts.R index aaceb15..52e50a9 100644 --- a/scripts/plotting/extreme_muts.R +++ b/scripts/plotting/extreme_muts.R @@ -124,3 +124,40 @@ hs_foldx #=============== aa_muts = merged_df3[merged_df3$ligand_distance<5,] + +aa_dist = paste0(5, angstroms_symbol) + +cat("No. of active site residues within", aa_dist, ":", nrow(aa_muts)) + +#==================== +# budding hotspots +#==================== + +# Method +# this is what you want +foo = merged_df3 %>% group_by(position) %>% tally() +bar = merged_df3 %>% group_by(position) %>% count() + +# sanity check +all(table(foo$n) == table(bar$n)) +table(foo$n) + + +n_budding_sites = table(foo$n)[[2]] +n_mult_muts_sites = sum(table(foo$n)) - (table(foo$n)[[1]] - table(foo$n)[[2]]) + +cat("No of budding hotspots (sites with 2 mutations):", n_budding_sites + , "\nNo. of sites with mutiple (>2) mutations:", n_mult_muts_sites) + +# another way +setDT(merged_df3)[, pos_count := .N, by = .(position)] + +# this is cummulative +table(merged_df3$pos_count) + +# use group by on this: same as the +snpsBYpos_df <- merged_df3 %>% + group_by(position) %>% + summarize(snpsBYpos = mean(pos_count)) + +