From 48050752db0db8aaf2ecfca6a0c7ccbd92671ac8 Mon Sep 17 00:00:00 2001 From: Tanushree Tunstall Date: Wed, 23 Sep 2020 19:23:34 +0100 Subject: [PATCH] added more analysis in extreme_muts.R to be tidied later --- scripts/plotting/extreme_muts.R | 100 +++++++++++++++++++++++++++++++- 1 file changed, 99 insertions(+), 1 deletion(-) diff --git a/scripts/plotting/extreme_muts.R b/scripts/plotting/extreme_muts.R index 053fde9..12ffcab 100644 --- a/scripts/plotting/extreme_muts.R +++ b/scripts/plotting/extreme_muts.R @@ -139,15 +139,45 @@ bar = merged_df3 %>% group_by(position) %>% count() all(table(foo$n) == table(bar$n)) table(foo$n) - n_budding_sites = table(foo$n)[[2]] n_mult_muts_sites = sum(table(foo$n)) - (table(foo$n)[[1]] - table(foo$n)[[2]]) cat("No of budding hotspots (sites with 2 mutations):", n_budding_sites , "\nNo. of sites with mutiple (>2) mutations:", n_mult_muts_sites) +#==================== +# budding hotspots: dr muts +#==================== +merged_df3_dr = merged_df3[merged_df3$mutation_info == dr_muts_col,] + +bar = merged_df3_dr %>% group_by(position) %>% count() +table(bar$n) + +n_budding_sites_dr = table(bar$n)[[2]] +n_mult_muts_sites_dr = sum(table(bar$n)) - (table(bar$n)[[1]] - table(bar$n)[[2]]) + +cat("No of budding hotspots (sites with 2 mutations) for dr muts:", n_budding_sites_dr + , "\nNo. of sites with mutiple (>2) mutations for dr muts:", n_mult_muts_sites_dr) + #========================================================================== +#============================== +# wild_pos count: This tells you +# how many muts associated with each +# wild-type postion: handy! +#============================== +wild_pos_count_filename = paste0(outdir, "/" +, tolower(gene), "_wild_pos_count.csv") + + +head(merged_df3$position) +# order by position +merged_df3_s = merged_df3[order(merged_df3$position),] +head(merged_df3_s$position) + +wild_pos_count = as.data.frame(table(merged_df3_s$wild_pos)) +write.csv(wild_pos_count, wild_pos_count_filename) + #============================== # agreement of foldx and DUET #============================== @@ -166,3 +196,71 @@ agreement = 100 - disagreement cat("There is", agreement, "% between mcsm and foldx predictions") ############################################################################## + +# plots + +plot(density(merged_df3$duet_stability_change)) +plot(density(merged_df3$ddg)) + +plot(density(merged_df3$duet_scaled)) +plot(density(merged_df3$foldx_scaled)) + +hist(merged_df3$duet_scaled) +hist(merged_df3$foldx_scaled) + + +#======================================== +#layout(matrix(c(1,1,2,3), 2, 2, byrow = TRUE)) +par(mfrow=c(3,1)) + +#---------- +# OR +#---------- +# raw +plot(density(merged_df3$or_mychisq, na.rm = T)) +hist(merged_df3$or_mychisq) + +# log10 +plot(density(log10(merged_df3$or_mychisq), na.rm = T)) +hist(log10(merged_df3$or_mychisq)) + + + + +#---------- +# adjusted OR +#---------- +# raw +plot(density(merged_df3$or_kin, na.rm = T)) +hist(merged_df3$or_kin) + +# log +plot(density(log10(merged_df3$or_kin), na.rm = T)) +hist(log10(merged_df3$or_kin)) + + +# overlay +#par(yaxs="i",las=1) +d = density(log10(merged_df3$or_mychisq), na.rm = T) +ylim = max(d$y); ylim + +hist(log10(merged_df3$or_mychisq) + , prob = TRUE + , col = "grey" + , border= "black" + , ylim = c(0, ylim) + , xlab = "Adjusted OR (Log10)" + , main = "Adjusted OR") + +box(bty="l") + +lines(density(log10(merged_df3$or_mychisq) + , na.rm = T + , bw = "nrd0") # default + , col = "black" + , lwd = 2) + +#grid(nx=NA,ny=NULL,lty=1,lwd=1,col="gray") + +#=============================== +par(mfrow=c(2,2))