added more analysis in extreme_muts.R to be tidied later
This commit is contained in:
parent
5f10ad8075
commit
a0755eeab6
1 changed files with 99 additions and 1 deletions
|
@ -139,15 +139,45 @@ bar = merged_df3 %>% group_by(position) %>% count()
|
||||||
all(table(foo$n) == table(bar$n))
|
all(table(foo$n) == table(bar$n))
|
||||||
table(foo$n)
|
table(foo$n)
|
||||||
|
|
||||||
|
|
||||||
n_budding_sites = table(foo$n)[[2]]
|
n_budding_sites = table(foo$n)[[2]]
|
||||||
n_mult_muts_sites = sum(table(foo$n)) - (table(foo$n)[[1]] - table(foo$n)[[2]])
|
n_mult_muts_sites = sum(table(foo$n)) - (table(foo$n)[[1]] - table(foo$n)[[2]])
|
||||||
|
|
||||||
cat("No of budding hotspots (sites with 2 mutations):", n_budding_sites
|
cat("No of budding hotspots (sites with 2 mutations):", n_budding_sites
|
||||||
, "\nNo. of sites with mutiple (>2) mutations:", n_mult_muts_sites)
|
, "\nNo. of sites with mutiple (>2) mutations:", n_mult_muts_sites)
|
||||||
|
|
||||||
|
#====================
|
||||||
|
# budding hotspots: dr muts
|
||||||
|
#====================
|
||||||
|
merged_df3_dr = merged_df3[merged_df3$mutation_info == dr_muts_col,]
|
||||||
|
|
||||||
|
bar = merged_df3_dr %>% group_by(position) %>% count()
|
||||||
|
table(bar$n)
|
||||||
|
|
||||||
|
n_budding_sites_dr = table(bar$n)[[2]]
|
||||||
|
n_mult_muts_sites_dr = sum(table(bar$n)) - (table(bar$n)[[1]] - table(bar$n)[[2]])
|
||||||
|
|
||||||
|
cat("No of budding hotspots (sites with 2 mutations) for dr muts:", n_budding_sites_dr
|
||||||
|
, "\nNo. of sites with mutiple (>2) mutations for dr muts:", n_mult_muts_sites_dr)
|
||||||
|
|
||||||
#==========================================================================
|
#==========================================================================
|
||||||
|
|
||||||
|
#==============================
|
||||||
|
# wild_pos count: This tells you
|
||||||
|
# how many muts associated with each
|
||||||
|
# wild-type postion: handy!
|
||||||
|
#==============================
|
||||||
|
wild_pos_count_filename = paste0(outdir, "/"
|
||||||
|
, tolower(gene), "_wild_pos_count.csv")
|
||||||
|
|
||||||
|
|
||||||
|
head(merged_df3$position)
|
||||||
|
# order by position
|
||||||
|
merged_df3_s = merged_df3[order(merged_df3$position),]
|
||||||
|
head(merged_df3_s$position)
|
||||||
|
|
||||||
|
wild_pos_count = as.data.frame(table(merged_df3_s$wild_pos))
|
||||||
|
write.csv(wild_pos_count, wild_pos_count_filename)
|
||||||
|
|
||||||
#==============================
|
#==============================
|
||||||
# agreement of foldx and DUET
|
# agreement of foldx and DUET
|
||||||
#==============================
|
#==============================
|
||||||
|
@ -166,3 +196,71 @@ agreement = 100 - disagreement
|
||||||
cat("There is", agreement, "% between mcsm and foldx predictions")
|
cat("There is", agreement, "% between mcsm and foldx predictions")
|
||||||
|
|
||||||
##############################################################################
|
##############################################################################
|
||||||
|
|
||||||
|
# plots
|
||||||
|
|
||||||
|
plot(density(merged_df3$duet_stability_change))
|
||||||
|
plot(density(merged_df3$ddg))
|
||||||
|
|
||||||
|
plot(density(merged_df3$duet_scaled))
|
||||||
|
plot(density(merged_df3$foldx_scaled))
|
||||||
|
|
||||||
|
hist(merged_df3$duet_scaled)
|
||||||
|
hist(merged_df3$foldx_scaled)
|
||||||
|
|
||||||
|
|
||||||
|
#========================================
|
||||||
|
#layout(matrix(c(1,1,2,3), 2, 2, byrow = TRUE))
|
||||||
|
par(mfrow=c(3,1))
|
||||||
|
|
||||||
|
#----------
|
||||||
|
# OR
|
||||||
|
#----------
|
||||||
|
# raw
|
||||||
|
plot(density(merged_df3$or_mychisq, na.rm = T))
|
||||||
|
hist(merged_df3$or_mychisq)
|
||||||
|
|
||||||
|
# log10
|
||||||
|
plot(density(log10(merged_df3$or_mychisq), na.rm = T))
|
||||||
|
hist(log10(merged_df3$or_mychisq))
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
#----------
|
||||||
|
# adjusted OR
|
||||||
|
#----------
|
||||||
|
# raw
|
||||||
|
plot(density(merged_df3$or_kin, na.rm = T))
|
||||||
|
hist(merged_df3$or_kin)
|
||||||
|
|
||||||
|
# log
|
||||||
|
plot(density(log10(merged_df3$or_kin), na.rm = T))
|
||||||
|
hist(log10(merged_df3$or_kin))
|
||||||
|
|
||||||
|
|
||||||
|
# overlay
|
||||||
|
#par(yaxs="i",las=1)
|
||||||
|
d = density(log10(merged_df3$or_mychisq), na.rm = T)
|
||||||
|
ylim = max(d$y); ylim
|
||||||
|
|
||||||
|
hist(log10(merged_df3$or_mychisq)
|
||||||
|
, prob = TRUE
|
||||||
|
, col = "grey"
|
||||||
|
, border= "black"
|
||||||
|
, ylim = c(0, ylim)
|
||||||
|
, xlab = "Adjusted OR (Log10)"
|
||||||
|
, main = "Adjusted OR")
|
||||||
|
|
||||||
|
box(bty="l")
|
||||||
|
|
||||||
|
lines(density(log10(merged_df3$or_mychisq)
|
||||||
|
, na.rm = T
|
||||||
|
, bw = "nrd0") # default
|
||||||
|
, col = "black"
|
||||||
|
, lwd = 2)
|
||||||
|
|
||||||
|
#grid(nx=NA,ny=NULL,lty=1,lwd=1,col="gray")
|
||||||
|
|
||||||
|
#===============================
|
||||||
|
par(mfrow=c(2,2))
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue