added foldx scaled and foldx outcome to plotting_data.R
This commit is contained in:
parent
5579e9527b
commit
6d08b646fc
3 changed files with 48 additions and 17 deletions
|
@ -118,7 +118,6 @@ df_s_foldx = df[df$foldx_outcome == "Stabilising",]
|
||||||
hs_foldx = df_s_foldx[df_s_foldx$ddg == min(df_s_foldx$ddg), ]
|
hs_foldx = df_s_foldx[df_s_foldx$ddg == min(df_s_foldx$ddg), ]
|
||||||
hs_foldx
|
hs_foldx
|
||||||
|
|
||||||
|
|
||||||
#===============
|
#===============
|
||||||
# active site muts
|
# active site muts
|
||||||
#===============
|
#===============
|
||||||
|
@ -132,8 +131,6 @@ cat("No. of active site residues within", aa_dist, ":", nrow(aa_muts))
|
||||||
#====================
|
#====================
|
||||||
# budding hotspots
|
# budding hotspots
|
||||||
#====================
|
#====================
|
||||||
|
|
||||||
# Method
|
|
||||||
# this is what you want
|
# this is what you want
|
||||||
foo = merged_df3 %>% group_by(position) %>% tally()
|
foo = merged_df3 %>% group_by(position) %>% tally()
|
||||||
bar = merged_df3 %>% group_by(position) %>% count()
|
bar = merged_df3 %>% group_by(position) %>% count()
|
||||||
|
@ -149,15 +146,4 @@ n_mult_muts_sites = sum(table(foo$n)) - (table(foo$n)[[1]] - table(foo$n)[[2]])
|
||||||
cat("No of budding hotspots (sites with 2 mutations):", n_budding_sites
|
cat("No of budding hotspots (sites with 2 mutations):", n_budding_sites
|
||||||
, "\nNo. of sites with mutiple (>2) mutations:", n_mult_muts_sites)
|
, "\nNo. of sites with mutiple (>2) mutations:", n_mult_muts_sites)
|
||||||
|
|
||||||
# another way
|
#==========================================================================
|
||||||
setDT(merged_df3)[, pos_count := .N, by = .(position)]
|
|
||||||
|
|
||||||
# this is cummulative
|
|
||||||
table(merged_df3$pos_count)
|
|
||||||
|
|
||||||
# use group by on this: same as the
|
|
||||||
snpsBYpos_df <- merged_df3 %>%
|
|
||||||
group_by(position) %>%
|
|
||||||
summarize(snpsBYpos = mean(pos_count))
|
|
||||||
|
|
||||||
|
|
|
@ -59,7 +59,6 @@ if (my_min == -1 && my_max == 1){
|
||||||
cat("FAIL: could not scale foldx ddg values"
|
cat("FAIL: could not scale foldx ddg values"
|
||||||
, "Aborting!")
|
, "Aborting!")
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
#================================
|
#================================
|
||||||
# adding foldx outcome category
|
# adding foldx outcome category
|
||||||
|
@ -76,7 +75,7 @@ if ( all(c1 == c2) ){
|
||||||
cat("FAIL: foldx outcome could not be created. Aborting!")
|
cat("FAIL: foldx outcome could not be created. Aborting!")
|
||||||
exit()
|
exit()
|
||||||
}
|
}
|
||||||
|
#=======================================================================
|
||||||
# name tidying
|
# name tidying
|
||||||
df_ps$mutation_info = as.factor(df_ps$mutation_info)
|
df_ps$mutation_info = as.factor(df_ps$mutation_info)
|
||||||
df_ps$duet_outcome = as.factor(df_ps$duet_outcome)
|
df_ps$duet_outcome = as.factor(df_ps$duet_outcome)
|
||||||
|
|
|
@ -63,6 +63,52 @@ my_df = read.csv(infile_params, header = T)
|
||||||
|
|
||||||
cat("\nInput dimensions:", dim(my_df))
|
cat("\nInput dimensions:", dim(my_df))
|
||||||
|
|
||||||
|
###########################
|
||||||
|
# add foldx outcome category
|
||||||
|
# and foldx scaled values
|
||||||
|
|
||||||
|
# This will enable to always have these variables available
|
||||||
|
# when calling for plots
|
||||||
|
###########################
|
||||||
|
|
||||||
|
#------------------------------
|
||||||
|
# adding foldx scaled values
|
||||||
|
# scale data b/w -1 and 1
|
||||||
|
#------------------------------
|
||||||
|
n = which(colnames(my_df) == "ddg"); n
|
||||||
|
|
||||||
|
my_min = min(my_df[,n]); my_min
|
||||||
|
my_max = max(my_df[,n]); my_max
|
||||||
|
|
||||||
|
my_df$foldx_scaled = ifelse(my_df[,n] < 0
|
||||||
|
, my_df[,n]/abs(my_min)
|
||||||
|
, my_df[,n]/my_max)
|
||||||
|
# sanity check
|
||||||
|
my_min = min(my_df$foldx_scaled); my_min
|
||||||
|
my_max = max(my_df$foldx_scaled); my_max
|
||||||
|
|
||||||
|
if (my_min == -1 && my_max == 1){
|
||||||
|
cat("PASS: foldx ddg successfully scaled b/w -1 and 1"
|
||||||
|
, "\nProceeding with assigning foldx outcome category")
|
||||||
|
}else{
|
||||||
|
cat("FAIL: could not scale foldx ddg values"
|
||||||
|
, "Aborting!")
|
||||||
|
}
|
||||||
|
|
||||||
|
#------------------------------
|
||||||
|
# adding foldx outcome category
|
||||||
|
# ddg<0 = "Stabilising" (-ve)
|
||||||
|
#------------------------------
|
||||||
|
c1 = table(my_df$ddg < 0)
|
||||||
|
my_df$foldx_outcome = ifelse(my_df$ddg < 0, "Stabilising", "Destabilising")
|
||||||
|
c2 = table(my_df$ddg < 0)
|
||||||
|
|
||||||
|
if ( all(c1 == c2) ){
|
||||||
|
cat("PASS: foldx outcome successfully created")
|
||||||
|
}else{
|
||||||
|
cat("FAIL: foldx outcome could not be created. Aborting!")
|
||||||
|
exit()
|
||||||
|
}
|
||||||
|
|
||||||
###########################
|
###########################
|
||||||
# extract unique mutation entries
|
# extract unique mutation entries
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue