From 4398c049ca98ef015c422edee370ff5cb9157718 Mon Sep 17 00:00:00 2001 From: Tanushree Tunstall Date: Wed, 23 Sep 2020 11:12:41 +0100 Subject: [PATCH] added foldx scaled and foldx outcome to plotting_data.R --- scripts/plotting/extreme_muts.R | 16 +--------- scripts/plotting/other_plots_data.R | 3 +- scripts/plotting/plotting_data.R | 46 +++++++++++++++++++++++++++++ 3 files changed, 48 insertions(+), 17 deletions(-) diff --git a/scripts/plotting/extreme_muts.R b/scripts/plotting/extreme_muts.R index 52e50a9..65943aa 100644 --- a/scripts/plotting/extreme_muts.R +++ b/scripts/plotting/extreme_muts.R @@ -118,7 +118,6 @@ df_s_foldx = df[df$foldx_outcome == "Stabilising",] hs_foldx = df_s_foldx[df_s_foldx$ddg == min(df_s_foldx$ddg), ] hs_foldx - #=============== # active site muts #=============== @@ -132,8 +131,6 @@ cat("No. of active site residues within", aa_dist, ":", nrow(aa_muts)) #==================== # budding hotspots #==================== - -# Method # this is what you want foo = merged_df3 %>% group_by(position) %>% tally() bar = merged_df3 %>% group_by(position) %>% count() @@ -149,15 +146,4 @@ n_mult_muts_sites = sum(table(foo$n)) - (table(foo$n)[[1]] - table(foo$n)[[2]]) cat("No of budding hotspots (sites with 2 mutations):", n_budding_sites , "\nNo. of sites with mutiple (>2) mutations:", n_mult_muts_sites) -# another way -setDT(merged_df3)[, pos_count := .N, by = .(position)] - -# this is cummulative -table(merged_df3$pos_count) - -# use group by on this: same as the -snpsBYpos_df <- merged_df3 %>% - group_by(position) %>% - summarize(snpsBYpos = mean(pos_count)) - - +#========================================================================== \ No newline at end of file diff --git a/scripts/plotting/other_plots_data.R b/scripts/plotting/other_plots_data.R index c89a010..df5c1e3 100644 --- a/scripts/plotting/other_plots_data.R +++ b/scripts/plotting/other_plots_data.R @@ -59,7 +59,6 @@ if (my_min == -1 && my_max == 1){ cat("FAIL: could not scale foldx ddg values" , "Aborting!") } - #================================ # adding foldx outcome category @@ -76,7 +75,7 @@ if ( all(c1 == c2) ){ cat("FAIL: foldx outcome could not be created. Aborting!") exit() } - +#======================================================================= # name tidying df_ps$mutation_info = as.factor(df_ps$mutation_info) df_ps$duet_outcome = as.factor(df_ps$duet_outcome) diff --git a/scripts/plotting/plotting_data.R b/scripts/plotting/plotting_data.R index 6f6c3ec..0c15a78 100755 --- a/scripts/plotting/plotting_data.R +++ b/scripts/plotting/plotting_data.R @@ -63,6 +63,52 @@ my_df = read.csv(infile_params, header = T) cat("\nInput dimensions:", dim(my_df)) +########################### +# add foldx outcome category +# and foldx scaled values + +# This will enable to always have these variables available +# when calling for plots +########################### + +#------------------------------ +# adding foldx scaled values +# scale data b/w -1 and 1 +#------------------------------ +n = which(colnames(my_df) == "ddg"); n + +my_min = min(my_df[,n]); my_min +my_max = max(my_df[,n]); my_max + +my_df$foldx_scaled = ifelse(my_df[,n] < 0 + , my_df[,n]/abs(my_min) + , my_df[,n]/my_max) +# sanity check +my_min = min(my_df$foldx_scaled); my_min +my_max = max(my_df$foldx_scaled); my_max + +if (my_min == -1 && my_max == 1){ + cat("PASS: foldx ddg successfully scaled b/w -1 and 1" + , "\nProceeding with assigning foldx outcome category") +}else{ + cat("FAIL: could not scale foldx ddg values" + , "Aborting!") +} + +#------------------------------ +# adding foldx outcome category +# ddg<0 = "Stabilising" (-ve) +#------------------------------ +c1 = table(my_df$ddg < 0) +my_df$foldx_outcome = ifelse(my_df$ddg < 0, "Stabilising", "Destabilising") +c2 = table(my_df$ddg < 0) + +if ( all(c1 == c2) ){ + cat("PASS: foldx outcome successfully created") +}else{ + cat("FAIL: foldx outcome could not be created. Aborting!") + exit() +} ########################### # extract unique mutation entries