From a14fc4dc33b8064a2fb1891f7110f83f772e0d6f Mon Sep 17 00:00:00 2001 From: Tanushree Tunstall Date: Fri, 11 Sep 2020 16:07:23 +0100 Subject: [PATCH] added extreme_muts.R --- scripts/plotting/extreme_muts.R | 108 +++++++++++++++++++++++++++++++ scripts/plotting/output_tables.R | 4 ++ 2 files changed, 112 insertions(+) create mode 100644 scripts/plotting/extreme_muts.R diff --git a/scripts/plotting/extreme_muts.R b/scripts/plotting/extreme_muts.R new file mode 100644 index 0000000..b29b34c --- /dev/null +++ b/scripts/plotting/extreme_muts.R @@ -0,0 +1,108 @@ +#!/usr/bin/env Rscript +######################################################### +# TASK: producing boxplots for dr and other muts + +######################################################### +#======================================================================= +# working dir and loading libraries +getwd() +setwd("~/git/LSHTM_analysis/scripts/plotting") +getwd() + +#source("Header_TT.R") +library(ggplot2) +library(data.table) +library(dplyr) + +#========= +# Input +#========= +#source("combining_dfs_plotting.R") + +# FIXME: add a separate script to add foldx values and others +source("output_tables.R") +rm(df, merged_df3_short, df_output) + +#=============================================================== +df_comp = df_ordered[!is.na(df_ordered$af),] + +#%%%%%%%%%%%%%%%%%%%%% +# REASSIGNMENT +df = df_comp +#%%%%%%%%%%%%%%%%%%%%% + +cols_all_muts_table = c("mutationinformation" + , "mutation_info" + , "af" + , "af_percent" + + , "or_mychisq" + + , "pval_fisher" + , "or_kin" + + , "pwald_kin" + , "duet_stability_change" + , "duet_outcome" + , "ligand_distance" + , "ligand_affinity_change" + , "ligand_outcome" + , "ddg" + , "foldx_outcome" + , "asa" + , "rsa" + , "kd_values" + , "rd_values") + +df = df[,cols_all_muts_table] +#=============================================================== + +#Most Frequent mutation + +mf = df[df$af_percent == max(df$af_percent), ] +mf + +# highest OR +hor = df[df$or_mychisq == max(df$or_mychisq), ] +hor + + +# Most Destabilising for protein stability (DUET) +df_d = df[df$duet_outcome == "Destabilising",] + +hd_duet = df_d[df_d$duet_stability_change == min(df_d$duet_stability_change), ] +hd_duet + +# Most Stabilising for protein stability (DUET) +df_s = df[df$duet_outcome == "Stabilising",] +hs_duet = df_s[df_s$duet_stability_change == max(df_s$duet_stability_change), ] +hs_duet + +# Closest Destabilising for protein stability +close_d = df_d[order(df_d$ligand_distance, df_d$duet_stability_change),] + +# Closest Stabilising for protein stability +close_s = df_s[order(df_s$ligand_distance, df_s$duet_stability_change),] + + +#=============== +# ligand affinity: filtered +#================ +df_lig = df[df$ligand_distance<10,] + +df_d_lig = df_lig[df_lig$ligand_outcome == "Destabilising",] +hd_lig= df_d_lig[df_d_lig$ligand_affinity_change == min(df_d_lig$ligand_affinity_change), ] +hd_lig + + +df_s_lig = df[df$ligand_outcome == "Stabilising",] +hs_lig= df_s_lig[df_s_lig$ligand_affinity_change == max(df_s_lig$ligand_affinity_change), ] +hs_lig + + +# Closest Destabilising for ligand affintiy +close_d_lig = df_d_lig[order(df_d_lig$ligand_distance, df_d_lig$ligand_affinity_change),] + +# Closest Stabilising for ligand affinity +close_s_lig = df_s_lig[order(df_s_lig$ligand_distance, df_s_lig$ligand_affinity_change),] + diff --git a/scripts/plotting/output_tables.R b/scripts/plotting/output_tables.R index 9eb9b7d..0a15176 100644 --- a/scripts/plotting/output_tables.R +++ b/scripts/plotting/output_tables.R @@ -253,3 +253,7 @@ cat("\nOutput table (csv) written:", outfile_all_muts_table # clear excess variables +rm(check_colnames, c1, c2, cols_all_muts_table + , cols_mut_landscape, all_muts_table, mut_landscape_data, my_max, my_min, my_pretty_colnames + , na_count, na_count_df2, na_count_df3, outfile_all_muts_table, outfile_mut_landscape_data) +