From 4e6f10d1ba296d4ee0726dc41e6ec726091cc69f Mon Sep 17 00:00:00 2001 From: Tanushree Tunstall Date: Sun, 7 Aug 2022 11:21:25 +0100 Subject: [PATCH] added outcome col to dm_om data --- scripts/functions/dm_om_data.R | 55 ++++++++++++++++--- scripts/functions/lf_bp2.R | 2 +- scripts/plotting/get_plotting_dfs.R | 2 +- .../plotting/plotting_thesis/dm_om_plots.R | 15 +++++ 4 files changed, 65 insertions(+), 9 deletions(-) diff --git a/scripts/functions/dm_om_data.R b/scripts/functions/dm_om_data.R index 29389d5..1a3a526 100644 --- a/scripts/functions/dm_om_data.R +++ b/scripts/functions/dm_om_data.R @@ -10,9 +10,10 @@ # DistCutOff, LigDist_colname, ppi2Dist_colname, naDist_colname dm_om_wf_lf_data <- function(df - , gene_name = gene # from globals + , gene # from globals , colnames_to_extract - , ligand_dist_colname = LigDist_colname # from globals + #, ligand_dist_colname = LigDist_colname # from globals + #, LigDist_colname # from globals used #, ppi2Dist_colname #from globals used #, naDist_colname #from globals used , dr_muts = dr_muts_col # from globals @@ -52,11 +53,11 @@ dm_om_wf_lf_data <- function(df ) # additional dfs - if (tolower(gene_name)%in%geneL_normal){ + if (tolower(gene)%in%geneL_normal){ wf_lf_dataL = common_dfsL } - if (tolower(gene_name)%in%geneL_na){ + if (tolower(gene)%in%geneL_na){ additional_dfL = list( wf_mcsm_na = data.frame() , lf_mcsm_na = data.frame() @@ -64,7 +65,7 @@ dm_om_wf_lf_data <- function(df wf_lf_dataL = c(common_dfsL, additional_dfL) } - if (tolower(gene_name)%in%geneL_ppi2){ + if (tolower(gene)%in%geneL_ppi2){ additional_dfL = list( wf_mcsm_ppi2 = data.frame() , lf_mcsm_ppi2 = data.frame() @@ -248,6 +249,10 @@ if (nrow(lf_duet) == expected_rows_lf){ quit() } +# NEW columns [outcome and outcome colname] +lf_duet$outcome_colname = "duet_outcome" +lf_duet$outcome = lf_duet$duet_outcome + # Assign them to the output list wf_lf_dataL[['wf_duet']] = wf_duet wf_lf_dataL[['lf_duet']] = lf_duet @@ -278,6 +283,10 @@ if (nrow(lf_foldx) == expected_rows_lf){ quit() } +# NEW column +lf_foldx$outcome_colname = "foldx_outcome" +lf_foldx$outcome = lf_foldx$foldx_outcome + # Assign them to the output list wf_lf_dataL[['wf_foldx']] = wf_foldx wf_lf_dataL[['lf_foldx']] = lf_foldx @@ -308,6 +317,10 @@ if (nrow(lf_deepddg) == expected_rows_lf){ quit() } +# NEW columns [outcome and outcome colname] +lf_deepddg$outcome_colname = "deepddg_outcome" +lf_deepddg$outcome = lf_deepddg$deepddg_outcome + # Assign them to the output list wf_lf_dataL[['wf_deepddg']] = wf_deepddg wf_lf_dataL[['lf_deepddg']] = lf_deepddg @@ -337,6 +350,10 @@ if (nrow(lf_dynamut2) == expected_rows_lf){ quit() } +# NEW columns [outcome and outcome colname] +lf_dynamut2$outcome_colname = "ddg_dynamut2_outcome" +lf_dynamut2$outcome = lf_dynamut2$ddg_dynamut2_outcome + # Assign them to the output list wf_lf_dataL[['wf_dynamut2']] = wf_dynamut2 wf_lf_dataL[['lf_dynamut2']] = lf_dynamut2 @@ -384,6 +401,10 @@ if (nrow(lf_consurf) == expected_rows_lf){ quit() } +# NEW columns [outcome and outcome colname] +lf_consurf$outcome_colname = "consurf_outcome" +lf_consurf$outcome = lf_consurf$consurf_outcome + # Assign them to the output list wf_lf_dataL[['wf_consurf']] = wf_consurf wf_lf_dataL[['lf_consurf']] = lf_consurf @@ -413,6 +434,10 @@ if (nrow(lf_snap2) == expected_rows_lf){ quit() } +# NEW columns [outcome and outcome colname] +lf_snap2$outcome_colname = "snap2_outcome" +lf_snap2$outcome = lf_snap2$snap2_outcome + # Assign them to the output list wf_lf_dataL[['wf_snap2']] = wf_snap2 wf_lf_dataL[['lf_snap2']] = lf_snap2 @@ -442,6 +467,10 @@ if (nrow(lf_provean) == expected_rows_lf){ quit() } +# NEW columns [outcome and outcome colname] +lf_provean$outcome_colname = "provean_outcome" +lf_provean$outcome = lf_provean$provean_outcome + # Assign them to the output list wf_lf_dataL[['wf_provean']] = wf_provean wf_lf_dataL[['lf_provean']] = lf_provean @@ -479,6 +508,10 @@ if (nrow(lf_mcsm_lig) == expected_rows_lf){ quit() } +# NEW columns [outcome and outcome colname] +lf_mcsm_lig$outcome_colname = "ligand_outcome" +lf_mcsm_lig$outcome = lf_mcsm_lig$ligand_outcome + # Assign them to the output list wf_lf_dataL[['wf_mcsm_lig']] = wf_mcsm_lig wf_lf_dataL[['lf_mcsm_lig']] = lf_mcsm_lig @@ -487,7 +520,7 @@ wf_lf_dataL[['lf_mcsm_lig']] = lf_mcsm_lig # mcsm-NA affinity # data filtered by cut off #==================== -if (tolower(gene_name)%in%geneL_na){ +if (tolower(gene)%in%geneL_na){ #--------------- # mCSM-NA: WF and lF #----------------- @@ -514,6 +547,10 @@ if (tolower(gene_name)%in%geneL_na){ quit() } + # NEW columns [outcome and outcome colname] + lf_mcsm_na$outcome_colname = "mcsm_na_outcome" + lf_mcsm_na$outcome = lf_mcsm_na$mcsm_na_outcome + # Assign them to the output list wf_lf_dataL[['wf_mcsm_na']] = wf_mcsm_na wf_lf_dataL[['lf_mcsm_na']] = lf_mcsm_na @@ -524,7 +561,7 @@ if (tolower(gene_name)%in%geneL_na){ # mcsm-ppi2 affinity # data filtered by cut off #======================== -if (tolower(gene_name)%in%geneL_ppi2){ +if (tolower(gene)%in%geneL_ppi2){ #----------------- # mCSM-PPI2: WF and lF #----------------- @@ -551,6 +588,10 @@ if (tolower(gene_name)%in%geneL_ppi2){ quit() } + # NEW columns [outcome and outcome colname] + lf_mcsm_ppi2$outcome_colname = "mcsm_ppi2_outcome" + lf_mcsm_ppi2$outcome = lf_mcsm_ppi2$mcsm_ppi2_outcome + # Assign them to the output list wf_lf_dataL[['wf_mcsm_ppi2']] = wf_mcsm_ppi2 wf_lf_dataL[['lf_mcsm_ppi2']] = lf_mcsm_ppi2 diff --git a/scripts/functions/lf_bp2.R b/scripts/functions/lf_bp2.R index 630f8b8..bff1d7a 100644 --- a/scripts/functions/lf_bp2.R +++ b/scripts/functions/lf_bp2.R @@ -7,7 +7,7 @@ ############################# lf_bp2 <- function(lf_df = lf_duet - , p_title = "DUET-DDG" + , p_title = "" , colour_categ = "outcome" , x_grp = "mutation_info_labels" , y_var = "param_value" diff --git a/scripts/plotting/get_plotting_dfs.R b/scripts/plotting/get_plotting_dfs.R index 2b8c6ba..cd7c0a4 100644 --- a/scripts/plotting/get_plotting_dfs.R +++ b/scripts/plotting/get_plotting_dfs.R @@ -117,7 +117,7 @@ geneL_normal = c("pnca") geneL_na = c("gid", "rpob") geneL_ppi2 = c("alr", "embb", "katg", "rpob") -all_dm_om_df = dm_om_wf_lf_data(df = merged_df3, gene_name = gene) +all_dm_om_df = dm_om_wf_lf_data(df = merged_df3, gene = gene) wf_duet = all_dm_om_df[['wf_duet']] lf_duet = all_dm_om_df[['lf_duet']] diff --git a/scripts/plotting/plotting_thesis/dm_om_plots.R b/scripts/plotting/plotting_thesis/dm_om_plots.R index 937aadf..733e77f 100644 --- a/scripts/plotting/plotting_thesis/dm_om_plots.R +++ b/scripts/plotting/plotting_thesis/dm_om_plots.R @@ -1,3 +1,13 @@ +################# +# Numbers +################## +nrow(wf_mcsm_lig) +table(wf_mcsm_lig$mutation_info_labels) + +nrow(wf_mcsm_ppi2) +table(wf_mcsm_ppi2$mutation_info_labels) +################################################################ + geneL_normal = c("pnca") geneL_na = c("gid", "rpob") geneL_ppi2 = c("alr", "embb", "katg", "rpob") @@ -116,6 +126,11 @@ stability_suffix <- paste0(delta_symbol, delta_symbol, "G") lf_bp2(lf_df_stabP, p_title = paste0("Stability",stability_suffix) , violin_quantiles = c(0.5), monochrome = F) +lf_bp2(lf_duet, p_title = paste0("Stability",stability_suffix) + , violin_quantiles = c(0.5), monochrome = F) + + + lf_bp2(lf_df_consP, p_title = "Evolutionary Conservation" , violin_quantiles = c(0.5), monochrome = F)