From e03ce277b7eb1cea9f1063d6bcf65b281304a5e3 Mon Sep 17 00:00:00 2001 From: Tanushree Tunstall Date: Thu, 1 Sep 2022 12:57:38 +0100 Subject: [PATCH] checked masked cols after running --- scripts/ml/ml_functions/GetMLData.py | 16 +--------------- 1 file changed, 1 insertion(+), 15 deletions(-) diff --git a/scripts/ml/ml_functions/GetMLData.py b/scripts/ml/ml_functions/GetMLData.py index d5eab71..6e8aaa6 100755 --- a/scripts/ml/ml_functions/GetMLData.py +++ b/scripts/ml/ml_functions/GetMLData.py @@ -481,7 +481,7 @@ def getmldata(gene, drug # mask the mcsm ligand affinity AND mcsm_na affinity columns where ligand distance > 10 my_df_ml.loc[(my_df_ml['ligand_distance'] > 10), cols_to_mask] = 0 - #mask_check = my_df_ml[['mutationinformation', 'ligand_distance'] + cols_to_mask] + # Create names for masking columns mask_check_cols = ['mutationinformation', 'ligand_distance'] + cols_to_mask #--------------------------- @@ -502,24 +502,10 @@ def getmldata(gene, drug #mask_check = my_df_ml[['mutationinformation', 'ligand_distance'] + cols_to_mask + add_cols_mask] mask_check_cols = mask_check_cols + add_cols_mask - # if gene.lower() in geneL_na_ppi2: - # #--------------------------- - # # RPOB: mask ppi2 + na + lig cols - # #--------------------------- - # mask_check = my_df_ml[['mutationinformation', - # 'ligand_distance', 'ligand_affinity_change', - # 'nca_distance','mcsm_na_affinity', - # 'mcsm_ppi2_affinity','interface_dist']] - - # GET mask data mask_check = my_df_ml[mask_check_cols] - # sanity check: check script SANITY_CHECK_mask.py if write_maskfile: - # write mask file for sanity check - #mask_check.sort_values(by = ['ligand_distance'], ascending = True, inplace = True) - mask_check.to_csv(outdir_ml + gene.lower() + '_mask_check.csv') ###############################################################################