From 56b71c6ca2a10f63dcf6d52fb18a3d3e1b78cfe4 Mon Sep 17 00:00:00 2001 From: Tanushree Tunstall Date: Thu, 1 Sep 2022 13:04:37 +0100 Subject: [PATCH] added avg affinity and stability cols with mask for avg affinity --- scripts/ml/ml_functions/GetMLData.py | 12 ++++--- scripts/ml/ml_functions/TEST_GetMLData.py | 44 +++++++++++++++++++++++ 2 files changed, 51 insertions(+), 5 deletions(-) diff --git a/scripts/ml/ml_functions/GetMLData.py b/scripts/ml/ml_functions/GetMLData.py index 6e8aaa6..a61b0da 100755 --- a/scripts/ml/ml_functions/GetMLData.py +++ b/scripts/ml/ml_functions/GetMLData.py @@ -436,7 +436,7 @@ def getmldata(gene, drug if gene.lower() in geneL_basic: #X_stabilityN = common_cols_stabiltyN gene_affinity_colnames = []# not needed as its the common ones - cols_to_mask = ['ligand_affinity_change'] + cols_to_mask = ['ligand_affinity_change', 'avg_lig_affinity'] cols_to_mask_ppi2 = [] cols_to_mask_na = [] @@ -444,7 +444,7 @@ def getmldata(gene, drug gene_affinity_colnames = ['mcsm_ppi2_affinity', 'interface_dist'] #X_stabilityN = common_cols_stabiltyN + geneL_ppi2_st_cols #cols_to_mask = ['ligand_affinity_change', 'mcsm_ppi2_affinity'] - cols_to_mask = ['ligand_affinity_change'] + cols_to_mask = ['ligand_affinity_change', 'avg_lig_affinity'] cols_to_mask_ppi2 = ['mcsm_ppi2_affinity'] cols_to_mask_na = [] @@ -452,7 +452,7 @@ def getmldata(gene, drug if gene.lower() in geneL_na: gene_affinity_colnames = ['mcsm_na_affinity', 'nca_distance'] #X_stabilityN = common_cols_stabiltyN + geneL_na_st_cols - cols_to_mask = ['ligand_affinity_change']#, 'mcsm_na_affinity'] + cols_to_mask = ['ligand_affinity_change', 'avg_lig_affinity'] cols_to_mask_ppi2 = [] cols_to_mask_na = ['mcsm_na_affinity'] @@ -461,7 +461,7 @@ def getmldata(gene, drug gene_affinity_colnames = ['mcsm_na_affinity','nca_distance', 'mcsm_ppi2_affinity', 'interface_dist'] #X_stabilityN = common_cols_stabiltyN + geneL_na_ppi2_st_cols #cols_to_mask = ['ligand_affinity_change', 'mcsm_na_affinity', 'mcsm_ppi2_affinity'] - cols_to_mask = ['ligand_affinity_change']#, 'mcsm_na_affinity'] + cols_to_mask = ['ligand_affinity_change', 'avg_lig_affinity'] cols_to_mask_ppi2 = ['mcsm_ppi2_affinity'] cols_to_mask_na = ['mcsm_na_affinity'] @@ -530,6 +530,7 @@ def getmldata(gene, drug , 'ddg_foldx' , 'deepddg' , 'ddg_dynamut2' + , 'avg_stability' # NEW , 'contacts'] #-------- # FoldX @@ -549,7 +550,8 @@ def getmldata(gene, drug #=================== common_affinity_Fnum = ['ligand_distance' , 'ligand_affinity_change' - , 'mmcsm_lig'] + , 'mmcsm_lig' + , 'avg_lig_affinity'] # NEW # if gene.lower() in geneL_basic: # X_affinityFN = common_affinity_Fnum diff --git a/scripts/ml/ml_functions/TEST_GetMLData.py b/scripts/ml/ml_functions/TEST_GetMLData.py index 8bf23c5..040af65 100644 --- a/scripts/ml/ml_functions/TEST_GetMLData.py +++ b/scripts/ml/ml_functions/TEST_GetMLData.py @@ -5,10 +5,54 @@ Created on Thu Sep 1 12:22:27 2022 @author: tanu """ +getmldata(gene = "alr" + , drug = "cycloserine" + , data_combined_model = False + , use_or = False + , omit_all_genomic_features = False + , write_maskfile = True + , write_outfile = False) + + +getmldata(gene = "embB" + , drug = "ethambutol" + , data_combined_model = False + , use_or = False + , omit_all_genomic_features = False + , write_maskfile = True + , write_outfile = False) + + +getmldata(gene = "gid" + , drug = "streptomycin" + , data_combined_model = False + , use_or = False + , omit_all_genomic_features = False + , write_maskfile = True + , write_outfile = False) + + getmldata(gene = "katG" , drug = "isoniazid" , data_combined_model = False , use_or = False , omit_all_genomic_features = False , write_maskfile = True + , write_outfile = False) + + +getmldata(gene = "rpoB" + , drug = "rifampicin" + , data_combined_model = False + , use_or = False + , omit_all_genomic_features = False + , write_maskfile = True + , write_outfile = False) + +getmldata(gene = "pncA" + , drug = "pyrazinamide" + , data_combined_model = False + , use_or = False + , omit_all_genomic_features = False + , write_maskfile = True , write_outfile = False) \ No newline at end of file