From 8d8fc03f72e899ef16e7c1a5183a058dade36fa7 Mon Sep 17 00:00:00 2001 From: Tanushree Tunstall Date: Mon, 20 Jun 2022 21:53:15 +0100 Subject: [PATCH] added test script to test dissected model --- scripts/ml/Mult_dissected_CALL.py | 87 +++++++++++++++++++++++++++++++ 1 file changed, 87 insertions(+) create mode 100644 scripts/ml/Mult_dissected_CALL.py diff --git a/scripts/ml/Mult_dissected_CALL.py b/scripts/ml/Mult_dissected_CALL.py new file mode 100644 index 0000000..229ed92 --- /dev/null +++ b/scripts/ml/Mult_dissected_CALL.py @@ -0,0 +1,87 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- +""" +Created on Mon Jun 20 13:05:23 2022 + +@author: tanu +""" +#================== +# Baseline models +#================== +cm_di2 = MultModelsCl_dissected(input_df = X + , target = y + , var_type = 'mixed' + , skf_cv = skf_cv + , blind_test_input_df = X_bts + , blind_test_target = y_bts + , add_cm = True + , add_yn = True) + +baseline_all2 = pd.DataFrame(cm_di2) +baseline_all2 = baseline_all2.T +baseline_CTBT2 = baseline_all2.filter(regex = 'test_.*|bts_.*|TN|FP|FN|TP|.*_neg|.*_pos' , axis = 1) + +#================ +# Stability cols +#================ + + +#================ +# Affinity cols +#================ + + +#================ +# Residue level +#================ + + +#================ +# Genomics +# X_genomicFN +#================ +feature_gp_name = 'genomics' + +scores_mm_gn = MultModelsCl_dissected(input_df = X[X_genomicFN] + , target = y + , var_type = 'mixed' + , skf_cv = skf_cv + , blind_test_input_df = X_bts[X_genomicFN] + , blind_test_target = y_bts + , add_cm = True + , add_yn = True) + +baseline_all_gn = pd.DataFrame(scores_mm_gn) +baseline_CTBT_gn = baseline_all_gn.filter(regex = '.*_time|test_.*|bts_.*|TN|FP|FN|TP|.*_neg|.*_pos' , axis = 0) +baseline_CTBT_gn['feature_group'] = feature_gp_name + + + +baseline_CT = baseline_CTBT_gn.filter(regex = '.*_time|test_.*|TN|FP|FN|TP|.*_neg|.*_pos', axis = 0) + +baseline_CT = baseline_CT.reset_index() +baseline_CT.rename(columns = {'index': 'original_index'}, inplace = True) +baseline_CT['score_type'] = baseline_CT['original_index'] +baseline_CT['score_type'] = baseline_CT['score_type'].str.replace('test_*', '', regex = True) +baseline_CT['data_source'] = 'CT_score' + + +baseline_BT = baseline_CTBT_gn.filter(regex = 'bts_', axis = 0) +baseline_BT = baseline_BT.reset_index() +baseline_BT.rename(columns = {'index': 'original_index'}, inplace = True) +baseline_BT['score_type'] = baseline_BT['original_index'] +baseline_BT['score_type'] = baseline_BT['score_type'].str.replace('bts_*', '', regex = True) +baseline_BT['data_source'] = 'BT_score' + +# rpow bind +if all(baseline_CT.columns == baseline_BT.columns): + print('\nPASS:colnames match, proceeding to rowbind') + comb_df = pd.concat([baseline_BT,baseline_CT], axis = 0, ignore_index = True ) + +baseline_CT +baseline_CT + +#================ +# Evolution +#================ +