saving work with scripts for feature selection

This commit is contained in:
Tanushree Tunstall 2022-05-19 08:30:18 +01:00
parent a9dc3c43e5
commit fa0f5e5b39
3 changed files with 15 additions and 222 deletions

View file

@ -207,8 +207,8 @@ X_genomicFN = ['maf'
# , 'or_fisher'
# , 'pval_fisher'
#, 'lineage'
, 'lineage_count_all'
, 'lineage_count_unique'
#, 'lineage_count_all'
#, 'lineage_count_unique'
]
#%% Construct numerical and categorical column names
@ -256,7 +256,7 @@ all_df_wtgt = training_df[numerical_FN + categorical_FN + ['dst_mode']]
all_df_wtgt.shape
#%%================================================================
#%% Apply ML
#TODO: Apply oversampling!
#TODO: A
#%% Data
#X = all_df_wtgt[numerical_FN+categorical_FN]
@ -272,16 +272,16 @@ X_bts_wt = blind_test_df[numerical_FN + ['dst_mode']]
# Quick check
(X['ligand_affinity_change']==0).sum() == (X['ligand_distance']>10).sum()
#%% MultClassPipeSKFCV: function call()
mm_skf_scoresD = MultClassPipeSKFCV(input_df = X
, target = y
, var_type = 'numerical'
, skf_cv = skf_cv)
# mm_skf_scoresD = MultClassPipeSKFCV(input_df = X
# , target = y
# , var_type = 'numerical'
# , skf_cv = skf_cv)
mm_skf_scores_df_all = pd.DataFrame(mm_skf_scoresD)
mm_skf_scores_df_all
mm_skf_scores_df_test = mm_skf_scores_df_all.filter(like='test_', axis=0)
mm_skf_scores_df_train = mm_skf_scores_df_all.filter(like='train_', axis=0) # helps to see if you trust the results
print(mm_skf_scores_df_train)
print(mm_skf_scores_df_test)
# mm_skf_scores_df_all = pd.DataFrame(mm_skf_scoresD)
# mm_skf_scores_df_all
# mm_skf_scores_df_test = mm_skf_scores_df_all.filter(like='test_', axis=0)
# mm_skf_scores_df_train = mm_skf_scores_df_all.filter(like='train_', axis=0) # helps to see if you trust the results
# print(mm_skf_scores_df_train)
# print(mm_skf_scores_df_test)