copy of ML dir to an FS-only version

This commit is contained in:
Tanushree Tunstall 2022-05-22 23:30:58 +01:00
parent 52cc16f3fa
commit 80e6b3af96
23 changed files with 3115 additions and 243 deletions

View file

@ -49,8 +49,7 @@ clf2.best_estimator_.named_steps['selector'].n_features_in_
clf2.best_estimator_ #n of best features
clf2.best_params_
clf2.best_estimator_.get_params
clf2.get_feature_names()
clf2.get_feature_names(
clf3 = clf2.best_estimator_ #
@ -62,4 +61,37 @@ clf3._final_estimator.solver
fs_bmod = clf2.best_estimator_
print('\nbest model with feature selection:', fs_bmod)
#########################################################
# my data
pipe = Pipeline([
('pre', MinMaxScaler())
('selector', RFECV(LogisticRegression(**rs), cv = skf_cv, scoring = 'matthews_corrcoef'))
, ('classifier', LogisticRegression(**rs))])
search_space = [{'selector__min_features_to_select': [1,2]},
{'classifier': [LogisticRegression()],
#'classifier__C': np.logspace(0, 4, 10),
'classifier__C': [2, 2.8],
'classifier__max_iter': [100],
'classifier__penalty': ['l1', 'l2'],
'classifier__solver': ['saga']
}] #,
#{'classifier': [RandomForestClassifier(n_estimators=100)],
# 'classifier__max_depth': [5, 10, None]},
#{'classifier': [KNeighborsClassifier()],
# 'classifier__n_neighbors': [3, 7, 11],
# 'classifier__weights': ['uniform', 'distance']
#}]
clf = GridSearchCV(pipe, search_space, cv=skf_cv, scoring = mcc_score_fn, refit = 'mcc', verbose=0)
clf.fit(X, y)
clf.best_params_
clf.best_score_
tp = clf.predict(X_bts)
print('\nMCC on Blind test:' , round(matthews_corrcoef(y_bts, tp),2))
print('\nAccuracy on Blind test:', round(accuracy_score(y_bts, tp),2))