modified ml params and models

This commit is contained in:
Tanushree Tunstall 2022-05-19 02:35:50 +01:00
parent 3ed7840f60
commit 4dbc90ad44
6 changed files with 17 additions and 332 deletions

View file

@ -17,16 +17,11 @@ from sklearn.model_selection import GridSearchCV
from sklearn.linear_model import LogisticRegression
from sklearn.preprocessing import StandardScaler, MinMaxScaler, OneHotEncoder
from xgboost import XGBClassifier
#%% Get train-test split and scoring functions
X_train, X_test, y_train, y_test = train_test_split(num_df_wtgt[numerical_FN]
, num_df_wtgt['mutation_class']
, test_size = 0.33
, random_state = 2
, shuffle = True
, stratify = num_df_wtgt['mutation_class'])
#######################################################
y.to_frame().value_counts().plot(kind = 'bar')
blind_test_df['dst_mode'].to_frame().value_counts().plot(kind = 'bar')
y_train.to_frame().value_counts().plot(kind = 'bar')
y_test.to_frame().value_counts().plot(kind = 'bar')
scoring_fn = ({'accuracy' : make_scorer(accuracy_score)
, 'fscore' : make_scorer(f1_score)
, 'mcc' : make_scorer(matthews_corrcoef)