33 lines
1.2 KiB
Python
33 lines
1.2 KiB
Python
#!/usr/bin/env python3
|
|
# -*- coding: utf-8 -*-
|
|
"""
|
|
Created on Tue Mar 15 11:09:50 2022
|
|
|
|
@author: tanu
|
|
"""
|
|
# stratified shuffle split
|
|
X_train, X_test, y_train, y_test = train_test_split(num_df_wtgt[numerical_FN]
|
|
, num_df_wtgt['mutation_class']
|
|
, test_size = 0.33
|
|
, **rs
|
|
, shuffle = True
|
|
, stratify = num_df_wtgt['mutation_class'])
|
|
|
|
y_train.to_frame().value_counts().plot(kind = 'bar')
|
|
y_test.to_frame().value_counts().plot(kind = 'bar')
|
|
|
|
MultClassPipelineCV(X_train, X_test, y_train, y_test
|
|
, input_df = num_df_wtgt[numerical_FN]
|
|
, var_type = 'numerical')
|
|
|
|
|
|
skf_cv_scores = MultClassPipelineCV(X_train, X_test, y_train, y_test
|
|
, input_df = num_df_wtgt[numerical_FN]
|
|
, var_type = 'numerical')
|
|
|
|
pp.pprint(skf_cv_scores)
|
|
# construct a df
|
|
skf_cv_scores_df = pd.DataFrame(skf_cv_scores)
|
|
skf_cv_scores_df
|
|
skf_cv_scores_df_test = skf_cv_scores_df.filter(like='test_', axis=0)
|
|
skf_cv_scores_df_train = skf_cv_scores_df.filter(like='train_', axis=0)
|