modified loopity and multclass3 to have skf_cv as a parameters for cv

This commit is contained in:
Tanushree Tunstall 2022-03-17 18:17:58 +00:00
parent 97620c1bb0
commit d0c329a1d9
8 changed files with 161 additions and 127 deletions

View file

@ -5,29 +5,19 @@ Created on Tue Mar 15 11:09:50 2022
@author: tanu
"""
# stratified shuffle split
X_train, X_test, y_train, y_test = train_test_split(num_df_wtgt[numerical_FN]
, num_df_wtgt['mutation_class']
, test_size = 0.33
, **rs
, shuffle = True
, stratify = num_df_wtgt['mutation_class'])
#%% Data
X = all_df_wtgt[numerical_FN+categorical_FN]
y = all_df_wtgt['mutation_class']
#%% variables
y_train.to_frame().value_counts().plot(kind = 'bar')
y_test.to_frame().value_counts().plot(kind = 'bar')
MultClassPipelineCV(X_train, X_test, y_train, y_test
, input_df = num_df_wtgt[numerical_FN]
, var_type = 'numerical')
#%% MultClassPipeSKFCV: function call()
mm_skf_scoresD = MultClassPipeSKFCV(input_df = X
, target = y
, var_type = 'mixed'
, skf_cv = skf_cv)
skf_cv_scores = MultClassPipelineCV(X_train, X_test, y_train, y_test
, input_df = num_df_wtgt[numerical_FN]
, var_type = 'numerical')
pp.pprint(skf_cv_scores)
# construct a df
skf_cv_scores_df = pd.DataFrame(skf_cv_scores)
skf_cv_scores_df
skf_cv_scores_df_test = skf_cv_scores_df.filter(like='test_', axis=0)
skf_cv_scores_df_train = skf_cv_scores_df.filter(like='train_', axis=0)
mm_skf_scores_df_all = pd.DataFrame(mm_skf_scoresD)
mm_skf_scores_df_all
mm_skf_scores_df_test = mm_skf_scores_df_all.filter(like='test_', axis=0)
mm_skf_scores_df_train = mm_skf_scores_df_all.filter(like='train_', axis=0) # helps to see if you trust the results