From ec2d5ca25b863faf887dde7b9559b5a5d19b366f Mon Sep 17 00:00:00 2001 From: Tanushree Tunstall Date: Sat, 5 Mar 2022 15:13:26 +0000 Subject: [PATCH] saving work --- my_data6.py | 10 ++++++---- p_jr_d1.py | 29 +++++++++++++++++++++++++++++ 2 files changed, 35 insertions(+), 4 deletions(-) diff --git a/my_data6.py b/my_data6.py index adfe017..afc887c 100644 --- a/my_data6.py +++ b/my_data6.py @@ -152,7 +152,7 @@ X_vars11 = my_df[x_stability_cols + X_strF + X_evolF ] #%% X_vars1.shape[1] - +X_vars5.shape[1] # TODO: stratified cross validate # Train-test Split @@ -161,11 +161,13 @@ X_train, X_test, y_train, y_test = train_test_split(X_vars1, target1, test_size = 0.33, random_state = 42) -MultClassPipeline(X_train, X_test, y_train, y_test) - +t1_res = MultClassPipeline(X_train, X_test, y_train, y_test) +t1_res # TARGET3 X_train3, X_test3, y_train3, y_test3 = train_test_split(X_vars5, target3, test_size = 0.33, random_state = 42) -MultClassPipeline(X_train3, X_test3, y_train3, y_test3) +t3_res = MultClassPipeline(X_train3, X_test3, y_train3, y_test3) +t3_res +#%% \ No newline at end of file diff --git a/p_jr_d1.py b/p_jr_d1.py index da72511..602681d 100644 --- a/p_jr_d1.py +++ b/p_jr_d1.py @@ -372,4 +372,33 @@ print(pipe2.classification_report (y_test, np.argmax(predicted, axis = 1))) enc = preprocessing.OneHotEncoder() enc.fit(X_train) enc.transform(X_train).toarray() +#%% +from sklearn.metrics import mean_squared_error, make_scorer +from sklearn.model_selection import cross_validate +from sklearn.linear_model import LinearRegression +from sklearn.pipeline import Pipeline +from sklearn.preprocessing import StandardScaler +from sklearn.preprocessing import MinMaxScaler +boston = load_boston() +X_train, y_train = pd.DataFrame(boston.data, columns = boston.feature_names), boston.target + +model1 = Pipeline(steps = [ + ('pre', MinMaxScaler()), + ('reg', LinearRegression())]) + +score_fn = make_scorer(mean_squared_error) +scores = cross_validate(model1, X_train, y_train + , scoring = score_fn + , cv = 10) + +from itertools import combinations +def train(X): + return cross_validate(model1, X, y_train + , scoring = score_fn + #, return_train_score = False) + , return_estimator = True)['test_score'] + +scores = [train(X_train.loc[:,vars]) for vars in combinations(X_train.columns, 12)] +means = [score.mean() for score in scores] +means \ No newline at end of file