From 9d46613ca48dfb385a16b449ddc3981fa40d781c Mon Sep 17 00:00:00 2001 From: Tanushree Tunstall Date: Thu, 24 Feb 2022 18:41:15 +0000 Subject: [PATCH] updated practice script with some notes --- my_datap1.py | 36 +++++++++++++++++++++++++++--------- 1 file changed, 27 insertions(+), 9 deletions(-) diff --git a/my_datap1.py b/my_datap1.py index 1814950..d6b4ee8 100644 --- a/my_datap1.py +++ b/my_datap1.py @@ -119,8 +119,9 @@ X_test = my_df[my_df['or_mychisq'].isnull()] X_test = [23.9, 0.69, -0.16, 0.59 , 5, 0.5, 0.4, -1 , 0.1, 1, 1, 1] - X_test_re = np.array(X_test).reshape(3, -1) + + #################### fitted = model.predict(X_train) model.coef_ @@ -134,13 +135,8 @@ scaler = preprocessing.MinMaxScaler() scaler.fit(X_train) #We can then create a scaled training set X_train_scaled = scaler.transform(X_train) - - new_scaled = scaler.transform(X_test_re) - model.predict(new_scaled) - - ######### from sklearn.pipeline import Pipeline from sklearn.linear_model import LogisticRegression @@ -160,6 +156,20 @@ model_pipe.predict(X_test_re) # resid = y_train - fitted_vals # resid +#===== +# Logistic 1 test +# FAILS since: the test set dim and input dim should be the same +# i.e if you give the model 10 features to train on, you will need +# 10 features to predict something? +# THINK!!!! +#===== +mod_logis = linear_model.LogisticRegression(class_weight = 'balanced') +mod_logis.fit(X_train,y_train) +X_test = [23.9] +X_test_re = np.array(X_test).reshape(1, -1) +mod_logis.predict(X_test_re) +################# + from sklearn.metrics import accuracy_score, precision_score, recall_score y_pred = model_pipe.predict(X_train) accuracy_score(y_train,y_pred) @@ -189,6 +199,14 @@ output = cross_validate(model_pipe , cv = 10, return_train_score = False) pd.DataFrame(output).mean() - 0.65527950310559 - 0.9853658536585366 - 0.6516129032258065 \ No newline at end of file +# fit_time 0.005486 +# score_time 0.002673 +# test_acc 0.601799 +# test_prec 0.976936 +# test_rec 0.603226 +# dtype: float64 + +# the three scores +# 0.65527950310559 +# 0.9853658536585366 +# 0.6516129032258065 \ No newline at end of file