updated practice script with some notes

2022-02-24 18:41:15 +00:00 · 2022-02-24 18:41:15 +00:00 · 9d46613ca4
commit 9d46613ca4
parent 67e003df8b
1 changed files with 27 additions and 9 deletions
--- a/my_datap1.py
+++ b/my_datap1.py
@ -119,8 +119,9 @@ X_test = my_df[my_df['or_mychisq'].isnull()]
 X_test = [23.9, 0.69, -0.16, 0.59
    , 5, 0.5, 0.4, -1
    , 0.1, 1, 1, 1] 
 X_test_re = np.array(X_test).reshape(3, -1)
 ####################
 fitted = model.predict(X_train)
 model.coef_
@ -134,13 +135,8 @@ scaler = preprocessing.MinMaxScaler()
 scaler.fit(X_train)
 #We can then create a scaled training set
 X_train_scaled = scaler.transform(X_train)
 new_scaled = scaler.transform(X_test_re)
 model.predict(new_scaled)
 #########
 from sklearn.pipeline import Pipeline
 from sklearn.linear_model import LogisticRegression
@ -160,6 +156,20 @@ model_pipe.predict(X_test_re)
 # resid = y_train - fitted_vals
 # resid  
 #=====
 # Logistic  1 test
 # FAILS since: the test set dim and input dim should be the same
 # i.e if you give the model 10 features to train on, you will need
 # 10 features to predict something?
 # THINK!!!!
 #=====
 mod_logis = linear_model.LogisticRegression(class_weight = 'balanced')
 mod_logis.fit(X_train,y_train)
 X_test = [23.9] 
 X_test_re = np.array(X_test).reshape(1, -1)
 mod_logis.predict(X_test_re)
 #################
 from sklearn.metrics import accuracy_score, precision_score, recall_score
 y_pred = model_pipe.predict(X_train)
 accuracy_score(y_train,y_pred)
@ -189,6 +199,14 @@ output = cross_validate(model_pipe
                        , cv = 10, return_train_score = False)
 pd.DataFrame(output).mean()
- 0.65527950310559
+# fit_time      0.005486
- 0.9853658536585366
+# score_time    0.002673
- 0.6516129032258065
+# test_acc      0.601799
 # test_prec     0.976936
 # test_rec      0.603226
 # dtype: float64
 # the three scores
 # 0.65527950310559
 # 0.9853658536585366
 # 0.6516129032258065