updated practice script with some notes

2022-02-24 18:41:15 +00:00 · 2022-02-24 18:41:15 +00:00 · 9d46613ca4
commit 9d46613ca4
parent 67e003df8b
1 changed files with 27 additions and 9 deletions
--- a/my_datap1.py
+++ b/my_datap1.py
@ -119,8 +119,9 @@ X_test = my_df[my_df['or_mychisq'].isnull()]
 X_test = [23.9, 0.69, -0.16, 0.59
    , 5, 0.5, 0.4, -1
    , 0.1, 1, 1, 1] 
-
 X_test_re = np.array(X_test).reshape(3, -1)
+
+
 ####################
 fitted = model.predict(X_train)
 model.coef_
@ -134,13 +135,8 @@ scaler = preprocessing.MinMaxScaler()
 scaler.fit(X_train)
 #We can then create a scaled training set
 X_train_scaled = scaler.transform(X_train)
-
-
 new_scaled = scaler.transform(X_test_re)
-
 model.predict(new_scaled)
-
-
 #########
 from sklearn.pipeline import Pipeline
 from sklearn.linear_model import LogisticRegression
@ -160,6 +156,20 @@ model_pipe.predict(X_test_re)
 # resid = y_train - fitted_vals
 # resid  

+#=====
+# Logistic  1 test
+# FAILS since: the test set dim and input dim should be the same
+# i.e if you give the model 10 features to train on, you will need
+# 10 features to predict something?
+# THINK!!!!
+#=====
+mod_logis = linear_model.LogisticRegression(class_weight = 'balanced')
+mod_logis.fit(X_train,y_train)
+X_test = [23.9] 
+X_test_re = np.array(X_test).reshape(1, -1)
+mod_logis.predict(X_test_re)
+#################
+
 from sklearn.metrics import accuracy_score, precision_score, recall_score
 y_pred = model_pipe.predict(X_train)
 accuracy_score(y_train,y_pred)
@ -189,6 +199,14 @@ output = cross_validate(model_pipe
                        , cv = 10, return_train_score = False)

 pd.DataFrame(output).mean()
- 0.65527950310559
- 0.9853658536585366
- 0.6516129032258065
+# fit_time      0.005486
+# score_time    0.002673
+# test_acc      0.601799
+# test_prec     0.976936
+# test_rec      0.603226
+# dtype: float64
+
+# the three scores
+# 0.65527950310559
+# 0.9853658536585366
+# 0.6516129032258065