updated practice script with some notes

This commit is contained in:
Tanushree Tunstall 2022-02-24 18:41:15 +00:00
parent 67e003df8b
commit 9d46613ca4

View file

@ -119,8 +119,9 @@ X_test = my_df[my_df['or_mychisq'].isnull()]
X_test = [23.9, 0.69, -0.16, 0.59 X_test = [23.9, 0.69, -0.16, 0.59
, 5, 0.5, 0.4, -1 , 5, 0.5, 0.4, -1
, 0.1, 1, 1, 1] , 0.1, 1, 1, 1]
X_test_re = np.array(X_test).reshape(3, -1) X_test_re = np.array(X_test).reshape(3, -1)
#################### ####################
fitted = model.predict(X_train) fitted = model.predict(X_train)
model.coef_ model.coef_
@ -134,13 +135,8 @@ scaler = preprocessing.MinMaxScaler()
scaler.fit(X_train) scaler.fit(X_train)
#We can then create a scaled training set #We can then create a scaled training set
X_train_scaled = scaler.transform(X_train) X_train_scaled = scaler.transform(X_train)
new_scaled = scaler.transform(X_test_re) new_scaled = scaler.transform(X_test_re)
model.predict(new_scaled) model.predict(new_scaled)
######### #########
from sklearn.pipeline import Pipeline from sklearn.pipeline import Pipeline
from sklearn.linear_model import LogisticRegression from sklearn.linear_model import LogisticRegression
@ -160,6 +156,20 @@ model_pipe.predict(X_test_re)
# resid = y_train - fitted_vals # resid = y_train - fitted_vals
# resid # resid
#=====
# Logistic 1 test
# FAILS since: the test set dim and input dim should be the same
# i.e if you give the model 10 features to train on, you will need
# 10 features to predict something?
# THINK!!!!
#=====
mod_logis = linear_model.LogisticRegression(class_weight = 'balanced')
mod_logis.fit(X_train,y_train)
X_test = [23.9]
X_test_re = np.array(X_test).reshape(1, -1)
mod_logis.predict(X_test_re)
#################
from sklearn.metrics import accuracy_score, precision_score, recall_score from sklearn.metrics import accuracy_score, precision_score, recall_score
y_pred = model_pipe.predict(X_train) y_pred = model_pipe.predict(X_train)
accuracy_score(y_train,y_pred) accuracy_score(y_train,y_pred)
@ -189,6 +199,14 @@ output = cross_validate(model_pipe
, cv = 10, return_train_score = False) , cv = 10, return_train_score = False)
pd.DataFrame(output).mean() pd.DataFrame(output).mean()
0.65527950310559 # fit_time 0.005486
0.9853658536585366 # score_time 0.002673
0.6516129032258065 # test_acc 0.601799
# test_prec 0.976936
# test_rec 0.603226
# dtype: float64
# the three scores
# 0.65527950310559
# 0.9853658536585366
# 0.6516129032258065