optimised run_7030.py to generate ouput from dict now that the processfunction and parameter dicts have been added

This commit is contained in:
Tanushree Tunstall 2022-06-24 15:40:18 +01:00
parent 7dc7e25016
commit b37a950fec
12 changed files with 180 additions and 128408 deletions

View file

@ -197,35 +197,35 @@ def MultModelsCl(input_df, target, skf_cv
# Specify multiple Classification Models
#======================================================
models = [('AdaBoost Classifier' , AdaBoostClassifier(**rs) )
# , ('Bagging Classifier' , BaggingClassifier(**rs, **njobs, bootstrap = True, oob_score = True) )
# , ('Decision Tree' , DecisionTreeClassifier(**rs) )
# , ('Extra Tree' , ExtraTreeClassifier(**rs) )
# , ('Extra Trees' , ExtraTreesClassifier(**rs) )
# , ('Gradient Boosting' , GradientBoostingClassifier(**rs) )
# , ('Gaussian NB' , GaussianNB() )
# , ('Gaussian Process' , GaussianProcessClassifier(**rs) )
# , ('K-Nearest Neighbors' , KNeighborsClassifier() )
# , ('LDA' , LinearDiscriminantAnalysis() )
, ('Bagging Classifier' , BaggingClassifier(**rs, **njobs, bootstrap = True, oob_score = True) )
, ('Decision Tree' , DecisionTreeClassifier(**rs) )
, ('Extra Tree' , ExtraTreeClassifier(**rs) )
, ('Extra Trees' , ExtraTreesClassifier(**rs) )
, ('Gradient Boosting' , GradientBoostingClassifier(**rs) )
, ('Gaussian NB' , GaussianNB() )
, ('Gaussian Process' , GaussianProcessClassifier(**rs) )
, ('K-Nearest Neighbors' , KNeighborsClassifier() )
, ('LDA' , LinearDiscriminantAnalysis() )
, ('Logistic Regression' , LogisticRegression(**rs) )
# , ('Logistic RegressionCV' , LogisticRegressionCV(cv = 3, **rs))
# , ('MLP' , MLPClassifier(max_iter = 500, **rs) )
# , ('Multinomial' , MultinomialNB() )
# , ('Naive Bayes' , BernoulliNB() )
# , ('Passive Aggresive' , PassiveAggressiveClassifier(**rs, **njobs) )
# , ('QDA' , QuadraticDiscriminantAnalysis() )
# , ('Random Forest' , RandomForestClassifier(**rs, n_estimators = 1000 ) )
# , ('Random Forest2' , RandomForestClassifier(min_samples_leaf = 5
# , n_estimators = 1000
# , bootstrap = True
# , oob_score = True
# , **njobs
# , **rs
# , max_features = 'auto') )
# , ('Ridge Classifier' , RidgeClassifier(**rs) )
# , ('Ridge ClassifierCV' , RidgeClassifierCV(cv = 3) )
# , ('SVC' , SVC(**rs) )
# , ('Stochastic GDescent' , SGDClassifier(**rs, **njobs) )
# , ('XGBoost' , XGBClassifier(**rs, verbosity = 0, use_label_encoder =False) )
, ('Logistic RegressionCV' , LogisticRegressionCV(cv = 3, **rs))
, ('MLP' , MLPClassifier(max_iter = 500, **rs) )
, ('Multinomial' , MultinomialNB() )
, ('Naive Bayes' , BernoulliNB() )
, ('Passive Aggresive' , PassiveAggressiveClassifier(**rs, **njobs) )
, ('QDA' , QuadraticDiscriminantAnalysis() )
, ('Random Forest' , RandomForestClassifier(**rs, n_estimators = 1000 ) )
, ('Random Forest2' , RandomForestClassifier(min_samples_leaf = 5
, n_estimators = 1000
, bootstrap = True
, oob_score = True
, **njobs
, **rs
, max_features = 'auto') )
, ('Ridge Classifier' , RidgeClassifier(**rs) )
, ('Ridge ClassifierCV' , RidgeClassifierCV(cv = 3) )
, ('SVC' , SVC(**rs) )
, ('Stochastic GDescent' , SGDClassifier(**rs, **njobs) )
, ('XGBoost' , XGBClassifier(**rs, verbosity = 0, use_label_encoder =False) )
]
mm_skf_scoresD = {}
@ -440,10 +440,11 @@ def ProcessMultModelsCl(inputD = {}):
, '\nCV df:', len(scoresDF_CV.columns)
, '\nBT_df:', len(scoresDF_BT.columns)
, '\nmetaDF:', len(metaDF.columns))
if len(scoresDF_CV.columns) == len(scoresDF_BT.columns):
print('\nFirst proceeding to rowbind CV and BT dfs:')
expected_ncols_out = len(scoresDF_BT.columns) + len(metaDF.columns)
print('\nFinal output should have:',expected_ncols_out, 'columns' )
print('\nFinal output should have:', expected_ncols_out, 'columns' )
#-----------------
# Combine WF
@ -496,8 +497,7 @@ def ProcessMultModelsCl(inputD = {}):
sys.exit('\nFIRST IF FAILS')
else:
print('\nConcatenting dfs not possible [WF],check numbers ')
#-------------------------------------
# Combine WF+Metadata: Final output
#-------------------------------------
@ -515,11 +515,15 @@ def ProcessMultModelsCl(inputD = {}):
print('\nPASS: Combined df has expected ncols')
else:
sys.exit('\nFAIL: Length mismatch for combined_df')
print('\nAdding column: Model_name')
combDF['Model_name'] = combDF.index
print('\n========================================================='
, '\nSUCCESS: Ran multiple classifiers'
, '\n=======================================================')
#resampling_methods_wf = combined_baseline_wf[['resampling']]
#resampling_methods_wf = resampling_methods_wf.drop_duplicates()
#, '\n', resampling_methods_wf)