added FS to MultClfs.py and modified data for different splits for consistency

This commit is contained in:
Tanushree Tunstall 2022-06-24 20:35:53 +01:00
parent edb7aebd6a
commit e2bc384155
12 changed files with 1585 additions and 994 deletions

View file

@ -92,7 +92,7 @@ gene = args.gene
#==================
# other vars
#==================
tts_split = '70/30'
tts_split = '70_30'
OutFile_suffix = '7030_FS'
###############################################################################
#==================
@ -116,7 +116,8 @@ from FS import fsgs
#==================
outdir_ml = outdir + 'ml/tts_7030/fs/'
print('\nOutput directory:', outdir_ml)
OutFileFS = outdir_ml + gene.lower() + '_FS_' + OutFile_suffix + '.json'
#OutFileFS = outdir_ml + gene.lower() + '_FS' + OutFile_suffix + '.json'
OutFileFS = outdir_ml + gene.lower() + '_FS_noOR' + OutFile_suffix + '.json'
############################################################################
@ -153,17 +154,17 @@ models = [('AdaBoost Classifier' , AdaBoostClassifier(**rs) )
, ('Extra Tree' , ExtraTreeClassifier(**rs) )
, ('Extra Trees' , ExtraTreesClassifier(**rs) )
, ('Gradient Boosting' , GradientBoostingClassifier(**rs) )
#, ('Gaussian NB' , GaussianNB() )
#, ('Gaussian Process' , GaussianProcessClassifier(**rs) )
#, ('K-Nearest Neighbors' , KNeighborsClassifier() )
##, ('Gaussian NB' , GaussianNB() )
##, ('Gaussian Process' , GaussianProcessClassifier(**rs) )
##, ('K-Nearest Neighbors' , KNeighborsClassifier() )
, ('LDA' , LinearDiscriminantAnalysis() )
, ('Logistic Regression' , LogisticRegression(**rs) )
, ('Logistic RegressionCV' , LogisticRegressionCV(cv = 3, **rs))
#, ('MLP' , MLPClassifier(max_iter = 500, **rs) )
#, ('Multinomial' , MultinomialNB() )
#, ('Naive Bayes' , BernoulliNB() )
##, ('MLP' , MLPClassifier(max_iter = 500, **rs) )
##, ('Multinomial' , MultinomialNB() )
##, ('Naive Bayes' , BernoulliNB() )
, ('Passive Aggresive' , PassiveAggressiveClassifier(**rs, **njobs) )
#, ('QDA' , QuadraticDiscriminantAnalysis() )
##, ('QDA' , QuadraticDiscriminantAnalysis() )
, ('Random Forest' , RandomForestClassifier(**rs, n_estimators = 1000 ) )
, ('Random Forest2' , RandomForestClassifier(min_samples_leaf = 5
, n_estimators = 1000
@ -174,10 +175,10 @@ models = [('AdaBoost Classifier' , AdaBoostClassifier(**rs) )
, max_features = 'auto') )
, ('Ridge Classifier' , RidgeClassifier(**rs) )
, ('Ridge ClassifierCV' , RidgeClassifierCV(cv = 3) )
#, ('SVC' , SVC(**rs) )
##, ('SVC' , SVC(**rs) )
, ('Stochastic GDescent' , SGDClassifier(**rs, **njobs) )
# , ('XGBoost' , XGBClassifier(**rs, **njobs, verbosity = 3
# , use_label_encoder = False) )
## , ('XGBoost' , XGBClassifier(**rs, **njobs, verbosity = 3
## , use_label_encoder = False) )
]
print('\n#####################################################################'