saving work for yesterday where uq runs were repeated
This commit is contained in:
parent
efeaf52cde
commit
4ab99dcbd2
11 changed files with 10993 additions and 10660 deletions
|
@ -147,7 +147,7 @@ def MultModelsCl(input_df, target, skf_cv
|
|||
mlp = MLPClassifier(max_iter = 500, **rs)
|
||||
dt = DecisionTreeClassifier(**rs)
|
||||
ets = ExtraTreesClassifier(**rs)
|
||||
|
||||
et = ExtraTreeClassifier(**rs)
|
||||
rf = RandomForestClassifier(**rs, n_estimators = 1000 )
|
||||
rf2 = RandomForestClassifier(
|
||||
min_samples_leaf = 5
|
||||
|
@ -169,7 +169,6 @@ def MultModelsCl(input_df, target, skf_cv
|
|||
|
||||
abc = AdaBoostClassifier(**rs)
|
||||
bc = BaggingClassifier(**rs, **njobs, bootstrap = True, oob_score = True)
|
||||
et = ExtraTreeClassifier(**rs)
|
||||
gpc = GaussianProcessClassifier(**rs)
|
||||
gbc = GradientBoostingClassifier(**rs)
|
||||
qda = QuadraticDiscriminantAnalysis()
|
||||
|
@ -181,14 +180,13 @@ def MultModelsCl(input_df, target, skf_cv
|
|||
, ('Gaussian NB' , gnb)
|
||||
, ('Naive Bayes' , nb)
|
||||
, ('K-Nearest Neighbors' , knn)
|
||||
, ('SVM' , svc)
|
||||
, ('SVC' , svc)
|
||||
, ('MLP' , mlp)
|
||||
, ('Decision Tree' , dt)
|
||||
, ('Extra Trees' , ets)
|
||||
, ('Extra Tree' , et)
|
||||
, ('Random Forest' , rf)
|
||||
, ('Random Forest2' , rf2)
|
||||
, ('Naive Bayes' , nb)
|
||||
, ('XGBoost' , xgb)
|
||||
, ('LDA' , lda)
|
||||
, ('Multinomial' , mnb)
|
||||
|
|
|
@ -32,15 +32,36 @@ from ml_data import *
|
|||
# TT run all ML clfs: baseline mode
|
||||
from MultModelsCl import MultModelsCl
|
||||
|
||||
#%%###########################################################################
|
||||
|
||||
print('\n#####################################################################\n')
|
||||
|
||||
print('TESTING cmd:'
|
||||
############################################################################
|
||||
print('\n#####################################################################\n'
|
||||
, '\nRunning ML analysis: UQ [without AA index but with active site annotations]'
|
||||
, '\nGene name:', gene
|
||||
, '\nDrug name:', drug
|
||||
, '\nTotal input features:', X.shape
|
||||
, '\n', Counter(y))
|
||||
, '\nDrug name:', drug)
|
||||
|
||||
#==================
|
||||
# Specify outdir
|
||||
#==================
|
||||
|
||||
outdir_ml = outdir + 'ml/uq_v1/'
|
||||
|
||||
print('\nOutput directory:', outdir_ml)
|
||||
|
||||
#%%###########################################################################
|
||||
print('\nSanity checks:'
|
||||
, '\nTotal input features:', len(X.columns)
|
||||
, '\n'
|
||||
, '\nTraining data size:', X.shape
|
||||
, '\nTest data size:', X_bts.shape
|
||||
, '\n'
|
||||
, '\nTarget feature numbers (training data):', Counter(y)
|
||||
, '\nTarget features ratio (training data:', yc1_ratio
|
||||
, '\n'
|
||||
, '\nTarget feature numbers (test data):', Counter(y_bts)
|
||||
, '\nTarget features ratio (test data):', yc2_ratio
|
||||
|
||||
, '\n\n#####################################################################\n')
|
||||
|
||||
print('\n================================================================\n')
|
||||
|
||||
print('Strucutral features (n):'
|
||||
, len(X_ssFN)
|
||||
|
@ -50,11 +71,11 @@ print('Strucutral features (n):'
|
|||
, '\nOther struc columns:', X_str
|
||||
, '\n================================================================\n')
|
||||
|
||||
print('AAindex features (n):'
|
||||
, len(X_aaindexFN)
|
||||
, '\nThese are:\n'
|
||||
, X_aaindexFN
|
||||
, '\n================================================================\n')
|
||||
# print('AAindex features (n):'
|
||||
# , len(X_aaindexFN)
|
||||
# , '\nThese are:\n'
|
||||
# , X_aaindexFN
|
||||
# , '\n================================================================\n')
|
||||
|
||||
print('Evolutionary features (n):'
|
||||
, len(X_evolFN)
|
||||
|
@ -75,20 +96,15 @@ print('Categorical features (n):'
|
|||
, categorical_FN
|
||||
, '\n================================================================\n')
|
||||
|
||||
if ( len(X.columns) == len(X_ssFN) + len(X_aaindexFN) + len(X_evolFN) + len(X_genomicFN) + len(categorical_FN) ):
|
||||
#if ( len(X.columns) == len(X_ssFN) + len(X_aaindexFN) + len(X_evolFN) + len(X_genomicFN) + len(categorical_FN) ):
|
||||
if ( len(X.columns) == len(X_ssFN) + len(X_evolFN) + len(X_genomicFN) + len(categorical_FN) ):
|
||||
print('\nPass: No. of features match')
|
||||
else:
|
||||
sys.exit('\nFail: Count of feature mismatch')
|
||||
|
||||
print('\n#####################################################################\n')
|
||||
################################################################################
|
||||
#==================
|
||||
# Specify outdir
|
||||
#==================
|
||||
|
||||
outdir_ml = outdir + 'ml/v2/'
|
||||
|
||||
################################################################################
|
||||
###############################################################################
|
||||
#==================
|
||||
# Baseline models
|
||||
#==================
|
||||
|
|
|
@ -32,15 +32,36 @@ from ml_data import *
|
|||
# TT run all ML clfs: baseline mode
|
||||
from MultModelsCl import MultModelsCl
|
||||
|
||||
#%%###########################################################################
|
||||
|
||||
print('\n#####################################################################\n')
|
||||
|
||||
print('TESTING cmd:'
|
||||
############################################################################
|
||||
print('\n#####################################################################\n'
|
||||
, '\nRunning ML analysis: UQ [without AA index but with active site annotations]'
|
||||
, '\nGene name:', gene
|
||||
, '\nDrug name:', drug
|
||||
, '\nTotal input features:', X.shape
|
||||
, '\n', Counter(y))
|
||||
, '\nDrug name:', drug)
|
||||
|
||||
#==================
|
||||
# Specify outdir
|
||||
#==================
|
||||
|
||||
outdir_ml = outdir + 'ml/uq_v1/'
|
||||
|
||||
print('\nOutput directory:', outdir_ml)
|
||||
|
||||
#%%###########################################################################
|
||||
print('\nSanity checks:'
|
||||
, '\nTotal input features:', len(X.columns)
|
||||
, '\n'
|
||||
, '\nTraining data size:', X.shape
|
||||
, '\nTest data size:', X_bts.shape
|
||||
, '\n'
|
||||
, '\nTarget feature numbers (training data):', Counter(y)
|
||||
, '\nTarget features ratio (training data:', yc1_ratio
|
||||
, '\n'
|
||||
, '\nTarget feature numbers (test data):', Counter(y_bts)
|
||||
, '\nTarget features ratio (test data):', yc2_ratio
|
||||
|
||||
, '\n\n#####################################################################\n')
|
||||
|
||||
print('\n================================================================\n')
|
||||
|
||||
print('Strucutral features (n):'
|
||||
, len(X_ssFN)
|
||||
|
@ -50,11 +71,11 @@ print('Strucutral features (n):'
|
|||
, '\nOther struc columns:', X_str
|
||||
, '\n================================================================\n')
|
||||
|
||||
print('AAindex features (n):'
|
||||
, len(X_aaindexFN)
|
||||
, '\nThese are:\n'
|
||||
, X_aaindexFN
|
||||
, '\n================================================================\n')
|
||||
# print('AAindex features (n):'
|
||||
# , len(X_aaindexFN)
|
||||
# , '\nThese are:\n'
|
||||
# , X_aaindexFN
|
||||
# , '\n================================================================\n')
|
||||
|
||||
print('Evolutionary features (n):'
|
||||
, len(X_evolFN)
|
||||
|
@ -75,20 +96,15 @@ print('Categorical features (n):'
|
|||
, categorical_FN
|
||||
, '\n================================================================\n')
|
||||
|
||||
if ( len(X.columns) == len(X_ssFN) + len(X_aaindexFN) + len(X_evolFN) + len(X_genomicFN) + len(categorical_FN) ):
|
||||
#if ( len(X.columns) == len(X_ssFN) + len(X_aaindexFN) + len(X_evolFN) + len(X_genomicFN) + len(categorical_FN) ):
|
||||
if ( len(X.columns) == len(X_ssFN) + len(X_evolFN) + len(X_genomicFN) + len(categorical_FN) ):
|
||||
print('\nPass: No. of features match')
|
||||
else:
|
||||
sys.exit('\nFail: Count of feature mismatch')
|
||||
|
||||
print('\n#####################################################################\n')
|
||||
################################################################################
|
||||
#==================
|
||||
# Specify outdir
|
||||
#==================
|
||||
|
||||
outdir_ml = outdir + 'ml/v2/'
|
||||
|
||||
################################################################################
|
||||
###############################################################################
|
||||
#==================
|
||||
# Baseline models
|
||||
#==================
|
||||
|
|
|
@ -32,15 +32,36 @@ from ml_data import *
|
|||
# TT run all ML clfs: baseline mode
|
||||
from MultModelsCl import MultModelsCl
|
||||
|
||||
#%%###########################################################################
|
||||
|
||||
print('\n#####################################################################\n')
|
||||
|
||||
print('TESTING cmd:'
|
||||
############################################################################
|
||||
print('\n#####################################################################\n'
|
||||
, '\nRunning ML analysis: UQ [without AA index but with active site annotations]'
|
||||
, '\nGene name:', gene
|
||||
, '\nDrug name:', drug
|
||||
, '\nTotal input features:', X.shape
|
||||
, '\n', Counter(y))
|
||||
, '\nDrug name:', drug)
|
||||
|
||||
#==================
|
||||
# Specify outdir
|
||||
#==================
|
||||
|
||||
outdir_ml = outdir + 'ml/uq_v1/'
|
||||
|
||||
print('\nOutput directory:', outdir_ml)
|
||||
|
||||
#%%###########################################################################
|
||||
print('\nSanity checks:'
|
||||
, '\nTotal input features:', len(X.columns)
|
||||
, '\n'
|
||||
, '\nTraining data size:', X.shape
|
||||
, '\nTest data size:', X_bts.shape
|
||||
, '\n'
|
||||
, '\nTarget feature numbers (training data):', Counter(y)
|
||||
, '\nTarget features ratio (training data:', yc1_ratio
|
||||
, '\n'
|
||||
, '\nTarget feature numbers (test data):', Counter(y_bts)
|
||||
, '\nTarget features ratio (test data):', yc2_ratio
|
||||
|
||||
, '\n\n#####################################################################\n')
|
||||
|
||||
print('\n================================================================\n')
|
||||
|
||||
print('Strucutral features (n):'
|
||||
, len(X_ssFN)
|
||||
|
@ -50,11 +71,11 @@ print('Strucutral features (n):'
|
|||
, '\nOther struc columns:', X_str
|
||||
, '\n================================================================\n')
|
||||
|
||||
print('AAindex features (n):'
|
||||
, len(X_aaindexFN)
|
||||
, '\nThese are:\n'
|
||||
, X_aaindexFN
|
||||
, '\n================================================================\n')
|
||||
# print('AAindex features (n):'
|
||||
# , len(X_aaindexFN)
|
||||
# , '\nThese are:\n'
|
||||
# , X_aaindexFN
|
||||
# , '\n================================================================\n')
|
||||
|
||||
print('Evolutionary features (n):'
|
||||
, len(X_evolFN)
|
||||
|
@ -75,20 +96,15 @@ print('Categorical features (n):'
|
|||
, categorical_FN
|
||||
, '\n================================================================\n')
|
||||
|
||||
if ( len(X.columns) == len(X_ssFN) + len(X_aaindexFN) + len(X_evolFN) + len(X_genomicFN) + len(categorical_FN) ):
|
||||
#if ( len(X.columns) == len(X_ssFN) + len(X_aaindexFN) + len(X_evolFN) + len(X_genomicFN) + len(categorical_FN) ):
|
||||
if ( len(X.columns) == len(X_ssFN) + len(X_evolFN) + len(X_genomicFN) + len(categorical_FN) ):
|
||||
print('\nPass: No. of features match')
|
||||
else:
|
||||
sys.exit('\nFail: Count of feature mismatch')
|
||||
|
||||
print('\n#####################################################################\n')
|
||||
################################################################################
|
||||
#==================
|
||||
# Specify outdir
|
||||
#==================
|
||||
|
||||
outdir_ml = outdir + 'ml/v2/'
|
||||
|
||||
################################################################################
|
||||
###############################################################################
|
||||
#==================
|
||||
# Baseline models
|
||||
#==================
|
||||
|
|
|
@ -32,15 +32,36 @@ from ml_data import *
|
|||
# TT run all ML clfs: baseline mode
|
||||
from MultModelsCl import MultModelsCl
|
||||
|
||||
#%%###########################################################################
|
||||
|
||||
print('\n#####################################################################\n')
|
||||
|
||||
print('TESTING cmd:'
|
||||
############################################################################
|
||||
print('\n#####################################################################\n'
|
||||
, '\nRunning ML analysis: UQ [without AA index but with active site annotations]'
|
||||
, '\nGene name:', gene
|
||||
, '\nDrug name:', drug
|
||||
, '\nTotal input features:', X.shape
|
||||
, '\n', Counter(y))
|
||||
, '\nDrug name:', drug)
|
||||
|
||||
#==================
|
||||
# Specify outdir
|
||||
#==================
|
||||
|
||||
outdir_ml = outdir + 'ml/uq_v1/'
|
||||
|
||||
print('\nOutput directory:', outdir_ml)
|
||||
|
||||
#%%###########################################################################
|
||||
print('\nSanity checks:'
|
||||
, '\nTotal input features:', len(X.columns)
|
||||
, '\n'
|
||||
, '\nTraining data size:', X.shape
|
||||
, '\nTest data size:', X_bts.shape
|
||||
, '\n'
|
||||
, '\nTarget feature numbers (training data):', Counter(y)
|
||||
, '\nTarget features ratio (training data:', yc1_ratio
|
||||
, '\n'
|
||||
, '\nTarget feature numbers (test data):', Counter(y_bts)
|
||||
, '\nTarget features ratio (test data):', yc2_ratio
|
||||
|
||||
, '\n\n#####################################################################\n')
|
||||
|
||||
print('\n================================================================\n')
|
||||
|
||||
print('Strucutral features (n):'
|
||||
, len(X_ssFN)
|
||||
|
@ -50,11 +71,11 @@ print('Strucutral features (n):'
|
|||
, '\nOther struc columns:', X_str
|
||||
, '\n================================================================\n')
|
||||
|
||||
print('AAindex features (n):'
|
||||
, len(X_aaindexFN)
|
||||
, '\nThese are:\n'
|
||||
, X_aaindexFN
|
||||
, '\n================================================================\n')
|
||||
# print('AAindex features (n):'
|
||||
# , len(X_aaindexFN)
|
||||
# , '\nThese are:\n'
|
||||
# , X_aaindexFN
|
||||
# , '\n================================================================\n')
|
||||
|
||||
print('Evolutionary features (n):'
|
||||
, len(X_evolFN)
|
||||
|
@ -75,20 +96,15 @@ print('Categorical features (n):'
|
|||
, categorical_FN
|
||||
, '\n================================================================\n')
|
||||
|
||||
if ( len(X.columns) == len(X_ssFN) + len(X_aaindexFN) + len(X_evolFN) + len(X_genomicFN) + len(categorical_FN) ):
|
||||
#if ( len(X.columns) == len(X_ssFN) + len(X_aaindexFN) + len(X_evolFN) + len(X_genomicFN) + len(categorical_FN) ):
|
||||
if ( len(X.columns) == len(X_ssFN) + len(X_evolFN) + len(X_genomicFN) + len(categorical_FN) ):
|
||||
print('\nPass: No. of features match')
|
||||
else:
|
||||
sys.exit('\nFail: Count of feature mismatch')
|
||||
|
||||
print('\n#####################################################################\n')
|
||||
################################################################################
|
||||
#==================
|
||||
# Specify outdir
|
||||
#==================
|
||||
|
||||
outdir_ml = outdir + 'ml/v2/'
|
||||
|
||||
################################################################################
|
||||
###############################################################################
|
||||
#==================
|
||||
# Baseline models
|
||||
#==================
|
||||
|
|
File diff suppressed because it is too large
Load diff
File diff suppressed because it is too large
Load diff
|
@ -552,18 +552,18 @@ def setvars(gene,drug):
|
|||
|
||||
#=================================================
|
||||
# Training and BLIND test set: imputed vs actual
|
||||
# BUT in REVERSE i.e
|
||||
# BUT in REVERSE i.e.
|
||||
# dst with actual values : blind test
|
||||
# dst with imputed values : training set
|
||||
#==================================================
|
||||
my_df_ml[drug].isna().sum() #'na' ones are now training set
|
||||
|
||||
training_df = my_df_ml[my_df_ml[drug].isna()]
|
||||
training_df.shape
|
||||
|
||||
blind_test_df = my_df_ml[my_df_ml[drug].notna()]
|
||||
blind_test_df.shape
|
||||
|
||||
training_df = my_df_ml[my_df_ml[drug].isna()]
|
||||
training_df.shape
|
||||
|
||||
# Target 1: dst_mode
|
||||
training_df[drug].value_counts()
|
||||
training_df['dst_mode'].value_counts()
|
||||
|
|
|
@ -32,15 +32,36 @@ from ml_data import *
|
|||
# TT run all ML clfs: baseline mode
|
||||
from MultModelsCl import MultModelsCl
|
||||
|
||||
#%%###########################################################################
|
||||
|
||||
print('\n#####################################################################\n')
|
||||
|
||||
print('TESTING cmd:'
|
||||
############################################################################
|
||||
print('\n#####################################################################\n'
|
||||
, '\nRunning ML analysis: UQ [without AA index but with active site annotations]'
|
||||
, '\nGene name:', gene
|
||||
, '\nDrug name:', drug
|
||||
, '\nTotal input features:', X.shape
|
||||
, '\n', Counter(y))
|
||||
, '\nDrug name:', drug)
|
||||
|
||||
#==================
|
||||
# Specify outdir
|
||||
#==================
|
||||
|
||||
outdir_ml = outdir + 'ml/uq_v1/'
|
||||
|
||||
print('\nOutput directory:', outdir_ml)
|
||||
|
||||
#%%###########################################################################
|
||||
print('\nSanity checks:'
|
||||
, '\nTotal input features:', len(X.columns)
|
||||
, '\n'
|
||||
, '\nTraining data size:', X.shape
|
||||
, '\nTest data size:', X_bts.shape
|
||||
, '\n'
|
||||
, '\nTarget feature numbers (training data):', Counter(y)
|
||||
, '\nTarget features ratio (training data:', yc1_ratio
|
||||
, '\n'
|
||||
, '\nTarget feature numbers (test data):', Counter(y_bts)
|
||||
, '\nTarget features ratio (test data):', yc2_ratio
|
||||
|
||||
, '\n\n#####################################################################\n')
|
||||
|
||||
print('\n================================================================\n')
|
||||
|
||||
print('Strucutral features (n):'
|
||||
, len(X_ssFN)
|
||||
|
@ -50,11 +71,11 @@ print('Strucutral features (n):'
|
|||
, '\nOther struc columns:', X_str
|
||||
, '\n================================================================\n')
|
||||
|
||||
print('AAindex features (n):'
|
||||
, len(X_aaindexFN)
|
||||
, '\nThese are:\n'
|
||||
, X_aaindexFN
|
||||
, '\n================================================================\n')
|
||||
# print('AAindex features (n):'
|
||||
# , len(X_aaindexFN)
|
||||
# , '\nThese are:\n'
|
||||
# , X_aaindexFN
|
||||
# , '\n================================================================\n')
|
||||
|
||||
print('Evolutionary features (n):'
|
||||
, len(X_evolFN)
|
||||
|
@ -75,20 +96,15 @@ print('Categorical features (n):'
|
|||
, categorical_FN
|
||||
, '\n================================================================\n')
|
||||
|
||||
if ( len(X.columns) == len(X_ssFN) + len(X_aaindexFN) + len(X_evolFN) + len(X_genomicFN) + len(categorical_FN) ):
|
||||
#if ( len(X.columns) == len(X_ssFN) + len(X_aaindexFN) + len(X_evolFN) + len(X_genomicFN) + len(categorical_FN) ):
|
||||
if ( len(X.columns) == len(X_ssFN) + len(X_evolFN) + len(X_genomicFN) + len(categorical_FN) ):
|
||||
print('\nPass: No. of features match')
|
||||
else:
|
||||
sys.exit('\nFail: Count of feature mismatch')
|
||||
|
||||
print('\n#####################################################################\n')
|
||||
################################################################################
|
||||
#==================
|
||||
# Specify outdir
|
||||
#==================
|
||||
|
||||
outdir_ml = outdir + 'ml/v2/'
|
||||
|
||||
################################################################################
|
||||
###############################################################################
|
||||
#==================
|
||||
# Baseline models
|
||||
#==================
|
||||
|
|
|
@ -32,15 +32,36 @@ from ml_data import *
|
|||
# TT run all ML clfs: baseline mode
|
||||
from MultModelsCl import MultModelsCl
|
||||
|
||||
#%%###########################################################################
|
||||
|
||||
print('\n#####################################################################\n')
|
||||
|
||||
print('TESTING cmd:'
|
||||
############################################################################
|
||||
print('\n#####################################################################\n'
|
||||
, '\nRunning ML analysis: UQ [without AA index but with active site annotations]'
|
||||
, '\nGene name:', gene
|
||||
, '\nDrug name:', drug
|
||||
, '\nTotal input features:', X.shape
|
||||
, '\n', Counter(y))
|
||||
, '\nDrug name:', drug)
|
||||
|
||||
#==================
|
||||
# Specify outdir
|
||||
#==================
|
||||
|
||||
outdir_ml = outdir + 'ml/uq_v1/'
|
||||
|
||||
print('\nOutput directory:', outdir_ml)
|
||||
|
||||
#%%###########################################################################
|
||||
print('\nSanity checks:'
|
||||
, '\nTotal input features:', len(X.columns)
|
||||
, '\n'
|
||||
, '\nTraining data size:', X.shape
|
||||
, '\nTest data size:', X_bts.shape
|
||||
, '\n'
|
||||
, '\nTarget feature numbers (training data):', Counter(y)
|
||||
, '\nTarget features ratio (training data:', yc1_ratio
|
||||
, '\n'
|
||||
, '\nTarget feature numbers (test data):', Counter(y_bts)
|
||||
, '\nTarget features ratio (test data):', yc2_ratio
|
||||
|
||||
, '\n\n#####################################################################\n')
|
||||
|
||||
print('\n================================================================\n')
|
||||
|
||||
print('Strucutral features (n):'
|
||||
, len(X_ssFN)
|
||||
|
@ -50,11 +71,11 @@ print('Strucutral features (n):'
|
|||
, '\nOther struc columns:', X_str
|
||||
, '\n================================================================\n')
|
||||
|
||||
print('AAindex features (n):'
|
||||
, len(X_aaindexFN)
|
||||
, '\nThese are:\n'
|
||||
, X_aaindexFN
|
||||
, '\n================================================================\n')
|
||||
# print('AAindex features (n):'
|
||||
# , len(X_aaindexFN)
|
||||
# , '\nThese are:\n'
|
||||
# , X_aaindexFN
|
||||
# , '\n================================================================\n')
|
||||
|
||||
print('Evolutionary features (n):'
|
||||
, len(X_evolFN)
|
||||
|
@ -75,20 +96,15 @@ print('Categorical features (n):'
|
|||
, categorical_FN
|
||||
, '\n================================================================\n')
|
||||
|
||||
if ( len(X.columns) == len(X_ssFN) + len(X_aaindexFN) + len(X_evolFN) + len(X_genomicFN) + len(categorical_FN) ):
|
||||
#if ( len(X.columns) == len(X_ssFN) + len(X_aaindexFN) + len(X_evolFN) + len(X_genomicFN) + len(categorical_FN) ):
|
||||
if ( len(X.columns) == len(X_ssFN) + len(X_evolFN) + len(X_genomicFN) + len(categorical_FN) ):
|
||||
print('\nPass: No. of features match')
|
||||
else:
|
||||
sys.exit('\nFail: Count of feature mismatch')
|
||||
|
||||
print('\n#####################################################################\n')
|
||||
################################################################################
|
||||
#==================
|
||||
# Specify outdir
|
||||
#==================
|
||||
|
||||
outdir_ml = outdir + 'ml/v2/'
|
||||
|
||||
################################################################################
|
||||
###############################################################################
|
||||
#==================
|
||||
# Baseline models
|
||||
#==================
|
||||
|
|
|
@ -1,49 +1,51 @@
|
|||
==================================
|
||||
# BTS data: UQ
|
||||
# Features NOT including AA index
|
||||
# Date: 30/05/2022, but reran with active_site indication column that was added lateron
|
||||
# TODO: put in folder called v1, and UQ in folder called UQ_v0 (will need to rename!)
|
||||
# UQ run: same as ORIGINAL except
|
||||
# Features NOT including AA index, but includes active site annotations
|
||||
# Date: 30/05/2022, but reran with from my desktop on 19/05/2022 to capture log and include active_site indication column
|
||||
# captures error: 2>$1
|
||||
=================================
|
||||
./pnca_config.py
|
||||
./embb_config.py
|
||||
./gid_config.py
|
||||
./katg_config.py
|
||||
./rpob_config.py
|
||||
./pnca_config.py 2>&1 | tee log_pnca_config.txt
|
||||
./embb_config.py 2>&1 | tee log_embb_config.txt
|
||||
./gid_config.py 2>&1 | tee log_gid_config.txt
|
||||
./katg_config.py 2>&1 | tee log_katg_config.txt
|
||||
./rpob_config.py 2>&1 | tee log_rpob_config.txt
|
||||
./alr_config.py 2>&1 | tee log_alr_config.txt
|
||||
|
||||
# ERROR, as expected, too few values!
|
||||
|
||||
## ./alr_config.py = NA
|
||||
# ^^^^^^^^ FIXME!
|
||||
########################################################################
|
||||
|
||||
==================================
|
||||
# BTS data: imputed values
|
||||
# All features including AA index
|
||||
# Date: 16/05/2022
|
||||
=================================
|
||||
# Split: ORIGINAL
|
||||
# actual values: training set
|
||||
# imputed values: blind set
|
||||
# All features including AA index
|
||||
# Date: 18/05/2022 # reran with minor formatting and log capture
|
||||
# captures error: 2>$1
|
||||
=================================
|
||||
./pnca_orig.py 2>&1 | tee log_pnca_orig.txt
|
||||
./embb_orig.py 2>&1 | tee log_embb_orig.txt
|
||||
./gid_orig.py 2>&1 | tee log_gid_orig.txt
|
||||
./katg_orig.py 2>&1 | tee log_katg_orig.txt
|
||||
./rpob_orig.py 2>&1 | tee log_rpob_orig.txt
|
||||
./alr_orig.py 2>&1 | tee log_alr_orig.txt # ERROR, as expected, too few values!
|
||||
|
||||
./pnca_config.py
|
||||
./embb_config.py
|
||||
./gid_config.py
|
||||
./katg_config.py
|
||||
./rpob_config.py
|
||||
|
||||
##./alr_config.py = NA
|
||||
########################################################################
|
||||
|
||||
=================================
|
||||
# Split: 70/30
|
||||
# All features including AA index
|
||||
# Date: 17/05/2022 and 18/05/2022
|
||||
# Date: 17/05/2022 and 18/05/2022, reran with minor foramtting and log capture
|
||||
# captures error: 2>$1
|
||||
=================================
|
||||
|
||||
./pnca_7030.py
|
||||
./embb_7030.py
|
||||
# ./gid_7030.py: problems, CT values are non existing except for rouC
|
||||
./pnca_7030.py 2>&1 | tee log_pnca_7030.txt
|
||||
./embb_7030.py 2>&1 | tee log_embb_7030.txt
|
||||
./gid_7030.py 2>&1 | tee log_gid_7030.txt
|
||||
./katg_7030.py
|
||||
./rpob_7030.py
|
||||
./katg_7030.py 2>&1 | tee log_katg_7030.txt
|
||||
./rpob_7030.py 2>&1 | tee log_rpob_7030.txt
|
||||
./alr_7030.py 2>&1 | tee log_alr_7030.txt # ERROR, as expected, too few values!
|
||||
|
||||
##./alr_7030.py = NA
|
||||
problems, CT values are non existing except for rouC
|
||||
########################################################################
|
||||
|
||||
=================================
|
||||
|
@ -57,7 +59,7 @@
|
|||
./gid_8020.py 2>&1 | tee log_gid_8020.txt
|
||||
./katg_8020.py 2>&1 | tee log_katg_8020.txt
|
||||
./rpob_8020.py 2>&1 | tee log_rpob_8020.txt
|
||||
./alr_8020.py 2>&1 | tee log_alr_8020.txt
|
||||
./alr_8020.py 2>&1 | tee log_alr_8020.txt # ERROR, as expected, too few values!
|
||||
|
||||
########################################################################
|
||||
|
||||
|
@ -72,11 +74,11 @@
|
|||
./gid_sl.py 2>&1 | tee log_gid_sl.txt
|
||||
./katg_sl.py 2>&1 | tee log_katg_sl.txt
|
||||
./rpob_sl.py 2>&1 | tee log_rpob_sl.txt
|
||||
./alr_sl.py 2>&1 | tee log_alr_sl.txt
|
||||
./alr_sl.py 2>&1 | tee log_alr_sl.txt # ERROR, as expected, too few values!
|
||||
|
||||
########################################################################
|
||||
=================================
|
||||
# Split: Reverse training
|
||||
# Split: REVERSE training
|
||||
# imputed values: training set
|
||||
# actual values: blind set
|
||||
# All features including AA index
|
||||
|
@ -88,7 +90,7 @@
|
|||
./gid_rt.py 2>&1 | tee log_gid_rt.txt
|
||||
./katg_rt.py 2>&1 | tee log_katg_rt.txt
|
||||
./rpob_rt.py 2>&1 | tee log_rpob_rt.txt
|
||||
./alr_rt.py 2>&1 | tee log_alr_rt.txt
|
||||
./alr_rt.py 2>&1 | tee log_alr_rt.txt # ERROR, as expected, too few values!
|
||||
|
||||
########################################################################
|
||||
# COMPLETE Data: actual + na i.e imputed
|
||||
|
@ -105,7 +107,7 @@
|
|||
./gid_cd_7030.py 2>&1 | tee log_gid_cd_7030.txt
|
||||
./katg_cd_7030.py 2>&1 | tee log_katg_cd_7030.txt
|
||||
./rpob_cd_7030.py 2>&1 | tee log_rpob_cd_7030.txt
|
||||
./alr_cd_7030.py 2>&1 | tee log_alr_cd_7030.txt
|
||||
./alr_cd_7030.py 2>&1 | tee log_alr_cd_7030.txt # ERROR, as expected, too few values!
|
||||
|
||||
########################################################################
|
||||
=================================
|
||||
|
@ -119,5 +121,18 @@
|
|||
./gid_cd_8020.py 2>&1 | tee log_gid_cd_8020.txt
|
||||
./katg_cd_8020.py 2>&1 | tee log_katg_cd_8020.txt
|
||||
./rpob_cd_8020.py 2>&1 | tee log_rpob_cd_8020.txt
|
||||
./alr_cd_8020.py 2>&1 | tee log_alr_cd_8020.txt
|
||||
./alr_cd_8020.py 2>&1 | tee log_alr_cd_8020.txt # ERROR, as expected, too few values!
|
||||
|
||||
=================================
|
||||
# Split: scaling law [COMPLETE DATA]
|
||||
# All features including AA index
|
||||
# Date: 18/05/2022
|
||||
# captures error: 2>$1
|
||||
=================================
|
||||
./pnca_cd_sl.py 2>&1 | tee log_pnca_cd_sl.txt
|
||||
./embb_cd_sl.py 2>&1 | tee log_embb_cd_sl.txt
|
||||
./gid_cd_sl.py 2>&1 | tee log_gid_cd_sl.txt
|
||||
./katg_cd_sl.py 2>&1 | tee log_katg_cd_sl.txt
|
||||
./rpob_cd_sl.py 2>&1 | tee log_rpob_cd_sl.txt
|
||||
./alr_cd_sl.py 2>&1 | tee log_alr_cd_sl.txt # ERROR, as expected, too few values!
|
||||
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue