saving work for yesterday where uq runs were repeated

This commit is contained in:
Tanushree Tunstall 2022-06-20 14:57:11 +01:00
parent efeaf52cde
commit 4ab99dcbd2
11 changed files with 10993 additions and 10660 deletions

View file

@ -147,7 +147,7 @@ def MultModelsCl(input_df, target, skf_cv
mlp = MLPClassifier(max_iter = 500, **rs) mlp = MLPClassifier(max_iter = 500, **rs)
dt = DecisionTreeClassifier(**rs) dt = DecisionTreeClassifier(**rs)
ets = ExtraTreesClassifier(**rs) ets = ExtraTreesClassifier(**rs)
et = ExtraTreeClassifier(**rs)
rf = RandomForestClassifier(**rs, n_estimators = 1000 ) rf = RandomForestClassifier(**rs, n_estimators = 1000 )
rf2 = RandomForestClassifier( rf2 = RandomForestClassifier(
min_samples_leaf = 5 min_samples_leaf = 5
@ -169,7 +169,6 @@ def MultModelsCl(input_df, target, skf_cv
abc = AdaBoostClassifier(**rs) abc = AdaBoostClassifier(**rs)
bc = BaggingClassifier(**rs, **njobs, bootstrap = True, oob_score = True) bc = BaggingClassifier(**rs, **njobs, bootstrap = True, oob_score = True)
et = ExtraTreeClassifier(**rs)
gpc = GaussianProcessClassifier(**rs) gpc = GaussianProcessClassifier(**rs)
gbc = GradientBoostingClassifier(**rs) gbc = GradientBoostingClassifier(**rs)
qda = QuadraticDiscriminantAnalysis() qda = QuadraticDiscriminantAnalysis()
@ -181,14 +180,13 @@ def MultModelsCl(input_df, target, skf_cv
, ('Gaussian NB' , gnb) , ('Gaussian NB' , gnb)
, ('Naive Bayes' , nb) , ('Naive Bayes' , nb)
, ('K-Nearest Neighbors' , knn) , ('K-Nearest Neighbors' , knn)
, ('SVM' , svc) , ('SVC' , svc)
, ('MLP' , mlp) , ('MLP' , mlp)
, ('Decision Tree' , dt) , ('Decision Tree' , dt)
, ('Extra Trees' , ets) , ('Extra Trees' , ets)
, ('Extra Tree' , et) , ('Extra Tree' , et)
, ('Random Forest' , rf) , ('Random Forest' , rf)
, ('Random Forest2' , rf2) , ('Random Forest2' , rf2)
, ('Naive Bayes' , nb)
, ('XGBoost' , xgb) , ('XGBoost' , xgb)
, ('LDA' , lda) , ('LDA' , lda)
, ('Multinomial' , mnb) , ('Multinomial' , mnb)

View file

@ -32,15 +32,36 @@ from ml_data import *
# TT run all ML clfs: baseline mode # TT run all ML clfs: baseline mode
from MultModelsCl import MultModelsCl from MultModelsCl import MultModelsCl
#%%########################################################################### ############################################################################
print('\n#####################################################################\n'
print('\n#####################################################################\n') , '\nRunning ML analysis: UQ [without AA index but with active site annotations]'
print('TESTING cmd:'
, '\nGene name:', gene , '\nGene name:', gene
, '\nDrug name:', drug , '\nDrug name:', drug)
, '\nTotal input features:', X.shape
, '\n', Counter(y)) #==================
# Specify outdir
#==================
outdir_ml = outdir + 'ml/uq_v1/'
print('\nOutput directory:', outdir_ml)
#%%###########################################################################
print('\nSanity checks:'
, '\nTotal input features:', len(X.columns)
, '\n'
, '\nTraining data size:', X.shape
, '\nTest data size:', X_bts.shape
, '\n'
, '\nTarget feature numbers (training data):', Counter(y)
, '\nTarget features ratio (training data:', yc1_ratio
, '\n'
, '\nTarget feature numbers (test data):', Counter(y_bts)
, '\nTarget features ratio (test data):', yc2_ratio
, '\n\n#####################################################################\n')
print('\n================================================================\n')
print('Strucutral features (n):' print('Strucutral features (n):'
, len(X_ssFN) , len(X_ssFN)
@ -50,11 +71,11 @@ print('Strucutral features (n):'
, '\nOther struc columns:', X_str , '\nOther struc columns:', X_str
, '\n================================================================\n') , '\n================================================================\n')
print('AAindex features (n):' # print('AAindex features (n):'
, len(X_aaindexFN) # , len(X_aaindexFN)
, '\nThese are:\n' # , '\nThese are:\n'
, X_aaindexFN # , X_aaindexFN
, '\n================================================================\n') # , '\n================================================================\n')
print('Evolutionary features (n):' print('Evolutionary features (n):'
, len(X_evolFN) , len(X_evolFN)
@ -75,20 +96,15 @@ print('Categorical features (n):'
, categorical_FN , categorical_FN
, '\n================================================================\n') , '\n================================================================\n')
if ( len(X.columns) == len(X_ssFN) + len(X_aaindexFN) + len(X_evolFN) + len(X_genomicFN) + len(categorical_FN) ): #if ( len(X.columns) == len(X_ssFN) + len(X_aaindexFN) + len(X_evolFN) + len(X_genomicFN) + len(categorical_FN) ):
if ( len(X.columns) == len(X_ssFN) + len(X_evolFN) + len(X_genomicFN) + len(categorical_FN) ):
print('\nPass: No. of features match') print('\nPass: No. of features match')
else: else:
sys.exit('\nFail: Count of feature mismatch') sys.exit('\nFail: Count of feature mismatch')
print('\n#####################################################################\n') print('\n#####################################################################\n')
################################################################################
#==================
# Specify outdir
#==================
outdir_ml = outdir + 'ml/v2/' ###############################################################################
################################################################################
#================== #==================
# Baseline models # Baseline models
#================== #==================

View file

@ -32,15 +32,36 @@ from ml_data import *
# TT run all ML clfs: baseline mode # TT run all ML clfs: baseline mode
from MultModelsCl import MultModelsCl from MultModelsCl import MultModelsCl
#%%########################################################################### ############################################################################
print('\n#####################################################################\n'
print('\n#####################################################################\n') , '\nRunning ML analysis: UQ [without AA index but with active site annotations]'
print('TESTING cmd:'
, '\nGene name:', gene , '\nGene name:', gene
, '\nDrug name:', drug , '\nDrug name:', drug)
, '\nTotal input features:', X.shape
, '\n', Counter(y)) #==================
# Specify outdir
#==================
outdir_ml = outdir + 'ml/uq_v1/'
print('\nOutput directory:', outdir_ml)
#%%###########################################################################
print('\nSanity checks:'
, '\nTotal input features:', len(X.columns)
, '\n'
, '\nTraining data size:', X.shape
, '\nTest data size:', X_bts.shape
, '\n'
, '\nTarget feature numbers (training data):', Counter(y)
, '\nTarget features ratio (training data:', yc1_ratio
, '\n'
, '\nTarget feature numbers (test data):', Counter(y_bts)
, '\nTarget features ratio (test data):', yc2_ratio
, '\n\n#####################################################################\n')
print('\n================================================================\n')
print('Strucutral features (n):' print('Strucutral features (n):'
, len(X_ssFN) , len(X_ssFN)
@ -50,11 +71,11 @@ print('Strucutral features (n):'
, '\nOther struc columns:', X_str , '\nOther struc columns:', X_str
, '\n================================================================\n') , '\n================================================================\n')
print('AAindex features (n):' # print('AAindex features (n):'
, len(X_aaindexFN) # , len(X_aaindexFN)
, '\nThese are:\n' # , '\nThese are:\n'
, X_aaindexFN # , X_aaindexFN
, '\n================================================================\n') # , '\n================================================================\n')
print('Evolutionary features (n):' print('Evolutionary features (n):'
, len(X_evolFN) , len(X_evolFN)
@ -75,20 +96,15 @@ print('Categorical features (n):'
, categorical_FN , categorical_FN
, '\n================================================================\n') , '\n================================================================\n')
if ( len(X.columns) == len(X_ssFN) + len(X_aaindexFN) + len(X_evolFN) + len(X_genomicFN) + len(categorical_FN) ): #if ( len(X.columns) == len(X_ssFN) + len(X_aaindexFN) + len(X_evolFN) + len(X_genomicFN) + len(categorical_FN) ):
if ( len(X.columns) == len(X_ssFN) + len(X_evolFN) + len(X_genomicFN) + len(categorical_FN) ):
print('\nPass: No. of features match') print('\nPass: No. of features match')
else: else:
sys.exit('\nFail: Count of feature mismatch') sys.exit('\nFail: Count of feature mismatch')
print('\n#####################################################################\n') print('\n#####################################################################\n')
################################################################################
#==================
# Specify outdir
#==================
outdir_ml = outdir + 'ml/v2/' ###############################################################################
################################################################################
#================== #==================
# Baseline models # Baseline models
#================== #==================

View file

@ -32,15 +32,36 @@ from ml_data import *
# TT run all ML clfs: baseline mode # TT run all ML clfs: baseline mode
from MultModelsCl import MultModelsCl from MultModelsCl import MultModelsCl
#%%########################################################################### ############################################################################
print('\n#####################################################################\n'
print('\n#####################################################################\n') , '\nRunning ML analysis: UQ [without AA index but with active site annotations]'
print('TESTING cmd:'
, '\nGene name:', gene , '\nGene name:', gene
, '\nDrug name:', drug , '\nDrug name:', drug)
, '\nTotal input features:', X.shape
, '\n', Counter(y)) #==================
# Specify outdir
#==================
outdir_ml = outdir + 'ml/uq_v1/'
print('\nOutput directory:', outdir_ml)
#%%###########################################################################
print('\nSanity checks:'
, '\nTotal input features:', len(X.columns)
, '\n'
, '\nTraining data size:', X.shape
, '\nTest data size:', X_bts.shape
, '\n'
, '\nTarget feature numbers (training data):', Counter(y)
, '\nTarget features ratio (training data:', yc1_ratio
, '\n'
, '\nTarget feature numbers (test data):', Counter(y_bts)
, '\nTarget features ratio (test data):', yc2_ratio
, '\n\n#####################################################################\n')
print('\n================================================================\n')
print('Strucutral features (n):' print('Strucutral features (n):'
, len(X_ssFN) , len(X_ssFN)
@ -50,11 +71,11 @@ print('Strucutral features (n):'
, '\nOther struc columns:', X_str , '\nOther struc columns:', X_str
, '\n================================================================\n') , '\n================================================================\n')
print('AAindex features (n):' # print('AAindex features (n):'
, len(X_aaindexFN) # , len(X_aaindexFN)
, '\nThese are:\n' # , '\nThese are:\n'
, X_aaindexFN # , X_aaindexFN
, '\n================================================================\n') # , '\n================================================================\n')
print('Evolutionary features (n):' print('Evolutionary features (n):'
, len(X_evolFN) , len(X_evolFN)
@ -75,20 +96,15 @@ print('Categorical features (n):'
, categorical_FN , categorical_FN
, '\n================================================================\n') , '\n================================================================\n')
if ( len(X.columns) == len(X_ssFN) + len(X_aaindexFN) + len(X_evolFN) + len(X_genomicFN) + len(categorical_FN) ): #if ( len(X.columns) == len(X_ssFN) + len(X_aaindexFN) + len(X_evolFN) + len(X_genomicFN) + len(categorical_FN) ):
if ( len(X.columns) == len(X_ssFN) + len(X_evolFN) + len(X_genomicFN) + len(categorical_FN) ):
print('\nPass: No. of features match') print('\nPass: No. of features match')
else: else:
sys.exit('\nFail: Count of feature mismatch') sys.exit('\nFail: Count of feature mismatch')
print('\n#####################################################################\n') print('\n#####################################################################\n')
################################################################################
#==================
# Specify outdir
#==================
outdir_ml = outdir + 'ml/v2/' ###############################################################################
################################################################################
#================== #==================
# Baseline models # Baseline models
#================== #==================

View file

@ -32,15 +32,36 @@ from ml_data import *
# TT run all ML clfs: baseline mode # TT run all ML clfs: baseline mode
from MultModelsCl import MultModelsCl from MultModelsCl import MultModelsCl
#%%########################################################################### ############################################################################
print('\n#####################################################################\n'
print('\n#####################################################################\n') , '\nRunning ML analysis: UQ [without AA index but with active site annotations]'
print('TESTING cmd:'
, '\nGene name:', gene , '\nGene name:', gene
, '\nDrug name:', drug , '\nDrug name:', drug)
, '\nTotal input features:', X.shape
, '\n', Counter(y)) #==================
# Specify outdir
#==================
outdir_ml = outdir + 'ml/uq_v1/'
print('\nOutput directory:', outdir_ml)
#%%###########################################################################
print('\nSanity checks:'
, '\nTotal input features:', len(X.columns)
, '\n'
, '\nTraining data size:', X.shape
, '\nTest data size:', X_bts.shape
, '\n'
, '\nTarget feature numbers (training data):', Counter(y)
, '\nTarget features ratio (training data:', yc1_ratio
, '\n'
, '\nTarget feature numbers (test data):', Counter(y_bts)
, '\nTarget features ratio (test data):', yc2_ratio
, '\n\n#####################################################################\n')
print('\n================================================================\n')
print('Strucutral features (n):' print('Strucutral features (n):'
, len(X_ssFN) , len(X_ssFN)
@ -50,11 +71,11 @@ print('Strucutral features (n):'
, '\nOther struc columns:', X_str , '\nOther struc columns:', X_str
, '\n================================================================\n') , '\n================================================================\n')
print('AAindex features (n):' # print('AAindex features (n):'
, len(X_aaindexFN) # , len(X_aaindexFN)
, '\nThese are:\n' # , '\nThese are:\n'
, X_aaindexFN # , X_aaindexFN
, '\n================================================================\n') # , '\n================================================================\n')
print('Evolutionary features (n):' print('Evolutionary features (n):'
, len(X_evolFN) , len(X_evolFN)
@ -75,20 +96,15 @@ print('Categorical features (n):'
, categorical_FN , categorical_FN
, '\n================================================================\n') , '\n================================================================\n')
if ( len(X.columns) == len(X_ssFN) + len(X_aaindexFN) + len(X_evolFN) + len(X_genomicFN) + len(categorical_FN) ): #if ( len(X.columns) == len(X_ssFN) + len(X_aaindexFN) + len(X_evolFN) + len(X_genomicFN) + len(categorical_FN) ):
if ( len(X.columns) == len(X_ssFN) + len(X_evolFN) + len(X_genomicFN) + len(categorical_FN) ):
print('\nPass: No. of features match') print('\nPass: No. of features match')
else: else:
sys.exit('\nFail: Count of feature mismatch') sys.exit('\nFail: Count of feature mismatch')
print('\n#####################################################################\n') print('\n#####################################################################\n')
################################################################################
#==================
# Specify outdir
#==================
outdir_ml = outdir + 'ml/v2/' ###############################################################################
################################################################################
#================== #==================
# Baseline models # Baseline models
#================== #==================

File diff suppressed because it is too large Load diff

View file

@ -5,7 +5,7 @@ Created on Sun Mar 6 13:41:54 2022
@author: tanu @author: tanu
""" """
def setvars(gene,drug): #def setvars(gene,drug):
#https://stackoverflow.com/questions/51695322/compare-multiple-algorithms-with-sklearn-pipeline #https://stackoverflow.com/questions/51695322/compare-multiple-algorithms-with-sklearn-pipeline
import os, sys import os, sys
import pandas as pd import pandas as pd
@ -494,12 +494,12 @@ def setvars(gene,drug):
X_genomicFN = X_genomic_mafor + X_genomic_linegae X_genomicFN = X_genomic_mafor + X_genomic_linegae
X_aaindexFN = list(aa_df_cols) #X_aaindexFN = list(aa_df_cols)
print('\nTotal no. of features for aaindex:', len(X_aaindexFN)) #print('\nTotal no. of features for aaindex:', len(X_aaindexFN))
# numerical feature names # numerical feature names [NO aa_index]
numerical_FN = X_ssFN + X_evolFN + X_genomicFN + X_aaindexFN numerical_FN = X_ssFN + X_evolFN + X_genomicFN
# categorical feature names # categorical feature names
@ -551,7 +551,8 @@ def setvars(gene,drug):
mask_check.to_csv(outdir + 'ml/' + gene.lower() + '_mask_check.csv') mask_check.to_csv(outdir + 'ml/' + gene.lower() + '_mask_check.csv')
#=================================================== #===================================================
# Training and BLIND test set: actual vs imputed # Training and BLIND test set [UQ]: actual vs imputed
# No aa index but active_site included
# dst with actual values : training set # dst with actual values : training set
# dst with imputed values : blind test # dst with imputed values : blind test
#================================================== #==================================================
@ -625,6 +626,25 @@ def setvars(gene,drug):
print('Original Data\n', Counter(y) print('Original Data\n', Counter(y)
, 'Data dim:', X.shape) , 'Data dim:', X.shape)
yc1 = Counter(y)
yc1_ratio = yc1[0]/yc1[1]
yc2 = Counter(y_bts)
yc2_ratio = yc2[0]/yc2[1]
print('\n-------------------------------------------------------------'
, '\nSuccessfully split data: UQ [no aa_index but active site included] training'
, '\nactual values: training set'
, '\nimputed values: blind test set'
, '\nTrain data size:', X.shape
, '\nTest data size:', X_bts.shape
, '\ny_train numbers:', yc1
, '\ny_train ratio:',yc1_ratio
, '\n'
, '\ny_test_numbers:', yc2
, '\ny_test ratio:', yc2_ratio
, '\n-------------------------------------------------------------'
)
########################################################################### ###########################################################################
#%% #%%
########################################################################### ###########################################################################

View file

@ -552,18 +552,18 @@ def setvars(gene,drug):
#================================================= #=================================================
# Training and BLIND test set: imputed vs actual # Training and BLIND test set: imputed vs actual
# BUT in REVERSE i.e # BUT in REVERSE i.e.
# dst with actual values : blind test # dst with actual values : blind test
# dst with imputed values : training set # dst with imputed values : training set
#================================================== #==================================================
my_df_ml[drug].isna().sum() #'na' ones are now training set my_df_ml[drug].isna().sum() #'na' ones are now training set
training_df = my_df_ml[my_df_ml[drug].isna()]
training_df.shape
blind_test_df = my_df_ml[my_df_ml[drug].notna()] blind_test_df = my_df_ml[my_df_ml[drug].notna()]
blind_test_df.shape blind_test_df.shape
training_df = my_df_ml[my_df_ml[drug].isna()]
training_df.shape
# Target 1: dst_mode # Target 1: dst_mode
training_df[drug].value_counts() training_df[drug].value_counts()
training_df['dst_mode'].value_counts() training_df['dst_mode'].value_counts()

View file

@ -32,15 +32,36 @@ from ml_data import *
# TT run all ML clfs: baseline mode # TT run all ML clfs: baseline mode
from MultModelsCl import MultModelsCl from MultModelsCl import MultModelsCl
#%%########################################################################### ############################################################################
print('\n#####################################################################\n'
print('\n#####################################################################\n') , '\nRunning ML analysis: UQ [without AA index but with active site annotations]'
print('TESTING cmd:'
, '\nGene name:', gene , '\nGene name:', gene
, '\nDrug name:', drug , '\nDrug name:', drug)
, '\nTotal input features:', X.shape
, '\n', Counter(y)) #==================
# Specify outdir
#==================
outdir_ml = outdir + 'ml/uq_v1/'
print('\nOutput directory:', outdir_ml)
#%%###########################################################################
print('\nSanity checks:'
, '\nTotal input features:', len(X.columns)
, '\n'
, '\nTraining data size:', X.shape
, '\nTest data size:', X_bts.shape
, '\n'
, '\nTarget feature numbers (training data):', Counter(y)
, '\nTarget features ratio (training data:', yc1_ratio
, '\n'
, '\nTarget feature numbers (test data):', Counter(y_bts)
, '\nTarget features ratio (test data):', yc2_ratio
, '\n\n#####################################################################\n')
print('\n================================================================\n')
print('Strucutral features (n):' print('Strucutral features (n):'
, len(X_ssFN) , len(X_ssFN)
@ -50,11 +71,11 @@ print('Strucutral features (n):'
, '\nOther struc columns:', X_str , '\nOther struc columns:', X_str
, '\n================================================================\n') , '\n================================================================\n')
print('AAindex features (n):' # print('AAindex features (n):'
, len(X_aaindexFN) # , len(X_aaindexFN)
, '\nThese are:\n' # , '\nThese are:\n'
, X_aaindexFN # , X_aaindexFN
, '\n================================================================\n') # , '\n================================================================\n')
print('Evolutionary features (n):' print('Evolutionary features (n):'
, len(X_evolFN) , len(X_evolFN)
@ -75,20 +96,15 @@ print('Categorical features (n):'
, categorical_FN , categorical_FN
, '\n================================================================\n') , '\n================================================================\n')
if ( len(X.columns) == len(X_ssFN) + len(X_aaindexFN) + len(X_evolFN) + len(X_genomicFN) + len(categorical_FN) ): #if ( len(X.columns) == len(X_ssFN) + len(X_aaindexFN) + len(X_evolFN) + len(X_genomicFN) + len(categorical_FN) ):
if ( len(X.columns) == len(X_ssFN) + len(X_evolFN) + len(X_genomicFN) + len(categorical_FN) ):
print('\nPass: No. of features match') print('\nPass: No. of features match')
else: else:
sys.exit('\nFail: Count of feature mismatch') sys.exit('\nFail: Count of feature mismatch')
print('\n#####################################################################\n') print('\n#####################################################################\n')
################################################################################
#==================
# Specify outdir
#==================
outdir_ml = outdir + 'ml/v2/' ###############################################################################
################################################################################
#================== #==================
# Baseline models # Baseline models
#================== #==================

View file

@ -32,15 +32,36 @@ from ml_data import *
# TT run all ML clfs: baseline mode # TT run all ML clfs: baseline mode
from MultModelsCl import MultModelsCl from MultModelsCl import MultModelsCl
#%%########################################################################### ############################################################################
print('\n#####################################################################\n'
print('\n#####################################################################\n') , '\nRunning ML analysis: UQ [without AA index but with active site annotations]'
print('TESTING cmd:'
, '\nGene name:', gene , '\nGene name:', gene
, '\nDrug name:', drug , '\nDrug name:', drug)
, '\nTotal input features:', X.shape
, '\n', Counter(y)) #==================
# Specify outdir
#==================
outdir_ml = outdir + 'ml/uq_v1/'
print('\nOutput directory:', outdir_ml)
#%%###########################################################################
print('\nSanity checks:'
, '\nTotal input features:', len(X.columns)
, '\n'
, '\nTraining data size:', X.shape
, '\nTest data size:', X_bts.shape
, '\n'
, '\nTarget feature numbers (training data):', Counter(y)
, '\nTarget features ratio (training data:', yc1_ratio
, '\n'
, '\nTarget feature numbers (test data):', Counter(y_bts)
, '\nTarget features ratio (test data):', yc2_ratio
, '\n\n#####################################################################\n')
print('\n================================================================\n')
print('Strucutral features (n):' print('Strucutral features (n):'
, len(X_ssFN) , len(X_ssFN)
@ -50,11 +71,11 @@ print('Strucutral features (n):'
, '\nOther struc columns:', X_str , '\nOther struc columns:', X_str
, '\n================================================================\n') , '\n================================================================\n')
print('AAindex features (n):' # print('AAindex features (n):'
, len(X_aaindexFN) # , len(X_aaindexFN)
, '\nThese are:\n' # , '\nThese are:\n'
, X_aaindexFN # , X_aaindexFN
, '\n================================================================\n') # , '\n================================================================\n')
print('Evolutionary features (n):' print('Evolutionary features (n):'
, len(X_evolFN) , len(X_evolFN)
@ -75,20 +96,15 @@ print('Categorical features (n):'
, categorical_FN , categorical_FN
, '\n================================================================\n') , '\n================================================================\n')
if ( len(X.columns) == len(X_ssFN) + len(X_aaindexFN) + len(X_evolFN) + len(X_genomicFN) + len(categorical_FN) ): #if ( len(X.columns) == len(X_ssFN) + len(X_aaindexFN) + len(X_evolFN) + len(X_genomicFN) + len(categorical_FN) ):
if ( len(X.columns) == len(X_ssFN) + len(X_evolFN) + len(X_genomicFN) + len(categorical_FN) ):
print('\nPass: No. of features match') print('\nPass: No. of features match')
else: else:
sys.exit('\nFail: Count of feature mismatch') sys.exit('\nFail: Count of feature mismatch')
print('\n#####################################################################\n') print('\n#####################################################################\n')
################################################################################
#==================
# Specify outdir
#==================
outdir_ml = outdir + 'ml/v2/' ###############################################################################
################################################################################
#================== #==================
# Baseline models # Baseline models
#================== #==================

View file

@ -1,49 +1,51 @@
================================== ==================================
# BTS data: UQ # UQ run: same as ORIGINAL except
# Features NOT including AA index # Features NOT including AA index, but includes active site annotations
# Date: 30/05/2022, but reran with active_site indication column that was added lateron # Date: 30/05/2022, but reran with from my desktop on 19/05/2022 to capture log and include active_site indication column
# TODO: put in folder called v1, and UQ in folder called UQ_v0 (will need to rename!) # captures error: 2>$1
================================= =================================
./pnca_config.py ./pnca_config.py 2>&1 | tee log_pnca_config.txt
./embb_config.py ./embb_config.py 2>&1 | tee log_embb_config.txt
./gid_config.py ./gid_config.py 2>&1 | tee log_gid_config.txt
./katg_config.py ./katg_config.py 2>&1 | tee log_katg_config.txt
./rpob_config.py ./rpob_config.py 2>&1 | tee log_rpob_config.txt
./alr_config.py 2>&1 | tee log_alr_config.txt
# ERROR, as expected, too few values!
## ./alr_config.py = NA
# ^^^^^^^^ FIXME!
######################################################################## ########################################################################
==================================
# BTS data: imputed values
# All features including AA index
# Date: 16/05/2022
================================= =================================
# Split: ORIGINAL
# actual values: training set
# imputed values: blind set
# All features including AA index
# Date: 18/05/2022 # reran with minor formatting and log capture
# captures error: 2>$1
=================================
./pnca_orig.py 2>&1 | tee log_pnca_orig.txt
./embb_orig.py 2>&1 | tee log_embb_orig.txt
./gid_orig.py 2>&1 | tee log_gid_orig.txt
./katg_orig.py 2>&1 | tee log_katg_orig.txt
./rpob_orig.py 2>&1 | tee log_rpob_orig.txt
./alr_orig.py 2>&1 | tee log_alr_orig.txt # ERROR, as expected, too few values!
./pnca_config.py
./embb_config.py
./gid_config.py
./katg_config.py
./rpob_config.py
##./alr_config.py = NA
######################################################################## ########################################################################
================================= =================================
# Split: 70/30 # Split: 70/30
# All features including AA index # All features including AA index
# Date: 17/05/2022 and 18/05/2022 # Date: 17/05/2022 and 18/05/2022, reran with minor foramtting and log capture
# captures error: 2>$1 # captures error: 2>$1
================================= =================================
./pnca_7030.py 2>&1 | tee log_pnca_7030.txt
./pnca_7030.py ./embb_7030.py 2>&1 | tee log_embb_7030.txt
./embb_7030.py
# ./gid_7030.py: problems, CT values are non existing except for rouC
./gid_7030.py 2>&1 | tee log_gid_7030.txt ./gid_7030.py 2>&1 | tee log_gid_7030.txt
./katg_7030.py ./katg_7030.py 2>&1 | tee log_katg_7030.txt
./rpob_7030.py ./rpob_7030.py 2>&1 | tee log_rpob_7030.txt
./alr_7030.py 2>&1 | tee log_alr_7030.txt # ERROR, as expected, too few values!
##./alr_7030.py = NA problems, CT values are non existing except for rouC
######################################################################## ########################################################################
================================= =================================
@ -57,7 +59,7 @@
./gid_8020.py 2>&1 | tee log_gid_8020.txt ./gid_8020.py 2>&1 | tee log_gid_8020.txt
./katg_8020.py 2>&1 | tee log_katg_8020.txt ./katg_8020.py 2>&1 | tee log_katg_8020.txt
./rpob_8020.py 2>&1 | tee log_rpob_8020.txt ./rpob_8020.py 2>&1 | tee log_rpob_8020.txt
./alr_8020.py 2>&1 | tee log_alr_8020.txt ./alr_8020.py 2>&1 | tee log_alr_8020.txt # ERROR, as expected, too few values!
######################################################################## ########################################################################
@ -72,11 +74,11 @@
./gid_sl.py 2>&1 | tee log_gid_sl.txt ./gid_sl.py 2>&1 | tee log_gid_sl.txt
./katg_sl.py 2>&1 | tee log_katg_sl.txt ./katg_sl.py 2>&1 | tee log_katg_sl.txt
./rpob_sl.py 2>&1 | tee log_rpob_sl.txt ./rpob_sl.py 2>&1 | tee log_rpob_sl.txt
./alr_sl.py 2>&1 | tee log_alr_sl.txt ./alr_sl.py 2>&1 | tee log_alr_sl.txt # ERROR, as expected, too few values!
######################################################################## ########################################################################
================================= =================================
# Split: Reverse training # Split: REVERSE training
# imputed values: training set # imputed values: training set
# actual values: blind set # actual values: blind set
# All features including AA index # All features including AA index
@ -88,7 +90,7 @@
./gid_rt.py 2>&1 | tee log_gid_rt.txt ./gid_rt.py 2>&1 | tee log_gid_rt.txt
./katg_rt.py 2>&1 | tee log_katg_rt.txt ./katg_rt.py 2>&1 | tee log_katg_rt.txt
./rpob_rt.py 2>&1 | tee log_rpob_rt.txt ./rpob_rt.py 2>&1 | tee log_rpob_rt.txt
./alr_rt.py 2>&1 | tee log_alr_rt.txt ./alr_rt.py 2>&1 | tee log_alr_rt.txt # ERROR, as expected, too few values!
######################################################################## ########################################################################
# COMPLETE Data: actual + na i.e imputed # COMPLETE Data: actual + na i.e imputed
@ -105,7 +107,7 @@
./gid_cd_7030.py 2>&1 | tee log_gid_cd_7030.txt ./gid_cd_7030.py 2>&1 | tee log_gid_cd_7030.txt
./katg_cd_7030.py 2>&1 | tee log_katg_cd_7030.txt ./katg_cd_7030.py 2>&1 | tee log_katg_cd_7030.txt
./rpob_cd_7030.py 2>&1 | tee log_rpob_cd_7030.txt ./rpob_cd_7030.py 2>&1 | tee log_rpob_cd_7030.txt
./alr_cd_7030.py 2>&1 | tee log_alr_cd_7030.txt ./alr_cd_7030.py 2>&1 | tee log_alr_cd_7030.txt # ERROR, as expected, too few values!
######################################################################## ########################################################################
================================= =================================
@ -119,5 +121,18 @@
./gid_cd_8020.py 2>&1 | tee log_gid_cd_8020.txt ./gid_cd_8020.py 2>&1 | tee log_gid_cd_8020.txt
./katg_cd_8020.py 2>&1 | tee log_katg_cd_8020.txt ./katg_cd_8020.py 2>&1 | tee log_katg_cd_8020.txt
./rpob_cd_8020.py 2>&1 | tee log_rpob_cd_8020.txt ./rpob_cd_8020.py 2>&1 | tee log_rpob_cd_8020.txt
./alr_cd_8020.py 2>&1 | tee log_alr_cd_8020.txt ./alr_cd_8020.py 2>&1 | tee log_alr_cd_8020.txt # ERROR, as expected, too few values!
=================================
# Split: scaling law [COMPLETE DATA]
# All features including AA index
# Date: 18/05/2022
# captures error: 2>$1
=================================
./pnca_cd_sl.py 2>&1 | tee log_pnca_cd_sl.txt
./embb_cd_sl.py 2>&1 | tee log_embb_cd_sl.txt
./gid_cd_sl.py 2>&1 | tee log_gid_cd_sl.txt
./katg_cd_sl.py 2>&1 | tee log_katg_cd_sl.txt
./rpob_cd_sl.py 2>&1 | tee log_rpob_cd_sl.txt
./alr_cd_sl.py 2>&1 | tee log_alr_cd_sl.txt # ERROR, as expected, too few values!