slight formatting for existing scripts
This commit is contained in:
parent
a53fce5455
commit
9bc26c1947
4 changed files with 33 additions and 7 deletions
|
@ -667,6 +667,7 @@ def setvars(gene,drug):
|
|||
|
||||
print('\n-------------------------------------------------------------'
|
||||
, '\nSuccessfully split data with stratification: 70/30'
|
||||
, '\nInput features data size:', x_features.shape
|
||||
, '\nTrain data size:', X.shape
|
||||
, '\nTest data size:', X_bts.shape
|
||||
, '\ny_train numbers:', yc1
|
||||
|
|
|
@ -655,6 +655,7 @@ def setvars(gene,drug):
|
|||
|
||||
print('\n-------------------------------------------------------------'
|
||||
, '\nSuccessfully split data with stratification: 80/20 '
|
||||
, '\nInput features data size:', x_features.shape
|
||||
, '\nTrain data size:', X.shape
|
||||
, '\nTest data size:', X_bts.shape
|
||||
, '\ny_train numbers:', yc1
|
||||
|
|
|
@ -553,9 +553,7 @@ def setvars(gene,drug):
|
|||
# Training and BLIND test set: scaling law split
|
||||
# https://towardsdatascience.com/finally-why-we-use-an-80-20-split-for-training-and-test-data-plus-an-alternative-method-oh-yes-edc77e96295d
|
||||
|
||||
# Throw away previous blind_test_df, and call the 30% data as blind_test
|
||||
# as these were imputed values and initial analysis shows that this
|
||||
# is not very representative
|
||||
# test data size ~ 1/sqrt(features NOT including target variable)
|
||||
#================================================================
|
||||
my_df_ml[drug].isna().sum()
|
||||
# blind_test_df = my_df_ml[my_df_ml[drug].isna()]
|
||||
|
@ -650,7 +648,6 @@ def setvars(gene,drug):
|
|||
x_ncols = len(x_features.columns)
|
||||
print('\nNo. of columns for x_features:', x_ncols)
|
||||
# NEED It for scaling law split
|
||||
#https://towardsdatascience.com/finally-why-we-use-an-80-20-split-for-training-and-test-data-plus-an-alternative-method-oh-yes-edc77e96295d
|
||||
else:
|
||||
sys.exit('\nFAIL: x_features has target variable included. FIX it and rerun!')
|
||||
|
||||
|
@ -670,7 +667,8 @@ def setvars(gene,drug):
|
|||
yc2_ratio = yc2[0]/yc2[1]
|
||||
|
||||
print('\n-------------------------------------------------------------'
|
||||
, '\nSuccessfully split data according to scaling law: 1/np.sqrt(x_ncols)'
|
||||
, '\nSuccessfully split data with stratification according to scaling law: 1/sqrt(x_ncols)'
|
||||
, '\nInput features data size:', x_features.shape
|
||||
, '\nTrain data size:', X.shape
|
||||
, '\nTest data size:', sl_test_size, ' ', X_bts.shape
|
||||
, '\ny_train numbers:', yc1
|
||||
|
|
|
@ -90,8 +90,34 @@
|
|||
./rpob_rt.py 2>&1 | tee log_rpob_rt.txt
|
||||
./alr_rt.py 2>&1 | tee log_alr_rt.txt
|
||||
|
||||
########################################################################
|
||||
# COMPLETE Data: actual + na i.e imputed
|
||||
########################################################################
|
||||
|
||||
=================================
|
||||
# Split: 70/30 [COMPLETE DATA]
|
||||
# All features including AA index
|
||||
# Date: 18/05/2022
|
||||
# captures error: 2>$1
|
||||
=================================
|
||||
./pnca_cd_7030.py 2>&1 | tee log_pnca_cd_7030.txt
|
||||
./embb_cd_7030.py 2>&1 | tee log_embb_cd_7030.txt
|
||||
./gid_cd_7030.py 2>&1 | tee log_gid_cd_7030.txt
|
||||
./katg_cd_7030.py 2>&1 | tee log_katg_cd_7030.txt
|
||||
./rpob_cd_7030.py 2>&1 | tee log_rpob_cd_7030.txt
|
||||
./alr_cd_7030.py 2>&1 | tee log_alr_cd_7030.txt
|
||||
|
||||
|
||||
|
||||
########################################################################
|
||||
=================================
|
||||
# Split: 80/20 [COMPLETE DATA]
|
||||
# All features including AA index
|
||||
# Date: 18/05/2022
|
||||
# captures error: 2>$1
|
||||
=================================
|
||||
./pnca_cd_8020.py 2>&1 | tee log_pnca_cd_8020.txt
|
||||
./embb_cd_8020.py 2>&1 | tee log_embb_cd_8020.txt
|
||||
./gid_cd_8020.py 2>&1 | tee log_gid_cd_8020.txt
|
||||
./katg_cd_8020.py 2>&1 | tee log_katg_cd_8020.txt
|
||||
./rpob_cd_8020.py 2>&1 | tee log_rpob_cd_8020.txt
|
||||
./alr_cd_8020.py 2>&1 | tee log_alr_cd_8020.txt
|
||||
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue