added log files for these ml runs
This commit is contained in:
parent
5bd8ba33f7
commit
e176d018cb
20 changed files with 303568 additions and 0 deletions
75
scripts/ml/log_alr_8020.txt
Normal file
75
scripts/ml/log_alr_8020.txt
Normal file
|
@ -0,0 +1,75 @@
|
|||
/home/tanu/git/LSHTM_analysis/scripts/ml/ml_data_8020.py:549: SettingWithCopyWarning:
|
||||
A value is trying to be set on a copy of a slice from a DataFrame
|
||||
|
||||
See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
|
||||
mask_check.sort_values(by = ['ligand_distance'], ascending = True, inplace = True)
|
||||
1.22.4
|
||||
1.4.1
|
||||
|
||||
aaindex_df contains non-numerical data
|
||||
|
||||
Total no. of non-numerial columns: 2
|
||||
|
||||
Selecting numerical data only
|
||||
|
||||
PASS: successfully selected numerical columns only for aaindex_df
|
||||
|
||||
Now checking for NA in the remaining aaindex_cols
|
||||
|
||||
Counting aaindex_df cols with NA
|
||||
ncols with NA: 4 columns
|
||||
Dropping these...
|
||||
Original ncols: 127
|
||||
|
||||
Revised df ncols: 123
|
||||
|
||||
Checking NA in revised df...
|
||||
|
||||
PASS: cols with NA successfully dropped from aaindex_df
|
||||
Proceeding with combining aa_df with other features_df
|
||||
|
||||
PASS: ncols match
|
||||
Expected ncols: 123
|
||||
Got: 123
|
||||
|
||||
Total no. of columns in clean aa_df: 123
|
||||
|
||||
Proceeding to merge, expected nrows in merged_df: 271
|
||||
|
||||
PASS: my_features_df and aa_df successfully combined
|
||||
nrows: 271
|
||||
ncols: 269
|
||||
count of NULL values before imputation
|
||||
|
||||
or_mychisq 256
|
||||
log10_or_mychisq 256
|
||||
dtype: int64
|
||||
count of NULL values AFTER imputation
|
||||
|
||||
mutationinformation 0
|
||||
or_rawI 0
|
||||
logorI 0
|
||||
dtype: int64
|
||||
|
||||
PASS: OR values imputed, data ready for ML
|
||||
|
||||
Total no. of features for aaindex: 123
|
||||
|
||||
No. of numerical features: 168
|
||||
No. of categorical features: 7
|
||||
|
||||
PASS: x_features has no target variable
|
||||
|
||||
No. of columns for x_features: 175
|
||||
Traceback (most recent call last):
|
||||
File "/home/tanu/git/LSHTM_analysis/scripts/ml/./alr_8020.py", line 19, in <module>
|
||||
setvars(gene,drug)
|
||||
File "/home/tanu/git/LSHTM_analysis/scripts/ml/ml_data_8020.py", line 656, in setvars
|
||||
X, X_bts, y, y_bts = train_test_split(x_features, y_target
|
||||
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_split.py", line 2454, in train_test_split
|
||||
train, test = next(cv.split(X=arrays[0], y=stratify))
|
||||
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_split.py", line 1613, in split
|
||||
for train, test in self._iter_indices(X, y, groups):
|
||||
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_split.py", line 1953, in _iter_indices
|
||||
raise ValueError(
|
||||
ValueError: The least populated class in y has only 1 member, which is too few. The minimum number of groups for any class cannot be less than 2.
|
107
scripts/ml/log_alr_rt.txt
Normal file
107
scripts/ml/log_alr_rt.txt
Normal file
|
@ -0,0 +1,107 @@
|
|||
/home/tanu/git/LSHTM_analysis/scripts/ml/ml_data_rt.py:550: SettingWithCopyWarning:
|
||||
A value is trying to be set on a copy of a slice from a DataFrame
|
||||
|
||||
See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
|
||||
mask_check.sort_values(by = ['ligand_distance'], ascending = True, inplace = True)
|
||||
1.22.4
|
||||
1.4.1
|
||||
|
||||
aaindex_df contains non-numerical data
|
||||
|
||||
Total no. of non-numerial columns: 2
|
||||
|
||||
Selecting numerical data only
|
||||
|
||||
PASS: successfully selected numerical columns only for aaindex_df
|
||||
|
||||
Now checking for NA in the remaining aaindex_cols
|
||||
|
||||
Counting aaindex_df cols with NA
|
||||
ncols with NA: 4 columns
|
||||
Dropping these...
|
||||
Original ncols: 127
|
||||
|
||||
Revised df ncols: 123
|
||||
|
||||
Checking NA in revised df...
|
||||
|
||||
PASS: cols with NA successfully dropped from aaindex_df
|
||||
Proceeding with combining aa_df with other features_df
|
||||
|
||||
PASS: ncols match
|
||||
Expected ncols: 123
|
||||
Got: 123
|
||||
|
||||
Total no. of columns in clean aa_df: 123
|
||||
|
||||
Proceeding to merge, expected nrows in merged_df: 271
|
||||
|
||||
PASS: my_features_df and aa_df successfully combined
|
||||
nrows: 271
|
||||
ncols: 269
|
||||
count of NULL values before imputation
|
||||
|
||||
or_mychisq 256
|
||||
log10_or_mychisq 256
|
||||
dtype: int64
|
||||
count of NULL values AFTER imputation
|
||||
|
||||
mutationinformation 0
|
||||
or_rawI 0
|
||||
logorI 0
|
||||
dtype: int64
|
||||
|
||||
PASS: OR values imputed, data ready for ML
|
||||
|
||||
Total no. of features for aaindex: 123
|
||||
|
||||
No. of numerical features: 168
|
||||
No. of categorical features: 7
|
||||
|
||||
index: 0
|
||||
ind: 1
|
||||
|
||||
Mask count check: True
|
||||
|
||||
index: 1
|
||||
ind: 2
|
||||
|
||||
Mask count check: True
|
||||
Original Data
|
||||
Counter({0: 262, 1: 1}) Data dim: (263, 175)
|
||||
|
||||
-------------------------------------------------------------
|
||||
Successfully split data: REVERSE training
|
||||
imputed values: training set
|
||||
actual values: blind test set
|
||||
Train data size: (263, 175)
|
||||
Test data size: (8, 175)
|
||||
y_train numbers: Counter({0: 262, 1: 1})
|
||||
y_train ratio: 262.0
|
||||
|
||||
y_test_numbers: Counter({0: 7, 1: 1})
|
||||
y_test ratio: 7.0
|
||||
-------------------------------------------------------------
|
||||
Simple Random OverSampling
|
||||
Counter({0: 262, 1: 262})
|
||||
(524, 175)
|
||||
Simple Random UnderSampling
|
||||
Counter({0: 1, 1: 1})
|
||||
(2, 175)
|
||||
Simple Combined Over and UnderSampling
|
||||
Counter({0: 262, 1: 262})
|
||||
(524, 175)
|
||||
Traceback (most recent call last):
|
||||
File "/home/tanu/git/LSHTM_analysis/scripts/ml/./alr_rt.py", line 19, in <module>
|
||||
setvars(gene,drug)
|
||||
File "/home/tanu/git/LSHTM_analysis/scripts/ml/ml_data_rt.py", line 701, in setvars
|
||||
X_smnc, y_smnc = sm_nc.fit_resample(X, y)
|
||||
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/imblearn/base.py", line 83, in fit_resample
|
||||
output = self._fit_resample(X, y)
|
||||
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/imblearn/over_sampling/_smote/base.py", line 533, in _fit_resample
|
||||
X_resampled, y_resampled = super()._fit_resample(X_encoded, y)
|
||||
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/imblearn/over_sampling/_smote/base.py", line 324, in _fit_resample
|
||||
nns = self.nn_k_.kneighbors(X_class, return_distance=False)[:, 1:]
|
||||
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/neighbors/_base.py", line 749, in kneighbors
|
||||
raise ValueError(
|
||||
ValueError: Expected n_neighbors <= n_samples, but n_samples = 1, n_neighbors = 6
|
75
scripts/ml/log_alr_sl.txt
Normal file
75
scripts/ml/log_alr_sl.txt
Normal file
|
@ -0,0 +1,75 @@
|
|||
/home/tanu/git/LSHTM_analysis/scripts/ml/ml_data_sl.py:549: SettingWithCopyWarning:
|
||||
A value is trying to be set on a copy of a slice from a DataFrame
|
||||
|
||||
See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
|
||||
mask_check.sort_values(by = ['ligand_distance'], ascending = True, inplace = True)
|
||||
1.22.4
|
||||
1.4.1
|
||||
|
||||
aaindex_df contains non-numerical data
|
||||
|
||||
Total no. of non-numerial columns: 2
|
||||
|
||||
Selecting numerical data only
|
||||
|
||||
PASS: successfully selected numerical columns only for aaindex_df
|
||||
|
||||
Now checking for NA in the remaining aaindex_cols
|
||||
|
||||
Counting aaindex_df cols with NA
|
||||
ncols with NA: 4 columns
|
||||
Dropping these...
|
||||
Original ncols: 127
|
||||
|
||||
Revised df ncols: 123
|
||||
|
||||
Checking NA in revised df...
|
||||
|
||||
PASS: cols with NA successfully dropped from aaindex_df
|
||||
Proceeding with combining aa_df with other features_df
|
||||
|
||||
PASS: ncols match
|
||||
Expected ncols: 123
|
||||
Got: 123
|
||||
|
||||
Total no. of columns in clean aa_df: 123
|
||||
|
||||
Proceeding to merge, expected nrows in merged_df: 271
|
||||
|
||||
PASS: my_features_df and aa_df successfully combined
|
||||
nrows: 271
|
||||
ncols: 269
|
||||
count of NULL values before imputation
|
||||
|
||||
or_mychisq 256
|
||||
log10_or_mychisq 256
|
||||
dtype: int64
|
||||
count of NULL values AFTER imputation
|
||||
|
||||
mutationinformation 0
|
||||
or_rawI 0
|
||||
logorI 0
|
||||
dtype: int64
|
||||
|
||||
PASS: OR values imputed, data ready for ML
|
||||
|
||||
Total no. of features for aaindex: 123
|
||||
|
||||
No. of numerical features: 168
|
||||
No. of categorical features: 7
|
||||
|
||||
PASS: x_features has no target variable
|
||||
|
||||
No. of columns for x_features: 175
|
||||
Traceback (most recent call last):
|
||||
File "/home/tanu/git/LSHTM_analysis/scripts/ml/./alr_sl.py", line 19, in <module>
|
||||
setvars(gene,drug)
|
||||
File "/home/tanu/git/LSHTM_analysis/scripts/ml/ml_data_sl.py", line 660, in setvars
|
||||
X, X_bts, y, y_bts = train_test_split(x_features, y_target
|
||||
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_split.py", line 2454, in train_test_split
|
||||
train, test = next(cv.split(X=arrays[0], y=stratify))
|
||||
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_split.py", line 1613, in split
|
||||
for train, test in self._iter_indices(X, y, groups):
|
||||
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_split.py", line 1953, in _iter_indices
|
||||
raise ValueError(
|
||||
ValueError: The least populated class in y has only 1 member, which is too few. The minimum number of groups for any class cannot be less than 2.
|
19647
scripts/ml/log_embb_8020.txt
Normal file
19647
scripts/ml/log_embb_8020.txt
Normal file
File diff suppressed because it is too large
Load diff
19312
scripts/ml/log_embb_rt.txt
Normal file
19312
scripts/ml/log_embb_rt.txt
Normal file
File diff suppressed because it is too large
Load diff
19739
scripts/ml/log_embb_sl.txt
Normal file
19739
scripts/ml/log_embb_sl.txt
Normal file
File diff suppressed because it is too large
Load diff
25421
scripts/ml/log_gid_7030.txt
Normal file
25421
scripts/ml/log_gid_7030.txt
Normal file
File diff suppressed because it is too large
Load diff
18915
scripts/ml/log_gid_8020.txt
Normal file
18915
scripts/ml/log_gid_8020.txt
Normal file
File diff suppressed because it is too large
Load diff
14264
scripts/ml/log_gid_rt.txt
Normal file
14264
scripts/ml/log_gid_rt.txt
Normal file
File diff suppressed because it is too large
Load diff
107
scripts/ml/log_gid_rt_v1.txt
Normal file
107
scripts/ml/log_gid_rt_v1.txt
Normal file
|
@ -0,0 +1,107 @@
|
|||
/home/tanu/git/LSHTM_analysis/scripts/ml/ml_data_rt.py:550: SettingWithCopyWarning:
|
||||
A value is trying to be set on a copy of a slice from a DataFrame
|
||||
|
||||
See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
|
||||
mask_check.sort_values(by = ['ligand_distance'], ascending = True, inplace = True)
|
||||
1.22.4
|
||||
1.4.1
|
||||
|
||||
aaindex_df contains non-numerical data
|
||||
|
||||
Total no. of non-numerial columns: 2
|
||||
|
||||
Selecting numerical data only
|
||||
|
||||
PASS: successfully selected numerical columns only for aaindex_df
|
||||
|
||||
Now checking for NA in the remaining aaindex_cols
|
||||
|
||||
Counting aaindex_df cols with NA
|
||||
ncols with NA: 4 columns
|
||||
Dropping these...
|
||||
Original ncols: 127
|
||||
|
||||
Revised df ncols: 123
|
||||
|
||||
Checking NA in revised df...
|
||||
|
||||
PASS: cols with NA successfully dropped from aaindex_df
|
||||
Proceeding with combining aa_df with other features_df
|
||||
|
||||
PASS: ncols match
|
||||
Expected ncols: 123
|
||||
Got: 123
|
||||
|
||||
Total no. of columns in clean aa_df: 123
|
||||
|
||||
Proceeding to merge, expected nrows in merged_df: 531
|
||||
|
||||
PASS: my_features_df and aa_df successfully combined
|
||||
nrows: 531
|
||||
ncols: 286
|
||||
count of NULL values before imputation
|
||||
|
||||
or_mychisq 263
|
||||
log10_or_mychisq 263
|
||||
dtype: int64
|
||||
count of NULL values AFTER imputation
|
||||
|
||||
mutationinformation 0
|
||||
or_rawI 0
|
||||
logorI 0
|
||||
dtype: int64
|
||||
|
||||
PASS: OR values imputed, data ready for ML
|
||||
|
||||
Total no. of features for aaindex: 123
|
||||
|
||||
No. of numerical features: 167
|
||||
No. of categorical features: 7
|
||||
|
||||
index: 0
|
||||
ind: 1
|
||||
|
||||
Mask count check: True
|
||||
|
||||
index: 1
|
||||
ind: 2
|
||||
|
||||
Mask count check: True
|
||||
Original Data
|
||||
Counter({0: 409, 1: 3}) Data dim: (412, 174)
|
||||
|
||||
-------------------------------------------------------------
|
||||
Successfully split data: REVERSE training
|
||||
imputed values: training set
|
||||
actual values: blind test set
|
||||
Train data size: (412, 174)
|
||||
Test data size: (119, 174)
|
||||
y_train numbers: Counter({0: 409, 1: 3})
|
||||
y_train ratio: 136.33333333333334
|
||||
|
||||
y_test_numbers: Counter({0: 76, 1: 43})
|
||||
y_test ratio: 1.7674418604651163
|
||||
-------------------------------------------------------------
|
||||
Simple Random OverSampling
|
||||
Counter({0: 409, 1: 409})
|
||||
(818, 174)
|
||||
Simple Random UnderSampling
|
||||
Counter({0: 3, 1: 3})
|
||||
(6, 174)
|
||||
Simple Combined Over and UnderSampling
|
||||
Counter({0: 409, 1: 409})
|
||||
(818, 174)
|
||||
Traceback (most recent call last):
|
||||
File "/home/tanu/git/LSHTM_analysis/scripts/ml/./gid_rt.py", line 19, in <module>
|
||||
setvars(gene,drug)
|
||||
File "/home/tanu/git/LSHTM_analysis/scripts/ml/ml_data_rt.py", line 701, in setvars
|
||||
X_smnc, y_smnc = sm_nc.fit_resample(X, y)
|
||||
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/imblearn/base.py", line 83, in fit_resample
|
||||
output = self._fit_resample(X, y)
|
||||
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/imblearn/over_sampling/_smote/base.py", line 533, in _fit_resample
|
||||
X_resampled, y_resampled = super()._fit_resample(X_encoded, y)
|
||||
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/imblearn/over_sampling/_smote/base.py", line 324, in _fit_resample
|
||||
nns = self.nn_k_.kneighbors(X_class, return_distance=False)[:, 1:]
|
||||
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/neighbors/_base.py", line 749, in kneighbors
|
||||
raise ValueError(
|
||||
ValueError: Expected n_neighbors <= n_samples, but n_samples = 3, n_neighbors = 6
|
18779
scripts/ml/log_gid_sl.txt
Normal file
18779
scripts/ml/log_gid_sl.txt
Normal file
File diff suppressed because it is too large
Load diff
19521
scripts/ml/log_katg_8020.txt
Normal file
19521
scripts/ml/log_katg_8020.txt
Normal file
File diff suppressed because it is too large
Load diff
19607
scripts/ml/log_katg_rt.txt
Normal file
19607
scripts/ml/log_katg_rt.txt
Normal file
File diff suppressed because it is too large
Load diff
19707
scripts/ml/log_katg_sl.txt
Normal file
19707
scripts/ml/log_katg_sl.txt
Normal file
File diff suppressed because it is too large
Load diff
19103
scripts/ml/log_pnca_8020.txt
Normal file
19103
scripts/ml/log_pnca_8020.txt
Normal file
File diff suppressed because it is too large
Load diff
18962
scripts/ml/log_pnca_rt.txt
Normal file
18962
scripts/ml/log_pnca_rt.txt
Normal file
File diff suppressed because it is too large
Load diff
19294
scripts/ml/log_pnca_sl.txt
Normal file
19294
scripts/ml/log_pnca_sl.txt
Normal file
File diff suppressed because it is too large
Load diff
19319
scripts/ml/log_rpob_8020.txt
Normal file
19319
scripts/ml/log_rpob_8020.txt
Normal file
File diff suppressed because it is too large
Load diff
11890
scripts/ml/log_rpob_rt.txt
Normal file
11890
scripts/ml/log_rpob_rt.txt
Normal file
File diff suppressed because it is too large
Load diff
19724
scripts/ml/log_rpob_sl.txt
Normal file
19724
scripts/ml/log_rpob_sl.txt
Normal file
File diff suppressed because it is too large
Load diff
Loading…
Add table
Add a link
Reference in a new issue