ML logs
This commit is contained in:
parent
4c5afa614f
commit
cadaed2ba7
35 changed files with 568790 additions and 0 deletions
75
scripts/ml/log_alr_7030.txt
Normal file
75
scripts/ml/log_alr_7030.txt
Normal file
|
@ -0,0 +1,75 @@
|
||||||
|
/home/tanu/git/LSHTM_analysis/scripts/ml/ml_data_7030.py:548: SettingWithCopyWarning:
|
||||||
|
A value is trying to be set on a copy of a slice from a DataFrame
|
||||||
|
|
||||||
|
See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
|
||||||
|
mask_check.sort_values(by = ['ligand_distance'], ascending = True, inplace = True)
|
||||||
|
1.22.4
|
||||||
|
1.4.1
|
||||||
|
|
||||||
|
aaindex_df contains non-numerical data
|
||||||
|
|
||||||
|
Total no. of non-numerial columns: 2
|
||||||
|
|
||||||
|
Selecting numerical data only
|
||||||
|
|
||||||
|
PASS: successfully selected numerical columns only for aaindex_df
|
||||||
|
|
||||||
|
Now checking for NA in the remaining aaindex_cols
|
||||||
|
|
||||||
|
Counting aaindex_df cols with NA
|
||||||
|
ncols with NA: 4 columns
|
||||||
|
Dropping these...
|
||||||
|
Original ncols: 127
|
||||||
|
|
||||||
|
Revised df ncols: 123
|
||||||
|
|
||||||
|
Checking NA in revised df...
|
||||||
|
|
||||||
|
PASS: cols with NA successfully dropped from aaindex_df
|
||||||
|
Proceeding with combining aa_df with other features_df
|
||||||
|
|
||||||
|
PASS: ncols match
|
||||||
|
Expected ncols: 123
|
||||||
|
Got: 123
|
||||||
|
|
||||||
|
Total no. of columns in clean aa_df: 123
|
||||||
|
|
||||||
|
Proceeding to merge, expected nrows in merged_df: 271
|
||||||
|
|
||||||
|
PASS: my_features_df and aa_df successfully combined
|
||||||
|
nrows: 271
|
||||||
|
ncols: 269
|
||||||
|
count of NULL values before imputation
|
||||||
|
|
||||||
|
or_mychisq 256
|
||||||
|
log10_or_mychisq 256
|
||||||
|
dtype: int64
|
||||||
|
count of NULL values AFTER imputation
|
||||||
|
|
||||||
|
mutationinformation 0
|
||||||
|
or_rawI 0
|
||||||
|
logorI 0
|
||||||
|
dtype: int64
|
||||||
|
|
||||||
|
PASS: OR values imputed, data ready for ML
|
||||||
|
|
||||||
|
Total no. of features for aaindex: 123
|
||||||
|
|
||||||
|
No. of numerical features: 168
|
||||||
|
No. of categorical features: 7
|
||||||
|
|
||||||
|
PASS: x_features has no target variable
|
||||||
|
|
||||||
|
No. of columns for x_features: 175
|
||||||
|
Traceback (most recent call last):
|
||||||
|
File "/home/tanu/git/LSHTM_analysis/scripts/ml/./alr_7030.py", line 19, in <module>
|
||||||
|
setvars(gene,drug)
|
||||||
|
File "/home/tanu/git/LSHTM_analysis/scripts/ml/ml_data_7030.py", line 658, in setvars
|
||||||
|
X, X_bts, y, y_bts = train_test_split(x_features, y_target
|
||||||
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_split.py", line 2454, in train_test_split
|
||||||
|
train, test = next(cv.split(X=arrays[0], y=stratify))
|
||||||
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_split.py", line 1613, in split
|
||||||
|
for train, test in self._iter_indices(X, y, groups):
|
||||||
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/model_selection/_split.py", line 1953, in _iter_indices
|
||||||
|
raise ValueError(
|
||||||
|
ValueError: The least populated class in y has only 1 member, which is too few. The minimum number of groups for any class cannot be less than 2.
|
113
scripts/ml/log_alr_cd_7030.txt
Normal file
113
scripts/ml/log_alr_cd_7030.txt
Normal file
|
@ -0,0 +1,113 @@
|
||||||
|
/home/tanu/git/LSHTM_analysis/scripts/ml/ml_data_cd_7030.py:548: SettingWithCopyWarning:
|
||||||
|
A value is trying to be set on a copy of a slice from a DataFrame
|
||||||
|
|
||||||
|
See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
|
||||||
|
mask_check.sort_values(by = ['ligand_distance'], ascending = True, inplace = True)
|
||||||
|
1.22.4
|
||||||
|
1.4.1
|
||||||
|
|
||||||
|
aaindex_df contains non-numerical data
|
||||||
|
|
||||||
|
Total no. of non-numerial columns: 2
|
||||||
|
|
||||||
|
Selecting numerical data only
|
||||||
|
|
||||||
|
PASS: successfully selected numerical columns only for aaindex_df
|
||||||
|
|
||||||
|
Now checking for NA in the remaining aaindex_cols
|
||||||
|
|
||||||
|
Counting aaindex_df cols with NA
|
||||||
|
ncols with NA: 4 columns
|
||||||
|
Dropping these...
|
||||||
|
Original ncols: 127
|
||||||
|
|
||||||
|
Revised df ncols: 123
|
||||||
|
|
||||||
|
Checking NA in revised df...
|
||||||
|
|
||||||
|
PASS: cols with NA successfully dropped from aaindex_df
|
||||||
|
Proceeding with combining aa_df with other features_df
|
||||||
|
|
||||||
|
PASS: ncols match
|
||||||
|
Expected ncols: 123
|
||||||
|
Got: 123
|
||||||
|
|
||||||
|
Total no. of columns in clean aa_df: 123
|
||||||
|
|
||||||
|
Proceeding to merge, expected nrows in merged_df: 271
|
||||||
|
|
||||||
|
PASS: my_features_df and aa_df successfully combined
|
||||||
|
nrows: 271
|
||||||
|
ncols: 269
|
||||||
|
count of NULL values before imputation
|
||||||
|
|
||||||
|
or_mychisq 256
|
||||||
|
log10_or_mychisq 256
|
||||||
|
dtype: int64
|
||||||
|
count of NULL values AFTER imputation
|
||||||
|
|
||||||
|
mutationinformation 0
|
||||||
|
or_rawI 0
|
||||||
|
logorI 0
|
||||||
|
dtype: int64
|
||||||
|
|
||||||
|
PASS: OR values imputed, data ready for ML
|
||||||
|
|
||||||
|
Total no. of features for aaindex: 123
|
||||||
|
|
||||||
|
No. of numerical features: 168
|
||||||
|
No. of categorical features: 7
|
||||||
|
|
||||||
|
PASS: x_features has no target variable
|
||||||
|
|
||||||
|
No. of columns for x_features: 175
|
||||||
|
|
||||||
|
-------------------------------------------------------------
|
||||||
|
Successfully split data with stratification [COMPLETE data]: 70/30
|
||||||
|
Original data size: (271, 175)
|
||||||
|
Train data size: (181, 175)
|
||||||
|
Test data size: (90, 175)
|
||||||
|
y_train numbers: Counter({0: 180, 1: 1})
|
||||||
|
y_train ratio: 180.0
|
||||||
|
|
||||||
|
y_test_numbers: Counter({0: 89, 1: 1})
|
||||||
|
y_test ratio: 89.0
|
||||||
|
-------------------------------------------------------------
|
||||||
|
|
||||||
|
index: 0
|
||||||
|
ind: 1
|
||||||
|
|
||||||
|
Mask count check: True
|
||||||
|
|
||||||
|
index: 1
|
||||||
|
ind: 2
|
||||||
|
|
||||||
|
Mask count check: True
|
||||||
|
Original Data
|
||||||
|
Counter({0: 180, 1: 1}) Data dim: (181, 175)
|
||||||
|
|
||||||
|
Simple Random OverSampling
|
||||||
|
Counter({0: 180, 1: 180})
|
||||||
|
(360, 175)
|
||||||
|
|
||||||
|
Simple Random UnderSampling
|
||||||
|
Counter({0: 1, 1: 1})
|
||||||
|
(2, 175)
|
||||||
|
|
||||||
|
Simple Combined Over and UnderSampling
|
||||||
|
Counter({0: 180, 1: 180})
|
||||||
|
(360, 175)
|
||||||
|
Traceback (most recent call last):
|
||||||
|
File "/home/tanu/git/LSHTM_analysis/scripts/ml/./alr_cd_7030.py", line 19, in <module>
|
||||||
|
setvars(gene,drug)
|
||||||
|
File "/home/tanu/git/LSHTM_analysis/scripts/ml/ml_data_cd_7030.py", line 745, in setvars
|
||||||
|
X_smnc, y_smnc = sm_nc.fit_resample(X, y)
|
||||||
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/imblearn/base.py", line 83, in fit_resample
|
||||||
|
output = self._fit_resample(X, y)
|
||||||
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/imblearn/over_sampling/_smote/base.py", line 533, in _fit_resample
|
||||||
|
X_resampled, y_resampled = super()._fit_resample(X_encoded, y)
|
||||||
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/imblearn/over_sampling/_smote/base.py", line 324, in _fit_resample
|
||||||
|
nns = self.nn_k_.kneighbors(X_class, return_distance=False)[:, 1:]
|
||||||
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/neighbors/_base.py", line 749, in kneighbors
|
||||||
|
raise ValueError(
|
||||||
|
ValueError: Expected n_neighbors <= n_samples, but n_samples = 1, n_neighbors = 6
|
69
scripts/ml/log_alr_cd_8020.txt
Normal file
69
scripts/ml/log_alr_cd_8020.txt
Normal file
|
@ -0,0 +1,69 @@
|
||||||
|
/home/tanu/git/LSHTM_analysis/scripts/ml/ml_data_cd_8020.py:548: SettingWithCopyWarning:
|
||||||
|
A value is trying to be set on a copy of a slice from a DataFrame
|
||||||
|
|
||||||
|
See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
|
||||||
|
mask_check.sort_values(by = ['ligand_distance'], ascending = True, inplace = True)
|
||||||
|
1.22.4
|
||||||
|
1.4.1
|
||||||
|
|
||||||
|
aaindex_df contains non-numerical data
|
||||||
|
|
||||||
|
Total no. of non-numerial columns: 2
|
||||||
|
|
||||||
|
Selecting numerical data only
|
||||||
|
|
||||||
|
PASS: successfully selected numerical columns only for aaindex_df
|
||||||
|
|
||||||
|
Now checking for NA in the remaining aaindex_cols
|
||||||
|
|
||||||
|
Counting aaindex_df cols with NA
|
||||||
|
ncols with NA: 4 columns
|
||||||
|
Dropping these...
|
||||||
|
Original ncols: 127
|
||||||
|
|
||||||
|
Revised df ncols: 123
|
||||||
|
|
||||||
|
Checking NA in revised df...
|
||||||
|
|
||||||
|
PASS: cols with NA successfully dropped from aaindex_df
|
||||||
|
Proceeding with combining aa_df with other features_df
|
||||||
|
|
||||||
|
PASS: ncols match
|
||||||
|
Expected ncols: 123
|
||||||
|
Got: 123
|
||||||
|
|
||||||
|
Total no. of columns in clean aa_df: 123
|
||||||
|
|
||||||
|
Proceeding to merge, expected nrows in merged_df: 271
|
||||||
|
|
||||||
|
PASS: my_features_df and aa_df successfully combined
|
||||||
|
nrows: 271
|
||||||
|
ncols: 269
|
||||||
|
count of NULL values before imputation
|
||||||
|
|
||||||
|
or_mychisq 256
|
||||||
|
log10_or_mychisq 256
|
||||||
|
dtype: int64
|
||||||
|
count of NULL values AFTER imputation
|
||||||
|
|
||||||
|
mutationinformation 0
|
||||||
|
or_rawI 0
|
||||||
|
logorI 0
|
||||||
|
dtype: int64
|
||||||
|
|
||||||
|
PASS: OR values imputed, data ready for ML
|
||||||
|
|
||||||
|
Total no. of features for aaindex: 123
|
||||||
|
|
||||||
|
No. of numerical features: 168
|
||||||
|
No. of categorical features: 7
|
||||||
|
|
||||||
|
PASS: x_features has no target variable
|
||||||
|
|
||||||
|
No. of columns for x_features: 175
|
||||||
|
Traceback (most recent call last):
|
||||||
|
File "/home/tanu/git/LSHTM_analysis/scripts/ml/./alr_cd_8020.py", line 19, in <module>
|
||||||
|
setvars(gene,drug)
|
||||||
|
File "/home/tanu/git/LSHTM_analysis/scripts/ml/ml_data_cd_8020.py", line 666, in setvars
|
||||||
|
yc2_ratio = yc2[0]/yc2[1]
|
||||||
|
ZeroDivisionError: division by zero
|
69
scripts/ml/log_alr_cd_sl.txt
Normal file
69
scripts/ml/log_alr_cd_sl.txt
Normal file
|
@ -0,0 +1,69 @@
|
||||||
|
/home/tanu/git/LSHTM_analysis/scripts/ml/ml_data_cd_sl.py:548: SettingWithCopyWarning:
|
||||||
|
A value is trying to be set on a copy of a slice from a DataFrame
|
||||||
|
|
||||||
|
See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
|
||||||
|
mask_check.sort_values(by = ['ligand_distance'], ascending = True, inplace = True)
|
||||||
|
1.22.4
|
||||||
|
1.4.1
|
||||||
|
|
||||||
|
aaindex_df contains non-numerical data
|
||||||
|
|
||||||
|
Total no. of non-numerial columns: 2
|
||||||
|
|
||||||
|
Selecting numerical data only
|
||||||
|
|
||||||
|
PASS: successfully selected numerical columns only for aaindex_df
|
||||||
|
|
||||||
|
Now checking for NA in the remaining aaindex_cols
|
||||||
|
|
||||||
|
Counting aaindex_df cols with NA
|
||||||
|
ncols with NA: 4 columns
|
||||||
|
Dropping these...
|
||||||
|
Original ncols: 127
|
||||||
|
|
||||||
|
Revised df ncols: 123
|
||||||
|
|
||||||
|
Checking NA in revised df...
|
||||||
|
|
||||||
|
PASS: cols with NA successfully dropped from aaindex_df
|
||||||
|
Proceeding with combining aa_df with other features_df
|
||||||
|
|
||||||
|
PASS: ncols match
|
||||||
|
Expected ncols: 123
|
||||||
|
Got: 123
|
||||||
|
|
||||||
|
Total no. of columns in clean aa_df: 123
|
||||||
|
|
||||||
|
Proceeding to merge, expected nrows in merged_df: 271
|
||||||
|
|
||||||
|
PASS: my_features_df and aa_df successfully combined
|
||||||
|
nrows: 271
|
||||||
|
ncols: 269
|
||||||
|
count of NULL values before imputation
|
||||||
|
|
||||||
|
or_mychisq 256
|
||||||
|
log10_or_mychisq 256
|
||||||
|
dtype: int64
|
||||||
|
count of NULL values AFTER imputation
|
||||||
|
|
||||||
|
mutationinformation 0
|
||||||
|
or_rawI 0
|
||||||
|
logorI 0
|
||||||
|
dtype: int64
|
||||||
|
|
||||||
|
PASS: OR values imputed, data ready for ML
|
||||||
|
|
||||||
|
Total no. of features for aaindex: 123
|
||||||
|
|
||||||
|
No. of numerical features: 168
|
||||||
|
No. of categorical features: 7
|
||||||
|
|
||||||
|
PASS: x_features has no target variable
|
||||||
|
|
||||||
|
No. of columns for x_features: 175
|
||||||
|
Traceback (most recent call last):
|
||||||
|
File "/home/tanu/git/LSHTM_analysis/scripts/ml/./alr_cd_sl.py", line 19, in <module>
|
||||||
|
setvars(gene,drug)
|
||||||
|
File "/home/tanu/git/LSHTM_analysis/scripts/ml/ml_data_cd_sl.py", line 669, in setvars
|
||||||
|
yc2_ratio = yc2[0]/yc2[1]
|
||||||
|
ZeroDivisionError: division by zero
|
105
scripts/ml/log_alr_config.txt
Normal file
105
scripts/ml/log_alr_config.txt
Normal file
|
@ -0,0 +1,105 @@
|
||||||
|
/home/tanu/git/LSHTM_analysis/scripts/ml/ml_data.py:550: SettingWithCopyWarning:
|
||||||
|
A value is trying to be set on a copy of a slice from a DataFrame
|
||||||
|
|
||||||
|
See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
|
||||||
|
mask_check.sort_values(by = ['ligand_distance'], ascending = True, inplace = True)
|
||||||
|
1.22.4
|
||||||
|
1.4.1
|
||||||
|
|
||||||
|
aaindex_df contains non-numerical data
|
||||||
|
|
||||||
|
Total no. of non-numerial columns: 2
|
||||||
|
|
||||||
|
Selecting numerical data only
|
||||||
|
|
||||||
|
PASS: successfully selected numerical columns only for aaindex_df
|
||||||
|
|
||||||
|
Now checking for NA in the remaining aaindex_cols
|
||||||
|
|
||||||
|
Counting aaindex_df cols with NA
|
||||||
|
ncols with NA: 4 columns
|
||||||
|
Dropping these...
|
||||||
|
Original ncols: 127
|
||||||
|
|
||||||
|
Revised df ncols: 123
|
||||||
|
|
||||||
|
Checking NA in revised df...
|
||||||
|
|
||||||
|
PASS: cols with NA successfully dropped from aaindex_df
|
||||||
|
Proceeding with combining aa_df with other features_df
|
||||||
|
|
||||||
|
PASS: ncols match
|
||||||
|
Expected ncols: 123
|
||||||
|
Got: 123
|
||||||
|
|
||||||
|
Total no. of columns in clean aa_df: 123
|
||||||
|
|
||||||
|
Proceeding to merge, expected nrows in merged_df: 271
|
||||||
|
|
||||||
|
PASS: my_features_df and aa_df successfully combined
|
||||||
|
nrows: 271
|
||||||
|
ncols: 269
|
||||||
|
count of NULL values before imputation
|
||||||
|
|
||||||
|
or_mychisq 256
|
||||||
|
log10_or_mychisq 256
|
||||||
|
dtype: int64
|
||||||
|
count of NULL values AFTER imputation
|
||||||
|
|
||||||
|
mutationinformation 0
|
||||||
|
or_rawI 0
|
||||||
|
logorI 0
|
||||||
|
dtype: int64
|
||||||
|
|
||||||
|
PASS: OR values imputed, data ready for ML
|
||||||
|
|
||||||
|
No. of numerical features: 45
|
||||||
|
No. of categorical features: 7
|
||||||
|
|
||||||
|
index: 0
|
||||||
|
ind: 1
|
||||||
|
|
||||||
|
Mask count check: True
|
||||||
|
|
||||||
|
index: 1
|
||||||
|
ind: 2
|
||||||
|
|
||||||
|
Mask count check: True
|
||||||
|
Original Data
|
||||||
|
Counter({0: 7, 1: 1}) Data dim: (8, 52)
|
||||||
|
|
||||||
|
-------------------------------------------------------------
|
||||||
|
Successfully split data: UQ [no aa_index but active site included] training
|
||||||
|
actual values: training set
|
||||||
|
imputed values: blind test set
|
||||||
|
Train data size: (8, 52)
|
||||||
|
Test data size: (263, 52)
|
||||||
|
y_train numbers: Counter({0: 7, 1: 1})
|
||||||
|
y_train ratio: 7.0
|
||||||
|
|
||||||
|
y_test_numbers: Counter({0: 262, 1: 1})
|
||||||
|
y_test ratio: 262.0
|
||||||
|
-------------------------------------------------------------
|
||||||
|
Simple Random OverSampling
|
||||||
|
Counter({0: 7, 1: 7})
|
||||||
|
(14, 52)
|
||||||
|
Simple Random UnderSampling
|
||||||
|
Counter({0: 1, 1: 1})
|
||||||
|
(2, 52)
|
||||||
|
Simple Combined Over and UnderSampling
|
||||||
|
Counter({0: 7, 1: 7})
|
||||||
|
(14, 52)
|
||||||
|
Traceback (most recent call last):
|
||||||
|
File "/home/tanu/git/LSHTM_analysis/scripts/ml/./alr_config.py", line 26, in <module>
|
||||||
|
setvars(gene,drug)
|
||||||
|
File "/home/tanu/git/LSHTM_analysis/scripts/ml/ml_data.py", line 701, in setvars
|
||||||
|
X_smnc, y_smnc = sm_nc.fit_resample(X, y)
|
||||||
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/imblearn/base.py", line 83, in fit_resample
|
||||||
|
output = self._fit_resample(X, y)
|
||||||
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/imblearn/over_sampling/_smote/base.py", line 533, in _fit_resample
|
||||||
|
X_resampled, y_resampled = super()._fit_resample(X_encoded, y)
|
||||||
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/imblearn/over_sampling/_smote/base.py", line 324, in _fit_resample
|
||||||
|
nns = self.nn_k_.kneighbors(X_class, return_distance=False)[:, 1:]
|
||||||
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/neighbors/_base.py", line 749, in kneighbors
|
||||||
|
raise ValueError(
|
||||||
|
ValueError: Expected n_neighbors <= n_samples, but n_samples = 1, n_neighbors = 6
|
107
scripts/ml/log_alr_orig.txt
Normal file
107
scripts/ml/log_alr_orig.txt
Normal file
|
@ -0,0 +1,107 @@
|
||||||
|
/home/tanu/git/LSHTM_analysis/scripts/ml/ml_data_orig.py:550: SettingWithCopyWarning:
|
||||||
|
A value is trying to be set on a copy of a slice from a DataFrame
|
||||||
|
|
||||||
|
See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
|
||||||
|
mask_check.sort_values(by = ['ligand_distance'], ascending = True, inplace = True)
|
||||||
|
1.22.4
|
||||||
|
1.4.1
|
||||||
|
|
||||||
|
aaindex_df contains non-numerical data
|
||||||
|
|
||||||
|
Total no. of non-numerial columns: 2
|
||||||
|
|
||||||
|
Selecting numerical data only
|
||||||
|
|
||||||
|
PASS: successfully selected numerical columns only for aaindex_df
|
||||||
|
|
||||||
|
Now checking for NA in the remaining aaindex_cols
|
||||||
|
|
||||||
|
Counting aaindex_df cols with NA
|
||||||
|
ncols with NA: 4 columns
|
||||||
|
Dropping these...
|
||||||
|
Original ncols: 127
|
||||||
|
|
||||||
|
Revised df ncols: 123
|
||||||
|
|
||||||
|
Checking NA in revised df...
|
||||||
|
|
||||||
|
PASS: cols with NA successfully dropped from aaindex_df
|
||||||
|
Proceeding with combining aa_df with other features_df
|
||||||
|
|
||||||
|
PASS: ncols match
|
||||||
|
Expected ncols: 123
|
||||||
|
Got: 123
|
||||||
|
|
||||||
|
Total no. of columns in clean aa_df: 123
|
||||||
|
|
||||||
|
Proceeding to merge, expected nrows in merged_df: 271
|
||||||
|
|
||||||
|
PASS: my_features_df and aa_df successfully combined
|
||||||
|
nrows: 271
|
||||||
|
ncols: 269
|
||||||
|
count of NULL values before imputation
|
||||||
|
|
||||||
|
or_mychisq 256
|
||||||
|
log10_or_mychisq 256
|
||||||
|
dtype: int64
|
||||||
|
count of NULL values AFTER imputation
|
||||||
|
|
||||||
|
mutationinformation 0
|
||||||
|
or_rawI 0
|
||||||
|
logorI 0
|
||||||
|
dtype: int64
|
||||||
|
|
||||||
|
PASS: OR values imputed, data ready for ML
|
||||||
|
|
||||||
|
Total no. of features for aaindex: 123
|
||||||
|
|
||||||
|
No. of numerical features: 168
|
||||||
|
No. of categorical features: 7
|
||||||
|
|
||||||
|
index: 0
|
||||||
|
ind: 1
|
||||||
|
|
||||||
|
Mask count check: True
|
||||||
|
|
||||||
|
index: 1
|
||||||
|
ind: 2
|
||||||
|
|
||||||
|
Mask count check: True
|
||||||
|
Original Data
|
||||||
|
Counter({0: 7, 1: 1}) Data dim: (8, 175)
|
||||||
|
|
||||||
|
-------------------------------------------------------------
|
||||||
|
Successfully split data: ORIGINAL training
|
||||||
|
actual values: training set
|
||||||
|
imputed values: blind test set
|
||||||
|
Train data size: (8, 175)
|
||||||
|
Test data size: (263, 175)
|
||||||
|
y_train numbers: Counter({0: 7, 1: 1})
|
||||||
|
y_train ratio: 7.0
|
||||||
|
|
||||||
|
y_test_numbers: Counter({0: 262, 1: 1})
|
||||||
|
y_test ratio: 262.0
|
||||||
|
-------------------------------------------------------------
|
||||||
|
Simple Random OverSampling
|
||||||
|
Counter({0: 7, 1: 7})
|
||||||
|
(14, 175)
|
||||||
|
Simple Random UnderSampling
|
||||||
|
Counter({0: 1, 1: 1})
|
||||||
|
(2, 175)
|
||||||
|
Simple Combined Over and UnderSampling
|
||||||
|
Counter({0: 7, 1: 7})
|
||||||
|
(14, 175)
|
||||||
|
Traceback (most recent call last):
|
||||||
|
File "/home/tanu/git/LSHTM_analysis/scripts/ml/./alr_orig.py", line 19, in <module>
|
||||||
|
setvars(gene,drug)
|
||||||
|
File "/home/tanu/git/LSHTM_analysis/scripts/ml/ml_data_orig.py", line 701, in setvars
|
||||||
|
X_smnc, y_smnc = sm_nc.fit_resample(X, y)
|
||||||
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/imblearn/base.py", line 83, in fit_resample
|
||||||
|
output = self._fit_resample(X, y)
|
||||||
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/imblearn/over_sampling/_smote/base.py", line 533, in _fit_resample
|
||||||
|
X_resampled, y_resampled = super()._fit_resample(X_encoded, y)
|
||||||
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/imblearn/over_sampling/_smote/base.py", line 324, in _fit_resample
|
||||||
|
nns = self.nn_k_.kneighbors(X_class, return_distance=False)[:, 1:]
|
||||||
|
File "/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/neighbors/_base.py", line 749, in kneighbors
|
||||||
|
raise ValueError(
|
||||||
|
ValueError: Expected n_neighbors <= n_samples, but n_samples = 1, n_neighbors = 6
|
19598
scripts/ml/log_embb_7030.txt
Normal file
19598
scripts/ml/log_embb_7030.txt
Normal file
File diff suppressed because it is too large
Load diff
19713
scripts/ml/log_embb_cd_7030.txt
Normal file
19713
scripts/ml/log_embb_cd_7030.txt
Normal file
File diff suppressed because it is too large
Load diff
19759
scripts/ml/log_embb_cd_8020.txt
Normal file
19759
scripts/ml/log_embb_cd_8020.txt
Normal file
File diff suppressed because it is too large
Load diff
19822
scripts/ml/log_embb_cd_sl.txt
Normal file
19822
scripts/ml/log_embb_cd_sl.txt
Normal file
File diff suppressed because it is too large
Load diff
19445
scripts/ml/log_embb_config.txt
Normal file
19445
scripts/ml/log_embb_config.txt
Normal file
File diff suppressed because it is too large
Load diff
19714
scripts/ml/log_embb_orig.txt
Normal file
19714
scripts/ml/log_embb_orig.txt
Normal file
File diff suppressed because it is too large
Load diff
19498
scripts/ml/log_gid_cd_7030.txt
Normal file
19498
scripts/ml/log_gid_cd_7030.txt
Normal file
File diff suppressed because it is too large
Load diff
20887
scripts/ml/log_gid_cd_8020.txt
Normal file
20887
scripts/ml/log_gid_cd_8020.txt
Normal file
File diff suppressed because it is too large
Load diff
19624
scripts/ml/log_gid_cd_sl.txt
Normal file
19624
scripts/ml/log_gid_cd_sl.txt
Normal file
File diff suppressed because it is too large
Load diff
18905
scripts/ml/log_gid_config.txt
Normal file
18905
scripts/ml/log_gid_config.txt
Normal file
File diff suppressed because it is too large
Load diff
18829
scripts/ml/log_gid_orig.txt
Normal file
18829
scripts/ml/log_gid_orig.txt
Normal file
File diff suppressed because it is too large
Load diff
19453
scripts/ml/log_katg_7030.txt
Normal file
19453
scripts/ml/log_katg_7030.txt
Normal file
File diff suppressed because it is too large
Load diff
19916
scripts/ml/log_katg_cd_7030.txt
Normal file
19916
scripts/ml/log_katg_cd_7030.txt
Normal file
File diff suppressed because it is too large
Load diff
19946
scripts/ml/log_katg_cd_8020.txt
Normal file
19946
scripts/ml/log_katg_cd_8020.txt
Normal file
File diff suppressed because it is too large
Load diff
19928
scripts/ml/log_katg_cd_sl.txt
Normal file
19928
scripts/ml/log_katg_cd_sl.txt
Normal file
File diff suppressed because it is too large
Load diff
19492
scripts/ml/log_katg_config.txt
Normal file
19492
scripts/ml/log_katg_config.txt
Normal file
File diff suppressed because it is too large
Load diff
19742
scripts/ml/log_katg_orig.txt
Normal file
19742
scripts/ml/log_katg_orig.txt
Normal file
File diff suppressed because it is too large
Load diff
18975
scripts/ml/log_pnca_7030.txt
Normal file
18975
scripts/ml/log_pnca_7030.txt
Normal file
File diff suppressed because it is too large
Load diff
19358
scripts/ml/log_pnca_cd_7030.txt
Normal file
19358
scripts/ml/log_pnca_cd_7030.txt
Normal file
File diff suppressed because it is too large
Load diff
19376
scripts/ml/log_pnca_cd_8020.txt
Normal file
19376
scripts/ml/log_pnca_cd_8020.txt
Normal file
File diff suppressed because it is too large
Load diff
19550
scripts/ml/log_pnca_cd_sl.txt
Normal file
19550
scripts/ml/log_pnca_cd_sl.txt
Normal file
File diff suppressed because it is too large
Load diff
19198
scripts/ml/log_pnca_config.txt
Normal file
19198
scripts/ml/log_pnca_config.txt
Normal file
File diff suppressed because it is too large
Load diff
19325
scripts/ml/log_pnca_orig.txt
Normal file
19325
scripts/ml/log_pnca_orig.txt
Normal file
File diff suppressed because it is too large
Load diff
19357
scripts/ml/log_rpob_7030.txt
Normal file
19357
scripts/ml/log_rpob_7030.txt
Normal file
File diff suppressed because it is too large
Load diff
19846
scripts/ml/log_rpob_cd_7030.txt
Normal file
19846
scripts/ml/log_rpob_cd_7030.txt
Normal file
File diff suppressed because it is too large
Load diff
19838
scripts/ml/log_rpob_cd_8020.txt
Normal file
19838
scripts/ml/log_rpob_cd_8020.txt
Normal file
File diff suppressed because it is too large
Load diff
19863
scripts/ml/log_rpob_cd_sl.txt
Normal file
19863
scripts/ml/log_rpob_cd_sl.txt
Normal file
File diff suppressed because it is too large
Load diff
19530
scripts/ml/log_rpob_config.txt
Normal file
19530
scripts/ml/log_rpob_config.txt
Normal file
File diff suppressed because it is too large
Load diff
19765
scripts/ml/log_rpob_orig.txt
Normal file
19765
scripts/ml/log_rpob_orig.txt
Normal file
File diff suppressed because it is too large
Load diff
Loading…
Add table
Add a link
Reference in a new issue