added FS to MultClfs.py and modified data for different splits for consistency
This commit is contained in:
parent
edb7aebd6a
commit
e2bc384155
12 changed files with 1585 additions and 994 deletions
|
@ -37,7 +37,7 @@ def setvars(gene,drug):
|
|||
import argparse
|
||||
import re
|
||||
#%% GLOBALS
|
||||
tts_split = "70/30"
|
||||
tts_split = "70_30"
|
||||
|
||||
rs = {'random_state': 42}
|
||||
njobs = {'n_jobs': 10}
|
||||
|
@ -727,7 +727,7 @@ def setvars(gene,drug):
|
|||
#------------------------------
|
||||
oversample = RandomOverSampler(sampling_strategy='minority')
|
||||
X_ros, y_ros = oversample.fit_resample(X, y)
|
||||
print('Simple Random OverSampling\n', Counter(y_ros))
|
||||
print('\nSimple Random OverSampling\n', Counter(y_ros))
|
||||
print(X_ros.shape)
|
||||
|
||||
#------------------------------
|
||||
|
@ -736,7 +736,7 @@ def setvars(gene,drug):
|
|||
#------------------------------
|
||||
undersample = RandomUnderSampler(sampling_strategy='majority')
|
||||
X_rus, y_rus = undersample.fit_resample(X, y)
|
||||
print('Simple Random UnderSampling\n', Counter(y_rus))
|
||||
print('\nSimple Random UnderSampling\n', Counter(y_rus))
|
||||
print(X_rus.shape)
|
||||
|
||||
#------------------------------
|
||||
|
@ -747,7 +747,7 @@ def setvars(gene,drug):
|
|||
X_ros, y_ros = oversample.fit_resample(X, y)
|
||||
undersample = RandomUnderSampler(sampling_strategy='majority')
|
||||
X_rouC, y_rouC = undersample.fit_resample(X_ros, y_ros)
|
||||
print('Simple Combined Over and UnderSampling\n', Counter(y_rouC))
|
||||
print('\nSimple Combined Over and UnderSampling\n', Counter(y_rouC))
|
||||
print(X_rouC.shape)
|
||||
|
||||
#------------------------------
|
||||
|
@ -767,7 +767,7 @@ def setvars(gene,drug):
|
|||
categorical_colind = X.columns.get_indexer(list(categorical_ix))
|
||||
categorical_colind
|
||||
|
||||
k_sm = 5 # 5 is deafult
|
||||
k_sm = 5 # 5 is default
|
||||
sm_nc = SMOTENC(categorical_features=categorical_colind, k_neighbors = k_sm, **rs, **njobs)
|
||||
X_smnc, y_smnc = sm_nc.fit_resample(X, y)
|
||||
print('\nSMOTE_NC OverSampling\n', Counter(y_smnc))
|
||||
|
@ -797,5 +797,10 @@ def setvars(gene,drug):
|
|||
# print(X_enn.shape)
|
||||
# print('\nSMOTE Over+Under Sampling combined\n', Counter(y_enn))
|
||||
|
||||
###############################################################################
|
||||
###########################################################################
|
||||
# TODO: Find over and undersampling JUST for categorical data
|
||||
###########################################################################
|
||||
|
||||
print('\n#################################################################'
|
||||
, '\nDim of X for gene:', gene.lower(), '\n', X.shape
|
||||
, '\n###############################################################')
|
Loading…
Add table
Add a link
Reference in a new issue