added OR impute code in UQ_pnca_ML
This commit is contained in:
parent
1da87ba177
commit
77fc14e19d
1 changed files with 33 additions and 6 deletions
|
@ -14,6 +14,7 @@ import pprint as pp
|
||||||
from copy import deepcopy
|
from copy import deepcopy
|
||||||
from sklearn import linear_model
|
from sklearn import linear_model
|
||||||
from sklearn import datasets
|
from sklearn import datasets
|
||||||
|
from collections import Counter
|
||||||
|
|
||||||
from sklearn.linear_model import LogisticRegression, LinearRegression
|
from sklearn.linear_model import LogisticRegression, LinearRegression
|
||||||
from sklearn.naive_bayes import BernoulliNB
|
from sklearn.naive_bayes import BernoulliNB
|
||||||
|
@ -62,8 +63,8 @@ from sklearn.model_selection import StratifiedKFold
|
||||||
from sklearn.pipeline import Pipeline
|
from sklearn.pipeline import Pipeline
|
||||||
from sklearn.pipeline import make_pipeline
|
from sklearn.pipeline import make_pipeline
|
||||||
|
|
||||||
#from sklearn.feature_selection import RFE
|
from sklearn.feature_selection import RFE
|
||||||
#from sklearn.feature_selection import RFECV
|
from sklearn.feature_selection import RFECV
|
||||||
import itertools
|
import itertools
|
||||||
#import seaborn as sns
|
#import seaborn as sns
|
||||||
import matplotlib.pyplot as plt
|
import matplotlib.pyplot as plt
|
||||||
|
@ -87,11 +88,12 @@ from imblearn.over_sampling import SMOTENC
|
||||||
from imblearn.under_sampling import EditedNearestNeighbours
|
from imblearn.under_sampling import EditedNearestNeighbours
|
||||||
from imblearn.under_sampling import RepeatedEditedNearestNeighbours
|
from imblearn.under_sampling import RepeatedEditedNearestNeighbours
|
||||||
|
|
||||||
|
|
||||||
from sklearn.model_selection import GridSearchCV
|
from sklearn.model_selection import GridSearchCV
|
||||||
from sklearn.base import BaseEstimator
|
from sklearn.base import BaseEstimator
|
||||||
import json
|
import json
|
||||||
|
from sklearn.impute import KNNImputer as KNN
|
||||||
|
|
||||||
|
# My functions and globals
|
||||||
scoring_fn = ({'accuracy' : make_scorer(accuracy_score)
|
scoring_fn = ({'accuracy' : make_scorer(accuracy_score)
|
||||||
, 'fscore' : make_scorer(f1_score)
|
, 'fscore' : make_scorer(f1_score)
|
||||||
, 'mcc' : make_scorer(matthews_corrcoef)
|
, 'mcc' : make_scorer(matthews_corrcoef)
|
||||||
|
@ -124,8 +126,8 @@ from loopity_loop import MultClassPipeSKFLoop
|
||||||
#from MultClassPipe3 import MultClassPipeSKFCV
|
#from MultClassPipe3 import MultClassPipeSKFCV
|
||||||
from UQ_MultClassPipe4 import MultClassPipeSKFCV
|
from UQ_MultClassPipe4 import MultClassPipeSKFCV
|
||||||
|
|
||||||
gene = 'pncA'
|
#gene = 'pncA'
|
||||||
drug = 'pyrazinamide'
|
#drug = 'pyrazinamide'
|
||||||
|
|
||||||
#gene = 'katG'
|
#gene = 'katG'
|
||||||
#drug = 'isoniazid'
|
#drug = 'isoniazid'
|
||||||
|
@ -167,7 +169,32 @@ cat_type = ['object', 'bool']
|
||||||
|
|
||||||
# -- CHECK script -- imports.py
|
# -- CHECK script -- imports.py
|
||||||
#%%============================================================================
|
#%%============================================================================
|
||||||
#%% IMPUTE values for OR
|
#%% IMPUTE values for OR [check script for exploration: UQ_or_imputer]
|
||||||
|
#or_cols = ['or_mychisq', 'log10_or_mychisq', 'or_fisher']
|
||||||
|
sel_cols = ['mutationinformation', 'or_mychisq', 'log10_or_mychisq']
|
||||||
|
or_cols = ['or_mychisq', 'log10_or_mychisq']
|
||||||
|
|
||||||
|
print("count of NULL values before imputation\n")
|
||||||
|
my_df[or_cols].isnull().sum()
|
||||||
|
|
||||||
|
my_dfI = pd.DataFrame(index = my_df['mutationinformation'] )
|
||||||
|
|
||||||
|
|
||||||
|
my_dfI = pd.DataFrame(KNN(n_neighbors= 5, weights="uniform").fit_transform(my_df[or_cols])
|
||||||
|
, index = my_df['mutationinformation']
|
||||||
|
, columns = or_cols )
|
||||||
|
my_dfI.columns = ['or_rawI', 'logorI']
|
||||||
|
my_dfI.columns
|
||||||
|
my_dfI = my_dfI.reset_index(drop = False) # prevents old index from being added as a column
|
||||||
|
my_dfI.head()
|
||||||
|
|
||||||
|
# merge with original based on index
|
||||||
|
my_df['index_bm'] = my_df.index
|
||||||
|
mydf_imputed = pd.merge(my_df
|
||||||
|
, my_dfI
|
||||||
|
, on = 'mutationinformation')
|
||||||
|
mydf_imputed = mydf_imputed.set_index(['index_bm'])
|
||||||
|
|
||||||
|
|
||||||
#%% Combine mmCSM_lig Data
|
#%% Combine mmCSM_lig Data
|
||||||
|
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue