added OR impute code in UQ_pnca_ML
This commit is contained in:
parent
1da87ba177
commit
77fc14e19d
1 changed files with 33 additions and 6 deletions
|
@ -14,6 +14,7 @@ import pprint as pp
|
|||
from copy import deepcopy
|
||||
from sklearn import linear_model
|
||||
from sklearn import datasets
|
||||
from collections import Counter
|
||||
|
||||
from sklearn.linear_model import LogisticRegression, LinearRegression
|
||||
from sklearn.naive_bayes import BernoulliNB
|
||||
|
@ -62,8 +63,8 @@ from sklearn.model_selection import StratifiedKFold
|
|||
from sklearn.pipeline import Pipeline
|
||||
from sklearn.pipeline import make_pipeline
|
||||
|
||||
#from sklearn.feature_selection import RFE
|
||||
#from sklearn.feature_selection import RFECV
|
||||
from sklearn.feature_selection import RFE
|
||||
from sklearn.feature_selection import RFECV
|
||||
import itertools
|
||||
#import seaborn as sns
|
||||
import matplotlib.pyplot as plt
|
||||
|
@ -87,11 +88,12 @@ from imblearn.over_sampling import SMOTENC
|
|||
from imblearn.under_sampling import EditedNearestNeighbours
|
||||
from imblearn.under_sampling import RepeatedEditedNearestNeighbours
|
||||
|
||||
|
||||
from sklearn.model_selection import GridSearchCV
|
||||
from sklearn.base import BaseEstimator
|
||||
import json
|
||||
from sklearn.impute import KNNImputer as KNN
|
||||
|
||||
# My functions and globals
|
||||
scoring_fn = ({'accuracy' : make_scorer(accuracy_score)
|
||||
, 'fscore' : make_scorer(f1_score)
|
||||
, 'mcc' : make_scorer(matthews_corrcoef)
|
||||
|
@ -124,8 +126,8 @@ from loopity_loop import MultClassPipeSKFLoop
|
|||
#from MultClassPipe3 import MultClassPipeSKFCV
|
||||
from UQ_MultClassPipe4 import MultClassPipeSKFCV
|
||||
|
||||
gene = 'pncA'
|
||||
drug = 'pyrazinamide'
|
||||
#gene = 'pncA'
|
||||
#drug = 'pyrazinamide'
|
||||
|
||||
#gene = 'katG'
|
||||
#drug = 'isoniazid'
|
||||
|
@ -167,7 +169,32 @@ cat_type = ['object', 'bool']
|
|||
|
||||
# -- CHECK script -- imports.py
|
||||
#%%============================================================================
|
||||
#%% IMPUTE values for OR
|
||||
#%% IMPUTE values for OR [check script for exploration: UQ_or_imputer]
|
||||
#or_cols = ['or_mychisq', 'log10_or_mychisq', 'or_fisher']
|
||||
sel_cols = ['mutationinformation', 'or_mychisq', 'log10_or_mychisq']
|
||||
or_cols = ['or_mychisq', 'log10_or_mychisq']
|
||||
|
||||
print("count of NULL values before imputation\n")
|
||||
my_df[or_cols].isnull().sum()
|
||||
|
||||
my_dfI = pd.DataFrame(index = my_df['mutationinformation'] )
|
||||
|
||||
|
||||
my_dfI = pd.DataFrame(KNN(n_neighbors= 5, weights="uniform").fit_transform(my_df[or_cols])
|
||||
, index = my_df['mutationinformation']
|
||||
, columns = or_cols )
|
||||
my_dfI.columns = ['or_rawI', 'logorI']
|
||||
my_dfI.columns
|
||||
my_dfI = my_dfI.reset_index(drop = False) # prevents old index from being added as a column
|
||||
my_dfI.head()
|
||||
|
||||
# merge with original based on index
|
||||
my_df['index_bm'] = my_df.index
|
||||
mydf_imputed = pd.merge(my_df
|
||||
, my_dfI
|
||||
, on = 'mutationinformation')
|
||||
mydf_imputed = mydf_imputed.set_index(['index_bm'])
|
||||
|
||||
|
||||
#%% Combine mmCSM_lig Data
|
||||
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue