added OR impute code in UQ_pnca_ML

This commit is contained in:
Tanushree Tunstall 2022-05-27 08:49:24 +01:00
parent 1da87ba177
commit 77fc14e19d

View file

@ -14,6 +14,7 @@ import pprint as pp
from copy import deepcopy from copy import deepcopy
from sklearn import linear_model from sklearn import linear_model
from sklearn import datasets from sklearn import datasets
from collections import Counter
from sklearn.linear_model import LogisticRegression, LinearRegression from sklearn.linear_model import LogisticRegression, LinearRegression
from sklearn.naive_bayes import BernoulliNB from sklearn.naive_bayes import BernoulliNB
@ -62,8 +63,8 @@ from sklearn.model_selection import StratifiedKFold
from sklearn.pipeline import Pipeline from sklearn.pipeline import Pipeline
from sklearn.pipeline import make_pipeline from sklearn.pipeline import make_pipeline
#from sklearn.feature_selection import RFE from sklearn.feature_selection import RFE
#from sklearn.feature_selection import RFECV from sklearn.feature_selection import RFECV
import itertools import itertools
#import seaborn as sns #import seaborn as sns
import matplotlib.pyplot as plt import matplotlib.pyplot as plt
@ -87,11 +88,12 @@ from imblearn.over_sampling import SMOTENC
from imblearn.under_sampling import EditedNearestNeighbours from imblearn.under_sampling import EditedNearestNeighbours
from imblearn.under_sampling import RepeatedEditedNearestNeighbours from imblearn.under_sampling import RepeatedEditedNearestNeighbours
from sklearn.model_selection import GridSearchCV from sklearn.model_selection import GridSearchCV
from sklearn.base import BaseEstimator from sklearn.base import BaseEstimator
import json import json
from sklearn.impute import KNNImputer as KNN
# My functions and globals
scoring_fn = ({'accuracy' : make_scorer(accuracy_score) scoring_fn = ({'accuracy' : make_scorer(accuracy_score)
, 'fscore' : make_scorer(f1_score) , 'fscore' : make_scorer(f1_score)
, 'mcc' : make_scorer(matthews_corrcoef) , 'mcc' : make_scorer(matthews_corrcoef)
@ -124,8 +126,8 @@ from loopity_loop import MultClassPipeSKFLoop
#from MultClassPipe3 import MultClassPipeSKFCV #from MultClassPipe3 import MultClassPipeSKFCV
from UQ_MultClassPipe4 import MultClassPipeSKFCV from UQ_MultClassPipe4 import MultClassPipeSKFCV
gene = 'pncA' #gene = 'pncA'
drug = 'pyrazinamide' #drug = 'pyrazinamide'
#gene = 'katG' #gene = 'katG'
#drug = 'isoniazid' #drug = 'isoniazid'
@ -167,7 +169,32 @@ cat_type = ['object', 'bool']
# -- CHECK script -- imports.py # -- CHECK script -- imports.py
#%%============================================================================ #%%============================================================================
#%% IMPUTE values for OR #%% IMPUTE values for OR [check script for exploration: UQ_or_imputer]
#or_cols = ['or_mychisq', 'log10_or_mychisq', 'or_fisher']
sel_cols = ['mutationinformation', 'or_mychisq', 'log10_or_mychisq']
or_cols = ['or_mychisq', 'log10_or_mychisq']
print("count of NULL values before imputation\n")
my_df[or_cols].isnull().sum()
my_dfI = pd.DataFrame(index = my_df['mutationinformation'] )
my_dfI = pd.DataFrame(KNN(n_neighbors= 5, weights="uniform").fit_transform(my_df[or_cols])
, index = my_df['mutationinformation']
, columns = or_cols )
my_dfI.columns = ['or_rawI', 'logorI']
my_dfI.columns
my_dfI = my_dfI.reset_index(drop = False) # prevents old index from being added as a column
my_dfI.head()
# merge with original based on index
my_df['index_bm'] = my_df.index
mydf_imputed = pd.merge(my_df
, my_dfI
, on = 'mutationinformation')
mydf_imputed = mydf_imputed.set_index(['index_bm'])
#%% Combine mmCSM_lig Data #%% Combine mmCSM_lig Data