made var names more meaniningful
This commit is contained in:
parent
e2b997badf
commit
6160d943f5
2 changed files with 85 additions and 8 deletions
46
my_data6.py
46
my_data6.py
|
@ -28,8 +28,8 @@ os.chdir(homedir + "/git/ML_AI_training/")
|
|||
# my function
|
||||
from MultClassPipe import MultClassPipeline
|
||||
|
||||
#gene = 'pncA'
|
||||
#drug = 'pyrazinamide'
|
||||
gene = 'pncA'
|
||||
drug = 'pyrazinamide'
|
||||
|
||||
#==============
|
||||
# directories
|
||||
|
@ -48,10 +48,10 @@ my_df = pd.read_csv(infile_ml1)
|
|||
my_df.dtypes
|
||||
my_df_cols = my_df.columns
|
||||
|
||||
geneL_basic = ['pnca']
|
||||
geneL_na = ['gid']
|
||||
geneL_basic = ['pnca']
|
||||
geneL_na = ['gid']
|
||||
geneL_na_ppi2 = ['rpob']
|
||||
geneL_ppi2 = ['alr', 'embb', 'katg']
|
||||
geneL_ppi2 = ['alr', 'embb', 'katg']
|
||||
#%% get cols
|
||||
mycols = my_df.columns
|
||||
|
||||
|
@ -82,6 +82,17 @@ my_df[drtype_labels] = my_df['drtype'].map({'Sensitive' : 0
|
|||
# target3 = my_df['drtype']
|
||||
target3 = my_df[drtype_labels]
|
||||
|
||||
# target4
|
||||
drtype_labels2 = 'drtype_labels2'
|
||||
my_df[drtype_labels2] = my_df['drtype'].map({'Sensitive' : 0
|
||||
, 'Other' : 0
|
||||
, 'Pre-MDR' : 1
|
||||
, 'MDR' : 1
|
||||
, 'Pre-XDR' : 2
|
||||
, 'XDR' : 2})
|
||||
|
||||
target4 = my_df[drtype_labels2]
|
||||
|
||||
# sanity checks
|
||||
target1.value_counts()
|
||||
my_df['mutation_info_labels'].value_counts()
|
||||
|
@ -91,6 +102,8 @@ my_df[drug_labels].value_counts()
|
|||
|
||||
target3.value_counts()
|
||||
my_df['drtype'].value_counts()
|
||||
target4.value_counts()
|
||||
my_df['drtype'].value_counts()
|
||||
|
||||
#%%
|
||||
# GET X
|
||||
|
@ -147,9 +160,30 @@ X_vars6 = my_df[x_stability_cols + X_evolF]
|
|||
X_vars8 = my_df[X_strF + X_evolF]
|
||||
#X_vars9 = my_df[X_strF + X_genomicF]
|
||||
#X_vars10 = my_df[X_evolF + X_genomicF]
|
||||
X_vars11 = my_df[x_stability_cols + X_strF + X_evolF ]
|
||||
X_vars11 = my_df[x_stability_cols + X_strF + X_evolF]
|
||||
#X_vars12 = my_df[x_stability_cols + X_strF + X_evolF + X_genomicF]
|
||||
|
||||
numerical_features_names = x_stability_cols + X_strF + X_evolF
|
||||
|
||||
# separate ones for foldx?
|
||||
categorical_features_names = ['ss_class'
|
||||
, 'wt_prop_water'
|
||||
# , 'lineage_labels' # misleading if using merged_df3
|
||||
, 'mut_prop_water'
|
||||
, 'wt_prop_polarity'
|
||||
, 'mut_prop_polarity'
|
||||
, 'wt_calcprop'
|
||||
, 'mut_calcprop'
|
||||
, 'active_aa_pos']
|
||||
|
||||
numerical_features_df = my_df[numerical_features_names]
|
||||
numerical_features_df.shape
|
||||
|
||||
categorical_features_df = my_df[categorical_features_names]
|
||||
categorical_features_df.shape
|
||||
|
||||
all_features_df = my_df[numerical_features_names + categorical_features_names]
|
||||
all_features_df.shape
|
||||
#%%
|
||||
X_vars1.shape[1]
|
||||
X_vars5.shape[1]
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue