finalised categorical and lineage col classifications
This commit is contained in:
parent
c37780350e
commit
084c280f16
2 changed files with 94 additions and 61 deletions
|
@ -6,64 +6,104 @@ Created on Wed May 25 02:01:19 2022
|
|||
@author: tanu
|
||||
"""
|
||||
# TODO
|
||||
categorical_cols = ['ss_class', 'wt_prop_water', 'mut_prop_water', 'wt_prop_polarity',
|
||||
'mut_prop_polarity', 'wt_calcprop', 'mut_calcprop']
|
||||
# categorical_cols = ['ss_class'
|
||||
# , 'wt_prop_water'
|
||||
# , 'mut_prop_water'
|
||||
# , 'wt_prop_polarity'
|
||||
# , 'mut_prop_polarity'
|
||||
# , 'wt_calcprop'
|
||||
# , 'mut_calcprop']
|
||||
|
||||
foo['water_prop_change'] = foo['wt_prop_water'] + str('_to_') + foo['mut_prop_water']
|
||||
foo['water_prop_change'].value_counts()
|
||||
my_df['water_change'] = my_df['wt_prop_water'] + str('_to_') + my_df['mut_prop_water']
|
||||
my_df['water_change'].value_counts()
|
||||
water_prop_changeD = {
|
||||
'hydrophobic_to_neutral' : ''
|
||||
'hydrophobic_to_neutral' : 'change'
|
||||
, 'hydrophobic_to_hydrophobic' : 'no_change'
|
||||
, 'neutral_to_neutral' : 'no_change'
|
||||
, 'neutral_to_hydrophobic' : ''
|
||||
, 'hydrophobic_to_hydrophilic' : ''
|
||||
, 'neutral_to_hydrophilic' : ''
|
||||
, 'hydrophilic_to_neutral' : ''
|
||||
, 'hydrophilic_to_hydrophobic' : ''
|
||||
, 'neutral_to_hydrophobic' : 'change'
|
||||
, 'hydrophobic_to_hydrophilic' : 'change'
|
||||
, 'neutral_to_hydrophilic' : 'change'
|
||||
, 'hydrophilic_to_neutral' : 'change'
|
||||
, 'hydrophilic_to_hydrophobic' : 'change'
|
||||
, 'hydrophilic_to_hydrophilic' : 'no_change'
|
||||
}
|
||||
|
||||
foo['polarity_prop_change'] = foo['wt_prop_polarity'] + str('_to_') + foo['mut_prop_polarity']
|
||||
foo['polarity_prop_change'].value_counts()
|
||||
my_df['water_change'] = my_df['water_change'].map(water_prop_changeD)
|
||||
my_df['water_change'].value_counts()
|
||||
|
||||
#%%
|
||||
my_df['polarity_change'] = my_df['wt_prop_polarity'] + str('_to_') + my_df['mut_prop_polarity']
|
||||
my_df['polarity_change'].value_counts()
|
||||
# add a no change category
|
||||
|
||||
polarity_prop_changeD = {
|
||||
'non-polar_to_non-polar' : 'no_change'
|
||||
, 'non-polar_to_neutral' : ''
|
||||
, 'neutral_to_non-polar' : ''
|
||||
, 'neutral_to_neutral' : ''
|
||||
, 'non-polar_to_basic' : ''
|
||||
, 'acidic_to_neutral' : ''
|
||||
, 'basic_to_neutral' : ''
|
||||
, 'non-polar_to_acidic' : ''
|
||||
, 'neutral_to_basic' : ''
|
||||
, 'acidic_to_non-polar' : ''
|
||||
, 'basic_to_non-polar' : ''
|
||||
, 'neutral_to_acidic' : ''
|
||||
, 'non-polar_to_neutral' : 'change'
|
||||
, 'neutral_to_non-polar' : 'change'
|
||||
, 'neutral_to_neutral' : 'no_change'
|
||||
, 'non-polar_to_basic' : 'change'
|
||||
, 'acidic_to_neutral' : 'change'
|
||||
, 'basic_to_neutral' : 'change'
|
||||
, 'non-polar_to_acidic' : 'change'
|
||||
, 'neutral_to_basic' : 'change'
|
||||
, 'acidic_to_non-polar' : 'change'
|
||||
, 'basic_to_non-polar' : 'change'
|
||||
, 'neutral_to_acidic' : 'change'
|
||||
, 'acidic_to_acidic' : 'no_change'
|
||||
, 'basic_to_acidic' : ''
|
||||
, 'basic_to_acidic' : 'change'
|
||||
, 'basic_to_basic' : 'no_change'
|
||||
, 'acidic_to_basic' : ''}
|
||||
, 'acidic_to_basic' : 'change'}
|
||||
|
||||
my_df['polarity_change'] = my_df['polarity_change'].map(polarity_prop_changeD)
|
||||
my_df['polarity_change'].value_counts()
|
||||
|
||||
foo['calc_prop_change'] = foo['wt_calcprop'] + str('_to_') + foo['mut_calcprop']
|
||||
foo['calc_prop_change'].value_counts()
|
||||
#%%
|
||||
my_df['electrostatics_change'] = my_df['wt_calcprop'] + str('_to_') + my_df['mut_calcprop']
|
||||
my_df['electrostatics_change'].value_counts()
|
||||
|
||||
calc_prop_changeD = {
|
||||
'non-polar_to_non-polar' : 'no_change'
|
||||
, 'non-polar_to_polar' : ''
|
||||
, 'polar_to_non-polar' : ''
|
||||
, 'non-polar_to_pos' : ''
|
||||
, 'neg_to_non-polar' : ''
|
||||
, 'non-polar_to_neg' : ''
|
||||
, 'pos_to_polar' : ''
|
||||
, 'pos_to_non-polar' : ''
|
||||
, 'non-polar_to_polar' : 'change'
|
||||
, 'polar_to_non-polar' : 'change'
|
||||
, 'non-polar_to_pos' : 'change'
|
||||
, 'neg_to_non-polar' : 'change'
|
||||
, 'non-polar_to_neg' : 'change'
|
||||
, 'pos_to_polar' : 'change'
|
||||
, 'pos_to_non-polar' : 'change'
|
||||
, 'polar_to_polar' : 'no_change'
|
||||
, 'neg_to_neg' : 'no_change'
|
||||
, 'polar_to_neg' : ''
|
||||
, 'pos_to_neg' : ''
|
||||
, 'pos_to_pos' : ''
|
||||
, 'polar_to_pos' : ''
|
||||
, 'neg_to_polar' : ''
|
||||
, 'neg_to_pos' : ''
|
||||
, 'polar_to_neg' : 'change'
|
||||
, 'pos_to_neg' : 'change'
|
||||
, 'pos_to_pos' : 'no_change'
|
||||
, 'polar_to_pos' : 'change'
|
||||
, 'neg_to_polar' : 'change'
|
||||
, 'neg_to_pos' : 'change'
|
||||
}
|
||||
|
||||
my_df['electrostatics_change'] = my_df['electrostatics_change'].map(calc_prop_changeD)
|
||||
my_df['electrostatics_change'].value_counts()
|
||||
|
||||
#%%
|
||||
#https://stackoverflow.com/questions/47181187/finding-string-over-multiple-columns-in-pandas
|
||||
detect_change = 'change'
|
||||
|
||||
# if detect_change in my_df['water_change'] | my_df['polarity_change'] | my_df['electrostatics_change']:
|
||||
# print('\nChange detected')
|
||||
|
||||
check = ['mutationinformation', 'wild_type', 'water_change', 'polarity_change', 'electrostatics_change']
|
||||
check_prop_cols = ['water_change', 'polarity_change', 'electrostatics_change']
|
||||
foo = my_df[check]
|
||||
|
||||
foo['new'] = (foo.values == detect_change).any(1).astype(int)
|
||||
#foo['new2'] = foo[check_prop_cols].applymap(lambda x: detect_change in x).any(1).astype(int) # lose match so alwasys 1
|
||||
foo['new3'] = (foo[check_prop_cols].values == detect_change).any(1).astype(int)
|
||||
|
||||
all(foo['new'] == foo['new3'])
|
||||
#%%lineage
|
||||
lineage_colnames = ['lineage', 'lineage_list_all', 'lineage_count_all', 'lineage_count_unique', 'lineage_list_unique', 'lineage_multimode']
|
||||
bar = my_df[lineage_colnames]
|
||||
|
||||
|
||||
tot_lineage_u = 8
|
||||
bar['lineage_proportion'] = bar['lineage_count_unique']/bar['lineage_count_all']
|
||||
bar['dist_lineage_proportion'] = bar['lineage_count_unique']/tot_lineage_u
|
29
pnca_config.py
Normal file → Executable file
29
pnca_config.py
Normal file → Executable file
|
@ -5,29 +5,22 @@ Created on Sat May 28 05:25:30 2022
|
|||
|
||||
@author: tanu
|
||||
"""
|
||||
import os, sys
|
||||
|
||||
def MyGlobalVars():
|
||||
global gene
|
||||
global drug
|
||||
global homedir
|
||||
import os
|
||||
|
||||
gene = 'pncA'
|
||||
drug = 'pyrazinamide'
|
||||
total_mtblineage_u = 8
|
||||
|
||||
|
||||
homedir = os.path.expanduser("~")
|
||||
os.chdir( homedir + '/git/ML_AI_training/')
|
||||
|
||||
MyGlobalVars()
|
||||
from UQ_ML_data import *
|
||||
setvars(gene,drug)
|
||||
from UQ_ML_data import *
|
||||
|
||||
os.chdir(homedir + "/git/ML_AI_training/")
|
||||
|
||||
# my function
|
||||
# from YC run_all_ML: run locally
|
||||
from UQ_MultModelsCl import MultModelsCl
|
||||
from UQ_pnca_ML.py import *
|
||||
# from YC run_all_ML
|
||||
|
||||
# YC_resD2 = run_all_ML(input_pd=X, target_label=y, blind_test_input_df=X_bts, blind_test_target=y_bts, preprocess = True, var_type = 'mixed')
|
||||
|
||||
# CVResultsDF = YC_resD2['CrossValResultsDF']
|
||||
# CVResultsDF.sort_values(by=['matthew'], ascending=False, inplace=True)
|
||||
# BTSResultsDF = YC_resD2['BlindTestResultsDF']
|
||||
# BTSResultsDF.sort_values(by=['matthew'], ascending=False, inplace=True)
|
||||
|
||||
print('TESTING cmd:', Counter(y))
|
Loading…
Add table
Add a link
Reference in a new issue