working on dissected model, testing diff feature groups
This commit is contained in:
parent
135efcee41
commit
e68a153883
4 changed files with 270 additions and 161 deletions
|
@ -47,60 +47,78 @@ outdir_ml = outdir + 'ml/uq_v1/dissected'
|
|||
print('\nOutput directory:', outdir_ml)
|
||||
|
||||
#%%###########################################################################
|
||||
print('\nSanity checks:'
|
||||
, '\nTotal input features:', len(X.columns)
|
||||
, '\n'
|
||||
, '\nTraining data size:', X.shape
|
||||
, '\nTest data size:', X_bts.shape
|
||||
, '\n'
|
||||
, '\nTarget feature numbers (training data):', Counter(y)
|
||||
, '\nTarget features ratio (training data:', yc1_ratio
|
||||
, '\n'
|
||||
, '\nTarget feature numbers (test data):', Counter(y_bts)
|
||||
, '\nTarget features ratio (test data):', yc2_ratio
|
||||
|
||||
, '\n\n#####################################################################\n')
|
||||
|
||||
print('\n================================================================\n')
|
||||
|
||||
print('Strucutral features (n):'
|
||||
, len(X_ssFN)
|
||||
, '\nThese are:'
|
||||
, '\nCommon stablity features:', X_stabilityN
|
||||
, '\nFoldX columns:', X_foldX_cols
|
||||
, '\nOther struc columns:', X_str
|
||||
, '\n================================================================\n')
|
||||
, '\n\nTotal no. of evolutionary features:' , len(X_evolFN)
|
||||
|
||||
, '\n\nTotal no. of stability features:' , len(X_stability_FN)
|
||||
, '\n--------Common stabilty cols:' , len(X_common_stability_Fnum)
|
||||
, '\n--------Foldx cols:' , len(X_foldX_Fnum)
|
||||
|
||||
, '\n\nTotal no. of affinity features:' , len(X_affinityFN)
|
||||
, '\n--------Common affinity cols:' , len(common_affinity_Fnum)
|
||||
, '\n--------Gene specific affinity cols:' , len(gene_affinity_colnames)
|
||||
|
||||
, '\n\nTotal no. of residue level features:', len(X_resprop_FN)
|
||||
, '\n--------AA index cols:' , len(X_aaindex_Fnum)
|
||||
, '\n--------Residue Prop cols:' , len(X_str_Fnum)
|
||||
, '\n--------AA change Prop cols:' , len(X_aap_Fcat)
|
||||
|
||||
, '\n\nTotal no. of genomic features:' , len(X_genomicFN)
|
||||
, '\n--------MAF+OR cols:' , len(X_gn_mafor_Fnum)
|
||||
, '\n--------Lineage cols:' , len(X_gn_linegae_Fnum)
|
||||
, '\n--------Other cols:' , len(X_gn_Fcat)
|
||||
|
||||
# print('AAindex features (n):'
|
||||
# , len(X_aaindexFN)
|
||||
# , '\nThese are:\n'
|
||||
# , X_aaindexFN
|
||||
# , '\n================================================================\n')
|
||||
X_structural_FN = X_stability_FN + X_affinityFN + X_resprop_FN
|
||||
X_aaindex_Fnum + X_str_Fnum + X_aap_Fcat
|
||||
all_featuresN = X_evolFN + X_structural_FN + X_genomicFN
|
||||
|
||||
###############################################################################
|
||||
|
||||
print('Evolutionary features (n):'
|
||||
, len(X_evolFN)
|
||||
, '\nThese are:\n'
|
||||
, X_evolFN
|
||||
, '\n================================================================\n')
|
||||
print('\n================================================================'
|
||||
|
||||
, '\nTotal Evolutionary features (n):' , len(X_evolFN)
|
||||
, '\n--------------Evol. feature colnames:', X_evolFN
|
||||
|
||||
, '\n================================================================'
|
||||
|
||||
, '\n\nTotal structural features (n):', len(X_structural_FN)
|
||||
|
||||
, '\n--------Stability ncols:' , len(X_stability_FN)
|
||||
, '\n--------------Common stability colnames:' , X_common_stability_Fnum
|
||||
, '\n--------------Foldx colnames:' , X_foldX_Fnum
|
||||
|
||||
, '\n--------Affinity ncols:' , len(X_affinityFN)
|
||||
, '\n--------------Common affinity colnames:' , common_affinity_Fnum
|
||||
, '\n--------------Gene specific affinity colnames:', gene_affinity_colnames
|
||||
|
||||
print('Genomic features (n):'
|
||||
, len(X_genomicFN)
|
||||
, '\nThese are:\n'
|
||||
, X_genomic_mafor, '\n'
|
||||
, X_genomic_linegae
|
||||
, '\n================================================================\n')
|
||||
, '\n--------Residue prop ncols:' , len(X_resprop_FN)
|
||||
, '\n--------------Residue Prop cols:' , X_str_Fnum
|
||||
, '\n--------------AA change Prop cols:' , X_aap_Fcat
|
||||
, '\n--------------AA index cols:' , X_aaindex_Fnum
|
||||
|
||||
, '\n================================================================'
|
||||
|
||||
, '\n\nTotal Genomic features (n):' , len(X_genomicFN)
|
||||
, '\n--------MAF+OR cols:' , len(X_gn_mafor_Fnum)
|
||||
, '\n--------------MAF+OR colnames:' , X_gn_mafor_Fnum
|
||||
|
||||
print('Categorical features (n):'
|
||||
, len(categorical_FN)
|
||||
, '\nThese are:\n'
|
||||
, categorical_FN
|
||||
, '\n================================================================\n')
|
||||
, '\n--------Lineage cols:' , len(X_gn_linegae_Fnum)
|
||||
, '\n--------------Lineage cols:' , X_gn_linegae_Fnum
|
||||
|
||||
#if ( len(X.columns) == len(X_ssFN) + len(X_aaindexFN) + len(X_evolFN) + len(X_genomicFN) + len(categorical_FN) ):
|
||||
if ( len(X.columns) == len(X_ssFN) + len(X_evolFN) + len(X_genomicFN) + len(categorical_FN) ):
|
||||
, '\n--------Other cols:' , len(X_gn_Fcat)
|
||||
, '\n--------------Other cols:' , X_gn_Fcat
|
||||
|
||||
, '\n================================================================')
|
||||
|
||||
# Sanity check
|
||||
if ( len(X.columns) == len(X_evolFN) + len(X_structural_FN) + len(X_genomicFN)):
|
||||
print('\nPass: No. of features match')
|
||||
else:
|
||||
sys.exit('\nFail: Count of feature mismatch')
|
||||
print('\nFail: Count of feature mismatch'
|
||||
, '\nExpected:', len(X_evolFN) + len(X_structural_FN) + len(X_genomicFN)
|
||||
, '\nGot:', len(X.columns))
|
||||
sys.exit()
|
||||
|
||||
print('\n#####################################################################\n')
|
||||
|
||||
|
@ -108,7 +126,7 @@ print('\n#####################################################################\n
|
|||
# #==================
|
||||
# # Baseline models
|
||||
# #==================
|
||||
# mm_skf_scoresD = MultModelsCl(input_df = X
|
||||
# mm_skf_scoresD = MultModelsCl_dissected(input_df = X
|
||||
# , target = y
|
||||
# , var_type = 'mixed'
|
||||
# , skf_cv = skf_cv
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue