saving work

This commit is contained in:
Tanushree Tunstall 2022-07-29 00:12:43 +01:00
parent 1695e90b42
commit e55906d2c7
3 changed files with 11 additions and 8 deletions

View file

@ -269,7 +269,9 @@ def split_tts(ml_input_data
#k_sm = 5 # default
k_sm = k_smote
sm_nc = SMOTENC(categorical_features=categorical_colind, k_neighbors = k_sm, **rs, **njobs)
sm_nc = SMOTENC(categorical_features=categorical_colind
, k_neighbors = k_sm
, **rs, **njobs)
X_smnc, y_smnc = sm_nc.fit_resample(X, y)
print('\nSMOTE_NC OverSampling\n', Counter(y_smnc))
print(X_smnc.shape)

View file

@ -54,6 +54,7 @@ expected_ncols
if len(common_cols) == expected_ncols:
print('\nProceeding to combine based on common cols (n):', len(common_cols))
combined_df = pd.concat([df[common_cols] for df in dfs_combine], ignore_index = False)
print('\nSuccessfully combined dfs:'
, '\nNo. of dfs combined:', len(dfs_combine)
, '\nDim of combined df:', combined_df.shape)
@ -76,7 +77,6 @@ cm_input_df5 = combined_df[~combined_df['gene_name'].isin(omit_gene_alr)]
combined_df['dst'].isna().sum()
combined_df['dst'].value_counts().sum()
combined_df_actual = combined_df[~combined_df['dst'].isna()]
##############################################################################