saving work
This commit is contained in:
parent
1695e90b42
commit
e55906d2c7
3 changed files with 11 additions and 8 deletions
|
@ -139,9 +139,9 @@ def CMLogoSkf(cm_input_df
|
||||||
# else:
|
# else:
|
||||||
# file_suffix = file_suffix
|
# file_suffix = file_suffix
|
||||||
|
|
||||||
outFile = output_dir + str(n_tr_genes+1) + "genes_" + tts_split_type + '_' + file_suffix + ".csv"
|
#outFile = output_dir + str(n_tr_genes+1) + "genes_" + tts_split_type + '_' + file_suffix + ".csv"
|
||||||
|
|
||||||
print(outFile)
|
#print(outFile)
|
||||||
|
|
||||||
#-------
|
#-------
|
||||||
# training
|
# training
|
||||||
|
@ -175,6 +175,7 @@ def CMLogoSkf(cm_input_df
|
||||||
, '\nTEST Target dim:' , cm_bts_y.shape)
|
, '\nTEST Target dim:' , cm_bts_y.shape)
|
||||||
|
|
||||||
print("Running Multiple models on LOGO with SKF")
|
print("Running Multiple models on LOGO with SKF")
|
||||||
|
|
||||||
#%%:Running Multiple models on LOGO with SKF
|
#%%:Running Multiple models on LOGO with SKF
|
||||||
# cD3_v2 = MultModelsCl_logo_skf(input_df = cm_X # two func were identical excpet for name
|
# cD3_v2 = MultModelsCl_logo_skf(input_df = cm_X # two func were identical excpet for name
|
||||||
cD3_v2 = MultModelsCl(input_df = cm_X
|
cD3_v2 = MultModelsCl(input_df = cm_X
|
||||||
|
@ -203,11 +204,11 @@ def CMLogoSkf(cm_input_df
|
||||||
#===============
|
#===============
|
||||||
# Complete Data
|
# Complete Data
|
||||||
#===============
|
#===============
|
||||||
CMLogoSkf(cm_input_df = combined_df,file_suffix = "complete")
|
#CMLogoSkf(cm_input_df = combined_df,file_suffix = "complete")
|
||||||
CMLogoSkf(cm_input_df = combined_df, std_gene_omit=['alr'], file_suffix = "complete")
|
#CMLogoSkf(cm_input_df = combined_df, std_gene_omit=['alr'], file_suffix = "complete")
|
||||||
|
|
||||||
#===============
|
#===============
|
||||||
# Actual Data
|
# Actual Data
|
||||||
#===============
|
#===============
|
||||||
CMLogoSkf(cm_input_df = combined_df_actual, file_suffix = "actual")
|
#CMLogoSkf(cm_input_df = combined_df_actual, file_suffix = "actual")
|
||||||
CMLogoSkf(cm_input_df = combined_df_actual, std_gene_omit=['alr'], file_suffix = "actual")
|
#CMLogoSkf(cm_input_df = combined_df_actual, std_gene_omit=['alr'], file_suffix = "actual")
|
||||||
|
|
|
@ -269,7 +269,9 @@ def split_tts(ml_input_data
|
||||||
|
|
||||||
#k_sm = 5 # default
|
#k_sm = 5 # default
|
||||||
k_sm = k_smote
|
k_sm = k_smote
|
||||||
sm_nc = SMOTENC(categorical_features=categorical_colind, k_neighbors = k_sm, **rs, **njobs)
|
sm_nc = SMOTENC(categorical_features=categorical_colind
|
||||||
|
, k_neighbors = k_sm
|
||||||
|
, **rs, **njobs)
|
||||||
X_smnc, y_smnc = sm_nc.fit_resample(X, y)
|
X_smnc, y_smnc = sm_nc.fit_resample(X, y)
|
||||||
print('\nSMOTE_NC OverSampling\n', Counter(y_smnc))
|
print('\nSMOTE_NC OverSampling\n', Counter(y_smnc))
|
||||||
print(X_smnc.shape)
|
print(X_smnc.shape)
|
||||||
|
|
|
@ -54,6 +54,7 @@ expected_ncols
|
||||||
if len(common_cols) == expected_ncols:
|
if len(common_cols) == expected_ncols:
|
||||||
print('\nProceeding to combine based on common cols (n):', len(common_cols))
|
print('\nProceeding to combine based on common cols (n):', len(common_cols))
|
||||||
combined_df = pd.concat([df[common_cols] for df in dfs_combine], ignore_index = False)
|
combined_df = pd.concat([df[common_cols] for df in dfs_combine], ignore_index = False)
|
||||||
|
|
||||||
print('\nSuccessfully combined dfs:'
|
print('\nSuccessfully combined dfs:'
|
||||||
, '\nNo. of dfs combined:', len(dfs_combine)
|
, '\nNo. of dfs combined:', len(dfs_combine)
|
||||||
, '\nDim of combined df:', combined_df.shape)
|
, '\nDim of combined df:', combined_df.shape)
|
||||||
|
@ -76,7 +77,6 @@ cm_input_df5 = combined_df[~combined_df['gene_name'].isin(omit_gene_alr)]
|
||||||
combined_df['dst'].isna().sum()
|
combined_df['dst'].isna().sum()
|
||||||
combined_df['dst'].value_counts().sum()
|
combined_df['dst'].value_counts().sum()
|
||||||
|
|
||||||
|
|
||||||
combined_df_actual = combined_df[~combined_df['dst'].isna()]
|
combined_df_actual = combined_df[~combined_df['dst'].isna()]
|
||||||
|
|
||||||
##############################################################################
|
##############################################################################
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue