added run_7030.py that runs as cmd for all gene targets and sampling methods and outputs a single csv
This commit is contained in:
parent
5b0ccdfec4
commit
bc12dbd7c2
5 changed files with 749 additions and 229 deletions
|
@ -30,9 +30,9 @@ os.chdir( homedir + '/git/LSHTM_analysis/scripts/ml/')
|
|||
#==================
|
||||
# Import data
|
||||
#==================
|
||||
from ml_data_dissected import *
|
||||
from ml_data_fg import *
|
||||
setvars(gene,drug)
|
||||
from ml_data_dissected import *
|
||||
from ml_data_fg import *
|
||||
|
||||
# from YC run_all_ML: run locally
|
||||
#from UQ_yc_RunAllClfs import run_all_ML
|
||||
|
@ -60,7 +60,7 @@ outFile = outdir_ml + gene.lower() + '_baseline_FG.csv'
|
|||
#==================
|
||||
# other vars
|
||||
#==================
|
||||
tts_split_name = 'original'
|
||||
tts_split = 'original'
|
||||
resampling = 'none'
|
||||
|
||||
###############################################################################
|
||||
|
@ -177,7 +177,7 @@ else:
|
|||
|
||||
baseline_EV['feature_group'] = feature_gp_nameEV
|
||||
baseline_EV['resampling'] = resampling
|
||||
baseline_EV['tts_split'] = tts_split_name
|
||||
baseline_EV['tts_split'] = tts_split
|
||||
baseline_EV['n_features'] = n_featuresEV
|
||||
###############################################################################
|
||||
#================
|
||||
|
@ -221,7 +221,7 @@ else:
|
|||
|
||||
baseline_GN['feature_group'] = feature_gp_nameGN
|
||||
baseline_GN['resampling'] = resampling
|
||||
baseline_GN['tts_split'] = tts_split_name
|
||||
baseline_GN['tts_split'] = tts_split
|
||||
baseline_GN['n_features'] = n_featuresGN
|
||||
###############################################################################
|
||||
#all_featuresN = X_evolFN + X_structural_FN + X_genomicFN
|
||||
|
@ -268,7 +268,7 @@ else:
|
|||
|
||||
baseline_STR['feature_group'] = feature_gp_nameSTR
|
||||
baseline_STR['resampling'] = resampling
|
||||
baseline_STR['tts_split'] = tts_split_name
|
||||
baseline_STR['tts_split'] = tts_split
|
||||
baseline_STR['n_features'] = n_featuresSTR
|
||||
##############################################################################
|
||||
#================
|
||||
|
@ -312,7 +312,7 @@ else:
|
|||
|
||||
baseline_STB['feature_group'] = feature_gp_nameSTB
|
||||
baseline_STB['resampling'] = resampling
|
||||
baseline_STB['tts_split'] = tts_split_name
|
||||
baseline_STB['tts_split'] = tts_split
|
||||
baseline_STB['n_features'] = n_featuresSTB
|
||||
###############################################################################
|
||||
#================
|
||||
|
@ -356,7 +356,7 @@ else:
|
|||
|
||||
baseline_AFF['feature_group'] = feature_gp_nameAFF
|
||||
baseline_AFF['resampling'] = resampling
|
||||
baseline_AFF['tts_split'] = tts_split_name
|
||||
baseline_AFF['tts_split'] = tts_split
|
||||
baseline_AFF['n_features'] = n_featuresAFF
|
||||
###############################################################################
|
||||
#================
|
||||
|
@ -400,7 +400,7 @@ else:
|
|||
|
||||
baseline_RES['feature_group'] = feature_gp_nameRES
|
||||
baseline_RES['resampling'] = resampling
|
||||
baseline_RES['tts_split'] = tts_split_name
|
||||
baseline_RES['tts_split'] = tts_split
|
||||
baseline_RES['n_features'] = n_featuresRES
|
||||
###############################################################################
|
||||
#================
|
||||
|
@ -446,7 +446,7 @@ else:
|
|||
|
||||
baseline_RNAA['feature_group'] = feature_gp_nameRNAA
|
||||
baseline_RNAA['resampling'] = resampling
|
||||
baseline_RNAA['tts_split'] = tts_split_name
|
||||
baseline_RNAA['tts_split'] = tts_split
|
||||
baseline_RNAA['n_features'] = n_featuresRNAA
|
||||
###############################################################################
|
||||
#================
|
||||
|
@ -492,7 +492,7 @@ else:
|
|||
|
||||
baseline_SNAA['feature_group'] = feature_gp_nameSNAA
|
||||
baseline_SNAA['resampling'] = resampling
|
||||
baseline_SNAA['tts_split'] = tts_split_name
|
||||
baseline_SNAA['tts_split'] = tts_split
|
||||
baseline_SNAA['n_features'] = n_featuresSNAA
|
||||
###############################################################################
|
||||
#%% COMBINING all FG dfs
|
||||
|
@ -525,7 +525,7 @@ if len(common_cols) == dfs_ncols :
|
|||
combined_FG_baseline = pd.concat([df[common_cols] for df in dfs_combine], ignore_index=True)
|
||||
fgs = combined_FG_baseline[['feature_group', 'n_features']]
|
||||
fgs = fgs.drop_duplicates()
|
||||
print('\nConcatenating dfs with feature groups after ML analysis (sampling type):'
|
||||
print('\nConcatenating dfs with feature groups after ML analysis:'
|
||||
, '\nNo. of dfs combining:', len(dfs_combine)
|
||||
, '\nSampling type:', resampling
|
||||
, '\nThe feature groups are:'
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue