added run_7030.py that runs as cmd for all gene targets and sampling methods and outputs a single csv

This commit is contained in:
Tanushree Tunstall 2022-06-21 20:37:53 +01:00
parent 5b0ccdfec4
commit bc12dbd7c2
5 changed files with 749 additions and 229 deletions

View file

@ -30,9 +30,9 @@ os.chdir( homedir + '/git/LSHTM_analysis/scripts/ml/')
#==================
# Import data
#==================
from ml_data_dissected import *
from ml_data_fg import *
setvars(gene,drug)
from ml_data_dissected import *
from ml_data_fg import *
# from YC run_all_ML: run locally
#from UQ_yc_RunAllClfs import run_all_ML
@ -60,7 +60,7 @@ outFile = outdir_ml + gene.lower() + '_baseline_FG.csv'
#==================
# other vars
#==================
tts_split_name = 'original'
tts_split = 'original'
resampling = 'none'
###############################################################################
@ -177,7 +177,7 @@ else:
baseline_EV['feature_group'] = feature_gp_nameEV
baseline_EV['resampling'] = resampling
baseline_EV['tts_split'] = tts_split_name
baseline_EV['tts_split'] = tts_split
baseline_EV['n_features'] = n_featuresEV
###############################################################################
#================
@ -221,7 +221,7 @@ else:
baseline_GN['feature_group'] = feature_gp_nameGN
baseline_GN['resampling'] = resampling
baseline_GN['tts_split'] = tts_split_name
baseline_GN['tts_split'] = tts_split
baseline_GN['n_features'] = n_featuresGN
###############################################################################
#all_featuresN = X_evolFN + X_structural_FN + X_genomicFN
@ -268,7 +268,7 @@ else:
baseline_STR['feature_group'] = feature_gp_nameSTR
baseline_STR['resampling'] = resampling
baseline_STR['tts_split'] = tts_split_name
baseline_STR['tts_split'] = tts_split
baseline_STR['n_features'] = n_featuresSTR
##############################################################################
#================
@ -312,7 +312,7 @@ else:
baseline_STB['feature_group'] = feature_gp_nameSTB
baseline_STB['resampling'] = resampling
baseline_STB['tts_split'] = tts_split_name
baseline_STB['tts_split'] = tts_split
baseline_STB['n_features'] = n_featuresSTB
###############################################################################
#================
@ -356,7 +356,7 @@ else:
baseline_AFF['feature_group'] = feature_gp_nameAFF
baseline_AFF['resampling'] = resampling
baseline_AFF['tts_split'] = tts_split_name
baseline_AFF['tts_split'] = tts_split
baseline_AFF['n_features'] = n_featuresAFF
###############################################################################
#================
@ -400,7 +400,7 @@ else:
baseline_RES['feature_group'] = feature_gp_nameRES
baseline_RES['resampling'] = resampling
baseline_RES['tts_split'] = tts_split_name
baseline_RES['tts_split'] = tts_split
baseline_RES['n_features'] = n_featuresRES
###############################################################################
#================
@ -446,7 +446,7 @@ else:
baseline_RNAA['feature_group'] = feature_gp_nameRNAA
baseline_RNAA['resampling'] = resampling
baseline_RNAA['tts_split'] = tts_split_name
baseline_RNAA['tts_split'] = tts_split
baseline_RNAA['n_features'] = n_featuresRNAA
###############################################################################
#================
@ -492,7 +492,7 @@ else:
baseline_SNAA['feature_group'] = feature_gp_nameSNAA
baseline_SNAA['resampling'] = resampling
baseline_SNAA['tts_split'] = tts_split_name
baseline_SNAA['tts_split'] = tts_split
baseline_SNAA['n_features'] = n_featuresSNAA
###############################################################################
#%% COMBINING all FG dfs
@ -525,7 +525,7 @@ if len(common_cols) == dfs_ncols :
combined_FG_baseline = pd.concat([df[common_cols] for df in dfs_combine], ignore_index=True)
fgs = combined_FG_baseline[['feature_group', 'n_features']]
fgs = fgs.drop_duplicates()
print('\nConcatenating dfs with feature groups after ML analysis (sampling type):'
print('\nConcatenating dfs with feature groups after ML analysis:'
, '\nNo. of dfs combining:', len(dfs_combine)
, '\nSampling type:', resampling
, '\nThe feature groups are:'