This commit is contained in:
Tanushree Tunstall 2022-09-03 12:28:21 +01:00
parent 889bea1e63
commit 78704dec5a
3 changed files with 10 additions and 720 deletions

View file

@ -80,7 +80,7 @@ homedir = os.path.expanduser("~")
sys.path.append(homedir + '/git/LSHTM_analysis/scripts/ml/ml_functions')
sys.path
###############################################################################
outdir = homedir + '/git/LSHTM_ML/output/combined/'
#outdir = homedir + '/git/LSHTM_ML/output/combined/'
#====================
# Import ML functions
@ -92,20 +92,20 @@ from MultClfs import *
skf_cv = StratifiedKFold(n_splits = 10 , shuffle = True, random_state = 42)
#logo = LeaveOneGroupOut()
# logo = LeaveOneGroupOut()
########################################################################
# COMPLETE data: No tts_split
########################################################################
#%%
def CMLogoSkf(cm_input_df
def CombinedModelML(cm_input_df
, all_genes = ["embb", "katg", "rpob", "pnca", "gid", "alr"]
, bts_genes = ["embb", "katg", "rpob", "pnca", "gid"]
, cols_to_drop = ['dst', 'dst_mode', 'gene_name']
, target_var = 'dst_mode'
, gene_group = 'gene_name'
, std_gene_omit = []
, output_dir = outdir
, output_dir = "/tmp/"
, file_suffix = ""
):
@ -133,15 +133,10 @@ def CMLogoSkf(cm_input_df
print('\nDim of data:', cm_input_df.shape)
tts_split_type = "logo_skf_BT_" + bts_gene
# if len(file_suffix) > 0:
# file_suffix = '_' + file_suffix
# else:
# file_suffix = file_suffix
#outFile = output_dir + str(n_tr_genes+1) + "genes_" + tts_split_type + '_' + file_suffix + ".csv"
outFile = output_dir + str(n_tr_genes+1) + "genes_" + tts_split_type + '_' + file_suffix + ".csv"
#print(outFile)
print("XXXXXXXXXXXXXXXXXXXXXXXXXXX", outFile)
#-------
# training
@ -204,11 +199,11 @@ def CMLogoSkf(cm_input_df
#===============
# Complete Data
#===============
#CMLogoSkf(cm_input_df = combined_df,file_suffix = "complete")
#CMLogoSkf(cm_input_df = combined_df, std_gene_omit=['alr'], file_suffix = "complete")
#CombinedModelML(cm_input_df = combined_df, outdir = , file_suffix = "complete")
#CombinedModelML(cm_input_df = combined_df, std_gene_omit=['alr'], file_suffix = "complete")
#===============
# Actual Data
#===============
#CMLogoSkf(cm_input_df = combined_df_actual, file_suffix = "actual")
#CMLogoSkf(cm_input_df = combined_df_actual, std_gene_omit=['alr'], file_suffix = "actual")
#CombinedModelML(cm_input_df = combined_df_actual, file_suffix = "actual")
#CombinedModelML(cm_input_df = combined_df_actual, std_gene_omit=['alr'], file_suffix = "actual")