horrible lineage analysis hell

This commit is contained in:
Tanushree Tunstall 2022-06-28 21:51:02 +01:00
parent ce0f12382e
commit 478df927cc
10 changed files with 1669 additions and 101 deletions

View file

@ -603,19 +603,20 @@ def getmldata(gene, drug
# training_df[drug].value_counts()
# training_df['dst_mode'].value_counts()
all_training_df = my_df_ml[all_featuresN]
#all_training_df = my_df_ml[all_featuresN]
# Getting the dst column as this will be required for tts_split()
if 'dst' in my_df_ml:
print('\ndst column exists')
if my_df_ml['dst'].equals(my_df_ml[drug]):
print('\nand this is identical to drug column:', drug)
all_featuresN2 = all_featuresN + ['dst', 'dst_mode']
all_training_df = my_df_ml[all_featuresN2]
print('\nAll feature names:', all_featuresN2)
####################################################################
print('\n#################################################################'
, '\nSUCCESS: Extacted training data for gene:', gene.lower()
, '\nDim of training_df:', all_training_df.shape)
if use_or:
print('\nThis includes Odds Ratio')
else:
print('\nThis EXCLUDES Odds Ratio'
, '\n###############################################################')
#==========================================================================
if write_maskfile:
print('\nPASS: and now writing file to check masked columns and values:', outFile_mask_ml )
@ -630,4 +631,15 @@ def getmldata(gene, drug
else:
print('\nPASS: But NOT writing processed file')
#==========================================================================
print('\n#################################################################'
, '\nSUCCESS: Extacted training data for gene:', gene.lower()
, '\nDim of training_df:', all_training_df.shape)
if use_or:
print('\nThis includes Odds Ratio'
, '\n###########################################################')
else:
print('\nThis EXCLUDES Odds Ratio'
, '\n############################################################')
return(all_training_df)