saving data extraction updated script
This commit is contained in:
parent
e419d320ac
commit
5429b8fed7
1 changed files with 54 additions and 7 deletions
|
@ -93,13 +93,13 @@ if not datadir:
|
|||
datadir = homedir + '/' + 'git/Data'
|
||||
|
||||
if not indir:
|
||||
indir = datadir + '/' + drug + '/input_v2'
|
||||
indir = datadir + '/' + drug + '/input'
|
||||
|
||||
if not outdir:
|
||||
outdir = datadir + '/' + drug + '/output_v2'
|
||||
outdir = datadir + '/' + drug + '/output'
|
||||
|
||||
if make_dirs:
|
||||
print('make_dirs is turned on, creating data dir:', datadir)
|
||||
print('make_dirs is turned on, creating data dir (unless it already exists):', datadir)
|
||||
try:
|
||||
os.makedirs(datadir, exist_ok = True)
|
||||
print("Directory '%s' created successfully" %datadir)
|
||||
|
@ -1411,7 +1411,7 @@ gene_LF3['drtype_numeric'] = gene_LF3['drtype'].map(drtype_map)
|
|||
gene_LF3['drtype'].value_counts()
|
||||
gene_LF3['drtype_numeric'].value_counts()
|
||||
|
||||
#%% Multimode: drtype
|
||||
# Multimode: drtype
|
||||
#=============================
|
||||
# Recalculation: Revised drtype
|
||||
# max(multimode)
|
||||
|
@ -1520,7 +1520,7 @@ gene_LF3['dst_multimode'].value_counts()
|
|||
# Now get the max from multimode
|
||||
#gene_LF3['dst_mode'] = gene_LF3.groupby('mutationinformation')['dst_noNA'].max() # this somehow is not right!
|
||||
#gene_LF3['dst_noNA'] = gene_LF3['dst_multimode'].apply(lambda x: np.nanmax(x))
|
||||
gene_LF3['dst_mode'] = gene_LF3['dst_multimode'].apply(lambda x: np.nanmax(x))
|
||||
gene_LF3['dst_mode'] = gene_LF3['dst_multimode'].apply(lambda x: np.nanmax(x)) #ML
|
||||
|
||||
# sanity checks
|
||||
#gene_LF3['dst_noNA'].equals(gene_LF3['dst_mode'])
|
||||
|
@ -1969,6 +1969,53 @@ if dr_muts.isin(other_muts).sum() & other_muts.isin(dr_muts).sum() > 0:
|
|||
, '\n============================================================='
|
||||
, '\nPost resolving ambiguity\n'
|
||||
, ambig_muts_rev_df['mutation_info_REV'].value_counts())
|
||||
|
||||
print('\n============================================================='
|
||||
, '\n============================================================='
|
||||
, '\###############################\n'
|
||||
, '\nNumbers for ML workflows...'
|
||||
, '\n###############################\n'
|
||||
|
||||
, '\ncolumn name [drug, old]:', drug, '\n'
|
||||
, gene_LF4[drug].value_counts()
|
||||
, '\nTotal drug samples[old]:', gene_LF4[drug].value_counts().sum()
|
||||
, '\nPercentages:\n'
|
||||
, gene_LF4[drug].value_counts(normalize = True)
|
||||
, '\n-------------------------------------------------------------'
|
||||
|
||||
, '\ncolumn name [drug, revised]: dst_mode\n'
|
||||
, gene_LF4['dst_mode'].value_counts()
|
||||
, '\nTotal drug samples[revised]:', gene_LF4['dst_mode'].value_counts().sum()
|
||||
, '\nPercentages:\n'
|
||||
, gene_LF4['dst_mode'].value_counts(normalize = True)
|
||||
, '\n-------------------------------------------------------------'
|
||||
|
||||
, '\n-------------------------------------------------------------'
|
||||
, '\ncolumn name: drtye_mode\n'
|
||||
, gene_LF4['drtype_mode'].value_counts()
|
||||
, '\nTotal drtype_mode:', gene_LF4['drtype_mode'].value_counts().sum()
|
||||
, '\nPercentages:\n'
|
||||
, gene_LF4['drtype_mode'].value_counts(normalize = True)
|
||||
, '\n-------------------------------------------------------------'
|
||||
|
||||
, '\n-------------------------------------------------------------'
|
||||
, '\ncolumn name [dm_om, old]: mutation_info_orig\n'
|
||||
, gene_LF4['mutation_info_orig'].value_counts()
|
||||
, '\nTotal mutation_info_orig:', gene_LF4['mutation_info_orig'].value_counts().sum()
|
||||
, '\nPercentages:\n'
|
||||
, gene_LF4['mutation_info_orig'].value_counts(normalize = True)
|
||||
, '\n-------------------------------------------------------------'
|
||||
|
||||
, '\n-------------------------------------------------------------'
|
||||
, '\ncolumn name [dm_om, revised]: mutation_info\n'
|
||||
, gene_LF4['mutation_info'].value_counts()
|
||||
, '\nTotal mutation_info:', gene_LF4['mutation_info'].value_counts().sum()
|
||||
, '\nPercentages:\n'
|
||||
, gene_LF4['mutation_info'].value_counts(normalize = True)
|
||||
, '\n-------------------------------------------------------------'
|
||||
|
||||
, '\n============================================================='
|
||||
)
|
||||
#=======================================================================
|
||||
print(u'\u2698' * 50,
|
||||
'\nEnd of script: Data extraction and writing files'
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue