From 95a73efdd2894b72a5839b5c065ebd064cc13aa9 Mon Sep 17 00:00:00 2001 From: Tanushree Tunstall Date: Thu, 14 Apr 2022 19:43:14 +0100 Subject: [PATCH] saving work with sections reflecting activities --- scripts/data_extraction_v2.py | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/scripts/data_extraction_v2.py b/scripts/data_extraction_v2.py index 16212e3..b21fa0d 100644 --- a/scripts/data_extraction_v2.py +++ b/scripts/data_extraction_v2.py @@ -1602,14 +1602,6 @@ gene_LF3.head() gene_LF3['drtype_max'] = gene_LF3.groupby(['mutationinformation'])['drtype_numeric'].max() gene_LF3.head() -#%% Reset index: original indices -#gene_LF3 = gene_LF3.reset_index() -gene_LF3.index -gene_LF3['mutationinformation'] = gene_LF3.index -gene_LF3 = gene_LF3.set_index(['index_orig']) - -gene_LF3[['mutationinformation']] -gene_LF3.index #%% Revised counts gene_LF3['dst_mode'].value_counts() gene_LF3[drug].value_counts() @@ -1688,7 +1680,6 @@ foo2['lineage_corrupt_ucount'] foo2.index foo2 = foo2.set_index(['mutationinformation']) - # now merge foo.index foo.index.nunique() @@ -1699,6 +1690,7 @@ foo_copy['lineage_ucount'] = foo_copy['lineage'] foo_copy.loc[foo2.index, 'lineage_ucount'] = foo2['lineage_corrupt_ucount'] #%%FIXME: do regex for lineage for meta data else the ; messes it up +# MOVE THIS TO THE RELEVANT section #-------------------------- # lineage multimode mode #-------------------------- @@ -1741,8 +1733,7 @@ c2 = foo2[foo2.loc[:, 'MUT'].isin(['A102P'])] c2['lineage_numeric'].value_counts() - -#%% Lineage counts (including the ones containing multiple entries) +#%% Lineage counts (including the ones containing multiple entries)[[<<< INCOMPLETE, TB finished]] # Get information about how many distinct lineages each mutation comes from gene_LF3['lineage'].value_counts() @@ -1781,4 +1772,13 @@ check1 = gene_LF3[['mutationinformation', 'lineage', 'lineage_ucount']] check2 = check1[check1.loc[:, 'mutationinformation'].isin(['H57D'])] check2.value_counts() -#%% \ No newline at end of file +#%% Reset index: original indices [WAS above section Revised counts] +#gene_LF3 = gene_LF3.reset_index() +gene_LF3.index +gene_LF3['mutationinformation'] = gene_LF3.index +gene_LF3 = gene_LF3.set_index(['index_orig']) + +gene_LF3[['mutationinformation']] +gene_LF3.index +#%% ADD summary results +#%% final output file with selected columns \ No newline at end of file