various debug, doc, and args

2020-05-25 14:27:25 +01:00 · 2020-05-25 14:27:25 +01:00 · b28d0afded
commit b28d0afded
parent 3a0ff9b35e
4 changed files with 77 additions and 40 deletions
--- a/mcsm/mcsm.py
+++ b/mcsm/mcsm.py
@ -135,7 +135,7 @@ def scrape_results(result_url):
        else:
            return web_result_raw
    else:
-        print('FAIL: Could not fetch results'
+        sys.exit('FAIL: Could not fetch results'
                , '\nCheck if url is valid')


@ -234,7 +234,7 @@ def format_mcsm_output(mcsm_outputcsv):
                , '\nDim of data:', mcsm_data.shape
                , '\n===============================================================')
    else:
-        print('FAIL (but not fatal): Duplicate mutations detected'
+        print('WARNING: Duplicate mutations detected'
                , '\nDim of df with duplicates:', mcsm_data.shape
                , 'Removing duplicate entries')
        mcsm_data = mcsm_data.drop_duplicates(['mutation_information'])
@ -252,14 +252,14 @@ def format_mcsm_output(mcsm_outputcsv):
    DUET_pos = c.get(key = 'duet_stability_change')
    # Assign category based on sign (+ve : Stabilising, -ve: Destabilising, Mind the spelling (British spelling))
    mcsm_data['duet_outcome'] = np.where(mcsm_data['duet_stability_change']>=0, 'Stabilising', 'Destabilising')
-    mcsm_data['duet_outcome'].value_counts()
-    if DUET_pos == mcsm_data['duet_outcome'].value_counts()['Stabilising']:
-        print('PASS: DUET outcome assigned correctly')
-    else:
-        print('FAIL: DUET outcome assigned incorrectly'
-            , '\nExpected no. of stabilising mutations:', DUET_pos
-            , '\nGot no. of stabilising mutations', mcsm_data['duet_outcome'].value_counts()['Stabilising']
-            , '\n===============================================================')
+    print('DUET Outcome:', mcsm_data['duet_outcome'].value_counts())
+    #if DUET_pos == mcsm_data['duet_outcome'].value_counts()['Stabilising']:
+    #    print('PASS: DUET outcome assigned correctly')
+    #else:
+    #    print('FAIL: DUET outcome assigned incorrectly'
+    #        , '\nExpected no. of stabilising mutations:', DUET_pos
+    #        , '\nGot no. of stabilising mutations', mcsm_data['duet_outcome'].value_counts()['Stabilising']
+    #        , '\n===============================================================')
 #%%===========================================================================
    #############
    # Extract numeric
@ -270,7 +270,7 @@ def format_mcsm_output(mcsm_outputcsv):
    mcsm_data['ligand_distance']
    print('extracting numeric part of col: ligand_distance')
    mcsm_data['ligand_distance'] = mcsm_data['ligand_distance'].str.extract('(\d+\.?\d*)')
-    mcsm_data['ligand_distance']
+    print('Ligand Distance:',mcsm_data['ligand_distance'])
 #%%===========================================================================
    #############
    # Create 2 columns:
@ -310,7 +310,7 @@ def format_mcsm_output(mcsm_outputcsv):
                , '\nNo. of predicted affinity changes:\n', british_spl
                , '\n===============================================================')
    else:
-        print('FAIL: spelling change unsucessfull'
+        sys.exit('FAIL: spelling change unsucessfull'
                , '\nExpected:\n', american_spl
                , '\nGot:\n', british_spl
                , '\n===============================================================')
@ -338,7 +338,7 @@ def format_mcsm_output(mcsm_outputcsv):
                , '\nchanged to numeric'
                , '\n===============================================================')
    else:
-        print('FAIL:dtype change to numeric for selected cols unsuccessful'
+        sys.exit('FAIL:dtype change to numeric for selected cols unsuccessful'
                , '\n===============================================================')
        print(mcsm_data.dtypes)
 #%%===========================================================================
@ -403,7 +403,7 @@ def format_mcsm_output(mcsm_outputcsv):
        print('PASS: dtypes for char cols:', char_cols, 'are indeed string'
                , '\n===============================================================')
    else:
-        print('FAIL:dtype change to numeric for selected cols unsuccessful'
+        sys.exit('FAIL:dtype change to numeric for selected cols unsuccessful'
                , '\n===============================================================')
    #mcsm_data['ligand_distance', 'ligand_affinity_change'].apply(is_numeric_dtype(mcsm_data['ligand_distance', 'ligand_affinity_change']))
    print(mcsm_data.dtypes)
@ -430,7 +430,7 @@ def format_mcsm_output(mcsm_outputcsv):
                , '\nformatted df shape:', mcsm_dataf.shape
                , '\n===============================================================')
    else: 
-        print('FAIL: something went wrong in formatting df'
+        sys.exit('FAIL: something went wrong in formatting df'
                , '\nLen of orig df:', dforig_len
                , '\nExpected number of cols to add:', expected_ncols_toadd
                , '\nExpected no. of cols:', expected_cols, '(', dforig_len, '+', expected_ncols_toadd, ')'