various debug, doc, and args

This commit is contained in:
Tanushree Tunstall 2020-05-25 14:27:25 +01:00
parent 3a0ff9b35e
commit b28d0afded
4 changed files with 77 additions and 40 deletions

View file

@ -135,7 +135,7 @@ def scrape_results(result_url):
else:
return web_result_raw
else:
print('FAIL: Could not fetch results'
sys.exit('FAIL: Could not fetch results'
, '\nCheck if url is valid')
@ -234,7 +234,7 @@ def format_mcsm_output(mcsm_outputcsv):
, '\nDim of data:', mcsm_data.shape
, '\n===============================================================')
else:
print('FAIL (but not fatal): Duplicate mutations detected'
print('WARNING: Duplicate mutations detected'
, '\nDim of df with duplicates:', mcsm_data.shape
, 'Removing duplicate entries')
mcsm_data = mcsm_data.drop_duplicates(['mutation_information'])
@ -252,14 +252,14 @@ def format_mcsm_output(mcsm_outputcsv):
DUET_pos = c.get(key = 'duet_stability_change')
# Assign category based on sign (+ve : Stabilising, -ve: Destabilising, Mind the spelling (British spelling))
mcsm_data['duet_outcome'] = np.where(mcsm_data['duet_stability_change']>=0, 'Stabilising', 'Destabilising')
mcsm_data['duet_outcome'].value_counts()
if DUET_pos == mcsm_data['duet_outcome'].value_counts()['Stabilising']:
print('PASS: DUET outcome assigned correctly')
else:
print('FAIL: DUET outcome assigned incorrectly'
, '\nExpected no. of stabilising mutations:', DUET_pos
, '\nGot no. of stabilising mutations', mcsm_data['duet_outcome'].value_counts()['Stabilising']
, '\n===============================================================')
print('DUET Outcome:', mcsm_data['duet_outcome'].value_counts())
#if DUET_pos == mcsm_data['duet_outcome'].value_counts()['Stabilising']:
# print('PASS: DUET outcome assigned correctly')
#else:
# print('FAIL: DUET outcome assigned incorrectly'
# , '\nExpected no. of stabilising mutations:', DUET_pos
# , '\nGot no. of stabilising mutations', mcsm_data['duet_outcome'].value_counts()['Stabilising']
# , '\n===============================================================')
#%%===========================================================================
#############
# Extract numeric
@ -270,7 +270,7 @@ def format_mcsm_output(mcsm_outputcsv):
mcsm_data['ligand_distance']
print('extracting numeric part of col: ligand_distance')
mcsm_data['ligand_distance'] = mcsm_data['ligand_distance'].str.extract('(\d+\.?\d*)')
mcsm_data['ligand_distance']
print('Ligand Distance:',mcsm_data['ligand_distance'])
#%%===========================================================================
#############
# Create 2 columns:
@ -310,7 +310,7 @@ def format_mcsm_output(mcsm_outputcsv):
, '\nNo. of predicted affinity changes:\n', british_spl
, '\n===============================================================')
else:
print('FAIL: spelling change unsucessfull'
sys.exit('FAIL: spelling change unsucessfull'
, '\nExpected:\n', american_spl
, '\nGot:\n', british_spl
, '\n===============================================================')
@ -338,7 +338,7 @@ def format_mcsm_output(mcsm_outputcsv):
, '\nchanged to numeric'
, '\n===============================================================')
else:
print('FAIL:dtype change to numeric for selected cols unsuccessful'
sys.exit('FAIL:dtype change to numeric for selected cols unsuccessful'
, '\n===============================================================')
print(mcsm_data.dtypes)
#%%===========================================================================
@ -403,7 +403,7 @@ def format_mcsm_output(mcsm_outputcsv):
print('PASS: dtypes for char cols:', char_cols, 'are indeed string'
, '\n===============================================================')
else:
print('FAIL:dtype change to numeric for selected cols unsuccessful'
sys.exit('FAIL:dtype change to numeric for selected cols unsuccessful'
, '\n===============================================================')
#mcsm_data['ligand_distance', 'ligand_affinity_change'].apply(is_numeric_dtype(mcsm_data['ligand_distance', 'ligand_affinity_change']))
print(mcsm_data.dtypes)
@ -430,7 +430,7 @@ def format_mcsm_output(mcsm_outputcsv):
, '\nformatted df shape:', mcsm_dataf.shape
, '\n===============================================================')
else:
print('FAIL: something went wrong in formatting df'
sys.exit('FAIL: something went wrong in formatting df'
, '\nLen of orig df:', dforig_len
, '\nExpected number of cols to add:', expected_ncols_toadd
, '\nExpected no. of cols:', expected_cols, '(', dforig_len, '+', expected_ncols_toadd, ')'