fixed white space prob with mcsm input with merge
This commit is contained in:
parent
5a2084ba11
commit
8dc2fa7326
6 changed files with 108 additions and 98 deletions
13
mcsm/mcsm.py
13
mcsm/mcsm.py
|
@ -183,7 +183,11 @@ def format_mcsm_output(mcsm_outputcsv):
|
|||
#############
|
||||
# Read file
|
||||
#############
|
||||
mcsm_data = pd.read_csv(mcsm_outputcsv, sep = ',')
|
||||
mcsm_data_raw = pd.read_csv(mcsm_outputcsv, sep = ',')
|
||||
|
||||
# strip white space from both ends in all columns
|
||||
mcsm_data = mcsm_data_raw.apply(lambda x: x.str.strip() if x.dtype == 'object' else x)
|
||||
|
||||
dforig_shape = mcsm_data.shape
|
||||
print('dimensions of input file:', dforig_shape)
|
||||
|
||||
|
@ -396,8 +400,7 @@ def format_mcsm_output(mcsm_outputcsv):
|
|||
print('removing white space within created column: wild_chain_pos')
|
||||
mcsm_data['wild_chain_pos'] = mcsm_data['wild_chain_pos'].str.replace(' ', '')
|
||||
print('Correctly formatted column: wild_chain_pos\n', mcsm_data['wild_chain_pos'].head()
|
||||
, '\n=========================================================')
|
||||
|
||||
, '\n=========================================================')
|
||||
#%%=====================================================================
|
||||
#############
|
||||
# ensuring corrrect dtype in non-numeric cols
|
||||
|
@ -426,8 +429,8 @@ def format_mcsm_output(mcsm_outputcsv):
|
|||
mcsm_data_fs = mcsm_data_f.sort_values(by = ['position'])
|
||||
print('sorted df:\n', mcsm_data_fs.head())
|
||||
|
||||
# Remove white space everywhere before output: bit me when merging!?
|
||||
mcsm_data_fs.columns = mcsm_data_fs.columns.str.replace(' ', '')
|
||||
# Ensuring column names are lowercase before output
|
||||
mcsm_data_fs.columns = mcsm_data_fs.columns.str.lower()
|
||||
#%%=====================================================================
|
||||
#############
|
||||
# sanity check before writing file
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue