tidy code and saving work for the day

This commit is contained in:
Tanushree Tunstall 2020-03-26 17:58:39 +00:00
parent 69e2567ffc
commit afd6ca8881
6 changed files with 307 additions and 284 deletions

View file

@ -56,9 +56,8 @@ indir = datadir + '/' + drug + '/' + 'input'
in_filename = '3pl1.fasta.txt'
infile = indir + '/' + in_filename
print('Input filename:', in_filename
, '\nInput path:', indir)
print('======================================================================')
, '\nInput path:', indir
, '\n============================================================')
#=======
# output
@ -67,9 +66,8 @@ outdir = datadir + '/' + drug + '/' + 'output'
out_filename = gene.lower() + '_kd.csv'
outfile = outdir + '/' + out_filename
print('Output filename:', out_filename
, '\nOutput path:', outdir)
print('======================================================================')
, '\nOutput path:', outdir
, '\n=============================================================')
#%% end of variable assignment for input and output files
#=======================================================================
#%%specify window size for hydropathy profile computation
@ -96,7 +94,7 @@ print('Sequence Length:', num_residues)
print('kd_values Length:',len(kd_values))
print('Window Length:', my_window)
print('Window Offset:', offset)
print('======================================================================')
print('=================================================================')
print('Checking:len(kd values) is as expected for the given window size & offset...')
expected_length = num_residues - (my_window - offset)
if len(kd_values) == expected_length:
@ -104,9 +102,8 @@ if len(kd_values) == expected_length:
else:
print('FAIL: length mismatch'
,'\nExpected length:', expected_length
,'\nActual length:', len(kd_values))
print('======================================================================')
,'\nActual length:', len(kd_values)
, '\n=========================================================')
#%% make 2 dfs; 1) aa sequence and 2) kd_values. Then reset index for each df
# which will allow easy merging of the two dfs.
@ -138,10 +135,11 @@ kd_df = pd.concat([dfSeq, dfVals], axis = 1)
#============================
kd_df = kd_df.rename_axis('position')
kd_df.head
print('======================================================================')
print('=================================================================')
print('position col i.e. index should be numeric')
print('======================================================================')
print('position col i.e. index should be numeric
, '\n===============================================================')
if kd_df.index.dtype == 'int64':
print('PASS: position col is numeric'
, '\ndtype is:', kd_df.index.dtype)
@ -150,19 +148,20 @@ else:
, '\nConverting to numeric')
kd_df.index.astype('int64')
print('Checking dtype for after conversion:\n'
,'\ndtype is:', kd_df.index.dtype)
print('======================================================================')
, '\ndtype is:', kd_df.index.dtype
, '\n=========================================================')
#%% write file
print('Writing file:', out_filename
, '\nFilename:', out_filename
, '\nPath:', outdir)
, '\nPath:', outdir
, '\n=============================================================')
kd_df.to_csv(outfile, header = True, index = True)
print('Finished writing:', out_filename
, '\nNo. of rows:', len(kd_df)
, '\nNo. of cols:', len(kd_df.columns))
, '\nNo. of cols:', len(kd_df.columns)
, '\n=============================================================')
#%% plot
# http://www.dalkescientific.com/writings/NBN/plotting.html
@ -176,7 +175,6 @@ xlabel('Residue Number')
ylabel('Hydrophobicity')
title('K&D Hydrophobicity for ' + id)
show()
print('======================================================================')
#%% end of script
#=======================================================================