changed outcols in dssp and kd outputs
This commit is contained in:
parent
5bab99c15f
commit
c0bac6fd7b
3 changed files with 14 additions and 9 deletions
|
@ -60,16 +60,19 @@ print('Input filename:', in_filename
|
||||||
# specify PDB chain
|
# specify PDB chain
|
||||||
my_chain = 'A'
|
my_chain = 'A'
|
||||||
|
|
||||||
|
print('======================================================================')
|
||||||
|
|
||||||
#=======
|
#=======
|
||||||
# output
|
# output
|
||||||
#=======
|
#=======
|
||||||
outdir = datadir + '/' + drug + '/' + 'output'
|
outdir = datadir + '/' + drug + '/' + 'output'
|
||||||
out_filename = gene.lower() + '_dssp_df'
|
out_filename = gene.lower() + '_dssp.csv'
|
||||||
outfile = outdir + '/' + out_filename
|
outfile = outdir + '/' + out_filename
|
||||||
print('Output filename:', out_filename
|
print('Output filename:', out_filename
|
||||||
, '\nOutput path:', outdir
|
, '\nOutput path:', outdir
|
||||||
,'\nOutfile: ', outfile)
|
,'\nOutfile: ', outfile)
|
||||||
|
|
||||||
|
print('======================================================================')
|
||||||
#%% end of variable assignment for input and output files
|
#%% end of variable assignment for input and output files
|
||||||
#=======================================================================
|
#=======================================================================
|
||||||
# Process dssp output and extract into df
|
# Process dssp output and extract into df
|
||||||
|
@ -87,7 +90,7 @@ pp.pprint(dssp_df)
|
||||||
# Rename column (amino acid) as 'wild_type' and (site} as 'position'
|
# Rename column (amino acid) as 'wild_type' and (site} as 'position'
|
||||||
# to be the same names as used in the file required for merging later.
|
# to be the same names as used in the file required for merging later.
|
||||||
dssp_df.columns
|
dssp_df.columns
|
||||||
dssp_df.rename(columns = {'site':'position', 'amino_acid':'wild_type'}, inplace = True)
|
dssp_df.rename(columns = {'site':'position', 'amino_acid':'wild_type_dssp'}, inplace = True)
|
||||||
dssp_df.columns
|
dssp_df.columns
|
||||||
|
|
||||||
#%% Write ouput csv file
|
#%% Write ouput csv file
|
||||||
|
|
|
@ -59,6 +59,7 @@ print('Input filename:', in_filename
|
||||||
, '\nInput path:', indir)
|
, '\nInput path:', indir)
|
||||||
|
|
||||||
print('======================================================================')
|
print('======================================================================')
|
||||||
|
|
||||||
#=======
|
#=======
|
||||||
# output
|
# output
|
||||||
#=======
|
#=======
|
||||||
|
@ -111,7 +112,7 @@ print('======================================================================')
|
||||||
|
|
||||||
# df1: df of aa seq with index reset to start from 1 (reflective of the actual aa position in a sequence)
|
# df1: df of aa seq with index reset to start from 1 (reflective of the actual aa position in a sequence)
|
||||||
# Name column of wt as 'wild_type' to be the same name used in the file required for merging later.
|
# Name column of wt as 'wild_type' to be the same name used in the file required for merging later.
|
||||||
dfSeq = pd.DataFrame({'wild_type':list(sequence)})
|
dfSeq = pd.DataFrame({'wild_type_kd':list(sequence)})
|
||||||
dfSeq.index = np.arange(1, len(dfSeq) + 1) # python is not inclusive
|
dfSeq.index = np.arange(1, len(dfSeq) + 1) # python is not inclusive
|
||||||
|
|
||||||
# df2: df of kd_values with index reset to start from offset + 1 and subsequent matched length of the kd_values
|
# df2: df of kd_values with index reset to start from offset + 1 and subsequent matched length of the kd_values
|
||||||
|
@ -122,9 +123,9 @@ dfVals.index = np.arange(offset + 1, len(dfVals) + 1 + offset)
|
||||||
max(dfVals['kd_values'])
|
max(dfVals['kd_values'])
|
||||||
min(dfVals['kd_values'])
|
min(dfVals['kd_values'])
|
||||||
|
|
||||||
#============
|
#===================
|
||||||
# merging dfs
|
# concatenating dfs
|
||||||
#============
|
#===================
|
||||||
# Merge the two on index (as these are now reflective of the aa position numbers): df1 and df2
|
# Merge the two on index (as these are now reflective of the aa position numbers): df1 and df2
|
||||||
# This will introduce NaN where there is missing values. In our case this will be 2 (first and last ones)
|
# This will introduce NaN where there is missing values. In our case this will be 2 (first and last ones)
|
||||||
# Conveniently, the last position in this case is not part of the struc, so not much loss of info
|
# Conveniently, the last position in this case is not part of the struc, so not much loss of info
|
||||||
|
@ -133,7 +134,7 @@ min(dfVals['kd_values'])
|
||||||
kd_df = pd.concat([dfSeq, dfVals], axis = 1)
|
kd_df = pd.concat([dfSeq, dfVals], axis = 1)
|
||||||
|
|
||||||
#============================
|
#============================
|
||||||
# Renaming index to position
|
# renaming index to position
|
||||||
#============================
|
#============================
|
||||||
kd_df = kd_df.rename_axis('position')
|
kd_df = kd_df.rename_axis('position')
|
||||||
kd_df.head
|
kd_df.head
|
||||||
|
|
|
@ -50,6 +50,7 @@ infile = indir + '/' + in_filename
|
||||||
print('Input filename:', in_filename
|
print('Input filename:', in_filename
|
||||||
, '\nInput path:', indir)
|
, '\nInput path:', indir)
|
||||||
|
|
||||||
|
print('======================================================================')
|
||||||
#=======
|
#=======
|
||||||
# output
|
# output
|
||||||
#=======
|
#=======
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue