changed outcols in dssp and kd outputs

2020-03-26 17:12:59 +00:00 · 2020-03-26 17:12:59 +00:00 · c0bac6fd7b
commit c0bac6fd7b
parent 5bab99c15f
3 changed files with 14 additions and 9 deletions
--- a/meta_data_analysis/dssp_df.py
+++ b/meta_data_analysis/dssp_df.py
@ -60,16 +60,19 @@ print('Input filename:', in_filename
 # specify PDB chain
 my_chain = 'A'

+print('======================================================================')
+
 #=======
 # output 
 #=======
 outdir = datadir + '/' + drug + '/' + 'output'
-out_filename = gene.lower() + '_dssp_df'
+out_filename = gene.lower() + '_dssp.csv'
 outfile =  outdir + '/' + out_filename
 print('Output filename:', out_filename
      , '\nOutput path:', outdir
      ,'\nOutfile: ', outfile)
      
+print('======================================================================')
 #%% end of variable assignment for input and output files
 #=======================================================================
 # Process dssp output and extract into df
@ -87,7 +90,7 @@ pp.pprint(dssp_df)
 # Rename column (amino acid) as 'wild_type' and (site} as 'position' 
 # to be the same names as used in the file required for merging later.
 dssp_df.columns
-dssp_df.rename(columns = {'site':'position', 'amino_acid':'wild_type'}, inplace = True)
+dssp_df.rename(columns = {'site':'position', 'amino_acid':'wild_type_dssp'}, inplace = True)
 dssp_df.columns

 #%% Write ouput csv file
--- a/meta_data_analysis/kd_df.py
+++ b/meta_data_analysis/kd_df.py
@ -59,6 +59,7 @@ print('Input filename:', in_filename
      , '\nInput path:', indir)

 print('======================================================================')
+
 #=======
 # output 
 #=======
@ -111,7 +112,7 @@ print('======================================================================')

 # df1: df of aa seq with index reset to start from 1 (reflective of the actual aa position in a sequence)
 # Name column of wt as 'wild_type' to be the same name used in the file required for merging later.
-dfSeq = pd.DataFrame({'wild_type':list(sequence)})
+dfSeq = pd.DataFrame({'wild_type_kd':list(sequence)})
 dfSeq.index = np.arange(1, len(dfSeq) + 1) # python is not inclusive

 # df2: df of kd_values with index reset to start from offset + 1 and subsequent matched length of the kd_values
@ -122,9 +123,9 @@ dfVals.index = np.arange(offset + 1, len(dfVals) + 1 + offset)
 max(dfVals['kd_values'])
 min(dfVals['kd_values'])

-#============
-# merging dfs
-#============
+#===================
+# concatenating dfs
+#===================
 # Merge the two on index (as these are now reflective of the aa position numbers): df1 and df2 
 # This will introduce NaN where there is missing values. In our case this will be 2 (first and last ones)
 # Conveniently, the last position in this case is not part of the struc, so not much loss of info
@ -133,7 +134,7 @@ min(dfVals['kd_values'])
 kd_df = pd.concat([dfSeq, dfVals], axis = 1)

 #============================
-# Renaming index to position
+# renaming index to position
 #============================
 kd_df = kd_df.rename_axis('position')
 kd_df.head
--- a/meta_data_analysis/rd_df.py
+++ b/meta_data_analysis/rd_df.py
@ -50,6 +50,7 @@ infile = indir + '/' + in_filename
 print('Input filename:', in_filename
      , '\nInput path:', indir)

+print('======================================================================')
 #=======
 # output 
 #=======