From c0bac6fd7b8e6c2cad629515ebede7efe7127a20 Mon Sep 17 00:00:00 2001 From: Tanushree Tunstall Date: Thu, 26 Mar 2020 17:12:59 +0000 Subject: [PATCH] changed outcols in dssp and kd outputs --- meta_data_analysis/dssp_df.py | 11 +++++++---- meta_data_analysis/kd_df.py | 11 ++++++----- meta_data_analysis/rd_df.py | 1 + 3 files changed, 14 insertions(+), 9 deletions(-) diff --git a/meta_data_analysis/dssp_df.py b/meta_data_analysis/dssp_df.py index fa07fe4..5a5b479 100755 --- a/meta_data_analysis/dssp_df.py +++ b/meta_data_analysis/dssp_df.py @@ -56,20 +56,23 @@ in_filename = gene.lower() +'.dssp' infile = indir + '/' + in_filename print('Input filename:', in_filename , '\nInput path:', indir) - + # specify PDB chain my_chain = 'A' +print('======================================================================') + #======= # output #======= outdir = datadir + '/' + drug + '/' + 'output' -out_filename = gene.lower() + '_dssp_df' +out_filename = gene.lower() + '_dssp.csv' outfile = outdir + '/' + out_filename print('Output filename:', out_filename , '\nOutput path:', outdir ,'\nOutfile: ', outfile) - + +print('======================================================================') #%% end of variable assignment for input and output files #======================================================================= # Process dssp output and extract into df @@ -87,7 +90,7 @@ pp.pprint(dssp_df) # Rename column (amino acid) as 'wild_type' and (site} as 'position' # to be the same names as used in the file required for merging later. dssp_df.columns -dssp_df.rename(columns = {'site':'position', 'amino_acid':'wild_type'}, inplace = True) +dssp_df.rename(columns = {'site':'position', 'amino_acid':'wild_type_dssp'}, inplace = True) dssp_df.columns #%% Write ouput csv file diff --git a/meta_data_analysis/kd_df.py b/meta_data_analysis/kd_df.py index 54e7544..0fd1ccc 100644 --- a/meta_data_analysis/kd_df.py +++ b/meta_data_analysis/kd_df.py @@ -59,6 +59,7 @@ print('Input filename:', in_filename , '\nInput path:', indir) print('======================================================================') + #======= # output #======= @@ -111,7 +112,7 @@ print('======================================================================') # df1: df of aa seq with index reset to start from 1 (reflective of the actual aa position in a sequence) # Name column of wt as 'wild_type' to be the same name used in the file required for merging later. -dfSeq = pd.DataFrame({'wild_type':list(sequence)}) +dfSeq = pd.DataFrame({'wild_type_kd':list(sequence)}) dfSeq.index = np.arange(1, len(dfSeq) + 1) # python is not inclusive # df2: df of kd_values with index reset to start from offset + 1 and subsequent matched length of the kd_values @@ -122,9 +123,9 @@ dfVals.index = np.arange(offset + 1, len(dfVals) + 1 + offset) max(dfVals['kd_values']) min(dfVals['kd_values']) -#============ -# merging dfs -#============ +#=================== +# concatenating dfs +#=================== # Merge the two on index (as these are now reflective of the aa position numbers): df1 and df2 # This will introduce NaN where there is missing values. In our case this will be 2 (first and last ones) # Conveniently, the last position in this case is not part of the struc, so not much loss of info @@ -133,7 +134,7 @@ min(dfVals['kd_values']) kd_df = pd.concat([dfSeq, dfVals], axis = 1) #============================ -# Renaming index to position +# renaming index to position #============================ kd_df = kd_df.rename_axis('position') kd_df.head diff --git a/meta_data_analysis/rd_df.py b/meta_data_analysis/rd_df.py index 96dbe6e..b0921b8 100755 --- a/meta_data_analysis/rd_df.py +++ b/meta_data_analysis/rd_df.py @@ -50,6 +50,7 @@ infile = indir + '/' + in_filename print('Input filename:', in_filename , '\nInput path:', indir) +print('======================================================================') #======= # output #=======