added deep ddg formatted data to combinig_dfs.py
This commit is contained in:
parent
3ff9604002
commit
1155959e67
1 changed files with 21 additions and 41 deletions
|
@ -84,9 +84,6 @@ arg_parser.add_argument('--debug', action ='store_true', help = 'Debug Mode')
|
|||
args = arg_parser.parse_args()
|
||||
#=======================================================================
|
||||
#%% variable assignment: input and output
|
||||
#drug = 'pyrazinamide'
|
||||
#gene = 'pncA'
|
||||
|
||||
drug = args.drug
|
||||
gene = args.gene
|
||||
datadir = args.datadir
|
||||
|
@ -128,39 +125,40 @@ if not outdir:
|
|||
#in_filename_mcsm = gene.lower() + '_complex_mcsm_norm.csv'
|
||||
in_filename_mcsm = gene.lower() + '_complex_mcsm_norm_SAM.csv' # gidb
|
||||
in_filename_foldx = gene.lower() + '_foldx.csv'
|
||||
in_filename_deepddg = gene.lower() + '_ni_deepddg.csv' # change to decent filename and put it in the correct dir
|
||||
|
||||
in_filename_dssp = gene.lower() + '_dssp.csv'
|
||||
in_filename_kd = gene.lower() + '_kd.csv'
|
||||
in_filename_rd = gene.lower() + '_rd.csv'
|
||||
in_filename_deepddg = gene.lower() + '_complex_ddg_results.txt' # change to decent filename and put it in the correct dir
|
||||
|
||||
in_filename_snpinfo = 'ns' + gene.lower() + '_snp_info_f.csv' # gwas f info
|
||||
in_filename_afor = gene.lower() + '_af_or.csv'
|
||||
in_filename_afor_kin = gene.lower() + '_af_or_kinship.csv'
|
||||
|
||||
#in_filename_snpinfo = 'ns' + gene.lower() + '_snp_info_f.csv' # gwas f info
|
||||
#in_filename_afor = gene.lower() + '_af_or.csv'
|
||||
#in_filename_afor_kin = gene.lower() + '_af_or_kinship.csv'
|
||||
|
||||
infile_mcsm = outdir + in_filename_mcsm
|
||||
infile_foldx = outdir + in_filename_foldx
|
||||
infile_deepddg = outdir + in_filename_deepddg
|
||||
|
||||
infile_dssp = outdir + in_filename_dssp
|
||||
infile_kd = outdir + in_filename_kd
|
||||
infile_rd = outdir + in_filename_rd
|
||||
infile_deepddg = outdir + 'deep_ddg/' + in_filename_deepddg
|
||||
|
||||
infile_snpinfo = outdir + '/' + in_filename_snpinfo
|
||||
infile_afor = outdir + '/' + in_filename_afor
|
||||
infile_afor_kin = outdir + '/' + in_filename_afor_kin
|
||||
#infile_snpinfo = outdir + '/' + in_filename_snpinfo
|
||||
#infile_afor = outdir + '/' + in_filename_afor
|
||||
#infile_afor_kin = outdir + '/' + in_filename_afor_kin
|
||||
|
||||
print('\nInput path:', indir
|
||||
, '\nOutput path:', outdir, '\n'
|
||||
, '\nInput filename mcsm:', infile_mcsm
|
||||
, '\nInput filename foldx:', infile_foldx, '\n'
|
||||
, '\nInput filename deepddg', infile_deepddg , '\n'
|
||||
, '\nInput filename dssp:', infile_dssp
|
||||
, '\nInput filename kd:', infile_kd
|
||||
, '\nInput filename rd', infile_rd
|
||||
# , '\nInput filename rd', infile_deepddg , '\n'
|
||||
|
||||
, '\nInput filename snp info:', infile_snpinfo, '\n'
|
||||
, '\nInput filename af or:', infile_afor
|
||||
, '\nInput filename afor kinship:', infile_afor_kin
|
||||
#, '\nInput filename snp info:', infile_snpinfo, '\n'
|
||||
#, '\nInput filename af or:', infile_afor
|
||||
#, '\nInput filename afor kinship:', infile_afor_kin
|
||||
, '\n============================================================')
|
||||
|
||||
#=======
|
||||
|
@ -208,29 +206,11 @@ print('==================================='
|
|||
, '\nSecond merge: mcsm_foldx_dfs + deepddg'
|
||||
, '\n===================================')
|
||||
|
||||
deepddg_df = pd.read_csv(infile_deepddg, sep = ' ')
|
||||
deepddg_df = pd.read_csv(infile_deepddg, sep = ',')
|
||||
deepddg_df.columns
|
||||
|
||||
deepddg_df.rename(columns = {'#chain' : 'chain_id'
|
||||
, 'WT' : 'wild_type_deepddg'
|
||||
, 'ResID' : 'position'
|
||||
, 'Mut' : 'mutant_type_deepddg'}
|
||||
, inplace = True)
|
||||
|
||||
deepddg_df['mutationinformation'] = deepddg_df['wild_type_deepddg'] + deepddg_df['position'].map(str) + deepddg_df['mutant_type_deepddg']
|
||||
|
||||
# add deepddg outcome column: <0--> Destabilising, >0 --> Stabilising
|
||||
deepddg_df['deepddg_outcome'] = np.where(deepddg_df['deepddg'] < 0, 'Destabilising', 'Stabilising')
|
||||
deepddg_df['deepddg_outcome'].value_counts()
|
||||
|
||||
# drop extra columns to allow clean merging
|
||||
deepddg_short_df = deepddg_df.drop(['chain_id', 'wild_type_deepddg', 'position', 'mutant_type_deepddg'], axis = 1)
|
||||
|
||||
# rearrange columns
|
||||
deepddg_short_df.columns
|
||||
deepddg_short_df = deepddg_short_df[["mutationinformation", "deepddg", "deepddg_outcome"]]
|
||||
|
||||
mcsm_foldx_deepddg_dfs = pd.merge(mcsm_foldx_dfs, deepddg_short_df, on = 'mutationinformation', how = l_join)
|
||||
# merge with mcsm_foldx_dfs and deepddg_df
|
||||
mcsm_foldx_deepddg_dfs = pd.merge(mcsm_foldx_dfs, deepddg_df, on = 'mutationinformation', how = l_join)
|
||||
mcsm_foldx_deepddg_dfs['deepddg_outcome'].value_counts()
|
||||
|
||||
ncols_deepddg_merge = len(mcsm_foldx_deepddg_dfs.columns)
|
||||
|
@ -317,10 +297,10 @@ print('Output filename:', outfile_stab_struc
|
|||
|
||||
# write csv
|
||||
print('Writing file: combined stability and structural parameters')
|
||||
combined_df.to_csv(outfile_stab_struc, index = False)
|
||||
combined_df_clean.to_csv(outfile_stab_struc, index = False)
|
||||
print('\nFinished writing file:'
|
||||
, '\nNo. of rows:', combined_df.shape[0]
|
||||
, '\nNo. of cols:', combined_df.shape[1])
|
||||
, '\nNo. of rows:', combined_df_clean.shape[0]
|
||||
, '\nNo. of cols:', combined_df_clean.shape[1])
|
||||
|
||||
|
||||
#%% end of script
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue