#!/usr/bin/env python import os from biopandas.pdb import PandasPdb #%% homedir = os.path.expanduser('~') os.chdir(homedir + '/git/LSHTM_analysis/scripts/examples') #%% file_list = ['7bvf_b.pdb', 'pnca_complex.pdb', 'alr_complex.pdb'] file_list = ['7bvf_b.pdb'] #file_list = ['pnca_complex.pdb'] file_list = ['alr_complex.pdb'] BORING_LIGANDS = ["HOH","CA","SO4","IOD","NA","CL","GOL","PO4"] #%% df with list ligands_dict = {} for pdb_id in file_list: ppdb = PandasPdb() pdb_file = ppdb.read_pdb(pdb_id) het = pdb_file.df['HETATM'] het_list = list(set(het['residue_name'])) ligands = [ l for l in het_list if l not in BORING_LIGANDS] lig_dict = {pdb_id:ligands} #lig_dict = {pdb_id:het_list} # include BORING_LIGANDS ligands_dict.update(lig_dict) print(ligands_dict) print('pdb_code:', pdb_file.code) # works only in case of valid pdb print('pdb_code:', pdb_file.pdb_path) #works for bespoke pdb but prints the full path print('pdb_code:', os.path.basename(pdb_file.pdb_path)) # prints only the last part i.e filename #%% test with one ppdb = PandasPdb() pdb_file = ppdb.read_pdb('7bvf_b.pdb') het = pdb_file.df['HETATM'] het_list = list(set(het['residue_name'])) print(het_list) ligands = [ l for l in het_list if l not in BORING_LIGANDS] print(ligands) #%% extract last part from file path print(os.path.basename(homedir + '/git/LSHTM_analysis/scripts/examples')) print(os.path.basename('alr_complex.pdb')) foo = os.path.basename(pdb_file.pdb_path) print(foo)