Skip to content

Commit 4824011

Browse files
committed
Install missing chain IDs into input PDB files
1 parent 6d885b4 commit 4824011

File tree

2 files changed

+11
-4
lines changed

2 files changed

+11
-4
lines changed

Diff for: project/utils/deepinteract_utils.py

+10-3
Original file line numberDiff line numberDiff line change
@@ -628,8 +628,8 @@ def recover_any_missing_chain_ids(interim_dataset_dir: str, new_pdb_filepath: st
628628
orig_pdb_chain_id = '_' # Default value for missing chain IDs
629629
new_pdb_code = db.get_pdb_code(new_pdb_filepath)
630630
orig_pdb_name = db.get_pdb_name(orig_pdb_filepath)
631-
orig_pdb_df = PandasPdb().read_pdb(new_pdb_filepath).df['ATOM']
632-
unique_chain_ids = np.unique(orig_pdb_df['chain_id'].values)
631+
new_pdb_obj = PandasPdb().read_pdb(new_pdb_filepath)
632+
unique_chain_ids = np.unique(new_pdb_obj.df['ATOM']['chain_id'].values)
633633

634634
"""Ascertain the chain ID corresponding to the original PDB file, using one of two available methods.
635635
Method 1: Used with datasets such as EVCoupling adopting .atom filename extensions (e.g., 4DI3C.atom)
@@ -645,6 +645,13 @@ def recover_any_missing_chain_ids(interim_dataset_dir: str, new_pdb_filepath: st
645645
# Assume the first/second index is the first non-empty chain ID (e.g., 'A')
646646
orig_pdb_chain_id = unique_chain_ids[0] if (unique_chain_ids[0] != '') else unique_chain_ids[1]
647647

648+
# Update version of the input PDB file copied to input_dataset_dir
649+
new_pdb_obj.df['ATOM']['chain_id'] = orig_pdb_chain_id
650+
new_pdb_obj.df['HETATM']['chain_id'] = orig_pdb_chain_id
651+
new_pdb_obj.df['ANISOU']['chain_id'] = orig_pdb_chain_id
652+
new_pdb_obj.df['OTHERS']['chain_id'] = orig_pdb_chain_id
653+
new_pdb_obj.to_pdb(new_pdb_filepath, records=None, gz=False, append_newline=True)
654+
648655
# Update existing parsed chains to contain the newly-recovered chain ID
649656
parsed_dir = os.path.join(interim_dataset_dir, 'parsed', pdb_code)
650657
parsed_filenames = [
@@ -818,7 +825,7 @@ def convert_input_pdb_files_to_pair(left_pdb_filepath: str, right_pdb_filepath:
818825
output_dir = os.path.join(input_dataset_dir, 'final', 'raw')
819826
produced_filenames = db.get_structures_filenames(output_dir, extension='.dill')
820827
produced_keys = [db.get_pdb_name(x) for x in produced_filenames
821-
if db.get_pdb_code(x).upper() in db.get_pdb_code(left_pdb_filepath).upper()]
828+
if db.get_pdb_code(x).upper() in db.get_pdb_code(new_l_u_filepath).upper()]
822829
pair_filepath = [os.path.join(output_dir, db.get_pdb_code(key)[1:3], key)
823830
for key in produced_keys][0]
824831
# Impute any missing feature values in the postprocessed input pairs

Diff for: setup.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@
44

55
setup(
66
name='DeepInteract',
7-
version='1.0.4',
7+
version='1.0.5',
88
description='A geometric deep learning pipeline for predicting protein interface contacts.',
99
author='Alex Morehead',
1010
author_email='[email protected]',

0 commit comments

Comments
 (0)