Skip to content

Commit a0876f5

Browse files
authored
Merge pull request #73 from ccdc-opensource/jira_sci_1804_hbp_class
Moved hbp calculator into class SCI-1804
2 parents 2435e41 + cdae933 commit a0876f5

File tree

2 files changed

+129
-94
lines changed

2 files changed

+129
-94
lines changed

scripts/hydrogen_bond_propensity/hydrogen_bond_propensity_report.py

+77-55
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@
99
# 2017-08-10: Created by Andy Maloney, the Cambridge Crystallographic Data Centre
1010
# 2020-08-21: made available by the Cambridge Crystallographic Data Centre
1111
# 2022-12-21: Updated by Joanna S. Stevens, the Cambridge Crystallographic Data Centre
12+
# 2025-01-08: Updated by Pablo Martinez-Bulit, the Cambridge Crystallographic Data Centre
1213
#
1314

1415
"""
@@ -53,6 +54,75 @@
5354

5455

5556
###############################################################################
57+
class PropensityCalc:
58+
"HBP Calculator"
59+
def __init__(self, crystal, work_directory, min_donor_coordination, min_acceptor_coordination, fg_count):
60+
self.crystal = crystal
61+
self.directory = work_directory
62+
self.min_donor_coordination = min_donor_coordination
63+
self.min_acceptor_coordination = min_acceptor_coordination
64+
self.fg_count = fg_count
65+
self.settings = self._hbp_settings()
66+
self.hbp = CrystalDescriptors.HBondPropensities()
67+
68+
def _hbp_settings(self):
69+
settings = CrystalDescriptors.HBondPropensities.Settings()
70+
settings.hbond_criterion.require_hydrogens = True
71+
settings.hbond_criterion.path_length_range = (3, 999)
72+
settings.working_directory = self.directory
73+
74+
return settings
75+
76+
def calculate(self):
77+
self.hbp.settings = self.settings
78+
79+
# Set up the target structure for the calculation
80+
self.hbp.set_target(self.crystal)
81+
print(self.hbp.functional_groups)
82+
83+
# Generate Training Dataset
84+
self.hbp.match_fitting_data(count=self.fg_count) # set to >300, preferably 500 for better representation of functional groups
85+
86+
self.hbp.analyse_fitting_data()
87+
88+
for d in self.hbp.donors:
89+
print(d.identifier, d.npositive, d.nnegative)
90+
for a in self.hbp.acceptors:
91+
print(a.identifier, a.npositive, a.nnegative)
92+
93+
# Perform regression
94+
model = self.hbp.perform_regression()
95+
96+
print(model.equation)
97+
print('Area under ROC curve: {} -- {}'.format(round(model.area_under_roc_curve, 3), model.advice_comment))
98+
99+
propensities = self.hbp.calculate_propensities()
100+
101+
intra_flag = True if len(self.hbp.intra_propensities) > 0 else False
102+
intra_count = len([p for p in propensities if not p.is_intermolecular and p.is_observed])
103+
intra_obs = True if intra_count > 0 else False
104+
105+
# Use default coordination cutoffs unless user overrides
106+
groups = self.hbp.generate_hbond_groupings(min_donor_prob=self.min_donor_coordination,
107+
min_acceptor_prob=self.min_acceptor_coordination)
108+
109+
observed_group = self.hbp.target_hbond_grouping()
110+
111+
return (self.hbp.functional_groups,
112+
self.hbp.fitting_data,
113+
self.hbp.donors,
114+
self.hbp.acceptors,
115+
model,
116+
propensities,
117+
intra_flag,
118+
groups,
119+
observed_group,
120+
self.min_donor_coordination,
121+
self.min_acceptor_coordination,
122+
intra_count,
123+
intra_obs)
124+
125+
56126
def make_diagram(mol, directory):
57127
# Generates a diagram from a given structure
58128
molecule_diagram_generator = DiagramGenerator()
@@ -143,59 +213,6 @@ def launch_word_processor(output_file):
143213
subprocess.Popen(['open', output_file])
144214

145215

146-
def propensity_calc(crystal, directory, min_donor_coordination, min_acceptor_coordination, fg_count):
147-
# Perform a Hydrogen Bond Propensity calculation
148-
149-
# Provide settings for the calculation
150-
settings = CrystalDescriptors.HBondPropensities.Settings()
151-
settings.working_directory = directory
152-
settings.hbond_criterion.require_hydrogens = True
153-
settings.hbond_criterion.path_length_range = (3, 999)
154-
155-
# Set up the HBP calculator
156-
hbp = CrystalDescriptors.HBondPropensities(settings)
157-
158-
# Set up the target structure for the calculation
159-
hbp.set_target(crystal)
160-
161-
print(hbp.functional_groups)
162-
163-
# Generate Training Dataset
164-
hbp.match_fitting_data(count=fg_count) # set to >300, preferably 500 for better representation of functional groups
165-
166-
hbp.analyse_fitting_data()
167-
168-
for d in hbp.donors:
169-
print(d.identifier, d.npositive, d.nnegative)
170-
for a in hbp.acceptors:
171-
print(a.identifier, a.npositive, a.nnegative)
172-
173-
# Perform regression
174-
model = hbp.perform_regression()
175-
176-
print(model.equation)
177-
print('Area under ROC curve: {} -- {}'.format(round(model.area_under_roc_curve, 3), model.advice_comment))
178-
179-
propensities = hbp.calculate_propensities()
180-
181-
intra_flag = False
182-
intra_obs = False
183-
intra_count = 0
184-
if len(hbp.intra_propensities) > 0:
185-
intra_flag = True
186-
intra_count = len([p for p in propensities if not p.is_intermolecular and p.is_observed])
187-
if intra_count > 0:
188-
intra_obs = True
189-
190-
# Use default coordination cutoffs unless user overrides
191-
groups = hbp.generate_hbond_groupings(min_donor_prob=min_donor_coordination, min_acceptor_prob=min_acceptor_coordination)
192-
193-
observed_group = hbp.target_hbond_grouping()
194-
195-
return hbp.functional_groups, hbp.fitting_data, hbp.donors, hbp.acceptors, model, propensities, \
196-
intra_flag, groups, observed_group, min_donor_coordination, min_acceptor_coordination, intra_count, intra_obs
197-
198-
199216
def coordination_scores_calc(crystal, directory):
200217
# Calculate coordination scores for the target structure
201218

@@ -220,6 +237,8 @@ def format_scores(scores, das, d_type):
220237

221238
def normalize_molecule(molecule):
222239
# Normalise bond types for the input structure (important for cifs)
240+
if any(bond.bond_type == 'Unknown' for bond in molecule.bonds):
241+
print('Unknown type bonds in molecule will be auto assigned')
223242
molecule.assign_bond_types(which='unknown')
224243
molecule.standardise_aromatic_bonds()
225244
molecule.standardise_delocalised_bonds()
@@ -356,9 +375,12 @@ def main(structure, directory, csdrefcode, min_donor_coordination, min_acceptor_
356375
# Set up a work directory for the HBP files
357376
work_directory = os.path.join(directory, str(structure).split('.')[0])
358377

378+
hbp_calculator = PropensityCalc(crystal, work_directory, min_donor_coordination,
379+
min_acceptor_coordination, fg_count)
380+
359381
# Get all the necessary data from a HBP calculation
360-
functional_groups, fitting_data, donors, acceptors, model, propensities, intra_flag, \
361-
groups, observed_groups, min_donor_coordination, min_acceptor_coordination, intra_count, intra_obs = propensity_calc(crystal, work_directory, min_donor_coordination, min_acceptor_coordination, fg_count)
382+
(functional_groups, fitting_data, donors, acceptors, model, propensities, intra_flag, groups, observed_groups,
383+
min_donor_coordination, min_acceptor_coordination, intra_count, intra_obs) = hbp_calculator.calculate()
362384

363385
# Calculate the coordination scores separately
364386
coordination_scores = coordination_scores_calc(crystal, work_directory)

scripts/multi_component_hydrogen_bond_propensity/multi_component_hydrogen_bond_propensity_report.py

+52-39
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@
88
#
99
# 2017-08-10: Created by Andy Maloney, the Cambridge Crystallographic Data Centre
1010
# 2020-08-21: made available by the Cambridge Crystallographic Data Centre
11+
# 2025-01-08: Updated by Pablo Martinez-Bulit, the Cambridge Crystallographic Data Centre
1112
#
1213
"""
1314
multi_component_hydrogen_bond_propensity_report.py
@@ -54,6 +55,52 @@
5455

5556

5657
###############################################################################
58+
class PropensityCalc:
59+
"HBP Calculator"
60+
def __init__(self):
61+
self.crystal = None
62+
self.directory = None
63+
self.fg_count = None
64+
self.settings = self._hbp_settings()
65+
self.hbp = CrystalDescriptors.HBondPropensities()
66+
67+
@staticmethod
68+
def _hbp_settings():
69+
settings = CrystalDescriptors.HBondPropensities.Settings()
70+
settings.hbond_criterion.require_hydrogens = True
71+
settings.hbond_criterion.path_length_range = (3, 999)
72+
73+
return settings
74+
75+
def calculate(self):
76+
self.hbp.settings = self.settings
77+
self.settings.working_directory = self.directory
78+
79+
# Set up the target structure for the calculation
80+
self.hbp.set_target(self.crystal)
81+
print(self.hbp.functional_groups)
82+
83+
# Generate Training Dataset
84+
self.hbp.match_fitting_data(count=500) # set to 500 for better representation of functional groups
85+
86+
self.hbp.analyse_fitting_data()
87+
88+
for d in self.hbp.donors:
89+
print(d.identifier, d.npositive, d.nnegative)
90+
for a in self.hbp.acceptors:
91+
print(a.identifier, a.npositive, a.nnegative)
92+
93+
# Perform regression
94+
model = self.hbp.perform_regression()
95+
96+
print(model.equation)
97+
print('Area under ROC curve: {} -- {}'.format(round(model.area_under_roc_curve, 3), model.advice_comment))
98+
99+
propensities = self.hbp.calculate_propensities()
100+
101+
return propensities, self.hbp.donors, self.hbp.acceptors
102+
103+
57104
def cm2inch(*tupl):
58105
inch = 2.54
59106
if isinstance(tupl[0], tuple):
@@ -119,44 +166,6 @@ def add_picture_subdoc(picture_location, docx_template, wd=7):
119166
docx_template, image_descriptor=picture_location, width=Cm(wd))
120167

121168

122-
def propensity_calc(crystal, directory):
123-
# Perform a Hydrogen Bond Propensity calculation
124-
125-
# Provide settings for the calculation
126-
settings = CrystalDescriptors.HBondPropensities.Settings()
127-
settings.working_directory = directory
128-
settings.hbond_criterion.require_hydrogens = True
129-
settings.hbond_criterion.path_length_range = (3, 999)
130-
131-
# Set up the HBP calculator
132-
hbp = CrystalDescriptors.HBondPropensities(settings)
133-
134-
# Set up the target structure for the calculation
135-
hbp.set_target(crystal)
136-
137-
print(hbp.functional_groups)
138-
139-
# Generate Training Dataset
140-
141-
hbp.match_fitting_data(count=500) # set to >300, preferably 500 for better representation of functional groups
142-
143-
hbp.analyse_fitting_data()
144-
145-
for d in hbp.donors:
146-
print(d.identifier, d.npositive, d.nnegative)
147-
for a in hbp.acceptors:
148-
print(a.identifier, a.npositive, a.nnegative)
149-
150-
# Perform regression
151-
model = hbp.perform_regression()
152-
print(model.equation)
153-
print('Area under ROC curve: {} -- {}'.format(round(model.area_under_roc_curve, 3), model.advice_comment))
154-
hbp.calculate_propensities()
155-
propensities = hbp.inter_propensities
156-
157-
return propensities, hbp.donors, hbp.acceptors
158-
159-
160169
def coordination_scores_calc(crystal, directory):
161170
# Calculate coordination scores for the target structure
162171

@@ -341,6 +350,8 @@ def main(structure, work_directory, failure_directory, library, csdrefcode, forc
341350
mc_dictionary = {}
342351
failures = []
343352

353+
hbp_calculator = PropensityCalc()
354+
344355
# for each coformer in the library, make a pair file for the api/coformer and run a HBP calculation
345356
for f in coformer_files:
346357
molecule_file, coformer_name = make_pair_file(api_molecule, tempdir, f)
@@ -355,7 +366,9 @@ def main(structure, work_directory, failure_directory, library, csdrefcode, forc
355366
mc_dictionary[coformer_name] = tloaded
356367
else:
357368
try:
358-
propensities, donors, acceptors = propensity_calc(crystal, directory)
369+
hbp_calculator.crystal = crystal
370+
hbp_calculator.directory = directory
371+
propensities, donors, acceptors = hbp_calculator.calculate()
359372
coordination_scores = coordination_scores_calc(crystal, directory)
360373
pair_output(crystal.identifier, propensities, donors, acceptors, coordination_scores, directory)
361374
with open(os.path.join(directory, "success.json"), "w") as file:

0 commit comments

Comments
 (0)