9
9
# 2017-08-10: Created by Andy Maloney, the Cambridge Crystallographic Data Centre
10
10
# 2020-08-21: made available by the Cambridge Crystallographic Data Centre
11
11
# 2022-12-21: Updated by Joanna S. Stevens, the Cambridge Crystallographic Data Centre
12
+ # 2025-01-08: Updated by Pablo Martinez-Bulit, the Cambridge Crystallographic Data Centre
12
13
#
13
14
14
15
"""
53
54
54
55
55
56
###############################################################################
57
+ class PropensityCalc :
58
+ "HBP Calculator"
59
+ def __init__ (self , crystal , work_directory , min_donor_coordination , min_acceptor_coordination , fg_count ):
60
+ self .crystal = crystal
61
+ self .directory = work_directory
62
+ self .min_donor_coordination = min_donor_coordination
63
+ self .min_acceptor_coordination = min_acceptor_coordination
64
+ self .fg_count = fg_count
65
+ self .settings = self ._hbp_settings ()
66
+ self .hbp = CrystalDescriptors .HBondPropensities ()
67
+
68
+ def _hbp_settings (self ):
69
+ settings = CrystalDescriptors .HBondPropensities .Settings ()
70
+ settings .hbond_criterion .require_hydrogens = True
71
+ settings .hbond_criterion .path_length_range = (3 , 999 )
72
+ settings .working_directory = self .directory
73
+
74
+ return settings
75
+
76
+ def calculate (self ):
77
+ self .hbp .settings = self .settings
78
+
79
+ # Set up the target structure for the calculation
80
+ self .hbp .set_target (self .crystal )
81
+ print (self .hbp .functional_groups )
82
+
83
+ # Generate Training Dataset
84
+ self .hbp .match_fitting_data (count = self .fg_count ) # set to >300, preferably 500 for better representation of functional groups
85
+
86
+ self .hbp .analyse_fitting_data ()
87
+
88
+ for d in self .hbp .donors :
89
+ print (d .identifier , d .npositive , d .nnegative )
90
+ for a in self .hbp .acceptors :
91
+ print (a .identifier , a .npositive , a .nnegative )
92
+
93
+ # Perform regression
94
+ model = self .hbp .perform_regression ()
95
+
96
+ print (model .equation )
97
+ print ('Area under ROC curve: {} -- {}' .format (round (model .area_under_roc_curve , 3 ), model .advice_comment ))
98
+
99
+ propensities = self .hbp .calculate_propensities ()
100
+
101
+ intra_flag = True if len (self .hbp .intra_propensities ) > 0 else False
102
+ intra_count = len ([p for p in propensities if not p .is_intermolecular and p .is_observed ])
103
+ intra_obs = True if intra_count > 0 else False
104
+
105
+ # Use default coordination cutoffs unless user overrides
106
+ groups = self .hbp .generate_hbond_groupings (min_donor_prob = self .min_donor_coordination ,
107
+ min_acceptor_prob = self .min_acceptor_coordination )
108
+
109
+ observed_group = self .hbp .target_hbond_grouping ()
110
+
111
+ return (self .hbp .functional_groups ,
112
+ self .hbp .fitting_data ,
113
+ self .hbp .donors ,
114
+ self .hbp .acceptors ,
115
+ model ,
116
+ propensities ,
117
+ intra_flag ,
118
+ groups ,
119
+ observed_group ,
120
+ self .min_donor_coordination ,
121
+ self .min_acceptor_coordination ,
122
+ intra_count ,
123
+ intra_obs )
124
+
125
+
56
126
def make_diagram (mol , directory ):
57
127
# Generates a diagram from a given structure
58
128
molecule_diagram_generator = DiagramGenerator ()
@@ -143,59 +213,6 @@ def launch_word_processor(output_file):
143
213
subprocess .Popen (['open' , output_file ])
144
214
145
215
146
- def propensity_calc (crystal , directory , min_donor_coordination , min_acceptor_coordination , fg_count ):
147
- # Perform a Hydrogen Bond Propensity calculation
148
-
149
- # Provide settings for the calculation
150
- settings = CrystalDescriptors .HBondPropensities .Settings ()
151
- settings .working_directory = directory
152
- settings .hbond_criterion .require_hydrogens = True
153
- settings .hbond_criterion .path_length_range = (3 , 999 )
154
-
155
- # Set up the HBP calculator
156
- hbp = CrystalDescriptors .HBondPropensities (settings )
157
-
158
- # Set up the target structure for the calculation
159
- hbp .set_target (crystal )
160
-
161
- print (hbp .functional_groups )
162
-
163
- # Generate Training Dataset
164
- hbp .match_fitting_data (count = fg_count ) # set to >300, preferably 500 for better representation of functional groups
165
-
166
- hbp .analyse_fitting_data ()
167
-
168
- for d in hbp .donors :
169
- print (d .identifier , d .npositive , d .nnegative )
170
- for a in hbp .acceptors :
171
- print (a .identifier , a .npositive , a .nnegative )
172
-
173
- # Perform regression
174
- model = hbp .perform_regression ()
175
-
176
- print (model .equation )
177
- print ('Area under ROC curve: {} -- {}' .format (round (model .area_under_roc_curve , 3 ), model .advice_comment ))
178
-
179
- propensities = hbp .calculate_propensities ()
180
-
181
- intra_flag = False
182
- intra_obs = False
183
- intra_count = 0
184
- if len (hbp .intra_propensities ) > 0 :
185
- intra_flag = True
186
- intra_count = len ([p for p in propensities if not p .is_intermolecular and p .is_observed ])
187
- if intra_count > 0 :
188
- intra_obs = True
189
-
190
- # Use default coordination cutoffs unless user overrides
191
- groups = hbp .generate_hbond_groupings (min_donor_prob = min_donor_coordination , min_acceptor_prob = min_acceptor_coordination )
192
-
193
- observed_group = hbp .target_hbond_grouping ()
194
-
195
- return hbp .functional_groups , hbp .fitting_data , hbp .donors , hbp .acceptors , model , propensities , \
196
- intra_flag , groups , observed_group , min_donor_coordination , min_acceptor_coordination , intra_count , intra_obs
197
-
198
-
199
216
def coordination_scores_calc (crystal , directory ):
200
217
# Calculate coordination scores for the target structure
201
218
@@ -220,6 +237,8 @@ def format_scores(scores, das, d_type):
220
237
221
238
def normalize_molecule (molecule ):
222
239
# Normalise bond types for the input structure (important for cifs)
240
+ if any (bond .bond_type == 'Unknown' for bond in molecule .bonds ):
241
+ print ('Unknown type bonds in molecule will be auto assigned' )
223
242
molecule .assign_bond_types (which = 'unknown' )
224
243
molecule .standardise_aromatic_bonds ()
225
244
molecule .standardise_delocalised_bonds ()
@@ -356,9 +375,12 @@ def main(structure, directory, csdrefcode, min_donor_coordination, min_acceptor_
356
375
# Set up a work directory for the HBP files
357
376
work_directory = os .path .join (directory , str (structure ).split ('.' )[0 ])
358
377
378
+ hbp_calculator = PropensityCalc (crystal , work_directory , min_donor_coordination ,
379
+ min_acceptor_coordination , fg_count )
380
+
359
381
# Get all the necessary data from a HBP calculation
360
- functional_groups , fitting_data , donors , acceptors , model , propensities , intra_flag , \
361
- groups , observed_groups , min_donor_coordination , min_acceptor_coordination , intra_count , intra_obs = propensity_calc ( crystal , work_directory , min_donor_coordination , min_acceptor_coordination , fg_count )
382
+ ( functional_groups , fitting_data , donors , acceptors , model , propensities , intra_flag , groups , observed_groups ,
383
+ min_donor_coordination , min_acceptor_coordination , intra_count , intra_obs ) = hbp_calculator . calculate ( )
362
384
363
385
# Calculate the coordination scores separately
364
386
coordination_scores = coordination_scores_calc (crystal , work_directory )
0 commit comments