diff --git a/scripts/hydrogen_bond_propensity/hydrogen_bond_propensity_report.py b/scripts/hydrogen_bond_propensity/hydrogen_bond_propensity_report.py index 8704fd5..164dd7d 100644 --- a/scripts/hydrogen_bond_propensity/hydrogen_bond_propensity_report.py +++ b/scripts/hydrogen_bond_propensity/hydrogen_bond_propensity_report.py @@ -9,6 +9,7 @@ # 2017-08-10: Created by Andy Maloney, the Cambridge Crystallographic Data Centre # 2020-08-21: made available by the Cambridge Crystallographic Data Centre # 2022-12-21: Updated by Joanna S. Stevens, the Cambridge Crystallographic Data Centre +# 2025-01-08: Updated by Pablo Martinez-Bulit, the Cambridge Crystallographic Data Centre # """ @@ -53,6 +54,75 @@ ############################################################################### +class PropensityCalc: + "HBP Calculator" + def __init__(self, crystal, work_directory, min_donor_coordination, min_acceptor_coordination, fg_count): + self.crystal = crystal + self.directory = work_directory + self.min_donor_coordination = min_donor_coordination + self.min_acceptor_coordination = min_acceptor_coordination + self.fg_count = fg_count + self.settings = self._hbp_settings() + self.hbp = CrystalDescriptors.HBondPropensities() + + def _hbp_settings(self): + settings = CrystalDescriptors.HBondPropensities.Settings() + settings.hbond_criterion.require_hydrogens = True + settings.hbond_criterion.path_length_range = (3, 999) + settings.working_directory = self.directory + + return settings + + def calculate(self): + self.hbp.settings = self.settings + + # Set up the target structure for the calculation + self.hbp.set_target(self.crystal) + print(self.hbp.functional_groups) + + # Generate Training Dataset + self.hbp.match_fitting_data(count=self.fg_count) # set to >300, preferably 500 for better representation of functional groups + + self.hbp.analyse_fitting_data() + + for d in self.hbp.donors: + print(d.identifier, d.npositive, d.nnegative) + for a in self.hbp.acceptors: + print(a.identifier, a.npositive, a.nnegative) + + # Perform regression + model = self.hbp.perform_regression() + + print(model.equation) + print('Area under ROC curve: {} -- {}'.format(round(model.area_under_roc_curve, 3), model.advice_comment)) + + propensities = self.hbp.calculate_propensities() + + intra_flag = True if len(self.hbp.intra_propensities) > 0 else False + intra_count = len([p for p in propensities if not p.is_intermolecular and p.is_observed]) + intra_obs = True if intra_count > 0 else False + + # Use default coordination cutoffs unless user overrides + groups = self.hbp.generate_hbond_groupings(min_donor_prob=self.min_donor_coordination, + min_acceptor_prob=self.min_acceptor_coordination) + + observed_group = self.hbp.target_hbond_grouping() + + return (self.hbp.functional_groups, + self.hbp.fitting_data, + self.hbp.donors, + self.hbp.acceptors, + model, + propensities, + intra_flag, + groups, + observed_group, + self.min_donor_coordination, + self.min_acceptor_coordination, + intra_count, + intra_obs) + + def make_diagram(mol, directory): # Generates a diagram from a given structure molecule_diagram_generator = DiagramGenerator() @@ -143,59 +213,6 @@ def launch_word_processor(output_file): subprocess.Popen(['open', output_file]) -def propensity_calc(crystal, directory, min_donor_coordination, min_acceptor_coordination, fg_count): - # Perform a Hydrogen Bond Propensity calculation - - # Provide settings for the calculation - settings = CrystalDescriptors.HBondPropensities.Settings() - settings.working_directory = directory - settings.hbond_criterion.require_hydrogens = True - settings.hbond_criterion.path_length_range = (3, 999) - - # Set up the HBP calculator - hbp = CrystalDescriptors.HBondPropensities(settings) - - # Set up the target structure for the calculation - hbp.set_target(crystal) - - print(hbp.functional_groups) - - # Generate Training Dataset - hbp.match_fitting_data(count=fg_count) # set to >300, preferably 500 for better representation of functional groups - - hbp.analyse_fitting_data() - - for d in hbp.donors: - print(d.identifier, d.npositive, d.nnegative) - for a in hbp.acceptors: - print(a.identifier, a.npositive, a.nnegative) - - # Perform regression - model = hbp.perform_regression() - - print(model.equation) - print('Area under ROC curve: {} -- {}'.format(round(model.area_under_roc_curve, 3), model.advice_comment)) - - propensities = hbp.calculate_propensities() - - intra_flag = False - intra_obs = False - intra_count = 0 - if len(hbp.intra_propensities) > 0: - intra_flag = True - intra_count = len([p for p in propensities if not p.is_intermolecular and p.is_observed]) - if intra_count > 0: - intra_obs = True - - # Use default coordination cutoffs unless user overrides - groups = hbp.generate_hbond_groupings(min_donor_prob=min_donor_coordination, min_acceptor_prob=min_acceptor_coordination) - - observed_group = hbp.target_hbond_grouping() - - return hbp.functional_groups, hbp.fitting_data, hbp.donors, hbp.acceptors, model, propensities, \ - intra_flag, groups, observed_group, min_donor_coordination, min_acceptor_coordination, intra_count, intra_obs - - def coordination_scores_calc(crystal, directory): # Calculate coordination scores for the target structure @@ -220,6 +237,8 @@ def format_scores(scores, das, d_type): def normalize_molecule(molecule): # Normalise bond types for the input structure (important for cifs) + if any(bond.bond_type == 'Unknown' for bond in molecule.bonds): + print('Unknown type bonds in molecule will be auto assigned') molecule.assign_bond_types(which='unknown') molecule.standardise_aromatic_bonds() molecule.standardise_delocalised_bonds() @@ -356,9 +375,12 @@ def main(structure, directory, csdrefcode, min_donor_coordination, min_acceptor_ # Set up a work directory for the HBP files work_directory = os.path.join(directory, str(structure).split('.')[0]) + hbp_calculator = PropensityCalc(crystal, work_directory, min_donor_coordination, + min_acceptor_coordination, fg_count) + # Get all the necessary data from a HBP calculation - functional_groups, fitting_data, donors, acceptors, model, propensities, intra_flag, \ - groups, observed_groups, min_donor_coordination, min_acceptor_coordination, intra_count, intra_obs = propensity_calc(crystal, work_directory, min_donor_coordination, min_acceptor_coordination, fg_count) + (functional_groups, fitting_data, donors, acceptors, model, propensities, intra_flag, groups, observed_groups, + min_donor_coordination, min_acceptor_coordination, intra_count, intra_obs) = hbp_calculator.calculate() # Calculate the coordination scores separately coordination_scores = coordination_scores_calc(crystal, work_directory) diff --git a/scripts/multi_component_hydrogen_bond_propensity/multi_component_hydrogen_bond_propensity_report.py b/scripts/multi_component_hydrogen_bond_propensity/multi_component_hydrogen_bond_propensity_report.py index a98a39a..7adb2dd 100644 --- a/scripts/multi_component_hydrogen_bond_propensity/multi_component_hydrogen_bond_propensity_report.py +++ b/scripts/multi_component_hydrogen_bond_propensity/multi_component_hydrogen_bond_propensity_report.py @@ -8,6 +8,7 @@ # # 2017-08-10: Created by Andy Maloney, the Cambridge Crystallographic Data Centre # 2020-08-21: made available by the Cambridge Crystallographic Data Centre +# 2025-01-08: Updated by Pablo Martinez-Bulit, the Cambridge Crystallographic Data Centre # """ multi_component_hydrogen_bond_propensity_report.py @@ -54,6 +55,52 @@ ############################################################################### +class PropensityCalc: + "HBP Calculator" + def __init__(self): + self.crystal = None + self.directory = None + self.fg_count = None + self.settings = self._hbp_settings() + self.hbp = CrystalDescriptors.HBondPropensities() + + @staticmethod + def _hbp_settings(): + settings = CrystalDescriptors.HBondPropensities.Settings() + settings.hbond_criterion.require_hydrogens = True + settings.hbond_criterion.path_length_range = (3, 999) + + return settings + + def calculate(self): + self.hbp.settings = self.settings + self.settings.working_directory = self.directory + + # Set up the target structure for the calculation + self.hbp.set_target(self.crystal) + print(self.hbp.functional_groups) + + # Generate Training Dataset + self.hbp.match_fitting_data(count=500) # set to 500 for better representation of functional groups + + self.hbp.analyse_fitting_data() + + for d in self.hbp.donors: + print(d.identifier, d.npositive, d.nnegative) + for a in self.hbp.acceptors: + print(a.identifier, a.npositive, a.nnegative) + + # Perform regression + model = self.hbp.perform_regression() + + print(model.equation) + print('Area under ROC curve: {} -- {}'.format(round(model.area_under_roc_curve, 3), model.advice_comment)) + + propensities = self.hbp.calculate_propensities() + + return propensities, self.hbp.donors, self.hbp.acceptors + + def cm2inch(*tupl): inch = 2.54 if isinstance(tupl[0], tuple): @@ -119,44 +166,6 @@ def add_picture_subdoc(picture_location, docx_template, wd=7): docx_template, image_descriptor=picture_location, width=Cm(wd)) -def propensity_calc(crystal, directory): - # Perform a Hydrogen Bond Propensity calculation - - # Provide settings for the calculation - settings = CrystalDescriptors.HBondPropensities.Settings() - settings.working_directory = directory - settings.hbond_criterion.require_hydrogens = True - settings.hbond_criterion.path_length_range = (3, 999) - - # Set up the HBP calculator - hbp = CrystalDescriptors.HBondPropensities(settings) - - # Set up the target structure for the calculation - hbp.set_target(crystal) - - print(hbp.functional_groups) - - # Generate Training Dataset - - hbp.match_fitting_data(count=500) # set to >300, preferably 500 for better representation of functional groups - - hbp.analyse_fitting_data() - - for d in hbp.donors: - print(d.identifier, d.npositive, d.nnegative) - for a in hbp.acceptors: - print(a.identifier, a.npositive, a.nnegative) - - # Perform regression - model = hbp.perform_regression() - print(model.equation) - print('Area under ROC curve: {} -- {}'.format(round(model.area_under_roc_curve, 3), model.advice_comment)) - hbp.calculate_propensities() - propensities = hbp.inter_propensities - - return propensities, hbp.donors, hbp.acceptors - - def coordination_scores_calc(crystal, directory): # Calculate coordination scores for the target structure @@ -341,6 +350,8 @@ def main(structure, work_directory, failure_directory, library, csdrefcode, forc mc_dictionary = {} failures = [] + hbp_calculator = PropensityCalc() + # for each coformer in the library, make a pair file for the api/coformer and run a HBP calculation for f in coformer_files: molecule_file, coformer_name = make_pair_file(api_molecule, tempdir, f) @@ -355,7 +366,9 @@ def main(structure, work_directory, failure_directory, library, csdrefcode, forc mc_dictionary[coformer_name] = tloaded else: try: - propensities, donors, acceptors = propensity_calc(crystal, directory) + hbp_calculator.crystal = crystal + hbp_calculator.directory = directory + propensities, donors, acceptors = hbp_calculator.calculate() coordination_scores = coordination_scores_calc(crystal, directory) pair_output(crystal.identifier, propensities, donors, acceptors, coordination_scores, directory) with open(os.path.join(directory, "success.json"), "w") as file: