diff --git a/nvmolkit/_mmffOptimization.pyi b/nvmolkit/_mmffOptimization.pyi index 313dde0..5228b6b 100644 --- a/nvmolkit/_mmffOptimization.pyi +++ b/nvmolkit/_mmffOptimization.pyi @@ -4,6 +4,6 @@ from rdkit.Chem import Mol def MMFFOptimizeMoleculesConfs( molecules: List[Mol], maxIters: int = 200, - nonBondedThreshold: float = 100.0, + properties: Any = None, hardwareOptions: Any = None ) -> List[List[float]]: ... diff --git a/nvmolkit/mmffOptimization.cpp b/nvmolkit/mmffOptimization.cpp index ef3b587..df9a590 100644 --- a/nvmolkit/mmffOptimization.cpp +++ b/nvmolkit/mmffOptimization.cpp @@ -36,14 +36,47 @@ template boost::python::list vectorOfVectorsToList(const std::vecto return outerList; } +nvMolKit::MMFFProperties extractMMFFProperties(const boost::python::object& obj) { + nvMolKit::MMFFProperties props; + if (obj.is_none()) { + return props; + } + props.variant = boost::python::extract(obj.attr("variant")); + props.dielectricConstant = boost::python::extract(obj.attr("dielectricConstant")); + props.dielectricModel = boost::python::extract(obj.attr("dielectricModel")); + props.nonBondedThreshold = boost::python::extract(obj.attr("nonBondedThreshold")); + props.ignoreInterfragInteractions = boost::python::extract(obj.attr("ignoreInterfragInteractions")); + props.bondTerm = boost::python::extract(obj.attr("bondTerm")); + props.angleTerm = boost::python::extract(obj.attr("angleTerm")); + props.stretchBendTerm = boost::python::extract(obj.attr("stretchBendTerm")); + props.oopTerm = boost::python::extract(obj.attr("oopTerm")); + props.torsionTerm = boost::python::extract(obj.attr("torsionTerm")); + props.vdwTerm = boost::python::extract(obj.attr("vdwTerm")); + props.eleTerm = boost::python::extract(obj.attr("eleTerm")); + return props; +} + +std::vector extractMMFFPropertiesList(const boost::python::list& properties, + const int expectedSize) { + if (boost::python::len(properties) != expectedSize) { + throw std::invalid_argument("Expected " + std::to_string(expectedSize) + " MMFF properties objects, got " + + std::to_string(boost::python::len(properties))); + } + std::vector out; + out.reserve(expectedSize); + for (int i = 0; i < expectedSize; ++i) { + out.push_back(extractMMFFProperties(boost::python::object(properties[i]))); + } + return out; +} + BOOST_PYTHON_MODULE(_mmffOptimization) { boost::python::def( "MMFFOptimizeMoleculesConfs", +[](const boost::python::list& molecules, int maxIters, - double nonBondedThreshold, + const boost::python::list& propertiesList, const nvMolKit::BatchHardwareOptions& hardwareOptions) -> boost::python::list { - // Convert Python list to std::vector std::vector molsVec; molsVec.reserve(len(molecules)); @@ -55,23 +88,23 @@ BOOST_PYTHON_MODULE(_mmffOptimization) { molsVec.push_back(mol); } - nvMolKit::MMFFProperties properties; - properties.nonBondedThreshold = nonBondedThreshold; - auto result = nvMolKit::MMFF::MMFFOptimizeMoleculesConfsBfgs(molsVec, maxIters, properties, hardwareOptions); + const auto properties = extractMMFFPropertiesList(propertiesList, static_cast(molsVec.size())); + const auto result = + nvMolKit::MMFF::MMFFOptimizeMoleculesConfsBfgs(molsVec, maxIters, properties, hardwareOptions); // Convert result back to Python list of lists return vectorOfVectorsToList(result); }, (boost::python::arg("molecules"), - boost::python::arg("maxIters") = 200, - boost::python::arg("nonBondedThreshold") = 100.0, - boost::python::arg("hardwareOptions") = nvMolKit::BatchHardwareOptions()), + boost::python::arg("maxIters") = 200, + boost::python::arg("properties") = boost::python::list(), + boost::python::arg("hardwareOptions") = nvMolKit::BatchHardwareOptions()), "Optimize conformers for multiple molecules using MMFF force field.\n" "\n" "Args:\n" " molecules: List of RDKit molecules to optimize\n" " maxIters: Maximum number of optimization iterations (default: 200)\n" - " nonBondedThreshold: Radius threshold for non-bonded interactions (default: 100.0)\n" + " properties: MMFFProperties-compatible object with forcefield settings\n" " hardwareOptions: BatchHardwareOptions object with hardware settings (default: default options)\n" "\n" "Returns:\n" diff --git a/nvmolkit/mmffOptimization.py b/nvmolkit/mmffOptimization.py index 0fe3b83..ff8940a 100644 --- a/nvmolkit/mmffOptimization.py +++ b/nvmolkit/mmffOptimization.py @@ -19,21 +19,26 @@ optimization for multiple molecules and conformers using CUDA and OpenMP. """ +from collections.abc import Sequence from typing import TYPE_CHECKING from rdkit.Chem import AllChem if TYPE_CHECKING: from rdkit.Chem import Mol + from rdkit.ForceField.rdForceField import MMFFMolProperties from nvmolkit import _mmffOptimization +from nvmolkit._mmff_bridge import default_rdkit_mmff_properties, make_internal_mmff_properties from nvmolkit.types import HardwareOptions def MMFFOptimizeMoleculesConfs( molecules: list["Mol"], maxIters: int = 200, - nonBondedThreshold: float = 100.0, + properties: "MMFFMolProperties | Sequence[MMFFMolProperties | None] | None" = None, + nonBondedThreshold: float | Sequence[float] = 100.0, + ignoreInterfragInteractions: bool | Sequence[bool] = True, hardwareOptions: HardwareOptions | None = None, ) -> list[list[float]]: """Optimize conformers for multiple molecules using MMFF force field with BFGS minimization. @@ -46,7 +51,12 @@ def MMFFOptimizeMoleculesConfs( molecules: List of RDKit molecules to optimize. Each molecule should have conformers already generated. maxIters: Maximum number of BFGS optimization iterations (default: 200) - nonBondedThreshold: Radius threshold for non-bonded interactions in Ångströms (default: 100.0) + properties: RDKit ``MMFFMolProperties`` object, a per-molecule sequence + of those objects, or ``None`` to use default MMFF94 settings. + nonBondedThreshold: Radius threshold used to exclude long-range + non-bonded interactions, either as a scalar or per-molecule sequence. + ignoreInterfragInteractions: If ``True``, omit non-bonded terms between + fragments. May also be provided as a per-molecule sequence. hardwareOptions: Configures CPU and GPU batching, threading, and device selection. Will attempt to use reasonable defaults if not set. Returns: @@ -117,8 +127,37 @@ def MMFFOptimizeMoleculesConfs( {"none": none_indices, "no_params": no_params_indices}, ) + def _normalize_scalar_or_list(value, name: str): + if isinstance(value, Sequence) and not isinstance(value, (str, bytes)): + if len(value) != len(molecules): + raise ValueError(f"Expected {len(molecules)} values for {name}, got {len(value)}") + return list(value) + return [value for _ in molecules] + + def _normalize_properties(value): + if value is None: + return [default_rdkit_mmff_properties(mol) for mol in molecules] + if isinstance(value, Sequence) and not hasattr(value, "SetMMFFVariant"): + if len(value) != len(molecules): + raise ValueError(f"Expected {len(molecules)} MMFFMolProperties objects, got {len(value)}") + return [ + default_rdkit_mmff_properties(mol) if props is None else props for mol, props in zip(molecules, value) + ] + return [value for _ in molecules] + # Call the C++ implementation if hardwareOptions is None: hardwareOptions = HardwareOptions() native_options = hardwareOptions._as_native() - return _mmffOptimization.MMFFOptimizeMoleculesConfs(molecules, maxIters, nonBondedThreshold, native_options) + properties_list = _normalize_properties(properties) + thresholds = _normalize_scalar_or_list(nonBondedThreshold, "nonBondedThreshold") + interfrag_flags = _normalize_scalar_or_list(ignoreInterfragInteractions, "ignoreInterfragInteractions") + native_properties = [ + make_internal_mmff_properties( + props, + non_bonded_threshold=float(threshold), + ignore_interfrag_interactions=bool(ignore_interfrag), + ) + for props, threshold, ignore_interfrag in zip(properties_list, thresholds, interfrag_flags) + ] + return _mmffOptimization.MMFFOptimizeMoleculesConfs(molecules, maxIters, native_properties, native_options) diff --git a/nvmolkit/tests/test_mmff_optimization.py b/nvmolkit/tests/test_mmff_optimization.py index bc407c8..158ba53 100644 --- a/nvmolkit/tests/test_mmff_optimization.py +++ b/nvmolkit/tests/test_mmff_optimization.py @@ -19,7 +19,10 @@ from rdkit import Chem from rdkit.Chem import rdDistGeom, rdForceFieldHelpers from rdkit.Chem.AllChem import ETKDGv3 +from rdkit.ForceField import rdForceField as _rdForceField # noqa: F401 +from rdkit.Geometry import Point3D +from nvmolkit._mmff_bridge import capture_mmff_settings from nvmolkit.embedMolecules import EmbedMolecules import nvmolkit.mmffOptimization as nvmolkit_mmff from nvmolkit.types import HardwareOptions @@ -82,7 +85,51 @@ def create_hard_copy_mols(molecules): return copied_mols -def calculate_rdkit_mmff_energies(molecules, maxIters=200, nonBondedThreshold=100.0): +def make_fragmented_mol(): + mol = Chem.AddHs(Chem.MolFromSmiles("CC.CC")) + params = ETKDGv3() + params.useRandomCoords = True + params.randomSeed = 42 + rdDistGeom.EmbedMultipleConfs(mol, numConfs=1, params=params) + conf = mol.GetConformer() + fragments = Chem.GetMolFrags(mol) + if len(fragments) != 2: + raise AssertionError("Expected two fragments for interfragment interaction test") + anchor = conf.GetAtomPosition(fragments[0][0]) + moved = conf.GetAtomPosition(fragments[1][0]) + shift = Point3D(anchor.x - moved.x + 2.0, anchor.y - moved.y, anchor.z - moved.z) + for atom_idx in fragments[1]: + pos = conf.GetAtomPosition(atom_idx) + conf.SetAtomPosition(atom_idx, Point3D(pos.x + shift.x, pos.y + shift.y, pos.z + shift.z)) + return mol + + +def make_rdkit_mmff_properties(mol, settings: dict | None = None): + settings = {} if settings is None else settings + variant = settings.get("variant", "MMFF94") + mmff_props = rdForceFieldHelpers.MMFFGetMoleculeProperties(mol, mmffVariant=variant) + if mmff_props is None: + raise ValueError("RDKit could not create MMFF properties for molecule") + mmff_props.SetMMFFVariant(variant) + mmff_props.SetMMFFDielectricConstant(settings.get("dielectric_constant", 1.0)) + mmff_props.SetMMFFDielectricModel(settings.get("dielectric_model", 1)) + mmff_props.SetMMFFBondTerm(settings.get("bond_term", True)) + mmff_props.SetMMFFAngleTerm(settings.get("angle_term", True)) + mmff_props.SetMMFFStretchBendTerm(settings.get("stretch_bend_term", True)) + mmff_props.SetMMFFOopTerm(settings.get("oop_term", True)) + mmff_props.SetMMFFTorsionTerm(settings.get("torsion_term", True)) + mmff_props.SetMMFFVdWTerm(settings.get("vdw_term", True)) + mmff_props.SetMMFFEleTerm(settings.get("ele_term", True)) + return capture_mmff_settings(mmff_props, settings) + + +def calculate_rdkit_mmff_energies( + molecules, + maxIters=200, + property_settings: dict | None = None, + nonBondedThreshold: float = 100.0, + ignoreInterfragInteractions: bool = True, +): """Calculate MMFF energies using RDKit for all conformers of all molecules. Args: @@ -103,13 +150,18 @@ def calculate_rdkit_mmff_energies(molecules, maxIters=200, nonBondedThreshold=10 # Optimize all conformers for this molecule using RDKit # The signature shows it's a method on the molecule object - results = rdForceFieldHelpers.MMFFOptimizeMoleculeConfs( - mol, maxIters=maxIters, mmffVariant="MMFF94", nonBondedThresh=nonBondedThreshold - ) - - if results: - for _, energy in results: - mol_energies.append(energy) + mmff_props = make_rdkit_mmff_properties(mol, property_settings) + for conf_id in range(num_conformers): + ff = rdForceFieldHelpers.MMFFGetMoleculeForceField( + mol, + mmff_props, + nonBondedThresh=nonBondedThreshold, + confId=conf_id, + ignoreInterfragInteractions=ignoreInterfragInteractions, + ) + ff.Initialize() + ff.Minimize(maxIts=maxIters) + mol_energies.append(ff.CalcEnergy()) all_energies.append(mol_energies) @@ -140,7 +192,6 @@ def test_mmff_optimization_serial_vs_rdkit(mmff_test_mols): mol_energies = nvmolkit_mmff.MMFFOptimizeMoleculesConfs( [mol], maxIters=200, - nonBondedThreshold=100.0, ) nvmolkit_energies.extend(mol_energies) @@ -194,7 +245,7 @@ def test_mmff_optimization_batch_vs_rdkit(mmff_test_mols, gpu_ids, batchesize, b # Get nvMolKit energies in batch mode (all molecules at once) nvmolkit_energies = nvmolkit_mmff.MMFFOptimizeMoleculesConfs( - nvmolkit_mols, maxIters=200, nonBondedThreshold=100.0, hardwareOptions=hardware_options + nvmolkit_mols, maxIters=200, hardwareOptions=hardware_options ) # Verify we have the same number of molecules @@ -246,9 +297,9 @@ def test_mmff_optimization_allows_large_molecule_interleaved(): mols = [small1, big, small2] rdkit_mols = create_hard_copy_mols(mols) - rdkit_energies = calculate_rdkit_mmff_energies(rdkit_mols, maxIters=10, nonBondedThreshold=100.0) + rdkit_energies = calculate_rdkit_mmff_energies(rdkit_mols, maxIters=10) - energies = nvmolkit_mmff.MMFFOptimizeMoleculesConfs(mols, maxIters=10, nonBondedThreshold=100.0) + energies = nvmolkit_mmff.MMFFOptimizeMoleculesConfs(mols, maxIters=10) assert len(energies) == 3 for mol_idx, (rdkit_mol_energies, nvmolkit_mol_energies) in enumerate(zip(rdkit_energies, energies)): @@ -268,6 +319,148 @@ def test_mmff_optimization_allows_large_molecule_interleaved(): ) +def test_mmff_optimization_custom_properties_vs_rdkit(mmff_test_mols): + custom_property_settings = { + "dielectric_constant": 2.0, + "dielectric_model": 2, + } + custom_props = make_rdkit_mmff_properties(mmff_test_mols[0], custom_property_settings) + + # Step 0: compare initial energies (no minimization) to verify properties are applied + for label, props, settings in [("default", None, None), ("custom", custom_props, custom_property_settings)]: + rdkit_mols_0 = create_hard_copy_mols(mmff_test_mols[:2]) + nvmolkit_mols_0 = create_hard_copy_mols(mmff_test_mols[:2]) + rdkit_e0 = calculate_rdkit_mmff_energies(rdkit_mols_0, maxIters=0, property_settings=settings) + nvmolkit_e0 = nvmolkit_mmff.MMFFOptimizeMoleculesConfs( + nvmolkit_mols_0, + maxIters=0, + properties=props, + ) + for mol_idx, (r, n) in enumerate(zip(rdkit_e0, nvmolkit_e0)): + for conf_idx, (re, ne) in enumerate(zip(r, n)): + diff = abs(re - ne) + rel = diff / abs(re) if abs(re) > 1e-10 else diff + assert rel < 1e-3, ( + f"[{label}] Step-0 mol {mol_idx} conf {conf_idx}: RDKit={re:.6f} nvMolKit={ne:.6f} rel={rel:.6f}" + ) + + # Verify custom properties actually change the energy + default_mols = create_hard_copy_mols(mmff_test_mols[:2]) + custom_mols = create_hard_copy_mols(mmff_test_mols[:2]) + default_e0 = calculate_rdkit_mmff_energies(default_mols, maxIters=0) + custom_e0 = calculate_rdkit_mmff_energies(custom_mols, maxIters=0, property_settings=custom_property_settings) + for mol_idx, (de, ce) in enumerate(zip(default_e0, custom_e0)): + for conf_idx, (d, c) in enumerate(zip(de, ce)): + assert abs(d - c) > 1e-3, ( + f"Mol {mol_idx} conf {conf_idx}: default and custom energies " + f"should differ: default={d:.6f} custom={c:.6f}" + ) + + # Now test with minimization + rdkit_mols = create_hard_copy_mols(mmff_test_mols[:2]) + nvmolkit_mols = create_hard_copy_mols(mmff_test_mols[:2]) + rdkit_energies = calculate_rdkit_mmff_energies( + rdkit_mols, + maxIters=100, + property_settings=custom_property_settings, + ) + nvmolkit_energies = nvmolkit_mmff.MMFFOptimizeMoleculesConfs( + nvmolkit_mols, + maxIters=100, + properties=custom_props, + ) + + assert len(rdkit_energies) == len(nvmolkit_energies) + for mol_idx, (rdkit_mol_energies, nvmolkit_mol_energies) in enumerate(zip(rdkit_energies, nvmolkit_energies)): + assert len(rdkit_mol_energies) == len(nvmolkit_mol_energies) + for conf_idx, (rdkit_energy, nvmolkit_energy) in enumerate(zip(rdkit_mol_energies, nvmolkit_mol_energies)): + energy_diff = abs(rdkit_energy - nvmolkit_energy) + rel_error = energy_diff / abs(rdkit_energy) if abs(rdkit_energy) > 1e-10 else energy_diff + assert rel_error < 1e-2, ( + f"Molecule {mol_idx}, Conformer {conf_idx}: energy mismatch: " + f"RDKit={rdkit_energy:.6f}, nvMolKit={nvmolkit_energy:.6f}, " + f"abs_diff={energy_diff:.6f}, rel_error={rel_error:.6f}" + ) + + +def test_mmff_optimization_per_molecule_properties_vs_rdkit(mmff_test_mols): + mols = create_hard_copy_mols(mmff_test_mols[:2]) + rdkit_mols = create_hard_copy_mols(mols) + nvmolkit_mols = create_hard_copy_mols(mols) + + property_settings = [ + {"variant": "MMFF94s", "dielectric_constant": 2.0, "dielectric_model": 2}, + { + "bond_term": False, + "angle_term": False, + "stretch_bend_term": False, + "oop_term": False, + "torsion_term": False, + }, + ] + properties = [make_rdkit_mmff_properties(mol, settings) for mol, settings in zip(nvmolkit_mols, property_settings)] + + rdkit_energies = [ + calculate_rdkit_mmff_energies([mol], maxIters=100, property_settings=settings)[0] + for mol, settings in zip(rdkit_mols, property_settings) + ] + nvmolkit_energies = nvmolkit_mmff.MMFFOptimizeMoleculesConfs( + nvmolkit_mols, + maxIters=100, + properties=properties, + ) + + assert len(rdkit_energies) == len(nvmolkit_energies) + for mol_idx, (rdkit_mol_energies, nvmolkit_mol_energies) in enumerate(zip(rdkit_energies, nvmolkit_energies)): + assert len(rdkit_mol_energies) == len(nvmolkit_mol_energies) + for conf_idx, (rdkit_energy, nvmolkit_energy) in enumerate(zip(rdkit_mol_energies, nvmolkit_mol_energies)): + energy_diff = abs(rdkit_energy - nvmolkit_energy) + rel_error = energy_diff / abs(rdkit_energy) if abs(rdkit_energy) > 1e-10 else energy_diff + assert rel_error < 1e-2, ( + f"Molecule {mol_idx}, Conformer {conf_idx}: energy mismatch: " + f"RDKit={rdkit_energy:.6f}, nvMolKit={nvmolkit_energy:.6f}, " + f"abs_diff={energy_diff:.6f}, rel_error={rel_error:.6f}" + ) + + +def test_mmff_optimization_per_molecule_thresholds_and_interfrag_vs_rdkit(): + mols = [make_fragmented_mol(), make_fragmented_mol()] + rdkit_mols = create_hard_copy_mols(mols) + nvmolkit_mols = create_hard_copy_mols(mols) + properties = [make_rdkit_mmff_properties(mol) for mol in nvmolkit_mols] + non_bonded_thresholds = [25.0, 100.0] + ignore_interfrag_interactions = [False, True] + + rdkit_energies = [ + calculate_rdkit_mmff_energies( + [mol], + maxIters=100, + nonBondedThreshold=threshold, + ignoreInterfragInteractions=ignore_interfrag, + )[0] + for mol, threshold, ignore_interfrag in zip(rdkit_mols, non_bonded_thresholds, ignore_interfrag_interactions) + ] + nvmolkit_energies = nvmolkit_mmff.MMFFOptimizeMoleculesConfs( + nvmolkit_mols, + maxIters=100, + properties=properties, + nonBondedThreshold=non_bonded_thresholds, + ignoreInterfragInteractions=ignore_interfrag_interactions, + ) + + assert len(rdkit_energies) == len(nvmolkit_energies) + for mol_idx, (rdkit_mol_energies, nvmolkit_mol_energies) in enumerate(zip(rdkit_energies, nvmolkit_energies)): + assert len(rdkit_mol_energies) == len(nvmolkit_mol_energies) + for conf_idx, (rdkit_energy, nvmolkit_energy) in enumerate(zip(rdkit_mol_energies, nvmolkit_mol_energies)): + energy_diff = abs(rdkit_energy - nvmolkit_energy) + rel_error = energy_diff / abs(rdkit_energy) if abs(rdkit_energy) > 1e-10 else energy_diff + assert rel_error < 1e-2, ( + f"Molecule {mol_idx}, Conformer {conf_idx}: energy mismatch: " + f"RDKit={rdkit_energy:.6f}, nvMolKit={nvmolkit_energy:.6f}, " + f"abs_diff={energy_diff:.6f}, rel_error={rel_error:.6f}" + ) + + # Testing github issue 9 - openmp error handling def test_error_case_throws_properly(): smiles = "CC1(C)OB(CC2=CC=CC=C2)OC1(C)C" diff --git a/nvmolkit/types.py b/nvmolkit/types.py index 327dde9..112cdce 100644 --- a/nvmolkit/types.py +++ b/nvmolkit/types.py @@ -17,6 +17,8 @@ import torch from typing import Iterable, List + + from nvmolkit import _embedMolecules # type: ignore