Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
68 changes: 68 additions & 0 deletions rdtools/compare.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
import traceback

import numpy as np
from collections import defaultdict

from rdkit import Chem
from rdkit.Chem.rdMolDescriptors import CalcMolFormula
Expand Down Expand Up @@ -282,3 +283,70 @@ def is_same_connectivity_conf(

else:
return is_same_connectivity_mol(mol, new_mol)


def is_symmetric_to_substructure(mol : Chem.Mol, substructure: Chem.Mol) -> bool:
'''
Check whether a mol is symmetric to a provided substructure.

Args:
mol1 (RWMol): The molecule to check.
substructure (RWMol): A molecule representing the SMARTS substructure to check.

Returns:
bool: Whether the molecule is symmetric w.r.t. the substructure.
'''
matches = mol.GetSubstructMatches(substructure)

classes = find_symmetry_classes(mol)

if len(matches) == 0: # Substructure isn't in molecule.
return False
elif len(matches) == 1: # Molecule has only one match and is therefore symmetric w.r.t. substructure
return True

# Assumes that 'matches' contains sets of equal size.
length_matches = len(matches[0])
num_matches = len(matches)
for match in matches:
assert len(match) == length_matches

# There is a match if all of the nth elements of each list in the matches is in the classes set.
for cla in classes: # Example: classes = {(2, 4), (1,3), (0,5)}; cla = (2, 4)

# Loop through the matches.
for j in range(length_matches): # Example: 0, 1 (length_matches = 2, the substructure is 2 atoms long)

match_index = 0
for i in range(num_matches): # Example: 0, 1 (num_matches = 2, we have 2 substructure matches)
# Logic here is that matches[i][j] should be in the cla set for all i.
if matches[i][j] in cla:
match_index += 1

# 2 possibilities:
if match_index == num_matches: # symmetric: all symmetry classes match all substructure matches at the same ID
pass
elif match_index == 0: # nothing matches, but other iterations of i, j, and cla might match
pass
else: # asymmetric, matches are out of order
return False

return True

def find_symmetry_classes(mol : Chem.Mol) -> set:
'''
Find set of symmetry classes for a given mol.
Adapted from code by Greg Landrum, 2011:
https://sourceforge.net/p/rdkit/mailman/rdkit-discuss/thread/CAD4fdRSofifYy_GFeZfxoHd_z4Y=4tVNR3UuBTea3sr81e8UMQ@mail.gmail.com/

Args:
mol: Molecule to examine symmetry classes.
'''

equivs = defaultdict(set)
matches = mol.GetSubstructMatches(mol,uniquify=False)
for match in matches:
for idx1,idx2 in enumerate(match): equivs[idx1].add(idx2)
classes = set()
for s in equivs.values(): classes.add(tuple(s))
return classes
125 changes: 124 additions & 1 deletion rdtools/mol.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,9 +4,11 @@
from typing import List, Optional, Union, Sequence

import numpy as np
import warnings

from rdkit import Chem
from rdkit.Chem import Descriptors
from rdkit.Chem.MolStandardize.rdMolStandardize import ChargeParent
from rdkit.Geometry.rdGeometry import Point3D

from rdtools.atommap import has_atom_map_numbers
Expand All @@ -18,7 +20,7 @@
set_conformer_coordinates,
)
from rdtools.conversion.xyz import xyz_to_coords

from rdtools.conversion.smiles import mol_from_smiles, mol_to_smiles

def get_spin_multiplicity(mol: Chem.Mol) -> int:
"""
Expand Down Expand Up @@ -224,6 +226,127 @@ def fast_sanitize(mol: Chem.RWMol):
)


def is_implicit(mol : Chem.RWMol):
"""
Infer whether a molecule has implicit hydrogens.
"""

for atom in mol.GetAtoms():
if atom.GetNumImplicitHs() > 0:
return True

return False


def uncharge_mol(mol : Chem.RWMol,
method : str = "all"):
"""
Uncharges a molecule, adding or removing hydrogens wherever necessary.

Args:
mol (Chem.Mol): The molecule to uncharge.
method (str, optional): Algorithm for uncharging: "rdkit" for RDKit's
built-in ChargeParent method, "nocharge" for
Neil O'Boyle's nocharge algorithm (as adapted
by Vincent Scalfani)
Default is to use "all", starting with uncharger.
"""
if isinstance(mol, Chem.RWMol):
mol = copy.copy(mol)
else:
mol = Chem.RWMol(mol)

METHOD_LIST = ["all", "rdkit", "nocharge"]

if method not in METHOD_LIST:
raise KeyError(f"Method must be in {METHOD_LIST}; got: {method}")

if method in ("rdkit", "all"):
mol = ChargeParent(mol)
if get_formal_charge(mol) == 0:
return mol

if method in ("nocharge", "all"):
# Algorithm adapted from Noel O’Boyle (Vincent Scalfani adapted code for RDKit)
# See https://www.rdkit.org/docs/Cookbook.html#neutralizing-molecules)

implicit_h = is_implicit(mol)
if implicit_h:
pattern = Chem.MolFromSmarts("[+1!h0!$([*]~[-1,-2,-3,-4]),-1!$([*]~[+1,+2,+3,+4])]")
else:
pattern = Chem.MolFromSmarts("[+1!$([*]~[-1,-2,-3,-4]),-1!$([*]~[+1,+2,+3,+4])]")

at_matches = mol.GetSubstructMatches(pattern)
at_matches_list = [y[0] for y in at_matches]
if len(at_matches_list) > 0:
for at_idx in at_matches_list:
atom = mol.GetAtomWithIdx(at_idx)
chg = atom.GetFormalCharge()

if chg > 0:
mol = deprotonate_at_site(mol, at_idx)
elif chg < 0:
mol = protonate_at_site(mol, at_idx)

if get_formal_charge(mol) == 0:
return mol

warnings.warn(f"Unable to uncharge: got {mol_to_smiles(mol)}")
return mol


def protonate_at_site(mol : Chem.RWMol, site : int):
'''
Add a proton of a mol object at the provided index.

Args:
mol: Mol object
site: RDKit atom index of the site to be de/protonated.
'''

length = mol.GetNumAtoms()
atom = mol.GetAtomWithIdx(site)
atom.SetFormalCharge(atom.GetFormalCharge() + 1)

if is_implicit(mol):
hcount = atom.GetTotalNumHs(includeNeighbors=True)
newcharge = hcount + 1
atom.SetNumExplicitHs(newcharge)
else:
h_atom = Chem.MolFromSmiles('[H]')
mol = combine_mols(mol, h_atom)
mol = Chem.RWMol(mol) # as it appears to get un-RWmol from combining
mol.AddBond(site, length, order=Chem.rdchem.BondType.SINGLE)

return mol


def deprotonate_at_site(mol : Chem.RWMol, site : int):
'''
Remove a proton of a mol object at the provided index.

Args:
mol: Mol object
site: RDKit atom index of the site to be de/protonated.

'''

atom = mol.GetAtomWithIdx(site)
atom.SetFormalCharge(atom.GetFormalCharge() - 1)

if is_implicit(mol):
hcount = atom.GetTotalNumHs(includeNeighbors=True)
newcharge = hcount - 1
atom.SetNumExplicitHs(newcharge)
else:
for neighbor in atom.GetNeighbors():
if neighbor.GetAtomicNum() == 1:
mol.RemoveAtom(neighbor.GetIdx())
break

return mol


def get_closed_shell_mol(
mol: Chem.RWMol,
sanitize: bool = True,
Expand Down
18 changes: 17 additions & 1 deletion test/rdtools/test_compare.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,9 @@
import pytest

from rdtools.compare import get_match_and_recover_recipe, get_unique_mols, has_matched_mol, is_same_connectivity_mol
from rdtools.compare import get_match_and_recover_recipe, get_unique_mols, has_matched_mol, is_same_connectivity_mol, is_symmetric_to_substructure
from rdtools.conversion import mol_from_smiles, mol_to_smiles

from rdkit.Chem import MolFromSmarts

@pytest.mark.parametrize(
"smi1, smi2, expected",
Expand Down Expand Up @@ -147,3 +148,18 @@ def test_has_same_connectivity(smi1, smi2, expect_match):
mol1 = mol_from_smiles(smi1)
mol2 = mol_from_smiles(smi2)
assert is_same_connectivity_mol(mol1, mol2) == expect_match


@pytest.mark.parametrize(
'smi, sma, expect_match',
[
('CC(=O)C', '[CX3]=[OX1]', True),
('CC(=O)C(=O)C', '[CX3]=[OX1]', True),
('CC(=O)CC(=O)', '[CX3]=[OX1]', False),
('C', '[CX3]=[OX1]', False),
('OCC(CO)(CO)CO', '[CX4]-[OX2]', True),
])
def test_is_symmetric_to_substructure(smi, sma, expect_match):
mol = mol_from_smiles(smi)
substructure = MolFromSmarts(sma)
assert is_symmetric_to_substructure(mol, substructure) == expect_match
22 changes: 22 additions & 0 deletions test/rdtools/test_mol.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,8 @@
get_atomic_nums,
get_atom_masses,
get_closed_shell_mol,
is_implicit,
uncharge_mol
)
from rdtools.conf import (
embed_multiple_null_confs,
Expand Down Expand Up @@ -175,3 +177,23 @@ def test_get_closed_shell_mol(rad_smi, expect_smi, cheap, atommap):
rad_mol = mol_from_smiles(rad_smi, assign_atom_map=atommap)
cs_mol = get_closed_shell_mol(rad_mol, cheap=cheap)
assert mol_to_smiles(cs_mol) == expect_smi


@pytest.mark.parametrize(
"ion_smi, expected_smi",
[
("CCCCC(=O)[O-]", "CCCCC(=O)O"),
("c1ccccc1[O-]", "Oc1ccccc1"),
("CCC[NH3+]", "CCCN"),
("[NH3+]CC(=O)[O-]", "NCC(=O)O"),
("S(=O)(=O)([O-])[O-]", "O=S(=O)(O)O"),
("C", "C"),
],
)
@pytest.mark.parametrize("method", ["all", "rdkit", "nocharge"])
def test_uncharge_mol(ion_smi, expected_smi, method):

ion_mol = mol_from_smiles(ion_smi)
neut_mol = uncharge_mol(ion_mol, method=method)
assert mol_to_smiles(neut_mol) == expected_smi