Source code for MDAnalysis.converters.RDKitInferring
# -*- Mode: python; tab-width: 4; indent-tabs-mode:nil; coding:utf-8 -*-
# vim: tabstop=4 expandtab shiftwidth=4 softtabstop=4
#
# MDAnalysis --- https://www.mdanalysis.org
# Copyright (c) 2006-2017 The MDAnalysis Development Team and contributors
# (see the file AUTHORS for the full list of names)
#
# Released under the GNU Lesser General Public License, v2 or higher
#
# Please cite your use of MDAnalysis in published work:
#
# R. J. Gowers, M. Linke, J. Barnoud, T. J. E. Reddy, M. N. Melo, S. L. Seyler,
# D. L. Dotson, J. Domanski, S. Buchoux, I. M. Kenney, and O. Beckstein.
# MDAnalysis: A Python package for the rapid analysis of molecular dynamics
# simulations. In S. Benthall and S. Rostrup editors, Proceedings of the 15th
# Python in Science Conference, pages 102-109, Austin, TX, 2016. SciPy.
# doi: 10.25080/majora-629e541a-00e
#
# N. Michaud-Agrawal, E. J. Denning, T. B. Woolf, and O. Beckstein.
# MDAnalysis: A Toolkit for the Analysis of Molecular Dynamics Simulations.
# J. Comput. Chem. 32 (2011), 2319--2327, doi:10.1002/jcc.21787
#
"""RDKit bond order inferring --- :mod:`MDAnalysis.converters.RDKitInferring`
=============================================================================
Bond order and formal charge inferring for RDKit molecules. Because most MD
file formats don't preserve bond order information directly (or formal charges
to some extent), the classes provided here give users different options to
either provide or infer this information to the RDKit molecule constructed from
the topology. Having bond orders and formal charges properly defined is a
requirement for almost all cheminformatics-related task, hence the different
approaches proposed here to cover most use cases. You can also defined your own
function if need be, see the :mod:`~MDAnalysis.converters.RDKit` module for an
example.
These classes are meant to be passed directly to the RDKit converter::
>>> import MDAnalysis as mda
>>> from rdkit import Chem
>>> u = mda.Universe("aspirin.pdb")
>>> template = Chem.MolFromSmiles("CC(=O)Oc1ccccc1C(=O)O")
>>> inferrer = mda.converters.RDKitInferring.TemplateInferrer(template)
>>> rdkit_mol = u.atoms.convert_to.rdkit(inferrer=inferrer)
Classes
-------
.. autoclass:: MDAnalysisInferrer
:members:
:private-members:
.. autoclass:: TemplateInferrer
:members:
.. autoclass:: RDKitInferrer
:members:
.. autofunction:: sanitize_mol
.. autofunction:: reorder_atoms
"""
import warnings
from collections import defaultdict
from contextlib import suppress
from dataclasses import dataclass
from typing import ClassVar, Dict, List, Optional, Tuple
import numpy as np
RDBONDORDER = {}
with suppress(ImportError):
from rdkit import Chem
from rdkit.Chem.AllChem import AssignBondOrdersFromTemplate
from rdkit.Chem.rdChemReactions import ChemicalReaction, ReactionFromSmarts
RDBONDORDER = {
1: Chem.BondType.SINGLE,
1.5: Chem.BondType.AROMATIC,
"ar": Chem.BondType.AROMATIC,
2: Chem.BondType.DOUBLE,
3: Chem.BondType.TRIPLE,
}
# add string version of the key for each bond
RDBONDORDER.update({str(key): value for key, value in RDBONDORDER.items()})
PERIODIC_TABLE = Chem.GetPeriodicTable()
with suppress(ImportError):
from rdkit.Chem.rdDetermineBonds import (
DetermineBondOrders,
) # available since 2022.09.1
[docs]
def reorder_atoms(
mol: "Chem.Mol", field: str = "_MDAnalysis_index"
) -> "Chem.Mol":
"""Reorder atoms based on the given field. Defaults to sorting in the same
order as the input AtomGroup.
Notes
-----
If the field is not found on the molecule, skips reordering.
"""
with suppress(KeyError):
order = np.argsort([atom.GetIntProp(field) for atom in mol.GetAtoms()])
return Chem.RenumberAtoms(mol, order.astype(int).tolist())
# not a molecule converted by MDAnalysis
warnings.warn(
f"{field!r} not available on the input mol atoms, skipping reordering of atoms."
)
return mol
[docs]
def sanitize_mol(mol: "Chem.Mol") -> None:
"""Sanitizes the molecule."""
Chem.SanitizeMol(mol)
[docs]
@dataclass(frozen=True)
class MDAnalysisInferrer:
"""Bond order and formal charge inferring as originally implemented for
the RDKit converter. This algorithm only relies on the topology with
explicit hydrogens to assign bond orders and formal charges.
Attributes
----------
max_iter : int
Maximum number of iterations to standardize conjugated systems.
See :meth:`~MDAnalysisInferrer._rebuild_conjugated_bonds`
sanitize : bool
Whether to sanitize the molecule or not.
MONATOMIC_CATION_CHARGES : ClassVar[Dict[int, int]]
Charges that should be assigned to monatomic cations. Maps atomic
number to their formal charge. Anion charges are directly handled by
the code using the typical valence of the atom.
STANDARDIZATION_REACTIONS : ClassVar[List[str]]
Reactions uses by :meth:`~MDAnalysisInferrer._standardize_patterns` to
fix challenging cases must have single reactant and product, and
cannot add any atom.
Notes
-----
There are some molecules containing specific substructures that this
inferrer cannot currently tackle correctly. See
`Issue #3339 <https://github.com/MDAnalysis/mdanalysis/issues/3339>`__ for
more info.
.. versionadded:: 2.10.0
"""
MONATOMIC_CATION_CHARGES: ClassVar[Dict[int, int]] = {
3: 1,
11: 1,
19: 1,
37: 1,
47: 1,
55: 1,
12: 2,
20: 2,
29: 2,
30: 2,
38: 2,
56: 2,
26: 2, # Fe could also be +3
13: 3,
}
STANDARDIZATION_REACTIONS: ClassVar[List[str]] = [
"[C-;X2;H0:1]=[O:2]>>[C+0:1]=[O:2]", # Cterm
"[N-;X2;H1;$(N-[*^3]):1]>>[N+0:1]", # Nterm
"[#6-:1]-[#6:2]=[O:3]>>[#6+0:1]=[#6:2]-[O-:3]", # keto-enolate
"[C-;v3:1]-[#7+0;v3;H2:2]>>[#6+0:1]=[#7+:2]", # ARG
"[#6+0;H0:1]=[#6+0:2]-[#7;X3:3]-[#6-;X3:4]"
">>[#6:1]=[#6:2]-[#7+:3]=[#6+0:4]", # HIP
"[S;D4;!v6:1]-[*-:2]>>[S;v6:1]=[*+0:2]", # sulfone
"[#7+0;X3:1]-[*-:2]>>[#7+:1]=[*+0:2]", # charged-N
]
max_iter: int = 200
sanitize: bool = True
def __call__(self, mol: "Chem.Mol") -> "Chem.Mol":
"""Infer bond orders and formal charges in the molecule.
Parameters
----------
mol : Chem.Mol
The molecule to infer bond orders and charges for.
Returns
-------
A new molecule with proper bond orders and charges.
"""
self._infer_bo_and_charges(mol)
mol = self._standardize_patterns(mol)
mol = reorder_atoms(mol)
if self.sanitize:
sanitize_mol(mol)
return mol
[docs]
@classmethod
def _atom_sorter(cls, atom: "Chem.Atom") -> Tuple[int, int]:
"""Sorts atoms in the molecule in a way that makes it easy for the bond
order and charge infering code to get the correct state on the first
try. Currently sorts by number of unpaired electrons, then by number of
heavy atom neighbors (i.e. atoms at the edge first)."""
num_heavy_neighbors = len(
[
neighbor
for neighbor in atom.GetNeighbors()
if neighbor.GetAtomicNum() > 1
]
)
return (-cls._get_nb_unpaired_electrons(atom)[0], num_heavy_neighbors)
[docs]
@classmethod
def _infer_bo_and_charges(cls, mol: "Chem.Mol") -> None:
"""Infer bond orders and formal charges from a molecule.
Since most MD topology files don't explicitly retain information on
bond orders or charges, it has to be guessed from the topology. This
is done by looping over each atom and comparing its expected valence
to the current valence to get the Number of Unpaired Electrons (NUE).
If an atom has a negative NUE, it needs a positive formal charge
(-NUE). If two neighbouring atoms have UEs, the bond between them most
likely has to be increased by the value of the smallest NUE.
If after this process, an atom still has UEs, it needs a negative
formal charge of -NUE.
Parameters
----------
mol : rdkit.Chem.rdchem.RWMol
The molecule is modified inplace and must have all hydrogens added
Notes
-----
This algorithm is order dependant. For example, for a carboxylate group
``R-C(-O)-O`` the first oxygen read will receive a double bond and the
other one will be charged. It will also affect more complex conjugated
systems.
"""
# heavy atoms sorted by number of heavy atom neighbors (lower first)
# then NUE (higher first)
atoms = sorted(
[atom for atom in mol.GetAtoms() if atom.GetAtomicNum() > 1],
key=cls._atom_sorter,
)
for atom in atoms:
# monatomic ions
if atom.GetDegree() == 0:
atom.SetFormalCharge(
cls.MONATOMIC_CATION_CHARGES.get(
atom.GetAtomicNum(),
-cls._get_nb_unpaired_electrons(atom)[0],
)
)
mol.UpdatePropertyCache(strict=False)
continue
# get NUE for each possible valence
nue = cls._get_nb_unpaired_electrons(atom)
# if there's only one possible valence state and the corresponding
# NUE is negative, it means we can only add a positive charge to
# the atom
if (len(nue) == 1) and (nue[0] < 0):
atom.SetFormalCharge(-nue[0])
mol.UpdatePropertyCache(strict=False)
# go to next atom if above case or atom has no unpaired electron
if (len(nue) == 1) and (nue[0] <= 0):
continue
neighbors = sorted(
atom.GetNeighbors(),
reverse=True,
key=lambda a: cls._get_nb_unpaired_electrons(a)[0],
)
# check if one of the neighbors has a common NUE
for na in neighbors:
# get NUE for the neighbor
na_nue = cls._get_nb_unpaired_electrons(na)
# smallest common NUE
common_nue = min(
[i for i in [*nue, *na_nue] if i >= 0],
default=0,
)
# a common NUE of 0 means we don't need to do anything
if common_nue != 0:
# increase bond order
bond = mol.GetBondBetweenAtoms(atom.GetIdx(), na.GetIdx())
order = common_nue + 1
bond.SetBondType(RDBONDORDER[order])
mol.UpdatePropertyCache(strict=False)
# go to next atom if one of the valences is complete
nue = cls._get_nb_unpaired_electrons(atom)
if any(n == 0 for n in nue):
break
# if atom valence is still not filled
nue = cls._get_nb_unpaired_electrons(atom)
if not any(n == 0 for n in nue):
# transform nue to charge
atom.SetFormalCharge(-nue[0])
atom.SetNumRadicalElectrons(0)
mol.UpdatePropertyCache(strict=False)
[docs]
@staticmethod
def _get_nb_unpaired_electrons(atom: "Chem.Atom") -> List[int]:
"""Calculate the number of unpaired electrons (NUE) of an atom
Parameters
----------
atom: rdkit.Chem.rdchem.Atom
The atom for which the NUE will be computed
Returns
-------
nue : List[int]
The NUE for each possible valence of the atom
"""
expected_vs = PERIODIC_TABLE.GetValenceList(atom.GetAtomicNum())
current_v = atom.GetTotalValence() - atom.GetFormalCharge()
return [v - current_v for v in expected_vs]
[docs]
def _standardize_patterns(
self, mol: "Chem.Mol", max_iter: Optional[int] = None
) -> "Chem.Mol":
"""Standardizes functional groups
Uses :meth:`~MDAnalysisInferrer._rebuild_conjugated_bonds` to
standardize conjugated systems, and SMARTS reactions for other
functional groups.
Due to the way reactions work, we first have to split the molecule by
fragments. Then, for each fragment, we apply the standardization
reactions. Finally, the fragments are recombined.
Parameters
----------
mol : rdkit.Chem.rdchem.RWMol
The molecule to standardize
max_iter : Optional[int]
Maximum number of iterations to standardize conjugated systems.
.. deprecated:: 2.10.0
Will be removed in 3.0, use
``inferrer=MDAnalysisInferrer(max_iter=...)`` instead.
Returns
-------
mol : rdkit.Chem.rdchem.Mol
The standardized molecule
Notes
-----
The following functional groups are transformed in this order:
+---------------+------------------------------------------------------------------------------+
| Name | Reaction |
+===============+==============================================================================+
| conjugated | ``[*-:1]-[*:2]=[*:3]-[*-:4]>>[*+0:1]=[*:2]-[*:3]=[*+0:4]`` |
+---------------+------------------------------------------------------------------------------+
| conjugated N+ | ``[N;X3;v3:1]-[*:2]=[*:3]-[*-:4]>>[N+:1]=[*:2]-[*:3]=[*+0:4]`` |
+---------------+------------------------------------------------------------------------------+
| conjugated O- | ``[O:1]=[#6+0,#7+:2]-[*:3]=[*:4]-[*-:5]>>[O-:1]-[*:2]=[*:3]-[*:4]=[*+0:5]`` |
+---------------+------------------------------------------------------------------------------+
| conjug. S=O | ``[O-:1]-[S;D4;v4:2]-[*:3]=[*:4]-[*-:5]>>[O+0:1]=[*:2]=[*:3]-[*:4]=[*+0:5]`` |
+---------------+------------------------------------------------------------------------------+
| Cterm | ``[C-;X2;H0:1]=[O:2]>>[C+0:1]=[O:2]`` |
+---------------+------------------------------------------------------------------------------+
| Nterm | ``[N-;X2;H1;$(N-[*^3]):1]>>[N+0:1]`` |
+---------------+------------------------------------------------------------------------------+
| keto-enolate | ``[#6-:1]-[#6:2]=[O:3]>>[#6+0:1]=[#6:2]-[O-:3]`` |
+---------------+------------------------------------------------------------------------------+
| arginine | ``[C-;v3:1]-[#7+0;v3;H2:2]>>[#6+0:1]=[#7+:2]`` |
+---------------+------------------------------------------------------------------------------+
| histidine | ``[#6+0;H0:1]=[#6+0:2]-[#7;X3:3]-[#6-;X3:4]>>[#6:1]=[#6:2]-[#7+:3]=[#6+0:4]``|
+---------------+------------------------------------------------------------------------------+
| sulfone | ``[S;D4;!v6:1]-[*-:2]>>[S;v6:1]=[*+0:2]`` |
+---------------+------------------------------------------------------------------------------+
| charged N | ``[#7+0;X3:1]-[*-:2]>>[#7+:1]=[*+0:2]`` |
+---------------+------------------------------------------------------------------------------+
"""
if max_iter is None:
max_iter = self.max_iter
else:
warnings.warn(
"Specifying `max_iter` is deprecated and will be removed in a "
"future update. Directly create an instance of "
"`MDAnalysisInferrer` with `MDAnalysisInferrer(max_iter=...)` "
"instead.",
DeprecationWarning,
)
# standardize conjugated systems
self._rebuild_conjugated_bonds(mol, max_iter)
# list of sanitized reactions
reactions = [
ReactionFromSmarts(rxn) for rxn in self.STANDARDIZATION_REACTIONS
]
# fragment mol (reactions must have single reactant and product)
fragments = list(
Chem.GetMolFrags(mol, asMols=True, sanitizeFrags=self.sanitize)
)
for reactant in fragments:
self._apply_reactions(reactions, reactant)
# recombine fragments
newmol = fragments.pop(0)
for fragment in fragments:
newmol = Chem.CombineMols(newmol, fragment)
return newmol
[docs]
@staticmethod
def _apply_reactions(
reactions: List["ChemicalReaction"], reactant: "Chem.Mol"
) -> None:
"""Applies a series of unimolecular reactions to a molecule. The
reactions must not add any atom to the product. The molecule is
modified inplace.
Parameters
----------
reactions : List[rdkit.Chem.rdChemReactions.ChemicalReaction]
Reactions from SMARTS. Each reaction is applied until no more
transformations can be made.
reactant : rdkit.Chem.rdchem.Mol
The molecule to transform inplace
"""
reactant.UpdatePropertyCache(strict=False)
Chem.KekulizeIfPossible(reactant)
for reaction in reactions:
while reaction.RunReactantInPlace(reactant):
reactant.UpdatePropertyCache(strict=False)
reactant.UpdatePropertyCache(strict=False)
Chem.KekulizeIfPossible(reactant)
[docs]
def _rebuild_conjugated_bonds(
self, mol: "Chem.Mol", max_iter: Optional[int] = None
) -> None:
"""Rebuild conjugated bonds without negatively charged atoms at the
beginning and end of the conjugated system
Depending on the order in which atoms are read during the conversion,
the :meth:`~MDAnalysisInferrer._infer_bo_and_charges` function might
write conjugated systems with a double bond less and both edges of the
system as anions instead of the usual alternating single and double
bonds. This function corrects this behaviour by using an iterative
procedure.
The problematic molecules always follow the same pattern:
``anion[-*=*]n-anion`` instead of ``*=[*-*=]n*``, where ``n`` is the
number of successive single and double bonds. The goal of the
iterative procedure is to make ``n`` as small as possible by
consecutively transforming ``anion-*=*`` into ``*=*-anion`` until it
reaches the smallest pattern with ``n=1``. This last pattern is then
transformed ``anion-*=*-anion`` to ``*=*-*=*``.
Since ``anion-*=*`` is the same as ``*=*-anion`` in terms of SMARTS,
we can control that we don't transform the same triplet of atoms back
and forth by adding their indices to a list.
This function also handles conjugated systems with triple bonds.
The molecule needs to be kekulized first to also cover systems
with aromatic rings.
Parameters
----------
mol : rdkit.Chem.rdchem.RWMol
The molecule to transform, modified inplace
max_iter : Optional[int]
Maximum number of iterations to standardize conjugated systems.
.. deprecated:: 2.10.0
Will be removed in 3.0, use
``inferrer=MDAnalysisInferrer(max_iter=...)`` instead.
Notes
-----
The molecule is modified inplace
"""
max_iter = self.max_iter if max_iter is None else max_iter
mol.UpdatePropertyCache(strict=False)
Chem.KekulizeIfPossible(mol)
# pattern used to find problematic conjugated bonds
# there's usually an even number of matches for this
pattern = Chem.MolFromSmarts("[*-{1-2}]-,=[*+0]=,#[*+0]")
# pattern used to finish fixing a series of conjugated bonds
base_end_pattern = Chem.MolFromSmarts(
"[*-{1-2}]-,=[*+0]=,#[*+0]-,=[*-{1-2}]"
)
# used when there's an odd number of matches for `pattern`
odd_end_pattern = Chem.MolFromSmarts(
"[*-]-[*+0]=[*+0]-[*-,$([#7;X3;v3]),$([#6+0,#7+1]=O),$([S;D4;v4]-[O-])]"
)
# number of unique matches with the pattern
n_matches = len(
{match[0] for match in mol.GetSubstructMatches(pattern)}
)
# nothing to standardize
if n_matches == 0:
return
# single match (unusual)
if n_matches == 1:
# as a last resort, the only way to standardize is to find a
# nitrogen that can accept a double bond and a positive charge
# or a carbonyl that will become an enolate
end_pattern = odd_end_pattern
# at least 2 matches
else:
# give priority to base end pattern and then deal with the odd one
# if necessary
end_pattern = base_end_pattern
backtrack = []
backtrack_cycles = 0
for _ in range(max_iter):
# check for ending pattern
end_match = mol.GetSubstructMatch(end_pattern)
if end_match:
# index of each atom
anion1, a1, a2, anion2 = end_match
term_atom = mol.GetAtomWithIdx(anion2)
# edge-case 1: C-[O-] or [N+]-[O-]
# [*-]-*=*-[C,N+]=O --> *=*-*=[C,N+]-[O-]
# transform the =O to -[O-]
if (
term_atom.GetAtomicNum() == 6
and term_atom.GetFormalCharge() == 0
) or (
term_atom.GetAtomicNum() == 7
and term_atom.GetFormalCharge() == 1
):
for neighbor in term_atom.GetNeighbors():
bond = mol.GetBondBetweenAtoms(
anion2, neighbor.GetIdx()
)
if (
neighbor.GetAtomicNum() == 8
and bond.GetBondTypeAsDouble() == 2
):
bond.SetBondType(Chem.BondType.SINGLE)
neighbor.SetFormalCharge(-1)
break
# edge-case 2: S=O
# [*-]-*=*-[Sv4]-[O-] --> *=*-*=[Sv6]=O
# transform -[O-] to =O
elif (
term_atom.GetAtomicNum() == 16
and term_atom.GetFormalCharge() == 0
):
for neighbor in term_atom.GetNeighbors():
bond = mol.GetBondBetweenAtoms(
anion2, neighbor.GetIdx()
)
if (
neighbor.GetAtomicNum() == 8
and neighbor.GetFormalCharge() == -1
and bond.GetBondTypeAsDouble() == 1
):
bond.SetBondType(Chem.BondType.DOUBLE)
neighbor.SetFormalCharge(0)
break
# not an edge case:
# increment the charge
else:
term_atom.SetFormalCharge(term_atom.GetFormalCharge() + 1)
# common to all cases:
# [*-]-*=*-[R] --> *=*-*=[R]
# increment the charge and switch single and double bonds
a = mol.GetAtomWithIdx(anion1)
a.SetFormalCharge(a.GetFormalCharge() + 1)
b = mol.GetBondBetweenAtoms(anion1, a1)
b.SetBondType(RDBONDORDER[b.GetBondTypeAsDouble() + 1])
b = mol.GetBondBetweenAtoms(a1, a2)
b.SetBondType(RDBONDORDER[b.GetBondTypeAsDouble() - 1])
b = mol.GetBondBetweenAtoms(a2, anion2)
b.SetBondType(RDBONDORDER[b.GetBondTypeAsDouble() + 1])
mol.UpdatePropertyCache(strict=False)
continue
# switch the position of the charge and the double bond
matches = mol.GetSubstructMatches(pattern)
if matches:
# check if we haven't already transformed this triplet
for match in matches:
# store order-independent atom indices
g = set(match)
# already transformed --> try the next one
if g in backtrack:
continue
# add to backtracking and start the switch
anion, a1, a2 = match
backtrack.append(g)
break
# already performed all changes
else:
if backtrack_cycles == 1:
# might be stuck because there were 2 "odd" end
# patterns misqualified as a single base one
end_pattern = odd_end_pattern
elif backtrack_cycles > 1:
# likely stuck changing the same bonds over and over so
# let's stop here
mol.UpdatePropertyCache(strict=False)
return
match = matches[0]
anion, a1, a2 = match
backtrack = [set(match)]
backtrack_cycles += 1
# switch charges
a = mol.GetAtomWithIdx(anion)
a.SetFormalCharge(a.GetFormalCharge() + 1)
a = mol.GetAtomWithIdx(a2)
a.SetFormalCharge(a.GetFormalCharge() - 1)
# switch bond orders
b = mol.GetBondBetweenAtoms(anion, a1)
b.SetBondType(RDBONDORDER[b.GetBondTypeAsDouble() + 1])
b = mol.GetBondBetweenAtoms(a1, a2)
b.SetBondType(RDBONDORDER[b.GetBondTypeAsDouble() - 1])
mol.UpdatePropertyCache(strict=False)
# update number of matches for the end pattern
n_matches = len({match[0] for match in matches})
if n_matches == 1:
end_pattern = odd_end_pattern
# start new iteration
continue
# no more changes to apply
mol.UpdatePropertyCache(strict=False)
return
# reached max_iter
warnings.warn(
"The standardization could not be completed within a "
"reasonable number of iterations"
)
[docs]
@dataclass(frozen=True)
class TemplateInferrer:
"""Infer bond orders and charges by matching the molecule with a template
molecule containing bond orders and charges.
Attributes
----------
template : rdkit.Chem.rdchem.Mol
Molecule containing the bond orders and charges.
adjust_hydrogens: bool, default = True
If ``True``, temporarily removes hydrogens from the molecule to
assign bond orders and charges from the template, then adds them
back. Useful to avoid adding explicit hydrogens on the template which
can prevent RDKit from finding a match between the template and the
molecule. Setting this to ``False`` can be useful to speed things up
for inorganic molecules that don't have any hydrogens.
.. versionadded:: 2.10.0
"""
template: "Chem.Mol"
adjust_hydrogens: bool = True
def __call__(self, mol: "Chem.Mol") -> "Chem.Mol":
if self.adjust_hydrogens:
return self.assign_from_template_with_adjusted_hydrogens(mol)
return AssignBondOrdersFromTemplate(self.template, mol)
[docs]
def assign_from_template_with_adjusted_hydrogens(
self, mol: "Chem.Mol", index_field: str = "_MDAnalysis_index"
) -> "Chem.Mol":
"""Temporarily removes hydrogens from the molecule before assigning
bond orders and charges from the template, then adds them back.
Parameters
----------
mol : rdkit.Chem.rdchem.Mol
Molecule to assign bond orders and charges to.
index_field : str, default = "_MDAnalysis_index"
Atom property corresponding to a unique integer that can used to
put back the hydrogen atoms that were removed before matching,
and sort them back to the original order. Must be accessible
through ``atom.GetIntProp``.
"""
# remove explicit hydrogens and assign BO from template
mol_no_h = Chem.RemoveAllHs(mol, sanitize=False)
new = AssignBondOrdersFromTemplate(self.template, mol_no_h)
# mapping between AtomGroup index of atom bearing H --> H atom(s)
atoms_with_hydrogens = defaultdict(list)
for atom in mol.GetAtoms():
if atom.GetAtomicNum() == 1:
atoms_with_hydrogens[
atom.GetNeighbors()[0].GetIntProp(index_field)
].append(atom)
# mapping between atom that should be bearing a H in RWMol and
# corresponding H(s)
reverse_mapping = {}
for atom in new.GetAtoms():
if (idx := atom.GetIntProp(index_field)) in atoms_with_hydrogens:
reverse_mapping[atom.GetIdx()] = atoms_with_hydrogens[idx]
# Set ExplicitH to 0 to avoid valence errors when adding Hs
atom.SetNumExplicitHs(0)
# let UpdatePropertyCache recalculate the number of radicals later
atom.SetNumRadicalElectrons(0)
# add missing Hs
rwmol = Chem.RWMol(new)
for atom_idx, hydrogens in reverse_mapping.items():
for hydrogen in hydrogens:
h_idx = rwmol.AddAtom(hydrogen)
rwmol.AddBond(atom_idx, h_idx, Chem.BondType.SINGLE)
new = rwmol.GetMol()
# sanitize
new.UpdatePropertyCache()
sanitize_mol(new)
# reorder atoms as input atomgroup (through _MDAnalysis_index)
return reorder_atoms(new, field=index_field)
[docs]
@dataclass(frozen=True)
class RDKitInferrer:
"""Uses RDKit's :func:`~rdkit.Chem.rdDetermineBonds.DetermineBondOrders`
to infer bond orders and formal charges. This is the same algorithm used
by the :ref:`xyz2mol <https://github.com/jensengroup/xyz2mol>` package.
.. versionadded:: 2.10.0
"""
charge: int = 0
def __call__(self, mol: "Chem.Mol") -> "Chem.Mol":
new = Chem.Mol(mol)
DetermineBondOrders(new, charge=self.charge)
return new