Source code for MDAnalysis.converters.RDKitInferring

# -*- Mode: python; tab-width: 4; indent-tabs-mode:nil; coding:utf-8 -*-
# vim: tabstop=4 expandtab shiftwidth=4 softtabstop=4
#
# MDAnalysis --- https://www.mdanalysis.org
# Copyright (c) 2006-2017 The MDAnalysis Development Team and contributors
# (see the file AUTHORS for the full list of names)
#
# Released under the GNU Lesser General Public License, v2 or higher
#
# Please cite your use of MDAnalysis in published work:
#
# R. J. Gowers, M. Linke, J. Barnoud, T. J. E. Reddy, M. N. Melo, S. L. Seyler,
# D. L. Dotson, J. Domanski, S. Buchoux, I. M. Kenney, and O. Beckstein.
# MDAnalysis: A Python package for the rapid analysis of molecular dynamics
# simulations. In S. Benthall and S. Rostrup editors, Proceedings of the 15th
# Python in Science Conference, pages 102-109, Austin, TX, 2016. SciPy.
# doi: 10.25080/majora-629e541a-00e
#
# N. Michaud-Agrawal, E. J. Denning, T. B. Woolf, and O. Beckstein.
# MDAnalysis: A Toolkit for the Analysis of Molecular Dynamics Simulations.
# J. Comput. Chem. 32 (2011), 2319--2327, doi:10.1002/jcc.21787
#

"""RDKit bond order inferring --- :mod:`MDAnalysis.converters.RDKitInferring`
=============================================================================
Bond order and formal charge inferring for RDKit molecules. Because most MD
file formats don't preserve bond order information directly (or formal charges
to some extent), the classes provided here give users different options to
either provide or infer this information to the RDKit molecule constructed from
the topology. Having bond orders and formal charges properly defined is a
requirement for almost all cheminformatics-related task, hence the different
approaches proposed here to cover most use cases. You can also defined your own
function if need be, see the :mod:`~MDAnalysis.converters.RDKit` module for an
example.

These classes are meant to be passed directly to the RDKit converter::

    >>> import MDAnalysis as mda
    >>> from rdkit import Chem
    >>> u = mda.Universe("aspirin.pdb")
    >>> template = Chem.MolFromSmiles("CC(=O)Oc1ccccc1C(=O)O")
    >>> inferrer = mda.converters.RDKitInferring.TemplateInferrer(template)
    >>> rdkit_mol = u.atoms.convert_to.rdkit(inferrer=inferrer)

Classes
-------

.. autoclass:: MDAnalysisInferrer
   :members:
   :private-members:

.. autoclass:: TemplateInferrer
   :members:

.. autoclass:: RDKitInferrer
   :members:

.. autofunction:: sanitize_mol

.. autofunction:: reorder_atoms

"""

import warnings
from collections import defaultdict
from contextlib import suppress
from dataclasses import dataclass
from typing import ClassVar, Dict, List, Optional, Tuple

import numpy as np

RDBONDORDER = {}
with suppress(ImportError):
    from rdkit import Chem
    from rdkit.Chem.AllChem import AssignBondOrdersFromTemplate
    from rdkit.Chem.rdChemReactions import ChemicalReaction, ReactionFromSmarts

    RDBONDORDER = {
        1: Chem.BondType.SINGLE,
        1.5: Chem.BondType.AROMATIC,
        "ar": Chem.BondType.AROMATIC,
        2: Chem.BondType.DOUBLE,
        3: Chem.BondType.TRIPLE,
    }
    # add string version of the key for each bond
    RDBONDORDER.update({str(key): value for key, value in RDBONDORDER.items()})
    PERIODIC_TABLE = Chem.GetPeriodicTable()

with suppress(ImportError):
    from rdkit.Chem.rdDetermineBonds import (
        DetermineBondOrders,
    )  # available since 2022.09.1



[docs]
def reorder_atoms(
    mol: "Chem.Mol", field: str = "_MDAnalysis_index"
) -> "Chem.Mol":
    """Reorder atoms based on the given field. Defaults to sorting in the same
    order as the input AtomGroup.

    Notes
    -----
    If the field is not found on the molecule, skips reordering.
    """
    with suppress(KeyError):
        order = np.argsort([atom.GetIntProp(field) for atom in mol.GetAtoms()])
        return Chem.RenumberAtoms(mol, order.astype(int).tolist())
    # not a molecule converted by MDAnalysis
    warnings.warn(
        f"{field!r} not available on the input mol atoms, skipping reordering of atoms."
    )
    return mol




[docs]
def sanitize_mol(mol: "Chem.Mol") -> None:
    """Sanitizes the molecule."""
    Chem.SanitizeMol(mol)




[docs]
@dataclass(frozen=True)
class MDAnalysisInferrer:
    """Bond order and formal charge inferring as originally implemented for
    the RDKit converter. This algorithm only relies on the topology with
    explicit hydrogens to assign bond orders and formal charges.

    Attributes
    ----------
    max_iter : int
        Maximum number of iterations to standardize conjugated systems.
        See :meth:`~MDAnalysisInferrer._rebuild_conjugated_bonds`
    sanitize : bool
        Whether to sanitize the molecule or not.
    MONATOMIC_CATION_CHARGES : ClassVar[Dict[int, int]]
        Charges that should be assigned to monatomic cations. Maps atomic
        number to  their formal charge. Anion charges are directly handled by
        the code using the typical valence of the atom.
    STANDARDIZATION_REACTIONS : ClassVar[List[str]]
        Reactions uses by :meth:`~MDAnalysisInferrer._standardize_patterns` to
        fix challenging cases must have single reactant and product, and
        cannot add any atom.

    Notes
    -----
    There are some molecules containing specific substructures that this
    inferrer cannot currently tackle correctly. See
    `Issue #3339 <https://github.com/MDAnalysis/mdanalysis/issues/3339>`__ for
    more info.

    .. versionadded:: 2.10.0
    """

    MONATOMIC_CATION_CHARGES: ClassVar[Dict[int, int]] = {
        3: 1,
        11: 1,
        19: 1,
        37: 1,
        47: 1,
        55: 1,
        12: 2,
        20: 2,
        29: 2,
        30: 2,
        38: 2,
        56: 2,
        26: 2,  # Fe could also be +3
        13: 3,
    }
    STANDARDIZATION_REACTIONS: ClassVar[List[str]] = [
        "[C-;X2;H0:1]=[O:2]>>[C+0:1]=[O:2]",  # Cterm
        "[N-;X2;H1;$(N-[*^3]):1]>>[N+0:1]",  # Nterm
        "[#6-:1]-[#6:2]=[O:3]>>[#6+0:1]=[#6:2]-[O-:3]",  # keto-enolate
        "[C-;v3:1]-[#7+0;v3;H2:2]>>[#6+0:1]=[#7+:2]",  # ARG
        "[#6+0;H0:1]=[#6+0:2]-[#7;X3:3]-[#6-;X3:4]"
        ">>[#6:1]=[#6:2]-[#7+:3]=[#6+0:4]",  # HIP
        "[S;D4;!v6:1]-[*-:2]>>[S;v6:1]=[*+0:2]",  # sulfone
        "[#7+0;X3:1]-[*-:2]>>[#7+:1]=[*+0:2]",  # charged-N
    ]

    max_iter: int = 200
    sanitize: bool = True

    def __call__(self, mol: "Chem.Mol") -> "Chem.Mol":
        """Infer bond orders and formal charges in the molecule.

        Parameters
        ----------
        mol : Chem.Mol
            The molecule to infer bond orders and charges for.

        Returns
        -------
        A new molecule with proper bond orders and charges.
        """
        self._infer_bo_and_charges(mol)
        mol = self._standardize_patterns(mol)
        mol = reorder_atoms(mol)
        if self.sanitize:
            sanitize_mol(mol)
        return mol


[docs]
    @classmethod
    def _atom_sorter(cls, atom: "Chem.Atom") -> Tuple[int, int]:
        """Sorts atoms in the molecule in a way that makes it easy for the bond
        order and charge infering code to get the correct state on the first
        try. Currently sorts by number of unpaired electrons, then by number of
        heavy atom neighbors (i.e. atoms at the edge first)."""
        num_heavy_neighbors = len(
            [
                neighbor
                for neighbor in atom.GetNeighbors()
                if neighbor.GetAtomicNum() > 1
            ]
        )
        return (-cls._get_nb_unpaired_electrons(atom)[0], num_heavy_neighbors)



[docs]
    @classmethod
    def _infer_bo_and_charges(cls, mol: "Chem.Mol") -> None:
        """Infer bond orders and formal charges from a molecule.

        Since most MD topology files don't explicitly retain information on
        bond orders or charges, it has to be guessed from the topology. This
        is done by looping over each atom and comparing its expected valence
        to the current valence to get the Number of Unpaired Electrons (NUE).
        If an atom has a negative NUE, it needs a positive formal charge
        (-NUE). If two neighbouring atoms have UEs, the bond between them most
        likely has to be increased by the value of the smallest NUE.
        If after this process, an atom still has UEs, it needs a negative
        formal charge of -NUE.

        Parameters
        ----------
        mol : rdkit.Chem.rdchem.RWMol
            The molecule is modified inplace and must have all hydrogens added

        Notes
        -----
        This algorithm is order dependant. For example, for a carboxylate group
        ``R-C(-O)-O`` the first oxygen read will receive a double bond and the
        other one will be charged. It will also affect more complex conjugated
        systems.
        """
        # heavy atoms sorted by number of heavy atom neighbors (lower first)
        # then NUE (higher first)
        atoms = sorted(
            [atom for atom in mol.GetAtoms() if atom.GetAtomicNum() > 1],
            key=cls._atom_sorter,
        )

        for atom in atoms:
            # monatomic ions
            if atom.GetDegree() == 0:
                atom.SetFormalCharge(
                    cls.MONATOMIC_CATION_CHARGES.get(
                        atom.GetAtomicNum(),
                        -cls._get_nb_unpaired_electrons(atom)[0],
                    )
                )
                mol.UpdatePropertyCache(strict=False)
                continue
            # get NUE for each possible valence
            nue = cls._get_nb_unpaired_electrons(atom)
            # if there's only one possible valence state and the corresponding
            # NUE is negative, it means we can only add a positive charge to
            # the atom
            if (len(nue) == 1) and (nue[0] < 0):
                atom.SetFormalCharge(-nue[0])
                mol.UpdatePropertyCache(strict=False)
            # go to next atom if above case or atom has no unpaired electron
            if (len(nue) == 1) and (nue[0] <= 0):
                continue
            neighbors = sorted(
                atom.GetNeighbors(),
                reverse=True,
                key=lambda a: cls._get_nb_unpaired_electrons(a)[0],
            )
            # check if one of the neighbors has a common NUE
            for na in neighbors:
                # get NUE for the neighbor
                na_nue = cls._get_nb_unpaired_electrons(na)
                # smallest common NUE
                common_nue = min(
                    [i for i in [*nue, *na_nue] if i >= 0],
                    default=0,
                )
                # a common NUE of 0 means we don't need to do anything
                if common_nue != 0:
                    # increase bond order
                    bond = mol.GetBondBetweenAtoms(atom.GetIdx(), na.GetIdx())
                    order = common_nue + 1
                    bond.SetBondType(RDBONDORDER[order])
                    mol.UpdatePropertyCache(strict=False)
                    # go to next atom if one of the valences is complete
                    nue = cls._get_nb_unpaired_electrons(atom)
                    if any(n == 0 for n in nue):
                        break

            # if atom valence is still not filled
            nue = cls._get_nb_unpaired_electrons(atom)
            if not any(n == 0 for n in nue):
                # transform nue to charge
                atom.SetFormalCharge(-nue[0])
                atom.SetNumRadicalElectrons(0)
                mol.UpdatePropertyCache(strict=False)



[docs]
    @staticmethod
    def _get_nb_unpaired_electrons(atom: "Chem.Atom") -> List[int]:
        """Calculate the number of unpaired electrons (NUE) of an atom

        Parameters
        ----------
        atom: rdkit.Chem.rdchem.Atom
            The atom for which the NUE will be computed

        Returns
        -------
        nue : List[int]
            The NUE for each possible valence of the atom
        """
        expected_vs = PERIODIC_TABLE.GetValenceList(atom.GetAtomicNum())
        current_v = atom.GetTotalValence() - atom.GetFormalCharge()
        return [v - current_v for v in expected_vs]



[docs]
    def _standardize_patterns(
        self, mol: "Chem.Mol", max_iter: Optional[int] = None
    ) -> "Chem.Mol":
        """Standardizes functional groups

        Uses :meth:`~MDAnalysisInferrer._rebuild_conjugated_bonds` to
        standardize conjugated systems, and SMARTS reactions for other
        functional groups.
        Due to the way reactions work, we first have to split the molecule by
        fragments. Then, for each fragment, we apply the standardization
        reactions. Finally, the fragments are recombined.

        Parameters
        ----------
        mol : rdkit.Chem.rdchem.RWMol
            The molecule to standardize
        max_iter : Optional[int]
            Maximum number of iterations to standardize conjugated systems.

            .. deprecated:: 2.10.0
                Will be removed in 3.0, use
                ``inferrer=MDAnalysisInferrer(max_iter=...)`` instead.

        Returns
        -------
        mol : rdkit.Chem.rdchem.Mol
            The standardized molecule

        Notes
        -----
        The following functional groups are transformed in this order:

        +---------------+------------------------------------------------------------------------------+
        | Name          | Reaction                                                                     |
        +===============+==============================================================================+
        | conjugated    | ``[*-:1]-[*:2]=[*:3]-[*-:4]>>[*+0:1]=[*:2]-[*:3]=[*+0:4]``                   |
        +---------------+------------------------------------------------------------------------------+
        | conjugated N+ | ``[N;X3;v3:1]-[*:2]=[*:3]-[*-:4]>>[N+:1]=[*:2]-[*:3]=[*+0:4]``               |
        +---------------+------------------------------------------------------------------------------+
        | conjugated O- | ``[O:1]=[#6+0,#7+:2]-[*:3]=[*:4]-[*-:5]>>[O-:1]-[*:2]=[*:3]-[*:4]=[*+0:5]``  |
        +---------------+------------------------------------------------------------------------------+
        | conjug. S=O   | ``[O-:1]-[S;D4;v4:2]-[*:3]=[*:4]-[*-:5]>>[O+0:1]=[*:2]=[*:3]-[*:4]=[*+0:5]`` |
        +---------------+------------------------------------------------------------------------------+
        | Cterm         | ``[C-;X2;H0:1]=[O:2]>>[C+0:1]=[O:2]``                                        |
        +---------------+------------------------------------------------------------------------------+
        | Nterm         | ``[N-;X2;H1;$(N-[*^3]):1]>>[N+0:1]``                                         |
        +---------------+------------------------------------------------------------------------------+
        | keto-enolate  | ``[#6-:1]-[#6:2]=[O:3]>>[#6+0:1]=[#6:2]-[O-:3]``                             |
        +---------------+------------------------------------------------------------------------------+
        | arginine      | ``[C-;v3:1]-[#7+0;v3;H2:2]>>[#6+0:1]=[#7+:2]``                               |
        +---------------+------------------------------------------------------------------------------+
        | histidine     | ``[#6+0;H0:1]=[#6+0:2]-[#7;X3:3]-[#6-;X3:4]>>[#6:1]=[#6:2]-[#7+:3]=[#6+0:4]``|
        +---------------+------------------------------------------------------------------------------+
        | sulfone       | ``[S;D4;!v6:1]-[*-:2]>>[S;v6:1]=[*+0:2]``                                    |
        +---------------+------------------------------------------------------------------------------+
        | charged N     | ``[#7+0;X3:1]-[*-:2]>>[#7+:1]=[*+0:2]``                                      |
        +---------------+------------------------------------------------------------------------------+

        """
        if max_iter is None:
            max_iter = self.max_iter
        else:
            warnings.warn(
                "Specifying `max_iter` is deprecated and will be removed in a "
                "future update. Directly create an instance of "
                "`MDAnalysisInferrer` with `MDAnalysisInferrer(max_iter=...)` "
                "instead.",
                DeprecationWarning,
            )

        # standardize conjugated systems
        self._rebuild_conjugated_bonds(mol, max_iter)

        # list of sanitized reactions
        reactions = [
            ReactionFromSmarts(rxn) for rxn in self.STANDARDIZATION_REACTIONS
        ]

        # fragment mol (reactions must have single reactant and product)
        fragments = list(
            Chem.GetMolFrags(mol, asMols=True, sanitizeFrags=self.sanitize)
        )
        for reactant in fragments:
            self._apply_reactions(reactions, reactant)

        # recombine fragments
        newmol = fragments.pop(0)
        for fragment in fragments:
            newmol = Chem.CombineMols(newmol, fragment)

        return newmol



[docs]
    @staticmethod
    def _apply_reactions(
        reactions: List["ChemicalReaction"], reactant: "Chem.Mol"
    ) -> None:
        """Applies a series of unimolecular reactions to a molecule. The
        reactions must not add any atom to the product. The molecule is
        modified inplace.

        Parameters
        ----------
        reactions : List[rdkit.Chem.rdChemReactions.ChemicalReaction]
            Reactions from SMARTS. Each reaction is applied until no more
            transformations can be made.
        reactant : rdkit.Chem.rdchem.Mol
            The molecule to transform inplace

        """
        reactant.UpdatePropertyCache(strict=False)
        Chem.KekulizeIfPossible(reactant)
        for reaction in reactions:
            while reaction.RunReactantInPlace(reactant):
                reactant.UpdatePropertyCache(strict=False)
            reactant.UpdatePropertyCache(strict=False)
            Chem.KekulizeIfPossible(reactant)



[docs]
    def _rebuild_conjugated_bonds(
        self, mol: "Chem.Mol", max_iter: Optional[int] = None
    ) -> None:
        """Rebuild conjugated bonds without negatively charged atoms at the
        beginning and end of the conjugated system

        Depending on the order in which atoms are read during the conversion,
        the :meth:`~MDAnalysisInferrer._infer_bo_and_charges` function might
        write conjugated systems with a double bond less and both edges of the
        system as anions instead of the usual alternating single and double
        bonds. This function corrects this behaviour by using an iterative
        procedure.
        The problematic molecules always follow the same pattern:
        ``anion[-*=*]n-anion`` instead of ``*=[*-*=]n*``, where ``n`` is the
        number of successive single and double bonds. The goal of the
        iterative procedure is to make ``n`` as small as possible by
        consecutively transforming ``anion-*=*`` into ``*=*-anion`` until it
        reaches the smallest pattern with ``n=1``. This last pattern is then
        transformed ``anion-*=*-anion`` to ``*=*-*=*``.
        Since ``anion-*=*`` is the same as ``*=*-anion`` in terms of SMARTS,
        we can control that we don't transform the same triplet of atoms back
        and forth by adding their indices to a list.
        This function also handles conjugated systems with triple bonds.
        The molecule needs to be kekulized first to also cover systems
        with aromatic rings.

        Parameters
        ----------
        mol : rdkit.Chem.rdchem.RWMol
            The molecule to transform, modified inplace
        max_iter : Optional[int]
            Maximum number of iterations to standardize conjugated systems.

            .. deprecated:: 2.10.0
                Will be removed in 3.0, use
                ``inferrer=MDAnalysisInferrer(max_iter=...)`` instead.

        Notes
        -----
        The molecule is modified inplace
        """
        max_iter = self.max_iter if max_iter is None else max_iter
        mol.UpdatePropertyCache(strict=False)
        Chem.KekulizeIfPossible(mol)
        # pattern used to find problematic conjugated bonds
        # there's usually an even number of matches for this
        pattern = Chem.MolFromSmarts("[*-{1-2}]-,=[*+0]=,#[*+0]")
        # pattern used to finish fixing a series of conjugated bonds
        base_end_pattern = Chem.MolFromSmarts(
            "[*-{1-2}]-,=[*+0]=,#[*+0]-,=[*-{1-2}]"
        )
        # used when there's an odd number of matches for `pattern`
        odd_end_pattern = Chem.MolFromSmarts(
            "[*-]-[*+0]=[*+0]-[*-,$([#7;X3;v3]),$([#6+0,#7+1]=O),$([S;D4;v4]-[O-])]"
        )
        # number of unique matches with the pattern
        n_matches = len(
            {match[0] for match in mol.GetSubstructMatches(pattern)}
        )
        # nothing to standardize
        if n_matches == 0:
            return
        # single match (unusual)
        if n_matches == 1:
            # as a last resort, the only way to standardize is to find a
            # nitrogen that can accept a double bond and a positive charge
            # or a carbonyl that will become an enolate
            end_pattern = odd_end_pattern
        # at least 2 matches
        else:
            # give priority to base end pattern and then deal with the odd one
            # if necessary
            end_pattern = base_end_pattern
        backtrack = []
        backtrack_cycles = 0
        for _ in range(max_iter):
            # check for ending pattern
            end_match = mol.GetSubstructMatch(end_pattern)
            if end_match:
                # index of each atom
                anion1, a1, a2, anion2 = end_match
                term_atom = mol.GetAtomWithIdx(anion2)
                # edge-case 1: C-[O-] or [N+]-[O-]
                # [*-]-*=*-[C,N+]=O --> *=*-*=[C,N+]-[O-]
                # transform the =O to -[O-]
                if (
                    term_atom.GetAtomicNum() == 6
                    and term_atom.GetFormalCharge() == 0
                ) or (
                    term_atom.GetAtomicNum() == 7
                    and term_atom.GetFormalCharge() == 1
                ):
                    for neighbor in term_atom.GetNeighbors():
                        bond = mol.GetBondBetweenAtoms(
                            anion2, neighbor.GetIdx()
                        )
                        if (
                            neighbor.GetAtomicNum() == 8
                            and bond.GetBondTypeAsDouble() == 2
                        ):
                            bond.SetBondType(Chem.BondType.SINGLE)
                            neighbor.SetFormalCharge(-1)
                            break
                # edge-case 2: S=O
                # [*-]-*=*-[Sv4]-[O-] --> *=*-*=[Sv6]=O
                # transform -[O-] to =O
                elif (
                    term_atom.GetAtomicNum() == 16
                    and term_atom.GetFormalCharge() == 0
                ):
                    for neighbor in term_atom.GetNeighbors():
                        bond = mol.GetBondBetweenAtoms(
                            anion2, neighbor.GetIdx()
                        )
                        if (
                            neighbor.GetAtomicNum() == 8
                            and neighbor.GetFormalCharge() == -1
                            and bond.GetBondTypeAsDouble() == 1
                        ):
                            bond.SetBondType(Chem.BondType.DOUBLE)
                            neighbor.SetFormalCharge(0)
                            break
                # not an edge case:
                # increment the charge
                else:
                    term_atom.SetFormalCharge(term_atom.GetFormalCharge() + 1)
                # common to all cases:
                # [*-]-*=*-[R] --> *=*-*=[R]
                # increment the charge and switch single and double bonds
                a = mol.GetAtomWithIdx(anion1)
                a.SetFormalCharge(a.GetFormalCharge() + 1)
                b = mol.GetBondBetweenAtoms(anion1, a1)
                b.SetBondType(RDBONDORDER[b.GetBondTypeAsDouble() + 1])
                b = mol.GetBondBetweenAtoms(a1, a2)
                b.SetBondType(RDBONDORDER[b.GetBondTypeAsDouble() - 1])
                b = mol.GetBondBetweenAtoms(a2, anion2)
                b.SetBondType(RDBONDORDER[b.GetBondTypeAsDouble() + 1])
                mol.UpdatePropertyCache(strict=False)
                continue

            # switch the position of the charge and the double bond
            matches = mol.GetSubstructMatches(pattern)
            if matches:
                # check if we haven't already transformed this triplet
                for match in matches:
                    # store order-independent atom indices
                    g = set(match)
                    # already transformed --> try the next one
                    if g in backtrack:
                        continue
                    # add to backtracking and start the switch
                    anion, a1, a2 = match
                    backtrack.append(g)
                    break
                # already performed all changes
                else:
                    if backtrack_cycles == 1:
                        # might be stuck because there were 2 "odd" end
                        # patterns  misqualified as a single base one
                        end_pattern = odd_end_pattern
                    elif backtrack_cycles > 1:
                        # likely stuck changing the same bonds over and over so
                        # let's stop here
                        mol.UpdatePropertyCache(strict=False)
                        return
                    match = matches[0]
                    anion, a1, a2 = match
                    backtrack = [set(match)]
                    backtrack_cycles += 1

                # switch charges
                a = mol.GetAtomWithIdx(anion)
                a.SetFormalCharge(a.GetFormalCharge() + 1)
                a = mol.GetAtomWithIdx(a2)
                a.SetFormalCharge(a.GetFormalCharge() - 1)
                # switch bond orders
                b = mol.GetBondBetweenAtoms(anion, a1)
                b.SetBondType(RDBONDORDER[b.GetBondTypeAsDouble() + 1])
                b = mol.GetBondBetweenAtoms(a1, a2)
                b.SetBondType(RDBONDORDER[b.GetBondTypeAsDouble() - 1])
                mol.UpdatePropertyCache(strict=False)
                # update number of matches for the end pattern
                n_matches = len({match[0] for match in matches})
                if n_matches == 1:
                    end_pattern = odd_end_pattern
                # start new iteration
                continue

            # no more changes to apply
            mol.UpdatePropertyCache(strict=False)
            return

        # reached max_iter
        warnings.warn(
            "The standardization could not be completed within a "
            "reasonable number of iterations"
        )





[docs]
@dataclass(frozen=True)
class TemplateInferrer:
    """Infer bond orders and charges by matching the molecule with a template
    molecule containing bond orders and charges.

    Attributes
    ----------
    template : rdkit.Chem.rdchem.Mol
        Molecule containing the bond orders and charges.
    adjust_hydrogens: bool, default = True
        If ``True``, temporarily removes hydrogens from the molecule to
        assign bond orders and charges from the template, then adds them
        back. Useful to avoid adding explicit hydrogens on the template which
        can prevent RDKit from finding a match between the template and the
        molecule. Setting this to ``False`` can be useful to speed things up
        for inorganic molecules that don't have any hydrogens.

    .. versionadded:: 2.10.0
    """

    template: "Chem.Mol"
    adjust_hydrogens: bool = True

    def __call__(self, mol: "Chem.Mol") -> "Chem.Mol":
        if self.adjust_hydrogens:
            return self.assign_from_template_with_adjusted_hydrogens(mol)
        return AssignBondOrdersFromTemplate(self.template, mol)


[docs]
    def assign_from_template_with_adjusted_hydrogens(
        self, mol: "Chem.Mol", index_field: str = "_MDAnalysis_index"
    ) -> "Chem.Mol":
        """Temporarily removes hydrogens from the molecule before assigning
        bond orders and charges from the template, then adds them back.

        Parameters
        ----------
        mol : rdkit.Chem.rdchem.Mol
            Molecule to assign bond orders and charges to.
        index_field : str, default = "_MDAnalysis_index"
            Atom property corresponding to a unique integer that can used to
            put back the hydrogen atoms that were removed before matching,
            and sort them back to the original order. Must be accessible
            through ``atom.GetIntProp``.
        """
        # remove explicit hydrogens and assign BO from template
        mol_no_h = Chem.RemoveAllHs(mol, sanitize=False)
        new = AssignBondOrdersFromTemplate(self.template, mol_no_h)
        # mapping between AtomGroup index of atom bearing H --> H atom(s)
        atoms_with_hydrogens = defaultdict(list)
        for atom in mol.GetAtoms():
            if atom.GetAtomicNum() == 1:
                atoms_with_hydrogens[
                    atom.GetNeighbors()[0].GetIntProp(index_field)
                ].append(atom)
        # mapping between atom that should be bearing a H in RWMol and
        # corresponding H(s)
        reverse_mapping = {}
        for atom in new.GetAtoms():
            if (idx := atom.GetIntProp(index_field)) in atoms_with_hydrogens:
                reverse_mapping[atom.GetIdx()] = atoms_with_hydrogens[idx]
                # Set ExplicitH to 0 to avoid valence errors when adding Hs
                atom.SetNumExplicitHs(0)
            # let UpdatePropertyCache recalculate the number of radicals later
            atom.SetNumRadicalElectrons(0)
        # add missing Hs
        rwmol = Chem.RWMol(new)
        for atom_idx, hydrogens in reverse_mapping.items():
            for hydrogen in hydrogens:
                h_idx = rwmol.AddAtom(hydrogen)
                rwmol.AddBond(atom_idx, h_idx, Chem.BondType.SINGLE)
        new = rwmol.GetMol()
        # sanitize
        new.UpdatePropertyCache()
        sanitize_mol(new)
        # reorder atoms as input atomgroup (through _MDAnalysis_index)
        return reorder_atoms(new, field=index_field)





[docs]
@dataclass(frozen=True)
class RDKitInferrer:
    """Uses RDKit's :func:`~rdkit.Chem.rdDetermineBonds.DetermineBondOrders`
    to infer bond orders and formal charges. This is the same algorithm used
    by the :ref:`xyz2mol <https://github.com/jensengroup/xyz2mol>` package.

    .. versionadded:: 2.10.0
    """

    charge: int = 0

    def __call__(self, mol: "Chem.Mol") -> "Chem.Mol":
        new = Chem.Mol(mol)
        DetermineBondOrders(new, charge=self.charge)
        return new