Source code for MDAnalysis.converters.RDKitInferring

# -*- Mode: python; tab-width: 4; indent-tabs-mode:nil; coding:utf-8 -*-
# vim: tabstop=4 expandtab shiftwidth=4 softtabstop=4
#
# MDAnalysis --- https://www.mdanalysis.org
# Copyright (c) 2006-2017 The MDAnalysis Development Team and contributors
# (see the file AUTHORS for the full list of names)
#
# Released under the GNU Lesser General Public License, v2 or higher
#
# Please cite your use of MDAnalysis in published work:
#
# R. J. Gowers, M. Linke, J. Barnoud, T. J. E. Reddy, M. N. Melo, S. L. Seyler,
# D. L. Dotson, J. Domanski, S. Buchoux, I. M. Kenney, and O. Beckstein.
# MDAnalysis: A Python package for the rapid analysis of molecular dynamics
# simulations. In S. Benthall and S. Rostrup editors, Proceedings of the 15th
# Python in Science Conference, pages 102-109, Austin, TX, 2016. SciPy.
# doi: 10.25080/majora-629e541a-00e
#
# N. Michaud-Agrawal, E. J. Denning, T. B. Woolf, and O. Beckstein.
# MDAnalysis: A Toolkit for the Analysis of Molecular Dynamics Simulations.
# J. Comput. Chem. 32 (2011), 2319--2327, doi:10.1002/jcc.21787
#

"""RDKit bond order inferring --- :mod:`MDAnalysis.converters.RDKitInferring`
=============================================================================
Bond order and formal charge inferring for RDKit molecules. Because most MD
file formats don't preserve bond order information directly (or formal charges
to some extent), the classes provided here give users different options to
either provide or infer this information to the RDKit molecule constructed from
the topology. Having bond orders and formal charges properly defined is a
requirement for almost all cheminformatics-related task, hence the different
approaches proposed here to cover most use cases. You can also defined your own
function if need be, see the :mod:`~MDAnalysis.converters.RDKit` module for an
example.

These classes are meant to be passed directly to the RDKit converter::

    >>> import MDAnalysis as mda
    >>> from rdkit import Chem
    >>> u = mda.Universe("aspirin.pdb")
    >>> template = Chem.MolFromSmiles("CC(=O)Oc1ccccc1C(=O)O")
    >>> inferrer = mda.converters.RDKitInferring.TemplateInferrer(template)
    >>> rdkit_mol = u.atoms.convert_to.rdkit(inferrer=inferrer)

Classes
-------

.. autoclass:: MDAnalysisInferrer
   :members:
   :private-members:

.. autoclass:: TemplateInferrer
   :members:

.. autoclass:: RDKitInferrer
   :members:

.. autofunction:: sanitize_mol

.. autofunction:: reorder_atoms

"""

import warnings
from collections import defaultdict
from contextlib import suppress
from dataclasses import dataclass
from typing import ClassVar, Dict, List, Optional, Tuple

import numpy as np

RDBONDORDER = {}
with suppress(ImportError):
    from rdkit import Chem
    from rdkit.Chem.AllChem import AssignBondOrdersFromTemplate
    from rdkit.Chem.rdChemReactions import ChemicalReaction, ReactionFromSmarts

    RDBONDORDER = {
        1: Chem.BondType.SINGLE,
        1.5: Chem.BondType.AROMATIC,
        "ar": Chem.BondType.AROMATIC,
        2: Chem.BondType.DOUBLE,
        3: Chem.BondType.TRIPLE,
    }
    # add string version of the key for each bond
    RDBONDORDER.update({str(key): value for key, value in RDBONDORDER.items()})
    PERIODIC_TABLE = Chem.GetPeriodicTable()

with suppress(ImportError):
    from rdkit.Chem.rdDetermineBonds import (
        DetermineBondOrders,
    )  # available since 2022.09.1


[docs] def reorder_atoms( mol: "Chem.Mol", field: str = "_MDAnalysis_index" ) -> "Chem.Mol": """Reorder atoms based on the given field. Defaults to sorting in the same order as the input AtomGroup. Notes ----- If the field is not found on the molecule, skips reordering. """ with suppress(KeyError): order = np.argsort([atom.GetIntProp(field) for atom in mol.GetAtoms()]) return Chem.RenumberAtoms(mol, order.astype(int).tolist()) # not a molecule converted by MDAnalysis warnings.warn( f"{field!r} not available on the input mol atoms, skipping reordering of atoms." ) return mol
[docs] def sanitize_mol(mol: "Chem.Mol") -> None: """Sanitizes the molecule.""" Chem.SanitizeMol(mol)
[docs] @dataclass(frozen=True) class MDAnalysisInferrer: """Bond order and formal charge inferring as originally implemented for the RDKit converter. This algorithm only relies on the topology with explicit hydrogens to assign bond orders and formal charges. Attributes ---------- max_iter : int Maximum number of iterations to standardize conjugated systems. See :meth:`~MDAnalysisInferrer._rebuild_conjugated_bonds` sanitize : bool Whether to sanitize the molecule or not. MONATOMIC_CATION_CHARGES : ClassVar[Dict[int, int]] Charges that should be assigned to monatomic cations. Maps atomic number to their formal charge. Anion charges are directly handled by the code using the typical valence of the atom. STANDARDIZATION_REACTIONS : ClassVar[List[str]] Reactions uses by :meth:`~MDAnalysisInferrer._standardize_patterns` to fix challenging cases must have single reactant and product, and cannot add any atom. Notes ----- There are some molecules containing specific substructures that this inferrer cannot currently tackle correctly. See `Issue #3339 <https://github.com/MDAnalysis/mdanalysis/issues/3339>`__ for more info. .. versionadded:: 2.10.0 """ MONATOMIC_CATION_CHARGES: ClassVar[Dict[int, int]] = { 3: 1, 11: 1, 19: 1, 37: 1, 47: 1, 55: 1, 12: 2, 20: 2, 29: 2, 30: 2, 38: 2, 56: 2, 26: 2, # Fe could also be +3 13: 3, } STANDARDIZATION_REACTIONS: ClassVar[List[str]] = [ "[C-;X2;H0:1]=[O:2]>>[C+0:1]=[O:2]", # Cterm "[N-;X2;H1;$(N-[*^3]):1]>>[N+0:1]", # Nterm "[#6-:1]-[#6:2]=[O:3]>>[#6+0:1]=[#6:2]-[O-:3]", # keto-enolate "[C-;v3:1]-[#7+0;v3;H2:2]>>[#6+0:1]=[#7+:2]", # ARG "[#6+0;H0:1]=[#6+0:2]-[#7;X3:3]-[#6-;X3:4]" ">>[#6:1]=[#6:2]-[#7+:3]=[#6+0:4]", # HIP "[S;D4;!v6:1]-[*-:2]>>[S;v6:1]=[*+0:2]", # sulfone "[#7+0;X3:1]-[*-:2]>>[#7+:1]=[*+0:2]", # charged-N ] max_iter: int = 200 sanitize: bool = True def __call__(self, mol: "Chem.Mol") -> "Chem.Mol": """Infer bond orders and formal charges in the molecule. Parameters ---------- mol : Chem.Mol The molecule to infer bond orders and charges for. Returns ------- A new molecule with proper bond orders and charges. """ self._infer_bo_and_charges(mol) mol = self._standardize_patterns(mol) mol = reorder_atoms(mol) if self.sanitize: sanitize_mol(mol) return mol
[docs] @classmethod def _atom_sorter(cls, atom: "Chem.Atom") -> Tuple[int, int]: """Sorts atoms in the molecule in a way that makes it easy for the bond order and charge infering code to get the correct state on the first try. Currently sorts by number of unpaired electrons, then by number of heavy atom neighbors (i.e. atoms at the edge first).""" num_heavy_neighbors = len( [ neighbor for neighbor in atom.GetNeighbors() if neighbor.GetAtomicNum() > 1 ] ) return (-cls._get_nb_unpaired_electrons(atom)[0], num_heavy_neighbors)
[docs] @classmethod def _infer_bo_and_charges(cls, mol: "Chem.Mol") -> None: """Infer bond orders and formal charges from a molecule. Since most MD topology files don't explicitly retain information on bond orders or charges, it has to be guessed from the topology. This is done by looping over each atom and comparing its expected valence to the current valence to get the Number of Unpaired Electrons (NUE). If an atom has a negative NUE, it needs a positive formal charge (-NUE). If two neighbouring atoms have UEs, the bond between them most likely has to be increased by the value of the smallest NUE. If after this process, an atom still has UEs, it needs a negative formal charge of -NUE. Parameters ---------- mol : rdkit.Chem.rdchem.RWMol The molecule is modified inplace and must have all hydrogens added Notes ----- This algorithm is order dependant. For example, for a carboxylate group ``R-C(-O)-O`` the first oxygen read will receive a double bond and the other one will be charged. It will also affect more complex conjugated systems. """ # heavy atoms sorted by number of heavy atom neighbors (lower first) # then NUE (higher first) atoms = sorted( [atom for atom in mol.GetAtoms() if atom.GetAtomicNum() > 1], key=cls._atom_sorter, ) for atom in atoms: # monatomic ions if atom.GetDegree() == 0: atom.SetFormalCharge( cls.MONATOMIC_CATION_CHARGES.get( atom.GetAtomicNum(), -cls._get_nb_unpaired_electrons(atom)[0], ) ) mol.UpdatePropertyCache(strict=False) continue # get NUE for each possible valence nue = cls._get_nb_unpaired_electrons(atom) # if there's only one possible valence state and the corresponding # NUE is negative, it means we can only add a positive charge to # the atom if (len(nue) == 1) and (nue[0] < 0): atom.SetFormalCharge(-nue[0]) mol.UpdatePropertyCache(strict=False) # go to next atom if above case or atom has no unpaired electron if (len(nue) == 1) and (nue[0] <= 0): continue neighbors = sorted( atom.GetNeighbors(), reverse=True, key=lambda a: cls._get_nb_unpaired_electrons(a)[0], ) # check if one of the neighbors has a common NUE for na in neighbors: # get NUE for the neighbor na_nue = cls._get_nb_unpaired_electrons(na) # smallest common NUE common_nue = min( [i for i in [*nue, *na_nue] if i >= 0], default=0, ) # a common NUE of 0 means we don't need to do anything if common_nue != 0: # increase bond order bond = mol.GetBondBetweenAtoms(atom.GetIdx(), na.GetIdx()) order = common_nue + 1 bond.SetBondType(RDBONDORDER[order]) mol.UpdatePropertyCache(strict=False) # go to next atom if one of the valences is complete nue = cls._get_nb_unpaired_electrons(atom) if any(n == 0 for n in nue): break # if atom valence is still not filled nue = cls._get_nb_unpaired_electrons(atom) if not any(n == 0 for n in nue): # transform nue to charge atom.SetFormalCharge(-nue[0]) atom.SetNumRadicalElectrons(0) mol.UpdatePropertyCache(strict=False)
[docs] @staticmethod def _get_nb_unpaired_electrons(atom: "Chem.Atom") -> List[int]: """Calculate the number of unpaired electrons (NUE) of an atom Parameters ---------- atom: rdkit.Chem.rdchem.Atom The atom for which the NUE will be computed Returns ------- nue : List[int] The NUE for each possible valence of the atom """ expected_vs = PERIODIC_TABLE.GetValenceList(atom.GetAtomicNum()) current_v = atom.GetTotalValence() - atom.GetFormalCharge() return [v - current_v for v in expected_vs]
[docs] def _standardize_patterns( self, mol: "Chem.Mol", max_iter: Optional[int] = None ) -> "Chem.Mol": """Standardizes functional groups Uses :meth:`~MDAnalysisInferrer._rebuild_conjugated_bonds` to standardize conjugated systems, and SMARTS reactions for other functional groups. Due to the way reactions work, we first have to split the molecule by fragments. Then, for each fragment, we apply the standardization reactions. Finally, the fragments are recombined. Parameters ---------- mol : rdkit.Chem.rdchem.RWMol The molecule to standardize max_iter : Optional[int] Maximum number of iterations to standardize conjugated systems. .. deprecated:: 2.10.0 Will be removed in 3.0, use ``inferrer=MDAnalysisInferrer(max_iter=...)`` instead. Returns ------- mol : rdkit.Chem.rdchem.Mol The standardized molecule Notes ----- The following functional groups are transformed in this order: +---------------+------------------------------------------------------------------------------+ | Name | Reaction | +===============+==============================================================================+ | conjugated | ``[*-:1]-[*:2]=[*:3]-[*-:4]>>[*+0:1]=[*:2]-[*:3]=[*+0:4]`` | +---------------+------------------------------------------------------------------------------+ | conjugated N+ | ``[N;X3;v3:1]-[*:2]=[*:3]-[*-:4]>>[N+:1]=[*:2]-[*:3]=[*+0:4]`` | +---------------+------------------------------------------------------------------------------+ | conjugated O- | ``[O:1]=[#6+0,#7+:2]-[*:3]=[*:4]-[*-:5]>>[O-:1]-[*:2]=[*:3]-[*:4]=[*+0:5]`` | +---------------+------------------------------------------------------------------------------+ | conjug. S=O | ``[O-:1]-[S;D4;v4:2]-[*:3]=[*:4]-[*-:5]>>[O+0:1]=[*:2]=[*:3]-[*:4]=[*+0:5]`` | +---------------+------------------------------------------------------------------------------+ | Cterm | ``[C-;X2;H0:1]=[O:2]>>[C+0:1]=[O:2]`` | +---------------+------------------------------------------------------------------------------+ | Nterm | ``[N-;X2;H1;$(N-[*^3]):1]>>[N+0:1]`` | +---------------+------------------------------------------------------------------------------+ | keto-enolate | ``[#6-:1]-[#6:2]=[O:3]>>[#6+0:1]=[#6:2]-[O-:3]`` | +---------------+------------------------------------------------------------------------------+ | arginine | ``[C-;v3:1]-[#7+0;v3;H2:2]>>[#6+0:1]=[#7+:2]`` | +---------------+------------------------------------------------------------------------------+ | histidine | ``[#6+0;H0:1]=[#6+0:2]-[#7;X3:3]-[#6-;X3:4]>>[#6:1]=[#6:2]-[#7+:3]=[#6+0:4]``| +---------------+------------------------------------------------------------------------------+ | sulfone | ``[S;D4;!v6:1]-[*-:2]>>[S;v6:1]=[*+0:2]`` | +---------------+------------------------------------------------------------------------------+ | charged N | ``[#7+0;X3:1]-[*-:2]>>[#7+:1]=[*+0:2]`` | +---------------+------------------------------------------------------------------------------+ """ if max_iter is None: max_iter = self.max_iter else: warnings.warn( "Specifying `max_iter` is deprecated and will be removed in a " "future update. Directly create an instance of " "`MDAnalysisInferrer` with `MDAnalysisInferrer(max_iter=...)` " "instead.", DeprecationWarning, ) # standardize conjugated systems self._rebuild_conjugated_bonds(mol, max_iter) # list of sanitized reactions reactions = [ ReactionFromSmarts(rxn) for rxn in self.STANDARDIZATION_REACTIONS ] # fragment mol (reactions must have single reactant and product) fragments = list( Chem.GetMolFrags(mol, asMols=True, sanitizeFrags=self.sanitize) ) for reactant in fragments: self._apply_reactions(reactions, reactant) # recombine fragments newmol = fragments.pop(0) for fragment in fragments: newmol = Chem.CombineMols(newmol, fragment) return newmol
[docs] @staticmethod def _apply_reactions( reactions: List["ChemicalReaction"], reactant: "Chem.Mol" ) -> None: """Applies a series of unimolecular reactions to a molecule. The reactions must not add any atom to the product. The molecule is modified inplace. Parameters ---------- reactions : List[rdkit.Chem.rdChemReactions.ChemicalReaction] Reactions from SMARTS. Each reaction is applied until no more transformations can be made. reactant : rdkit.Chem.rdchem.Mol The molecule to transform inplace """ reactant.UpdatePropertyCache(strict=False) Chem.KekulizeIfPossible(reactant) for reaction in reactions: while reaction.RunReactantInPlace(reactant): reactant.UpdatePropertyCache(strict=False) reactant.UpdatePropertyCache(strict=False) Chem.KekulizeIfPossible(reactant)
[docs] def _rebuild_conjugated_bonds( self, mol: "Chem.Mol", max_iter: Optional[int] = None ) -> None: """Rebuild conjugated bonds without negatively charged atoms at the beginning and end of the conjugated system Depending on the order in which atoms are read during the conversion, the :meth:`~MDAnalysisInferrer._infer_bo_and_charges` function might write conjugated systems with a double bond less and both edges of the system as anions instead of the usual alternating single and double bonds. This function corrects this behaviour by using an iterative procedure. The problematic molecules always follow the same pattern: ``anion[-*=*]n-anion`` instead of ``*=[*-*=]n*``, where ``n`` is the number of successive single and double bonds. The goal of the iterative procedure is to make ``n`` as small as possible by consecutively transforming ``anion-*=*`` into ``*=*-anion`` until it reaches the smallest pattern with ``n=1``. This last pattern is then transformed ``anion-*=*-anion`` to ``*=*-*=*``. Since ``anion-*=*`` is the same as ``*=*-anion`` in terms of SMARTS, we can control that we don't transform the same triplet of atoms back and forth by adding their indices to a list. This function also handles conjugated systems with triple bonds. The molecule needs to be kekulized first to also cover systems with aromatic rings. Parameters ---------- mol : rdkit.Chem.rdchem.RWMol The molecule to transform, modified inplace max_iter : Optional[int] Maximum number of iterations to standardize conjugated systems. .. deprecated:: 2.10.0 Will be removed in 3.0, use ``inferrer=MDAnalysisInferrer(max_iter=...)`` instead. Notes ----- The molecule is modified inplace """ max_iter = self.max_iter if max_iter is None else max_iter mol.UpdatePropertyCache(strict=False) Chem.KekulizeIfPossible(mol) # pattern used to find problematic conjugated bonds # there's usually an even number of matches for this pattern = Chem.MolFromSmarts("[*-{1-2}]-,=[*+0]=,#[*+0]") # pattern used to finish fixing a series of conjugated bonds base_end_pattern = Chem.MolFromSmarts( "[*-{1-2}]-,=[*+0]=,#[*+0]-,=[*-{1-2}]" ) # used when there's an odd number of matches for `pattern` odd_end_pattern = Chem.MolFromSmarts( "[*-]-[*+0]=[*+0]-[*-,$([#7;X3;v3]),$([#6+0,#7+1]=O),$([S;D4;v4]-[O-])]" ) # number of unique matches with the pattern n_matches = len( {match[0] for match in mol.GetSubstructMatches(pattern)} ) # nothing to standardize if n_matches == 0: return # single match (unusual) if n_matches == 1: # as a last resort, the only way to standardize is to find a # nitrogen that can accept a double bond and a positive charge # or a carbonyl that will become an enolate end_pattern = odd_end_pattern # at least 2 matches else: # give priority to base end pattern and then deal with the odd one # if necessary end_pattern = base_end_pattern backtrack = [] backtrack_cycles = 0 for _ in range(max_iter): # check for ending pattern end_match = mol.GetSubstructMatch(end_pattern) if end_match: # index of each atom anion1, a1, a2, anion2 = end_match term_atom = mol.GetAtomWithIdx(anion2) # edge-case 1: C-[O-] or [N+]-[O-] # [*-]-*=*-[C,N+]=O --> *=*-*=[C,N+]-[O-] # transform the =O to -[O-] if ( term_atom.GetAtomicNum() == 6 and term_atom.GetFormalCharge() == 0 ) or ( term_atom.GetAtomicNum() == 7 and term_atom.GetFormalCharge() == 1 ): for neighbor in term_atom.GetNeighbors(): bond = mol.GetBondBetweenAtoms( anion2, neighbor.GetIdx() ) if ( neighbor.GetAtomicNum() == 8 and bond.GetBondTypeAsDouble() == 2 ): bond.SetBondType(Chem.BondType.SINGLE) neighbor.SetFormalCharge(-1) break # edge-case 2: S=O # [*-]-*=*-[Sv4]-[O-] --> *=*-*=[Sv6]=O # transform -[O-] to =O elif ( term_atom.GetAtomicNum() == 16 and term_atom.GetFormalCharge() == 0 ): for neighbor in term_atom.GetNeighbors(): bond = mol.GetBondBetweenAtoms( anion2, neighbor.GetIdx() ) if ( neighbor.GetAtomicNum() == 8 and neighbor.GetFormalCharge() == -1 and bond.GetBondTypeAsDouble() == 1 ): bond.SetBondType(Chem.BondType.DOUBLE) neighbor.SetFormalCharge(0) break # not an edge case: # increment the charge else: term_atom.SetFormalCharge(term_atom.GetFormalCharge() + 1) # common to all cases: # [*-]-*=*-[R] --> *=*-*=[R] # increment the charge and switch single and double bonds a = mol.GetAtomWithIdx(anion1) a.SetFormalCharge(a.GetFormalCharge() + 1) b = mol.GetBondBetweenAtoms(anion1, a1) b.SetBondType(RDBONDORDER[b.GetBondTypeAsDouble() + 1]) b = mol.GetBondBetweenAtoms(a1, a2) b.SetBondType(RDBONDORDER[b.GetBondTypeAsDouble() - 1]) b = mol.GetBondBetweenAtoms(a2, anion2) b.SetBondType(RDBONDORDER[b.GetBondTypeAsDouble() + 1]) mol.UpdatePropertyCache(strict=False) continue # switch the position of the charge and the double bond matches = mol.GetSubstructMatches(pattern) if matches: # check if we haven't already transformed this triplet for match in matches: # store order-independent atom indices g = set(match) # already transformed --> try the next one if g in backtrack: continue # add to backtracking and start the switch anion, a1, a2 = match backtrack.append(g) break # already performed all changes else: if backtrack_cycles == 1: # might be stuck because there were 2 "odd" end # patterns misqualified as a single base one end_pattern = odd_end_pattern elif backtrack_cycles > 1: # likely stuck changing the same bonds over and over so # let's stop here mol.UpdatePropertyCache(strict=False) return match = matches[0] anion, a1, a2 = match backtrack = [set(match)] backtrack_cycles += 1 # switch charges a = mol.GetAtomWithIdx(anion) a.SetFormalCharge(a.GetFormalCharge() + 1) a = mol.GetAtomWithIdx(a2) a.SetFormalCharge(a.GetFormalCharge() - 1) # switch bond orders b = mol.GetBondBetweenAtoms(anion, a1) b.SetBondType(RDBONDORDER[b.GetBondTypeAsDouble() + 1]) b = mol.GetBondBetweenAtoms(a1, a2) b.SetBondType(RDBONDORDER[b.GetBondTypeAsDouble() - 1]) mol.UpdatePropertyCache(strict=False) # update number of matches for the end pattern n_matches = len({match[0] for match in matches}) if n_matches == 1: end_pattern = odd_end_pattern # start new iteration continue # no more changes to apply mol.UpdatePropertyCache(strict=False) return # reached max_iter warnings.warn( "The standardization could not be completed within a " "reasonable number of iterations" )
[docs] @dataclass(frozen=True) class TemplateInferrer: """Infer bond orders and charges by matching the molecule with a template molecule containing bond orders and charges. Attributes ---------- template : rdkit.Chem.rdchem.Mol Molecule containing the bond orders and charges. adjust_hydrogens: bool, default = True If ``True``, temporarily removes hydrogens from the molecule to assign bond orders and charges from the template, then adds them back. Useful to avoid adding explicit hydrogens on the template which can prevent RDKit from finding a match between the template and the molecule. Setting this to ``False`` can be useful to speed things up for inorganic molecules that don't have any hydrogens. .. versionadded:: 2.10.0 """ template: "Chem.Mol" adjust_hydrogens: bool = True def __call__(self, mol: "Chem.Mol") -> "Chem.Mol": if self.adjust_hydrogens: return self.assign_from_template_with_adjusted_hydrogens(mol) return AssignBondOrdersFromTemplate(self.template, mol)
[docs] def assign_from_template_with_adjusted_hydrogens( self, mol: "Chem.Mol", index_field: str = "_MDAnalysis_index" ) -> "Chem.Mol": """Temporarily removes hydrogens from the molecule before assigning bond orders and charges from the template, then adds them back. Parameters ---------- mol : rdkit.Chem.rdchem.Mol Molecule to assign bond orders and charges to. index_field : str, default = "_MDAnalysis_index" Atom property corresponding to a unique integer that can used to put back the hydrogen atoms that were removed before matching, and sort them back to the original order. Must be accessible through ``atom.GetIntProp``. """ # remove explicit hydrogens and assign BO from template mol_no_h = Chem.RemoveAllHs(mol, sanitize=False) new = AssignBondOrdersFromTemplate(self.template, mol_no_h) # mapping between AtomGroup index of atom bearing H --> H atom(s) atoms_with_hydrogens = defaultdict(list) for atom in mol.GetAtoms(): if atom.GetAtomicNum() == 1: atoms_with_hydrogens[ atom.GetNeighbors()[0].GetIntProp(index_field) ].append(atom) # mapping between atom that should be bearing a H in RWMol and # corresponding H(s) reverse_mapping = {} for atom in new.GetAtoms(): if (idx := atom.GetIntProp(index_field)) in atoms_with_hydrogens: reverse_mapping[atom.GetIdx()] = atoms_with_hydrogens[idx] # Set ExplicitH to 0 to avoid valence errors when adding Hs atom.SetNumExplicitHs(0) # let UpdatePropertyCache recalculate the number of radicals later atom.SetNumRadicalElectrons(0) # add missing Hs rwmol = Chem.RWMol(new) for atom_idx, hydrogens in reverse_mapping.items(): for hydrogen in hydrogens: h_idx = rwmol.AddAtom(hydrogen) rwmol.AddBond(atom_idx, h_idx, Chem.BondType.SINGLE) new = rwmol.GetMol() # sanitize new.UpdatePropertyCache() sanitize_mol(new) # reorder atoms as input atomgroup (through _MDAnalysis_index) return reorder_atoms(new, field=index_field)
[docs] @dataclass(frozen=True) class RDKitInferrer: """Uses RDKit's :func:`~rdkit.Chem.rdDetermineBonds.DetermineBondOrders` to infer bond orders and formal charges. This is the same algorithm used by the :ref:`xyz2mol <https://github.com/jensengroup/xyz2mol>` package. .. versionadded:: 2.10.0 """ charge: int = 0 def __call__(self, mol: "Chem.Mol") -> "Chem.Mol": new = Chem.Mol(mol) DetermineBondOrders(new, charge=self.charge) return new