Source code for MDAnalysis.converters.RDKitParser

# -*- Mode: python; tab-width: 4; indent-tabs-mode:nil; coding:utf-8 -*-
# vim: tabstop=4 expandtab shiftwidth=4 softtabstop=4 
#
# MDAnalysis --- https://www.mdanalysis.org
# Copyright (c) 2006-2017 The MDAnalysis Development Team and contributors
# (see the file AUTHORS for the full list of names)
#
# Released under the GNU Public Licence, v2 or any higher version
#
# Please cite your use of MDAnalysis in published work:
#
# R. J. Gowers, M. Linke, J. Barnoud, T. J. E. Reddy, M. N. Melo, S. L. Seyler,
# D. L. Dotson, J. Domanski, S. Buchoux, I. M. Kenney, and O. Beckstein.
# MDAnalysis: A Python package for the rapid analysis of molecular dynamics
# simulations. In S. Benthall and S. Rostrup editors, Proceedings of the 15th
# Python in Science Conference, pages 102-109, Austin, TX, 2016. SciPy.
# doi: 10.25080/majora-629e541a-00e
#
# N. Michaud-Agrawal, E. J. Denning, T. B. Woolf, and O. Beckstein.
# MDAnalysis: A Toolkit for the Analysis of Molecular Dynamics Simulations.
# J. Comput. Chem. 32 (2011), 2319--2327, doi:10.1002/jcc.21787
#

"""
RDKit topology parser --- :mod:`MDAnalysis.converters.RDKitParser`
==================================================================

Converts an `RDKit <https://www.rdkit.org/docs/source/rdkit.Chem.rdchem.html#rdkit.Chem.rdchem.Mol>`_ :class:`rdkit.Chem.rdchem.Mol` into a :class:`MDAnalysis.core.Topology`.


See Also
--------
:mod:`MDAnalysis.converters.RDKit`


Classes
-------

.. autoclass:: RDKitParser
   :members:
   :inherited-members:

"""

import logging
import warnings
import numpy as np

from ..topology.base import TopologyReaderBase, change_squash
from ..topology import guessers
from ..core.topologyattrs import (
    Atomids,
    Atomnames,
    Atomtypes,
    Elements,
    Masses,
    Charges,
    Aromaticities,
    Bonds,
    Resids,
    Resnums,
    Resnames,
    RSChirality,
    Segids,
    AltLocs,
    ChainIDs,
    ICodes,
    Occupancies,
    Tempfactors,
)
from ..core.topology import Topology

logger = logging.getLogger("MDAnalysis.converters.RDKitParser")


def _rdkit_atom_to_RS(atom):
    """Fetches RDKit chiral tags"""
    try:
        return atom.GetProp("_CIPCode")
    except KeyError:
        return ""


[docs]class RDKitParser(TopologyReaderBase): """ For RDKit structures Creates the following Attributes: - Atomids - Atomnames - Aromaticities - Elements - Masses - Bonds - Resids - Resnums - RSChirality - Segids Guesses the following: - Atomtypes Depending on RDKit's input, the following Attributes might be present: - Charges - Resnames - AltLocs - ChainIDs - ICodes - Occupancies - Tempfactors Attributes table: +---------------------------------------------+-------------------------+ | RDKit attribute | MDAnalysis equivalent | +=============================================+=========================+ | atom.GetMonomerInfo().GetAltLoc() | altLocs | +---------------------------------------------+-------------------------+ | atom.GetIsAromatic() | aromaticities | +---------------------------------------------+-------------------------+ | atom.GetMonomerInfo().GetChainId() | chainIDs | +---------------------------------------------+-------------------------+ | atom.GetDoubleProp('_GasteigerCharge') | charges | | atom.GetDoubleProp('_TriposPartialCharge') | | +---------------------------------------------+-------------------------+ | atom.GetSymbol() | elements | +---------------------------------------------+-------------------------+ | atom.GetMonomerInfo().GetInsertionCode() | icodes | +---------------------------------------------+-------------------------+ | atom.GetIdx() | indices | +---------------------------------------------+-------------------------+ | atom.GetMass() | masses | +---------------------------------------------+-------------------------+ | atom.GetMonomerInfo().GetName() | names | | atom.GetProp('_TriposAtomName') | | +---------------------------------------------+-------------------------+ | atom.GetProp('_CIPCode') | chiralities | +---------------------------------------------+-------------------------+ | atom.GetMonomerInfo().GetOccupancy() | occupancies | +---------------------------------------------+-------------------------+ | atom.GetMonomerInfo().GetResidueName() | resnames | +---------------------------------------------+-------------------------+ | atom.GetMonomerInfo().GetResidueNumber() | resnums | +---------------------------------------------+-------------------------+ | atom.GetMonomerInfo().GetTempFactor() | tempfactors | +---------------------------------------------+-------------------------+ | atom.GetProp('_TriposAtomType') | types | +---------------------------------------------+-------------------------+ Raises ------ ValueError If only part of the atoms have MonomerInfo available .. versionadded:: 2.0.0 .. versionchanged:: 2.1.0 Added R/S chirality support """ format = 'RDKIT' @staticmethod def _format_hint(thing): """Can this Parser read object *thing*?""" try: from rdkit import Chem except ImportError: # if no rdkit, probably not rdkit return False else: return isinstance(thing, Chem.Mol)
[docs] def parse(self, **kwargs): """Parse RDKit into Topology Returns ------- MDAnalysis Topology object """ mol = self.filename # Atoms names = [] chiralities = [] resnums = [] resnames = [] elements = [] masses = [] charges = [] aromatics = [] ids = [] atomtypes = [] segids = [] altlocs = [] chainids = [] icodes = [] occupancies = [] tempfactors = [] try: atom = mol.GetAtomWithIdx(0) except RuntimeError: top = Topology(n_atoms=0, n_res=0, n_seg=0, attrs=None, atom_resindex=None, residue_segindex=None) return top # check if multiple charges present if atom.HasProp('_GasteigerCharge') and ( atom.HasProp('_TriposPartialCharge') ): warnings.warn( 'Both _GasteigerCharge and _TriposPartialCharge properties ' 'are present. Using Gasteiger charges by default.') for atom in mol.GetAtoms(): ids.append(atom.GetIdx()) elements.append(atom.GetSymbol()) masses.append(atom.GetMass()) aromatics.append(atom.GetIsAromatic()) chiralities.append(_rdkit_atom_to_RS(atom)) mi = atom.GetMonomerInfo() if mi: # atom name and residue info are present names.append(mi.GetName().strip()) resnums.append(mi.GetResidueNumber()) resnames.append(mi.GetResidueName()) segids.append(mi.GetSegmentNumber()) altlocs.append(mi.GetAltLoc().strip()) chainids.append(mi.GetChainId().strip()) icodes.append(mi.GetInsertionCode().strip()) occupancies.append(mi.GetOccupancy()) tempfactors.append(mi.GetTempFactor()) else: # atom name (MOL2 only) try: names.append(atom.GetProp('_TriposAtomName')) except KeyError: pass # atom type (MOL2 only) try: atomtypes.append(atom.GetProp('_TriposAtomType')) except KeyError: pass # gasteiger charge (computed): # if the user took the time to compute them, make it a priority # over charges read from a MOL2 file try: charges.append(atom.GetDoubleProp('_GasteigerCharge')) except KeyError: # partial charge (MOL2 only) try: charges.append(atom.GetDoubleProp('_TriposPartialCharge')) except KeyError: pass # make Topology attributes attrs = [] n_atoms = len(ids) if resnums and (len(resnums) != n_atoms): raise ValueError( "ResidueInfo is only partially available in the molecule. " "If you have added hydrogens to the input RDKit molecule with " "`Chem.AddHs(mol)`, consider using " "`Chem.AddHs(mol, addResidueInfo=True)` instead" ) # * Attributes always present * # Atom attributes for vals, Attr, dtype in ( (ids, Atomids, np.int32), (elements, Elements, object), (masses, Masses, np.float32), (aromatics, Aromaticities, bool), (chiralities, RSChirality, 'U1'), ): attrs.append(Attr(np.array(vals, dtype=dtype))) # Bonds bonds = [] bond_types = [] bond_orders = [] for bond in mol.GetBonds(): bonds.append((bond.GetBeginAtomIdx(), bond.GetEndAtomIdx())) bond_orders.append(bond.GetBondTypeAsDouble()) bond_types.append(str(bond.GetBondType())) attrs.append(Bonds(bonds, types=bond_types, order=bond_orders)) # * Optional attributes * # Atom name if names: attrs.append(Atomnames(np.array(names, dtype=object))) else: for atom in mol.GetAtoms(): name = "%s%d" % (atom.GetSymbol(), atom.GetIdx()) names.append(name) attrs.append(Atomnames(np.array(names, dtype=object))) # Atom type if atomtypes: attrs.append(Atomtypes(np.array(atomtypes, dtype=object))) else: atomtypes = guessers.guess_types(names) attrs.append(Atomtypes(atomtypes, guessed=True)) # Partial charges if charges: attrs.append(Charges(np.array(charges, dtype=np.float32))) else: pass # no guesser yet # PDB only for vals, Attr, dtype in ( (altlocs, AltLocs, object), (chainids, ChainIDs, object), (occupancies, Occupancies, np.float32), (tempfactors, Tempfactors, np.float32), ): if vals: attrs.append(Attr(np.array(vals, dtype=dtype))) # Residue if any(resnums) and not any(val is None for val in resnums): resnums = np.array(resnums, dtype=np.int32) resnames = np.array(resnames, dtype=object) segids = np.array(segids, dtype=object) icodes = np.array(icodes, dtype=object) residx, (resnums, resnames, icodes, segids) = change_squash( (resnums, resnames, icodes, segids), (resnums, resnames, icodes, segids)) n_residues = len(resnums) for vals, Attr, dtype in ( (resnums, Resids, np.int32), (resnums.copy(), Resnums, np.int32), (resnames, Resnames, object), (icodes, ICodes, object), ): attrs.append(Attr(np.array(vals, dtype=dtype))) else: attrs.append(Resids(np.array([1]))) attrs.append(Resnums(np.array([1]))) residx = None n_residues = 1 # Segment if any(segids) and not any(val is None for val in segids): segidx, (segids,) = change_squash((segids,), (segids,)) n_segments = len(segids) attrs.append(Segids(segids)) else: n_segments = 1 attrs.append(Segids(np.array(['SYSTEM'], dtype=object))) segidx = None # create topology top = Topology(n_atoms, n_residues, n_segments, attrs=attrs, atom_resindex=residx, residue_segindex=segidx) return top