Source code for MDAnalysis.converters.RDKitParser
# -*- Mode: python; tab-width: 4; indent-tabs-mode:nil; coding:utf-8 -*-
# vim: tabstop=4 expandtab shiftwidth=4 softtabstop=4
#
# MDAnalysis --- https://www.mdanalysis.org
# Copyright (c) 2006-2017 The MDAnalysis Development Team and contributors
# (see the file AUTHORS for the full list of names)
#
# Released under the GNU Public Licence, v2 or any higher version
#
# Please cite your use of MDAnalysis in published work:
#
# R. J. Gowers, M. Linke, J. Barnoud, T. J. E. Reddy, M. N. Melo, S. L. Seyler,
# D. L. Dotson, J. Domanski, S. Buchoux, I. M. Kenney, and O. Beckstein.
# MDAnalysis: A Python package for the rapid analysis of molecular dynamics
# simulations. In S. Benthall and S. Rostrup editors, Proceedings of the 15th
# Python in Science Conference, pages 102-109, Austin, TX, 2016. SciPy.
# doi: 10.25080/majora-629e541a-00e
#
# N. Michaud-Agrawal, E. J. Denning, T. B. Woolf, and O. Beckstein.
# MDAnalysis: A Toolkit for the Analysis of Molecular Dynamics Simulations.
# J. Comput. Chem. 32 (2011), 2319--2327, doi:10.1002/jcc.21787
#
"""
RDKit topology parser --- :mod:`MDAnalysis.converters.RDKitParser`
==================================================================
Converts an `RDKit <https://www.rdkit.org/docs/source/rdkit.Chem.rdchem.html#rdkit.Chem.rdchem.Mol>`_ :class:`rdkit.Chem.rdchem.Mol` into a :class:`MDAnalysis.core.Topology`.
See Also
--------
:mod:`MDAnalysis.converters.RDKit`
Classes
-------
.. autoclass:: RDKitParser
:members:
:inherited-members:
"""
import logging
import warnings
import numpy as np
from ..topology.base import TopologyReaderBase, change_squash
from ..topology import guessers
from ..core.topologyattrs import (
Atomids,
Atomnames,
Atomtypes,
Elements,
Masses,
Charges,
Aromaticities,
Bonds,
Resids,
Resnums,
Resnames,
RSChirality,
Segids,
AltLocs,
ChainIDs,
ICodes,
Occupancies,
Tempfactors,
)
from ..core.topology import Topology
logger = logging.getLogger("MDAnalysis.converters.RDKitParser")
def _rdkit_atom_to_RS(atom):
"""Fetches RDKit chiral tags"""
try:
return atom.GetProp("_CIPCode")
except KeyError:
return ""
[docs]
class RDKitParser(TopologyReaderBase):
"""
For RDKit structures
Creates the following Attributes:
- Atomids
- Atomnames
- Aromaticities
- Elements
- Masses
- Bonds
- Resids
- Resnums
- RSChirality
- Segids
Guesses the following:
- Atomtypes
Depending on RDKit's input, the following Attributes might be present:
- Charges
- Resnames
- AltLocs
- ChainIDs
- ICodes
- Occupancies
- Tempfactors
Attributes table:
+---------------------------------------------+-------------------------+
| RDKit attribute | MDAnalysis equivalent |
+=============================================+=========================+
| atom.GetMonomerInfo().GetAltLoc() | altLocs |
+---------------------------------------------+-------------------------+
| atom.GetIsAromatic() | aromaticities |
+---------------------------------------------+-------------------------+
| atom.GetMonomerInfo().GetChainId() | chainIDs |
+---------------------------------------------+-------------------------+
| atom.GetDoubleProp('_GasteigerCharge') | charges |
| atom.GetDoubleProp('_TriposPartialCharge') | |
+---------------------------------------------+-------------------------+
| atom.GetSymbol() | elements |
+---------------------------------------------+-------------------------+
| atom.GetMonomerInfo().GetInsertionCode() | icodes |
+---------------------------------------------+-------------------------+
| atom.GetIdx() | indices |
+---------------------------------------------+-------------------------+
| atom.GetMass() | masses |
+---------------------------------------------+-------------------------+
| atom.GetMonomerInfo().GetName() | names |
| atom.GetProp('_TriposAtomName') | |
+---------------------------------------------+-------------------------+
| atom.GetProp('_CIPCode') | chiralities |
+---------------------------------------------+-------------------------+
| atom.GetMonomerInfo().GetOccupancy() | occupancies |
+---------------------------------------------+-------------------------+
| atom.GetMonomerInfo().GetResidueName() | resnames |
+---------------------------------------------+-------------------------+
| atom.GetMonomerInfo().GetResidueNumber() | resnums |
+---------------------------------------------+-------------------------+
| atom.GetMonomerInfo().GetTempFactor() | tempfactors |
+---------------------------------------------+-------------------------+
| atom.GetProp('_TriposAtomType') | types |
+---------------------------------------------+-------------------------+
Raises
------
ValueError
If only part of the atoms have MonomerInfo available
.. versionadded:: 2.0.0
.. versionchanged:: 2.1.0
Added R/S chirality support
"""
format = 'RDKIT'
@staticmethod
def _format_hint(thing):
"""Can this Parser read object *thing*?"""
try:
from rdkit import Chem
except ImportError: # if no rdkit, probably not rdkit
return False
else:
return isinstance(thing, Chem.Mol)
[docs]
def parse(self, **kwargs):
"""Parse RDKit into Topology
Returns
-------
MDAnalysis Topology object
"""
mol = self.filename
# Atoms
names = []
chiralities = []
resnums = []
resnames = []
elements = []
masses = []
charges = []
aromatics = []
ids = []
atomtypes = []
segids = []
altlocs = []
chainids = []
icodes = []
occupancies = []
tempfactors = []
try:
atom = mol.GetAtomWithIdx(0)
except RuntimeError:
top = Topology(n_atoms=0, n_res=0, n_seg=0,
attrs=None,
atom_resindex=None,
residue_segindex=None)
return top
# check if multiple charges present
if atom.HasProp('_GasteigerCharge') and (
atom.HasProp('_TriposPartialCharge')
):
warnings.warn(
'Both _GasteigerCharge and _TriposPartialCharge properties '
'are present. Using Gasteiger charges by default.')
for atom in mol.GetAtoms():
ids.append(atom.GetIdx())
elements.append(atom.GetSymbol())
masses.append(atom.GetMass())
aromatics.append(atom.GetIsAromatic())
chiralities.append(_rdkit_atom_to_RS(atom))
mi = atom.GetMonomerInfo()
if mi: # atom name and residue info are present
names.append(mi.GetName().strip())
resnums.append(mi.GetResidueNumber())
resnames.append(mi.GetResidueName())
segids.append(mi.GetSegmentNumber())
altlocs.append(mi.GetAltLoc().strip())
chainids.append(mi.GetChainId().strip())
icodes.append(mi.GetInsertionCode().strip())
occupancies.append(mi.GetOccupancy())
tempfactors.append(mi.GetTempFactor())
else:
# atom name (MOL2 only)
try:
names.append(atom.GetProp('_TriposAtomName'))
except KeyError:
pass
# atom type (MOL2 only)
try:
atomtypes.append(atom.GetProp('_TriposAtomType'))
except KeyError:
pass
# gasteiger charge (computed):
# if the user took the time to compute them, make it a priority
# over charges read from a MOL2 file
try:
charges.append(atom.GetDoubleProp('_GasteigerCharge'))
except KeyError:
# partial charge (MOL2 only)
try:
charges.append(atom.GetDoubleProp('_TriposPartialCharge'))
except KeyError:
pass
# make Topology attributes
attrs = []
n_atoms = len(ids)
if resnums and (len(resnums) != n_atoms):
raise ValueError(
"ResidueInfo is only partially available in the molecule. "
"If you have added hydrogens to the input RDKit molecule with "
"`Chem.AddHs(mol)`, consider using "
"`Chem.AddHs(mol, addResidueInfo=True)` instead"
)
# * Attributes always present *
# Atom attributes
for vals, Attr, dtype in (
(ids, Atomids, np.int32),
(elements, Elements, object),
(masses, Masses, np.float32),
(aromatics, Aromaticities, bool),
(chiralities, RSChirality, 'U1'),
):
attrs.append(Attr(np.array(vals, dtype=dtype)))
# Bonds
bonds = []
bond_types = []
bond_orders = []
for bond in mol.GetBonds():
bonds.append((bond.GetBeginAtomIdx(), bond.GetEndAtomIdx()))
bond_orders.append(bond.GetBondTypeAsDouble())
bond_types.append(str(bond.GetBondType()))
attrs.append(Bonds(bonds, types=bond_types, order=bond_orders))
# * Optional attributes *
# Atom name
if names:
attrs.append(Atomnames(np.array(names, dtype=object)))
else:
for atom in mol.GetAtoms():
name = "%s%d" % (atom.GetSymbol(), atom.GetIdx())
names.append(name)
attrs.append(Atomnames(np.array(names, dtype=object)))
# Atom type
if atomtypes:
attrs.append(Atomtypes(np.array(atomtypes, dtype=object)))
else:
atomtypes = guessers.guess_types(names)
attrs.append(Atomtypes(atomtypes, guessed=True))
# Partial charges
if charges:
attrs.append(Charges(np.array(charges, dtype=np.float32)))
else:
pass # no guesser yet
# PDB only
for vals, Attr, dtype in (
(altlocs, AltLocs, object),
(chainids, ChainIDs, object),
(occupancies, Occupancies, np.float32),
(tempfactors, Tempfactors, np.float32),
):
if vals:
attrs.append(Attr(np.array(vals, dtype=dtype)))
# Residue
if any(resnums) and not any(val is None for val in resnums):
resnums = np.array(resnums, dtype=np.int32)
resnames = np.array(resnames, dtype=object)
segids = np.array(segids, dtype=object)
icodes = np.array(icodes, dtype=object)
residx, (resnums, resnames, icodes, segids) = change_squash(
(resnums, resnames, icodes, segids),
(resnums, resnames, icodes, segids))
n_residues = len(resnums)
for vals, Attr, dtype in (
(resnums, Resids, np.int32),
(resnums.copy(), Resnums, np.int32),
(resnames, Resnames, object),
(icodes, ICodes, object),
):
attrs.append(Attr(np.array(vals, dtype=dtype)))
else:
attrs.append(Resids(np.array([1])))
attrs.append(Resnums(np.array([1])))
residx = None
n_residues = 1
# Segment
if any(segids) and not any(val is None for val in segids):
segidx, (segids,) = change_squash((segids,), (segids,))
n_segments = len(segids)
attrs.append(Segids(segids))
else:
n_segments = 1
attrs.append(Segids(np.array(['SYSTEM'], dtype=object)))
segidx = None
# create topology
top = Topology(n_atoms, n_residues, n_segments,
attrs=attrs,
atom_resindex=residx,
residue_segindex=segidx)
return top