Source code for MDAnalysis.guesser.default_guesser

# -*- Mode: python; tab-width: 4; indent-tabs-mode:nil; coding: utf-8 -*-
# vim: tabstop=4 expandtab shiftwidth=4 softtabstop=4
#
# MDAnalysis --- https://www.mdanalysis.org
# Copyright (c) 2006-2017 The MDAnalysis Development Team and contributors
# (see the file AUTHORS for the full list of names)
#
# Released under the Lesser GNU Public Licence, v2 or any higher version
#
# Please cite your use of MDAnalysis in published work:
#
# R. J. Gowers, M. Linke, J. Barnoud, T. J. E. Reddy, M. N. Melo, S. L. Seyler,
# D. L. Dotson, J. Domanski, S. Buchoux, I. M. Kenney, and O. Beckstein.
# MDAnalysis: A Python package for the rapid analysis of molecular dynamics
# simulations. In S. Benthall and S. Rostrup editors, Proceedings of the 15th
# Python in Science Conference, pages 102-109, Austin, TX, 2016. SciPy.
# doi: 10.25080/majora-629e541a-00e
#
# N. Michaud-Agrawal, E. J. Denning, T. B. Woolf, and O. Beckstein.
# MDAnalysis: A Toolkit for the Analysis of Molecular Dynamics Simulations.
# J. Comput. Chem. 32 (2011), 2319--2327, doi:10.1002/jcc.21787

r"""
Default Guesser
================
.. _DefaultGuesser:

DefaultGuesser is a generic guesser class that has basic guessing methods.
This class is a general purpose guesser that can be used with most topologies,
but being generic makes it the least accurate among all guessers.


Guessing behavior
-----------------

This section describes how each attribute is guessed by the DefaultGuesser.

Masses
~~~~~~

We first attempt to look up the mass of an atom based on its element if the
element TopologyAttr is available. If not, we attempt to lookup the mass based
on the atom type (``type``) TopologyAttr. If neither of these is available, we
attempt to guess the atom type based on the atom name (``name``) and then
lookup the mass based on the guessed atom type.


Types
~~~~~

We attempt to guess the atom type based on the atom name (``name``).
The name is first stripped of any numbers and symbols, and then looked up in
the :data:`MDAnalysis.guesser.tables.atomelements` table. If the name is not
found, we continue checking variations of the name following the logic in
:meth:`DefaultGuesser.guess_atom_element`. Ultimately, if no match is found,
the first character of the stripped name is returned.

Elements
~~~~~~~~

This follows the same method as guessing atom types.


Bonds
~~~~~

Bonds are guessed based on the distance between atoms.
See :meth:`DefaultGuesser.guess_bonds` for more details.

Angles
~~~~~~

Angles are guessed based on the bonds between atoms.
See :meth:`DefaultGuesser.guess_angles` for more details.

Dihedrals
~~~~~~~~~

Dihedrals are guessed based on the angles between atoms.
See :meth:`DefaultGuesser.guess_dihedrals` for more details.

Improper Dihedrals
~~~~~~~~~~~~~~~~~~

Improper dihedrals are guessed based on the angles between atoms.
See :meth:`DefaultGuesser.guess_improper_dihedrals` for more details.

Aromaticities
~~~~~~~~~~~~~

Aromaticity is guessed using RDKit's GetIsAromatic method.
See :meth:`DefaultGuesser.guess_aromaticities` for more details.




Classes
-------

.. autoclass:: DefaultGuesser
   :members:
   :inherited-members:

"""
from .base import GuesserBase
import numpy as np
import warnings
import math

import re

from ..exceptions import NoDataError
from ..lib import distances
from . import tables


[docs] class DefaultGuesser(GuesserBase): """ This guesser holds generic methods (not directed to specific contexts) for guessing different topology attribute. It has the same methods which where originally found in Topology.guesser.py. The attributes that can be guessed by this class are: - masses - types - elements - angles - dihedrals - bonds - improper dihedrals - aromaticities You can use this guesser either directly through an instance, or through the :meth:`~MDAnalysis.core.universe.Universe.guess_TopologyAttrs` method. Parameters ---------- universe : Universe The Universe to apply the guesser on box : np.ndarray, optional The box of the Universe. This is used for bond guessing. vdwradii : dict, optional Dict relating atom types: vdw radii. This is used for bond guessing fudge_factor : float, optional The factor by which atoms must overlap each other to be considered a bond. Larger values will increase the number of bonds found. [0.55] lower_bound : float, optional The minimum bond length. All bonds found shorter than this length will be ignored. This is useful for parsing PDB with altloc records where atoms with altloc A and B may be very close together and there should be no chemical bond between them. [0.1] Examples -------- to guess bonds for a universe:: import MDAnalysis as mda from MDAnalysisTests.datafiles import two_water_gro u = mda.Universe(two_water_gro, context='default', to_guess=['bonds']) .. versionadded:: 2.8.0 """ context = "default" def __init__( self, universe, box=None, vdwradii=None, fudge_factor=0.55, lower_bound=0.1, **kwargs, ): super().__init__( universe, box=box, vdwradii=vdwradii, fudge_factor=fudge_factor, lower_bound=lower_bound, **kwargs, ) self._guesser_methods = { "masses": self.guess_masses, "types": self.guess_types, "elements": self.guess_types, "bonds": self.guess_bonds, "angles": self.guess_angles, "dihedrals": self.guess_dihedrals, "impropers": self.guess_improper_dihedrals, "aromaticities": self.guess_aromaticities, }
[docs] def guess_masses(self, atom_types=None, indices_to_guess=None): """Guess the mass of many atoms based upon their type. For guessing masses through :meth:`~MDAnalysis.core.universe.Universe.guess_TopologyAttrs`: First try to guess masses from atom elements, if not available, try to guess masses from types and if not available, try to guess types. Parameters ---------- atom_types : Optional[np.ndarray] Atom types/elements to guess masses from indices_to_guess : Optional[np.ndarray] Mask array for partially guess masses for certain atoms Returns ------- atom_masses : np.ndarray dtype float64 Raises ------ :exc:`ValueError` If there are no atom types or elements to guess mass from. """ if atom_types is None: try: atom_types = self._universe.atoms.elements except NoDataError: try: atom_types = self._universe.atoms.types except NoDataError: try: atom_types = self.guess_types( atom_types=self._universe.atoms.names ) except NoDataError: raise NoDataError( "there is no reference attributes" " (elements, types, or names)" " in this universe to guess mass from" ) from None if indices_to_guess is not None: atom_types = atom_types[indices_to_guess] masses = np.array( [self.get_atom_mass(atom) for atom in atom_types], dtype=np.float64 ) return masses
[docs] def get_atom_mass(self, element): """Return the atomic mass in u for *element*. Masses are looked up in :data:`MDAnalysis.guesser.tables.masses`. .. Warning:: Until version 3.0.0 unknown masses are set to 0.0 """ try: return tables.masses[element] except KeyError: try: return tables.masses[element.upper()] except KeyError: warnings.warn( "Unknown masses are set to 0.0 for current version, " "this will be deprecated in version 3.0.0 and replaced by" " Masse's no_value_label (np.nan)", PendingDeprecationWarning, ) return 0.0
[docs] def guess_atom_mass(self, atomname): """Guess a mass based on the atom name. :func:`guess_atom_element` is used to determine the kind of atom. .. warning:: Until version 3.0.0 anything not recognized is simply set to 0.0; if you rely on the masses you might want to double-check. """ return self.get_atom_mass(self.guess_atom_element(atomname))
[docs] def guess_types(self, atom_types=None, indices_to_guess=None): """Guess the atom type of many atoms based on atom name Parameters ---------- atom_types (optional) atoms names if types guessing is desired to be from names indices_to_guess (optional) Mask array for partially guess types for certain atoms Returns ------- atom_types : np.ndarray dtype object Raises ------ :exc:`ValueError` If there is no names to guess types from. """ if atom_types is None: try: atom_types = self._universe.atoms.names except NoDataError: raise NoDataError( "there is no reference attributes in this universe " "to guess types from" ) from None if indices_to_guess is not None: atom_types = atom_types[indices_to_guess] return np.array( [self.guess_atom_element(atom) for atom in atom_types], dtype=object, )
[docs] def guess_atom_element(self, atomname): """Guess the element of the atom from the name. First all numbers and symbols are stripped from the name. Then the name is looked up in the :data:`MDAnalysis.guesser.tables.atomelements` table. If the name is not found, we remove the last character or first character from the name and check the table for both, with a preference for removing the last character. If the name is still not found, we iteratively continue to remove the last character or first character until we find a match. If ultimately no match is found, the first character of the stripped name is returned. If the input name is an empty string, an empty string is returned. The table comes from CHARMM and AMBER atom types, where the first character is not sufficient to determine the atom type. Some GROMOS ions have also been added. .. Warning: The translation table is incomplete. This will probably result in some mistakes, but it still better than nothing! See Also -------- :func:`guess_atom_type` :mod:`MDAnalysis.guesser.tables` """ NUMBERS = re.compile(r"[0-9]") # match numbers SYMBOLS = re.compile(r"[*+-]") # match *, +, - if atomname == "": return "" try: return tables.atomelements[atomname.upper()] except KeyError: # strip symbols and numbers no_symbols = re.sub(SYMBOLS, "", atomname) name = re.sub(NUMBERS, "", no_symbols).upper() # just in case if name in tables.atomelements: return tables.atomelements[name] while name: if name in tables.elements: return name if name[:-1] in tables.elements: return name[:-1] if name[1:] in tables.elements: return name[1:] if len(name) <= 2: return name[0] name = name[:-1] # probably element is on left not right # if it's numbers return no_symbols
[docs] def guess_bonds(self, atoms=None, coords=None): r"""Guess if bonds exist between two atoms based on their distance. Bond between two atoms is created, if the two atoms are within .. math:: d < f \cdot (R_1 + R_2) of each other, where :math:`R_1` and :math:`R_2` are the VdW radii of the atoms and :math:`f` is an ad-hoc *fudge_factor*. This is the `same algorithm that VMD uses`_. Parameters ---------- atoms: AtomGroup atoms for which bonds should be guessed coords: np.ndarray, optional coordinates of the atoms. If not provided, the coordinates of the ``atoms`` in the universe are used. Returns ------- list List of tuples suitable for use in Universe topology building. Warnings -------- No check is done after the bonds are guessed to see if Lewis structure is correct. This is wrong and will burn somebody. Raises ------ :exc:`ValueError` If inputs are malformed or `vdwradii` data is missing. .. _`same algorithm that VMD uses`: http://www.ks.uiuc.edu/Research/vmd/vmd-1.9.1/ug/node26.html """ if atoms is None: atoms = self._universe.atoms if coords is None: coords = self._universe.atoms.positions if len(atoms) != len(coords): raise ValueError("'atoms' and 'coord' must be the same length") fudge_factor = self._kwargs.get("fudge_factor", 0.55) # so I don't permanently change it vdwradii = tables.vdwradii.copy() user_vdwradii = self._kwargs.get("vdwradii", None) # this should make algo use their values over defaults if user_vdwradii: vdwradii.update(user_vdwradii) # Try using types, then elements if hasattr(atoms, "types"): atomtypes = atoms.types else: atomtypes = self.guess_types(atom_types=atoms.names) # check that all types have a defined vdw if not all(val in vdwradii for val in set(atomtypes)): raise ValueError( ( "vdw radii for types: " + ", ".join( [t for t in set(atomtypes) if t not in vdwradii] ) + ". These can be defined manually using the" + f" keyword 'vdwradii'" ) ) lower_bound = self._kwargs.get("lower_bound", 0.1) box = self._kwargs.get("box", None) if box is not None: box = np.asarray(box) # to speed up checking, calculate what the largest possible bond # atom that would warrant attention. # then use this to quickly mask distance results later max_vdw = max([vdwradii[t] for t in atomtypes]) bonds = [] pairs, dist = distances.self_capped_distance( coords, max_cutoff=2.0 * max_vdw, min_cutoff=lower_bound, box=box ) for idx, (i, j) in enumerate(pairs): d = ( vdwradii[atomtypes[i]] + vdwradii[atomtypes[j]] ) * fudge_factor if dist[idx] < d: bonds.append((atoms[i].index, atoms[j].index)) return tuple(bonds)
[docs] def guess_angles(self, bonds=None): """Given a list of Bonds, find all angles that exist between atoms. Works by assuming that if atoms 1 & 2 are bonded, and 2 & 3 are bonded, then (1,2,3) must be an angle. Parameters ---------- bonds : Bonds from which angles should be guessed Returns ------- list of tuples List of tuples defining the angles. Suitable for use in u._topology See Also -------- :meth:`guess_bonds` """ from ..core.universe import Universe angles_found = set() if bonds is None: if hasattr(self._universe.atoms, "bonds"): bonds = self._universe.atoms.bonds else: temp_u = Universe.empty(n_atoms=len(self._universe.atoms)) temp_u.add_bonds( self.guess_bonds( self._universe.atoms, self._universe.atoms.positions ) ) bonds = temp_u.atoms.bonds for b in bonds: for atom in b: other_a = b.partner(atom) # who's my friend currently in Bond for other_b in atom.bonds: if other_b != b: # if not the same bond I start as third_a = other_b.partner(atom) desc = tuple( [other_a.index, atom.index, third_a.index] ) # first index always less than last if desc[0] > desc[-1]: desc = desc[::-1] angles_found.add(desc) return tuple(angles_found)
[docs] def guess_dihedrals(self, angles=None): """Given a list of Angles, find all dihedrals that exist between atoms. Works by assuming that if (1,2,3) is an angle, and 3 & 4 are bonded, then (1,2,3,4) must be a dihedral. Parameters ---------- angles : Angles from which dihedrals should be guessed Returns ------- list of tuples List of tuples defining the dihedrals. Suitable for use in u._topology """ from ..core.universe import Universe if angles is None: if hasattr(self._universe.atoms, "angles"): angles = self._universe.atoms.angles else: temp_u = Universe.empty(n_atoms=len(self._universe.atoms)) temp_u.add_bonds( self.guess_bonds( self._universe.atoms, self._universe.atoms.positions ) ) temp_u.add_angles(self.guess_angles(temp_u.atoms.bonds)) angles = temp_u.atoms.angles dihedrals_found = set() for b in angles: a_tup = tuple([a.index for a in b]) # angle as tuple of numbers # if searching with b[0], want tuple of (b[2], b[1], b[0], +new) # search the first and last atom of each angle for atom, prefix in zip( [b.atoms[0], b.atoms[-1]], [a_tup[::-1], a_tup] ): for other_b in atom.bonds: if not other_b.partner(atom) in b: third_a = other_b.partner(atom) desc = prefix + (third_a.index,) if desc[0] > desc[-1]: desc = desc[::-1] dihedrals_found.add(desc) return tuple(dihedrals_found)
[docs] def guess_improper_dihedrals(self, angles=None): """Given a list of Angles, find all improper dihedrals that exist between atoms. Works by assuming that if (1,2,3) is an angle, and 2 & 4 are bonded, then (2, 1, 3, 4) must be an improper dihedral. ie the improper dihedral is the angle between the planes formed by (1, 2, 3) and (1, 3, 4) Returns ------- List of tuples defining the improper dihedrals. Suitable for use in u._topology """ from ..core.universe import Universe if angles is None: if hasattr(self._universe.atoms, "angles"): angles = self._universe.atoms.angles else: temp_u = Universe.empty(n_atoms=len(self._universe.atoms)) temp_u.add_bonds( self.guess_bonds( self._universe.atoms, self._universe.atoms.positions ) ) temp_u.add_angles(self.guess_angles(temp_u.atoms.bonds)) angles = temp_u.atoms.angles dihedrals_found = set() for b in angles: atom = b[1] # select middle atom in angle # start of improper tuple a_tup = tuple([b[a].index for a in [1, 2, 0]]) # if searching with b[1], want tuple of (b[1], b[2], b[0], +new) # search the first and last atom of each angle for other_b in atom.bonds: other_atom = other_b.partner(atom) # if this atom isn't in the angle I started with if other_atom not in b: desc = a_tup + (other_atom.index,) if desc[0] > desc[-1]: desc = desc[::-1] dihedrals_found.add(desc) return tuple(dihedrals_found)
[docs] def guess_atom_charge(self, atoms): """Guess atom charge from the name. .. Warning:: Not implemented; simply returns 0. """ # TODO: do something slightly smarter, at least use name/element return 0.0
[docs] def guess_aromaticities(self, atomgroup=None): """Guess aromaticity of atoms using RDKit Returns ------- aromaticities : numpy.ndarray Array of boolean values for the aromaticity of each atom """ if atomgroup is None: atomgroup = self._universe.atoms mol = atomgroup.convert_to("RDKIT") return np.array([atom.GetIsAromatic() for atom in mol.GetAtoms()])
[docs] def guess_gasteiger_charges(self, atomgroup): """Guess Gasteiger partial charges using RDKit Parameters ---------- atomgroup : mda.core.groups.AtomGroup Atoms for which the charges will be guessed Returns ------- charges : numpy.ndarray Array of float values representing the charge of each atom """ mol = atomgroup.convert_to("RDKIT") from rdkit.Chem.rdPartialCharges import ComputeGasteigerCharges ComputeGasteigerCharges(mol, throwOnParamFailure=True) return np.array( [ atom.GetDoubleProp("_GasteigerCharge") for atom in mol.GetAtoms() ], dtype=np.float32, )