Source code for MDAnalysis.guesser.default_guesser
# -*- Mode: python; tab-width: 4; indent-tabs-mode:nil; coding: utf-8 -*-
# vim: tabstop=4 expandtab shiftwidth=4 softtabstop=4
# MDAnalysis ---
# Copyright (c) 2006-2017 The MDAnalysis Development Team and contributors
# (see the file AUTHORS for the full list of names)
# Released under the Lesser GNU Public Licence, v2 or any higher version
# Please cite your use of MDAnalysis in published work:
# R. J. Gowers, M. Linke, J. Barnoud, T. J. E. Reddy, M. N. Melo, S. L. Seyler,
# D. L. Dotson, J. Domanski, S. Buchoux, I. M. Kenney, and O. Beckstein.
# MDAnalysis: A Python package for the rapid analysis of molecular dynamics
# simulations. In S. Benthall and S. Rostrup editors, Proceedings of the 15th
# Python in Science Conference, pages 102-109, Austin, TX, 2016. SciPy.
# doi: 10.25080/majora-629e541a-00e
# N. Michaud-Agrawal, E. J. Denning, T. B. Woolf, and O. Beckstein.
# MDAnalysis: A Toolkit for the Analysis of Molecular Dynamics Simulations.
# J. Comput. Chem. 32 (2011), 2319--2327, doi:10.1002/jcc.21787
Default Guesser
.. _DefaultGuesser:
DefaultGuesser is a generic guesser class that has basic guessing methods.
This class is a general purpose guesser that can be used with most topologies,
but being generic makes it the least accurate among all guessers.
Guessing behavior
This section describes how each attribute is guessed by the DefaultGuesser.
We first attempt to look up the mass of an atom based on its element if the
element TopologyAttr is available. If not, we attempt to lookup the mass based
on the atom type (``type``) TopologyAttr. If neither of these is available, we
attempt to guess the atom type based on the atom name (``name``) and then
lookup the mass based on the guessed atom type.
We attempt to guess the atom type based on the atom name (``name``).
The name is first stripped of any numbers and symbols, and then looked up in
the :data:`MDAnalysis.guesser.tables.atomelements` table. If the name is not
found, we continue checking variations of the name following the logic in
:meth:`DefaultGuesser.guess_atom_element`. Ultimately, if no match is found,
the first character of the stripped name is returned.
This follows the same method as guessing atom types.
Bonds are guessed based on the distance between atoms.
See :meth:`DefaultGuesser.guess_bonds` for more details.
Angles are guessed based on the bonds between atoms.
See :meth:`DefaultGuesser.guess_angles` for more details.
Dihedrals are guessed based on the angles between atoms.
See :meth:`DefaultGuesser.guess_dihedrals` for more details.
Improper Dihedrals
Improper dihedrals are guessed based on the angles between atoms.
See :meth:`DefaultGuesser.guess_improper_dihedrals` for more details.
Aromaticity is guessed using RDKit's GetIsAromatic method.
See :meth:`DefaultGuesser.guess_aromaticities` for more details.
.. autoclass:: DefaultGuesser
from .base import GuesserBase
import numpy as np
import warnings
import math
import re
from ..exceptions import NoDataError
from ..lib import distances
from . import tables
class DefaultGuesser(GuesserBase):
This guesser holds generic methods (not directed to specific contexts) for
guessing different topology attribute. It has the same methods which where
originally found in The attributes that can be
guessed by this class are:
- masses
- types
- elements
- angles
- dihedrals
- bonds
- improper dihedrals
- aromaticities
You can use this guesser either directly through an instance, or through
the :meth:`~MDAnalysis.core.universe.Universe.guess_TopologyAttrs` method.
universe : Universe
The Universe to apply the guesser on
box : np.ndarray, optional
The box of the Universe. This is used for bond guessing.
vdwradii : dict, optional
Dict relating atom types: vdw radii. This is used for bond guessing
fudge_factor : float, optional
The factor by which atoms must overlap each other to be considered
a bond. Larger values will increase the number of bonds found. [0.55]
lower_bound : float, optional
The minimum bond length. All bonds found shorter than this length
will be ignored. This is useful for parsing PDB with altloc records
where atoms with altloc A and B may be very close together and
there should be no chemical bond between them. [0.1]
to guess bonds for a universe::
import MDAnalysis as mda
from MDAnalysisTests.datafiles import two_water_gro
u = mda.Universe(two_water_gro, context='default', to_guess=['bonds'])
.. versionadded:: 2.8.0
context = "default"
def __init__(
self._guesser_methods = {
"masses": self.guess_masses,
"types": self.guess_types,
"elements": self.guess_types,
"bonds": self.guess_bonds,
"angles": self.guess_angles,
"dihedrals": self.guess_dihedrals,
"impropers": self.guess_improper_dihedrals,
"aromaticities": self.guess_aromaticities,
def guess_masses(self, atom_types=None, indices_to_guess=None):
"""Guess the mass of many atoms based upon their type.
For guessing masses through :meth:`~MDAnalysis.core.universe.Universe.guess_TopologyAttrs`:
First try to guess masses from atom elements, if not available,
try to guess masses from types and if not available, try to guess
atom_types : Optional[np.ndarray]
Atom types/elements to guess masses from
indices_to_guess : Optional[np.ndarray]
Mask array for partially guess masses for certain atoms
atom_masses : np.ndarray dtype float64
If there are no atom types or elements to guess mass from.
if atom_types is None:
atom_types = self._universe.atoms.elements
except NoDataError:
atom_types = self._universe.atoms.types
except NoDataError:
atom_types = self.guess_types(
except NoDataError:
raise NoDataError(
"there is no reference attributes"
" (elements, types, or names)"
" in this universe to guess mass from"
) from None
if indices_to_guess is not None:
atom_types = atom_types[indices_to_guess]
masses = np.array(
[self.get_atom_mass(atom) for atom in atom_types], dtype=np.float64
return masses
def get_atom_mass(self, element):
"""Return the atomic mass in u for *element*.
Masses are looked up in :data:`MDAnalysis.guesser.tables.masses`.
.. Warning:: Until version 3.0.0 unknown masses are set to 0.0
return tables.masses[element]
except KeyError:
return tables.masses[element.upper()]
except KeyError:
"Unknown masses are set to 0.0 for current version, "
"this will be deprecated in version 3.0.0 and replaced by"
" Masse's no_value_label (np.nan)",
return 0.0
def guess_atom_mass(self, atomname):
"""Guess a mass based on the atom name.
:func:`guess_atom_element` is used to determine the kind of atom.
.. warning:: Until version 3.0.0 anything not recognized is simply
set to 0.0; if you rely on the masses you might want to double-check.
return self.get_atom_mass(self.guess_atom_element(atomname))
def guess_types(self, atom_types=None, indices_to_guess=None):
"""Guess the atom type of many atoms based on atom name
atom_types (optional)
atoms names if types guessing is desired to be from names
indices_to_guess (optional)
Mask array for partially guess types for certain atoms
atom_types : np.ndarray dtype object
If there is no names to guess types from.
if atom_types is None:
atom_types = self._universe.atoms.names
except NoDataError:
raise NoDataError(
"there is no reference attributes in this universe "
"to guess types from"
) from None
if indices_to_guess is not None:
atom_types = atom_types[indices_to_guess]
return np.array(
[self.guess_atom_element(atom) for atom in atom_types],
def guess_atom_element(self, atomname):
"""Guess the element of the atom from the name.
First all numbers and symbols are stripped from the name.
Then the name is looked up in the
:data:`MDAnalysis.guesser.tables.atomelements` table.
If the name is not found, we remove the last character or
first character from the name and check the table for both,
with a preference for removing the last character. If the name is
still not found, we iteratively continue to remove the last character
or first character until we find a match. If ultimately no match
is found, the first character of the stripped name is returned.
If the input name is an empty string, an empty string is returned.
The table comes from CHARMM and AMBER atom
types, where the first character is not sufficient to determine the
atom type. Some GROMOS ions have also been added.
.. Warning: The translation table is incomplete.
This will probably result in some mistakes,
but it still better than nothing!
See Also
NUMBERS = re.compile(r"[0-9]") # match numbers
SYMBOLS = re.compile(r"[*+-]") # match *, +, -
if atomname == "":
return ""
return tables.atomelements[atomname.upper()]
except KeyError:
# strip symbols and numbers
no_symbols = re.sub(SYMBOLS, "", atomname)
name = re.sub(NUMBERS, "", no_symbols).upper()
# just in case
if name in tables.atomelements:
return tables.atomelements[name]
while name:
if name in tables.elements:
return name
if name[:-1] in tables.elements:
return name[:-1]
if name[1:] in tables.elements:
return name[1:]
if len(name) <= 2:
return name[0]
name = name[:-1] # probably element is on left not right
# if it's numbers
return no_symbols
def guess_bonds(self, atoms=None, coords=None):
r"""Guess if bonds exist between two atoms based on their distance.
Bond between two atoms is created, if the two atoms are within
.. math::
d < f \cdot (R_1 + R_2)
of each other, where :math:`R_1` and :math:`R_2` are the VdW radii
of the atoms and :math:`f` is an ad-hoc *fudge_factor*. This is
the `same algorithm that VMD uses`_.
atoms: AtomGroup
atoms for which bonds should be guessed
coords: np.ndarray, optional
coordinates of the atoms. If not provided, the coordinates
of the ``atoms`` in the universe are used.
List of tuples suitable for use in Universe topology building.
No check is done after the bonds are guessed to see if Lewis
structure is correct. This is wrong and will burn somebody.
If inputs are malformed or `vdwradii` data is missing.
.. _`same algorithm that VMD uses`:
if atoms is None:
atoms = self._universe.atoms
if coords is None:
coords = self._universe.atoms.positions
if len(atoms) != len(coords):
raise ValueError("'atoms' and 'coord' must be the same length")
fudge_factor = self._kwargs.get("fudge_factor", 0.55)
# so I don't permanently change it
vdwradii = tables.vdwradii.copy()
user_vdwradii = self._kwargs.get("vdwradii", None)
# this should make algo use their values over defaults
if user_vdwradii:
# Try using types, then elements
if hasattr(atoms, "types"):
atomtypes = atoms.types
atomtypes = self.guess_types(atom_types=atoms.names)
# check that all types have a defined vdw
if not all(val in vdwradii for val in set(atomtypes)):
raise ValueError(
"vdw radii for types: "
+ ", ".join(
[t for t in set(atomtypes) if t not in vdwradii]
+ ". These can be defined manually using the"
+ f" keyword 'vdwradii'"
lower_bound = self._kwargs.get("lower_bound", 0.1)
box = self._kwargs.get("box", None)
if box is not None:
box = np.asarray(box)
# to speed up checking, calculate what the largest possible bond
# atom that would warrant attention.
# then use this to quickly mask distance results later
max_vdw = max([vdwradii[t] for t in atomtypes])
bonds = []
pairs, dist = distances.self_capped_distance(
coords, max_cutoff=2.0 * max_vdw, min_cutoff=lower_bound, box=box
for idx, (i, j) in enumerate(pairs):
d = (
vdwradii[atomtypes[i]] + vdwradii[atomtypes[j]]
) * fudge_factor
if dist[idx] < d:
bonds.append((atoms[i].index, atoms[j].index))
return tuple(bonds)
def guess_angles(self, bonds=None):
"""Given a list of Bonds, find all angles that exist between atoms.
Works by assuming that if atoms 1 & 2 are bonded, and 2 & 3 are bonded,
then (1,2,3) must be an angle.
bonds : Bonds
from which angles should be guessed
list of tuples
List of tuples defining the angles.
Suitable for use in u._topology
See Also
from ..core.universe import Universe
angles_found = set()
if bonds is None:
if hasattr(self._universe.atoms, "bonds"):
bonds = self._universe.atoms.bonds
temp_u = Universe.empty(n_atoms=len(self._universe.atoms))
self._universe.atoms, self._universe.atoms.positions
bonds = temp_u.atoms.bonds
for b in bonds:
for atom in b:
other_a = b.partner(atom) # who's my friend currently in Bond
for other_b in atom.bonds:
if other_b != b: # if not the same bond I start as
third_a = other_b.partner(atom)
desc = tuple(
[other_a.index, atom.index, third_a.index]
# first index always less than last
if desc[0] > desc[-1]:
desc = desc[::-1]
return tuple(angles_found)
def guess_dihedrals(self, angles=None):
"""Given a list of Angles, find all dihedrals that exist between atoms.
Works by assuming that if (1,2,3) is an angle, and 3 & 4 are bonded,
then (1,2,3,4) must be a dihedral.
angles : Angles
from which dihedrals should be guessed
list of tuples
List of tuples defining the dihedrals.
Suitable for use in u._topology
from ..core.universe import Universe
if angles is None:
if hasattr(self._universe.atoms, "angles"):
angles = self._universe.atoms.angles
temp_u = Universe.empty(n_atoms=len(self._universe.atoms))
self._universe.atoms, self._universe.atoms.positions
angles = temp_u.atoms.angles
dihedrals_found = set()
for b in angles:
a_tup = tuple([a.index for a in b]) # angle as tuple of numbers
# if searching with b[0], want tuple of (b[2], b[1], b[0], +new)
# search the first and last atom of each angle
for atom, prefix in zip(
[b.atoms[0], b.atoms[-1]], [a_tup[::-1], a_tup]
for other_b in atom.bonds:
if not other_b.partner(atom) in b:
third_a = other_b.partner(atom)
desc = prefix + (third_a.index,)
if desc[0] > desc[-1]:
desc = desc[::-1]
return tuple(dihedrals_found)
def guess_improper_dihedrals(self, angles=None):
"""Given a list of Angles, find all improper dihedrals
that exist between atoms.
Works by assuming that if (1,2,3) is an angle, and 2 & 4 are bonded,
then (2, 1, 3, 4) must be an improper dihedral.
ie the improper dihedral is the angle between the planes formed by
(1, 2, 3) and (1, 3, 4)
List of tuples defining the improper dihedrals.
Suitable for use in u._topology
from ..core.universe import Universe
if angles is None:
if hasattr(self._universe.atoms, "angles"):
angles = self._universe.atoms.angles
temp_u = Universe.empty(n_atoms=len(self._universe.atoms))
self._universe.atoms, self._universe.atoms.positions
angles = temp_u.atoms.angles
dihedrals_found = set()
for b in angles:
atom = b[1] # select middle atom in angle
# start of improper tuple
a_tup = tuple([b[a].index for a in [1, 2, 0]])
# if searching with b[1], want tuple of (b[1], b[2], b[0], +new)
# search the first and last atom of each angle
for other_b in atom.bonds:
other_atom = other_b.partner(atom)
# if this atom isn't in the angle I started with
if other_atom not in b:
desc = a_tup + (other_atom.index,)
if desc[0] > desc[-1]:
desc = desc[::-1]
return tuple(dihedrals_found)
def guess_atom_charge(self, atoms):
"""Guess atom charge from the name.
.. Warning:: Not implemented; simply returns 0.
# TODO: do something slightly smarter, at least use name/element
return 0.0
def guess_aromaticities(self, atomgroup=None):
"""Guess aromaticity of atoms using RDKit
aromaticities : numpy.ndarray
Array of boolean values for the aromaticity of each atom
if atomgroup is None:
atomgroup = self._universe.atoms
mol = atomgroup.convert_to("RDKIT")
return np.array([atom.GetIsAromatic() for atom in mol.GetAtoms()])
def guess_gasteiger_charges(self, atomgroup):
"""Guess Gasteiger partial charges using RDKit
atomgroup : mda.core.groups.AtomGroup
Atoms for which the charges will be guessed
charges : numpy.ndarray
Array of float values representing the charge of each atom
mol = atomgroup.convert_to("RDKIT")
from rdkit.Chem.rdPartialCharges import ComputeGasteigerCharges
ComputeGasteigerCharges(mol, throwOnParamFailure=True)
return np.array(
for atom in mol.GetAtoms()