Source code for MDAnalysis.topology.guessers
# -*- Mode: python; tab-width: 4; indent-tabs-mode:nil; coding:utf-8 -*-
# vim: tabstop=4 expandtab shiftwidth=4 softtabstop=4
#
# MDAnalysis --- https://www.mdanalysis.org
# Copyright (c) 2006-2017 The MDAnalysis Development Team and contributors
# (see the file AUTHORS for the full list of names)
#
# Released under the GNU Public Licence, v2 or any higher version
#
# Please cite your use of MDAnalysis in published work:
#
# R. J. Gowers, M. Linke, J. Barnoud, T. J. E. Reddy, M. N. Melo, S. L. Seyler,
# D. L. Dotson, J. Domanski, S. Buchoux, I. M. Kenney, and O. Beckstein.
# MDAnalysis: A Python package for the rapid analysis of molecular dynamics
# simulations. In S. Benthall and S. Rostrup editors, Proceedings of the 15th
# Python in Science Conference, pages 102-109, Austin, TX, 2016. SciPy.
# doi: 10.25080/majora-629e541a-00e
#
# N. Michaud-Agrawal, E. J. Denning, T. B. Woolf, and O. Beckstein.
# MDAnalysis: A Toolkit for the Analysis of Molecular Dynamics Simulations.
# J. Comput. Chem. 32 (2011), 2319--2327, doi:10.1002/jcc.21787
#
"""
Guessing unknown Topology information --- :mod:`MDAnalysis.topology.guessers`
=============================================================================
In general `guess_atom_X` returns the guessed value for a single value,
while `guess_Xs` will work on an array of many atoms.
Example uses of guessers
------------------------
Guessing elements from atom names
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
Currently, it is possible to guess elements from atom names using
:func:`guess_atom_element` (or the synonymous :func:`guess_atom_type`). This can
be done in the following manner::
  import MDAnalysis as mda
  from MDAnalysis.topology.guessers import guess_atom_element
  from MDAnalysisTests.datafiles import PRM7
  u = mda.Universe(PRM7)
  print(u.atoms.names[1])  # returns the atom name H1
  element = guess_atom_element(u.atoms.names[1])
  print(element)  # returns element H
In the above example, we take an atom named H1 and use
:func:`guess_atom_element` to guess the element hydrogen (i.e. H). It is
important to note that element guessing is not always accurate. Indeed in cases
where the atom type is not recognised, we may end up with the wrong element.
For example::
  import MDAnalysis as mda
  from MDAnalysis.topology.guessers import guess_atom_element
  from MDAnalysisTests.datafiles import PRM19SBOPC
  u = mda.Universe(PRM19SBOPC)
  print(u.atoms.names[-1])  # returns the atom name EPW
  element = guess_atom_element(u.atoms.names[-1])
  print(element)  # returns element P
Here we find that virtual site atom 'EPW' was given the element P, which
would not be an expected result. We therefore always recommend that users
carefully check the outcomes of any guessers.
In some cases, one may want to guess elements for an entire universe and add
this guess as a topology attribute. This can be done using :func:`guess_types`
in the following manner::
  import MDAnalysis as mda
  from MDAnalysis.topology.guessers import guess_types
  from MDAnalysisTests.datafiles import PRM7
  u = mda.Universe(PRM7)
  guessed_elements = guess_types(u.atoms.names)
  u.add_TopologyAttr('elements', guessed_elements)
  print(u.atoms.elements)  # returns an array of guessed elements
More information on adding topology attributes can found in the `user guide`_.
.. Links
.. _user guide: https://www.mdanalysis.org/UserGuide/examples/constructing_universe.html#Adding-topology-attributes
"""
import numpy as np
import warnings
import re
from ..lib import distances
from . import tables
[docs]def guess_masses(atom_types):
    """Guess the mass of many atoms based upon their type
    Parameters
    ----------
    atom_types
      Type of each atom
    Returns
    -------
    atom_masses : np.ndarray dtype float64
    """
    validate_atom_types(atom_types)
    masses = np.array([get_atom_mass(atom_t) for atom_t in atom_types], dtype=np.float64)
    return masses
[docs]def validate_atom_types(atom_types):
    """Vaildates the atom types based on whether they are available in our tables
    Parameters
    ----------
    atom_types
      Type of each atom
    Returns
    -------
    None
    .. versionchanged:: 0.20.0
       Try uppercase atom type name as well
    """
    for atom_type in np.unique(atom_types):
        try:
            tables.masses[atom_type]
        except KeyError:
            try:
                tables.masses[atom_type.upper()]
            except KeyError:
                warnings.warn("Failed to guess the mass for the following atom types: {}".format(atom_type))
[docs]def guess_types(atom_names):
    """Guess the atom type of many atoms based on atom name
    Parameters
    ----------
    atom_names
      Name of each atom
    Returns
    -------
    atom_types : np.ndarray dtype object
    """
    return np.array([guess_atom_element(name) for name in atom_names], dtype=object)
[docs]def guess_atom_type(atomname):
    """Guess atom type from the name.
    At the moment, this function simply returns the element, as
    guessed by :func:`guess_atom_element`.
    See Also
    --------
    :func:`guess_atom_element`
    :mod:`MDAnalysis.topology.tables`
    """
    return guess_atom_element(atomname)
NUMBERS = re.compile(r'[0-9]') # match numbers
SYMBOLS = re.compile(r'[*+-]')  # match *, +, -
[docs]def guess_atom_element(atomname):
    """Guess the element of the atom from the name.
    Looks in dict to see if element is found, otherwise it uses the first
    character in the atomname. The table comes from CHARMM and AMBER atom
    types, where the first character is not sufficient to determine the atom
    type. Some GROMOS ions have also been added.
    .. Warning: The translation table is incomplete. This will probably result
                in some mistakes, but it still better than nothing!
    See Also
    --------
    :func:`guess_atom_type`
    :mod:`MDAnalysis.topology.tables`
    """
    if atomname == '':
        return ''
    try:
        return tables.atomelements[atomname.upper()]
    except KeyError:
        # strip symbols and numbers
        no_symbols = re.sub(SYMBOLS, '', atomname)
        name = re.sub(NUMBERS, '', no_symbols).upper()
        # just in case
        if name in tables.atomelements:
            return tables.atomelements[name]
        while name:
            if name in tables.elements:
                return name
            if name[:-1] in tables.elements:
                return name[:-1]
            if name[1:] in tables.elements:
                return name[1:]
            if len(name) <= 2:
                return name[0]
            name = name[:-1]  # probably element is on left not right
        # if it's numbers
        return no_symbols
[docs]def guess_bonds(atoms, coords, box=None, **kwargs):
    r"""Guess if bonds exist between two atoms based on their distance.
    Bond between two atoms is created, if the two atoms are within
    .. math::
          d < f \cdot (R_1 + R_2)
    of each other, where :math:`R_1` and :math:`R_2` are the VdW radii
    of the atoms and :math:`f` is an ad-hoc *fudge_factor*. This is
    the `same algorithm that VMD uses`_.
    Parameters
    ----------
    atoms : AtomGroup
         atoms for which bonds should be guessed
    coords : array
         coordinates of the atoms (i.e., `AtomGroup.positions)`)
    fudge_factor : float, optional
        The factor by which atoms must overlap eachother to be considered a
        bond.  Larger values will increase the number of bonds found. [0.55]
    vdwradii : dict, optional
        To supply custom vdwradii for atoms in the algorithm. Must be a dict
        of format {type:radii}. The default table of van der Waals radii is
        hard-coded as :data:`MDAnalysis.topology.tables.vdwradii`.  Any user
        defined vdwradii passed as an argument will supercede the table
        values. [``None``]
    lower_bound : float, optional
        The minimum bond length. All bonds found shorter than this length will
        be ignored. This is useful for parsing PDB with altloc records where
        atoms with altloc A and B maybe very close together and there should be
        no chemical bond between them. [0.1]
    box : array_like, optional
        Bonds are found using a distance search, if unit cell information is
        given, periodic boundary conditions will be considered in the distance
        search. [``None``]
    Returns
    -------
    list
        List of tuples suitable for use in Universe topology building.
    Warnings
    --------
    No check is done after the bonds are guessed to see if Lewis
    structure is correct. This is wrong and will burn somebody.
    Raises
    ------
    :exc:`ValueError` if inputs are malformed or `vdwradii` data is missing.
    .. _`same algorithm that VMD uses`:
       http://www.ks.uiuc.edu/Research/vmd/vmd-1.9.1/ug/node26.html
    .. versionadded:: 0.7.7
    .. versionchanged:: 0.9.0
       Updated method internally to use more :mod:`numpy`, should work
       faster.  Should also use less memory, previously scaled as
       :math:`O(n^2)`.  *vdwradii* argument now augments table list
       rather than replacing entirely.
    """
    # why not just use atom.positions?
    if len(atoms) != len(coords):
        raise ValueError("'atoms' and 'coord' must be the same length")
    fudge_factor = kwargs.get('fudge_factor', 0.55)
    vdwradii = tables.vdwradii.copy()  # so I don't permanently change it
    user_vdwradii = kwargs.get('vdwradii', None)
    if user_vdwradii:  # this should make algo use their values over defaults
        vdwradii.update(user_vdwradii)
    # Try using types, then elements
    atomtypes = atoms.types
    # check that all types have a defined vdw
    if not all(val in vdwradii for val in set(atomtypes)):
        raise ValueError(("vdw radii for types: " +
                          ", ".join([t for t in set(atomtypes) if
                                     not t in vdwradii]) +
                          ". These can be defined manually using the" +
                          " keyword 'vdwradii'"))
    lower_bound = kwargs.get('lower_bound', 0.1)
    if box is not None:
        box = np.asarray(box)
    # to speed up checking, calculate what the largest possible bond
    # atom that would warrant attention.
    # then use this to quickly mask distance results later
    max_vdw = max([vdwradii[t] for t in atomtypes])
    bonds = []
    pairs, dist = distances.self_capped_distance(coords,
                                                 max_cutoff=2.0*max_vdw,
                                                 min_cutoff=lower_bound,
                                                 box=box)
    for idx, (i, j) in enumerate(pairs):
        d = (vdwradii[atomtypes[i]] + vdwradii[atomtypes[j]])*fudge_factor
        if (dist[idx] < d):
            bonds.append((atoms[i].index, atoms[j].index))
    return tuple(bonds)
[docs]def guess_angles(bonds):
    """Given a list of Bonds, find all angles that exist between atoms.
    Works by assuming that if atoms 1 & 2 are bonded, and 2 & 3 are bonded,
    then (1,2,3) must be an angle.
    Returns
    -------
    list of tuples
        List of tuples defining the angles.
        Suitable for use in u._topology
    See Also
    --------
    :meth:`guess_bonds`
    .. versionadded 0.9.0
    """
    angles_found = set()
    for b in bonds:
        for atom in b:
            other_a = b.partner(atom)  # who's my friend currently in Bond
            for other_b in atom.bonds:
                if other_b != b:  # if not the same bond I start as
                    third_a = other_b.partner(atom)
                    desc = tuple([other_a.index, atom.index, third_a.index])
                    if desc[0] > desc[-1]:  # first index always less than last
                        desc = desc[::-1]
                    angles_found.add(desc)
    return tuple(angles_found)
[docs]def guess_dihedrals(angles):
    """Given a list of Angles, find all dihedrals that exist between atoms.
    Works by assuming that if (1,2,3) is an angle, and 3 & 4 are bonded,
    then (1,2,3,4) must be a dihedral.
    Returns
    -------
    list of tuples
        List of tuples defining the dihedrals.
        Suitable for use in u._topology
    .. versionadded 0.9.0
    """
    dihedrals_found = set()
    for b in angles:
        a_tup = tuple([a.index for a in b])  # angle as tuple of numbers
        # if searching with b[0], want tuple of (b[2], b[1], b[0], +new)
        # search the first and last atom of each angle
        for atom, prefix in zip([b.atoms[0], b.atoms[-1]],
                                [a_tup[::-1], a_tup]):
            for other_b in atom.bonds:
                if not other_b.partner(atom) in b:
                    third_a = other_b.partner(atom)
                    desc = prefix + (third_a.index,)
                    if desc[0] > desc[-1]:
                        desc = desc[::-1]
                    dihedrals_found.add(desc)
    return tuple(dihedrals_found)
[docs]def guess_improper_dihedrals(angles):
    """Given a list of Angles, find all improper dihedrals that exist between
    atoms.
    Works by assuming that if (1,2,3) is an angle, and 2 & 4 are bonded,
    then (2, 1, 3, 4) must be an improper dihedral.
    ie the improper dihedral is the angle between the planes formed by
    (1, 2, 3) and (1, 3, 4)
    Returns
    -------
        List of tuples defining the improper dihedrals.
        Suitable for use in u._topology
    .. versionadded 0.9.0
    """
    dihedrals_found = set()
    for b in angles:
        atom = b[1]  # select middle atom in angle
        # start of improper tuple
        a_tup = tuple([b[a].index for a in [1, 2, 0]])
        # if searching with b[1], want tuple of (b[1], b[2], b[0], +new)
        # search the first and last atom of each angle
        for other_b in atom.bonds:
            other_atom = other_b.partner(atom)
            # if this atom isn't in the angle I started with
            if not other_atom in b:
                desc = a_tup + (other_atom.index,)
                if desc[0] > desc[-1]:
                    desc = desc[::-1]
                dihedrals_found.add(desc)
    return tuple(dihedrals_found)
[docs]def get_atom_mass(element):
    """Return the atomic mass in u for *element*.
    Masses are looked up in :data:`MDAnalysis.topology.tables.masses`.
    .. Warning:: Unknown masses are set to 0.0
    .. versionchanged:: 0.20.0
       Try uppercase atom type name as well
    """
    try:
        return tables.masses[element]
    except KeyError:
        try:
            return tables.masses[element.upper()]
        except KeyError:
            return 0.0
[docs]def guess_atom_mass(atomname):
    """Guess a mass based on the atom name.
    :func:`guess_atom_element` is used to determine the kind of atom.
    .. warning:: Anything not recognized is simply set to 0; if you rely on the
                 masses you might want to double check.
    """
    return get_atom_mass(guess_atom_element(atomname))
[docs]def guess_atom_charge(atomname):
    """Guess atom charge from the name.
    .. Warning:: Not implemented; simply returns 0.
    """
    # TODO: do something slightly smarter, at least use name/element
    return 0.0
[docs]def guess_aromaticities(atomgroup):
    """Guess aromaticity of atoms using RDKit
    Parameters
    ----------
    atomgroup : mda.core.groups.AtomGroup
        Atoms for which the aromaticity will be guessed
    Returns
    -------
    aromaticities : numpy.ndarray
        Array of boolean values for the aromaticity of each atom
    .. versionadded:: 2.0.0
    """
    mol = atomgroup.convert_to("RDKIT")
    atoms = sorted(mol.GetAtoms(),
                   key=lambda a: a.GetIntProp("_MDAnalysis_index"))
    return np.array([atom.GetIsAromatic() for atom in atoms])
[docs]def guess_gasteiger_charges(atomgroup):
    """Guess Gasteiger partial charges using RDKit
    Parameters
    ----------
    atomgroup : mda.core.groups.AtomGroup
        Atoms for which the charges will be guessed
    Returns
    -------
    charges : numpy.ndarray
        Array of float values representing the charge of each atom
    .. versionadded:: 2.0.0
    """
    mol = atomgroup.convert_to("RDKIT")
    from rdkit.Chem.rdPartialCharges import ComputeGasteigerCharges
    ComputeGasteigerCharges(mol, throwOnParamFailure=True)
    atoms = sorted(mol.GetAtoms(),
                   key=lambda a: a.GetIntProp("_MDAnalysis_index"))
    return np.array([atom.GetDoubleProp("_GasteigerCharge") for atom in atoms],
                    dtype=np.float32)