Source code for MDAnalysis.core.topology

# -*- Mode: python; tab-width: 4; indent-tabs-mode:nil; coding:utf-8 -*-
# vim: tabstop=4 expandtab shiftwidth=4 softtabstop=4 fileencoding=utf-8
#
# MDAnalysis --- https://www.mdanalysis.org
# Copyright (c) 2006-2017 The MDAnalysis Development Team and contributors
# (see the file AUTHORS for the full list of names)
#
# Released under the GNU Public Licence, v2 or any higher version
#
# Please cite your use of MDAnalysis in published work:
#
# R. J. Gowers, M. Linke, J. Barnoud, T. J. E. Reddy, M. N. Melo, S. L. Seyler,
# D. L. Dotson, J. Domanski, S. Buchoux, I. M. Kenney, and O. Beckstein.
# MDAnalysis: A Python package for the rapid analysis of molecular dynamics
# simulations. In S. Benthall and S. Rostrup editors, Proceedings of the 15th
# Python in Science Conference, pages 102-109, Austin, TX, 2016. SciPy.
# doi: 10.25080/majora-629e541a-00e
#
# N. Michaud-Agrawal, E. J. Denning, T. B. Woolf, and O. Beckstein.
# MDAnalysis: A Toolkit for the Analysis of Molecular Dynamics Simulations.
# J. Comput. Chem. 32 (2011), 2319--2327, doi:10.1002/jcc.21787
#

"""\
Core Topology object --- :mod:`MDAnalysis.core.topology`
========================================================

.. versionadded:: 0.16.0

:class:`Topology` is the core object that holds all topology information.

TODO: Add in-depth discussion.

Notes
-----
For developers: In MDAnalysis 0.16.0 this new topology system was
introduced and discussed as issue `#363`_; this issue contains key
information and discussions on the new system. The issue number *363*
is also being used as a short-hand in discussions to refer to the new
topology system.


.. _`#363`: https://github.com/MDAnalysis/mdanalysis/issues/363

Classes
-------

.. autoclass:: Topology
   :members:
.. autoclass:: TransTable
   :members:

Helper functions
----------------

.. autofunction:: make_downshift_arrays

"""
import contextlib

import numpy as np
import typing

from .topologyattrs import Atomindices, Resindices, Segindices
from ..exceptions import NoDataError


[docs]def make_downshift_arrays(upshift, nparents):
    """From an upwards translation table, create the opposite direction

    Turns a many to one mapping (eg atoms to residues) to a one to many mapping
    (residues to atoms)

    Parameters
    ----------
    upshift : array_like
        Array of integers describing which parent each item belongs to
    nparents : integer
        Total number of parents that exist.

    Returns
    -------
    downshift : array_like (dtype object)
        An array of arrays, each containing the indices of the children
        of each parent.  Length `nparents` + 1

    Examples
    --------

    To find the residue to atom mappings for a given atom to residue mapping:

    >>> import numpy as np
    >>> import MDAnalysis as mda
    >>> from MDAnalysis.core.topology import make_downshift_arrays
    >>> atom2res = np.array([0, 1, 0, 2, 2, 0, 2])
    >>> make_downshift_arrays(atom2res, 3)
    array([array([0, 2, 5]), array([1]), array([3, 4, 6]), None], dtype=object)

    Entry 0 corresponds to residue 0 and says that this contains atoms 0, 2 & 5

    Notes
    -----
    The final entry in the return array will be ``None`` to ensure that the
    dtype of the array is :class:`object`.

    .. warning:: This means negative indexing should **never**
                 be used with these arrays.
    """
    if not len(upshift):
        return np.array([], dtype=object)

    # mergesort for a stable ordered array for the same value.
    order = np.argsort(upshift, kind="mergesort")

    upshift_sorted = upshift[order]
    u_values, indices = np.unique(upshift_sorted, return_index=True)

    # reset nparents to the larger one between input and heuristic from data
    # This is useful for creating empty Universe where default value is 1.
    nparents = np.max([nparents, u_values.max()+1])
    residue_indices = np.zeros(nparents, dtype=int)
    missing_resids = np.sort(np.setdiff1d(np.arange(nparents), u_values))
    indices = np.append(indices, upshift_sorted.shape[0])

    residue_indices[u_values] = indices[1:]

    for missing_resid in missing_resids:
        if missing_resid == 0:
            residue_indices[missing_resid] = 0
        else:
            residue_indices[missing_resid] = residue_indices[missing_resid-1]

    downshift = np.split(order, residue_indices[:-1])
    # Add None to end of array to force it to be of type Object
    # Without this, a rectangular array gets squashed into a single array
    downshift.append(None)
    return np.array(downshift, dtype=object)


[docs]class TransTable(object):
    """Membership tables with methods to translate indices across levels.

    There are three levels; Atom, Residue and Segment.  Each Atom **must**
    belong in a Residue, each Residue **must** belong to a Segment.

    When translating upwards, eg finding which Segment a Residue belongs in,
    a single numpy array is returned.  When translating downwards, two options
    are available; a concatenated result (suffix `_1`) or a list for each parent
    object (suffix `_2d`).

    Parameters
    ----------
    n_atoms : int
        number of atoms in topology
    n_residues : int
        number of residues in topology
    n_segments : int
        number of segments in topology
    atom_resindex : 1-D array
        resindex for each atom in the topology; the number of unique values in
        this array must be <= `n_residues`, and the array must be length
        `n_atoms`; giving None defaults to placing all atoms in residue 0
    residue_segindex : 1-D array
        segindex for each residue in the topology; the number of unique values
        in this array must be <= `n_segments`, and the array must be length
        `n_residues`; giving None defaults to placing all residues in segment 0


    Attributes
    ----------
    n_atoms : int
        number of atoms in topology
    n_residues : int
        number of residues in topology
    n_segments : int
        number of segments in topology
    size : tuple
        tuple ``(n_atoms, n_residues, n_segments)`` describing the shape of
        the TransTable

    .. versionchanged:: 2.3.0
        Lazy building RA and SR.
    """
    def __init__(self,
                 n_atoms, n_residues, n_segments,  # Size of tables
                 atom_resindex=None, residue_segindex=None,  # Contents of tables
                 ):
        self.n_atoms = n_atoms
        self.n_residues = n_residues
        self.n_segments = n_segments

        # built atom-to-residue mapping, and vice-versa
        if atom_resindex is None:
            self._AR = np.zeros(n_atoms, dtype=np.intp)
        else:
            self._AR = np.asarray(atom_resindex, dtype=np.intp).copy()
            if not len(self._AR) == n_atoms:
                raise ValueError("atom_resindex must be len n_atoms")
        self._RA = None

        # built residue-to-segment mapping, and vice-versa
        if residue_segindex is None:
            self._RS = np.zeros(n_residues, dtype=np.intp)
        else:
            self._RS = np.asarray(residue_segindex, dtype=np.intp).copy()
            if not len(self._RS) == n_residues:
                raise ValueError("residue_segindex must be len n_residues")
        self._SR = None

[docs]    def copy(self):
        """Return a deepcopy of this Transtable"""
        return self.__class__(self.n_atoms, self.n_residues, self.n_segments,
                              atom_resindex=self._AR, residue_segindex=self._RS)

    @property
    def RA(self):
        if self._RA is None:
            self._RA = make_downshift_arrays(self._AR,
                                             self.n_residues)
        return self._RA

    @property
    def SR(self):
        if self._SR is None:
            self._SR = make_downshift_arrays(self._RS,
                                             self.n_segments)
        return self._SR

    @property
    def size(self):
        """The shape of the table, ``(n_atoms, n_residues, n_segments)``.

        :meta private:
        """
        return (self.n_atoms, self.n_residues, self.n_segments)

[docs]    def atoms2residues(self, aix):
        """Get residue indices for each atom.

        Parameters
        ----------
        aix : array
            atom indices

        Returns
        -------
        rix : array
            residue index for each atom

        """
        return self._AR[aix]

[docs]    def residues2atoms_1d(self, rix):
        """Get atom indices collectively represented by given residue indices.

        Parameters
        ----------
        rix : array
            residue indices

        Returns
        -------
        aix : array
            indices of atoms present in residues, collectively

        """
        RA = self.RA
        try:
            return np.concatenate(RA[rix])
        except ValueError:  # rix is not iterable or empty
            # don't accidentally return a view!
            return RA[rix].astype(np.intp, copy=True)

[docs]    def residues2atoms_2d(self, rix):
        """Get atom indices represented by each residue index.

        Parameters
        ----------
        rix : array
            residue indices

        Returns
        -------
        raix : list
            each element corresponds to a residue index, in order given in
            `rix`, with each element being an array of the atom indices present
            in that residue

        """
        RA = self.RA
        try:
            return [RA[r].copy() for r in rix]
        except TypeError:
            return [RA[rix].copy()]  # why would this be singular for 2d?

[docs]    def residues2segments(self, rix):
        """Get segment indices for each residue.

        Parameters
        ----------
        rix : array
            residue indices

        Returns
        -------
        six : array
            segment index for each residue

        """
        return self._RS[rix]

[docs]    def segments2residues_1d(self, six):
        """Get residue indices collectively represented by given segment indices

        Parameters
        ----------
        six : array
            segment indices

        Returns
        -------
        rix : array
            sorted indices of residues present in segments, collectively

        """
        SR = self.SR
        try:
            return np.concatenate(SR[six])
        except ValueError:  # six is not iterable or empty
            # don't accidentally return a view!
            return SR[six].astype(np.intp, copy=True)

[docs]    def segments2residues_2d(self, six):
        """Get residue indices represented by each segment index.

        Parameters
        ----------
        six : array
            residue indices

        Returns
        -------
        srix : list
            each element corresponds to a segment index, in order given in
            `six`, with each element being an array of the residue indices
            present in that segment

        """
        SR = self.SR
        try:
            return [SR[s].copy() for s in six]
        except TypeError:
            return [SR[six].copy()]

    # Compound moves, does 2 translations
[docs]    def atoms2segments(self, aix):
        """Get segment indices for each atom.

        Parameters
        ----------
        aix : array
            atom indices

        Returns
        -------
        rix : array
            segment index for each atom

        """
        rix = self.atoms2residues(aix)
        return self.residues2segments(rix)

[docs]    def segments2atoms_1d(self, six):
        """Get atom indices collectively represented by given segment indices.

        Parameters
        ----------
        six : array
            segment indices

        Returns
        -------
        aix : array
            sorted indices of atoms present in segments, collectively

        """
        rix = self.segments2residues_1d(six)
        return self.residues2atoms_1d(rix)

[docs]    def segments2atoms_2d(self, six):
        """Get atom indices represented by each segment index.

        Parameters
        ----------
        six : array
            residue indices

        Returns
        -------
        saix : list
            each element corresponds to a segment index, in order given in
            `six`, with each element being an array of the atom indices present
            in that segment

        """
        # residues in EACH
        rixs = self.segments2residues_2d(six)
        return [self.residues2atoms_1d(rix) for rix in rixs]

    # Move between different groups.
[docs]    def move_atom(self, aix, rix):
        """Move aix to be in rix"""
        self._AR[aix] = rix
        self._RA = None

[docs]    def move_residue(self, rix, six):
        """Move rix to be in six"""
        self._RS[rix] = six
        self._SR = None

    def add_Residue(self, segidx):
        # segidx - index of parent
        self.n_residues += 1
        self._RA = None
        self._RS = np.concatenate([self._RS, np.array([segidx])])
        self._SR = None


        return self.n_residues - 1

    def add_Segment(self):
        self.n_segments += 1
        self._SR = None
        return self.n_segments - 1

    def __getstate__(self):
        # don't serialize _RA and _SR for performance.
        attrs = self.__dict__
        attrs['_RA'] = None
        attrs['_SR'] = None
        return attrs


[docs]class Topology(object):
    """In-memory, array-based topology database.

    The topology model of MDanalysis features atoms, which must each be a
    member of one residue. Each residue, in turn, must be a member of one
    segment. The details of maintaining this heirarchy, and mappings of atoms
    to residues, residues to segments, and vice-versa, are handled internally
    by this object.

    """

    def __init__(self, n_atoms=1, n_res=1, n_seg=1,
                 attrs=None,
                 atom_resindex=None,
                 residue_segindex=None):
        """
        Parameters
        ----------
        n_atoms : int
            number of atoms in topology. Must be larger then 1 at each level
        n_residues : int
            number of residues in topology. Must be larger then 1 at each level
        n_segments : int
            number of segments in topology. Must be larger then 1 at each level
        attrs : TopologyAttr objects
            components of the topology to be included
        atom_resindex : array
            1-D array giving the resindex of each atom in the system
        residue_segindex : array
            1-D array giving the segindex of each residue in the system

        """
        self.tt = TransTable(n_atoms, n_res, n_seg,
                             atom_resindex=atom_resindex,
                             residue_segindex=residue_segindex)

        if attrs is None:
            attrs = []
        # add core TopologyAttrs that give access to indices
        attrs.extend((Atomindices(), Resindices(), Segindices()))

        # attach the TopologyAttrs
        self.attrs = []
        for topologyattr in attrs:
            self.add_TopologyAttr(topologyattr)

[docs]    def copy(self):
        """Return a deepcopy of this Topology"""
        new = self.__class__(1, 1, 1)
        # copy the tt
        new.tt = self.tt.copy()
        # remove indices
        for attr in self.attrs:
            if isinstance(attr, (Atomindices, Resindices, Segindices)):
                continue
            new.add_TopologyAttr(attr.copy())
        return new

    @property
    def n_atoms(self):
        return self.tt.n_atoms

    @property
    def n_residues(self):
        return self.tt.n_residues

    @property
    def n_segments(self):
        return self.tt.n_segments

[docs]    def add_TopologyAttr(self, topologyattr):
        """Add a new TopologyAttr to the Topology.

        Parameters
        ----------
        topologyattr : TopologyAttr

        """
        self.attrs.append(topologyattr)
        topologyattr.top = self
        self.__setattr__(topologyattr.attrname, topologyattr)

[docs]    def del_TopologyAttr(self, topologyattr):
        """Remove a TopologyAttr from the Topology.

        If it is not present, nothing happens.

        Parameters
        ----------
        topologyattr : TopologyAttr


        .. versionadded:: 2.0.0
        """
        self.__delattr__(topologyattr.attrname)
        self.attrs.remove(topologyattr)

    @property
    def guessed_attributes(self):
        """A list of the guessed attributes in this topology"""
        return filter(lambda x: x.is_guessed
                      if(not isinstance(x.is_guessed, typing.Container))
                      else True in x.is_guessed, self.attrs)

    @property
    def read_attributes(self):
        """A list of the attributes read from the topology"""
        return filter(lambda x: not x.is_guessed
                      if(not isinstance(x.is_guessed, typing.Container))
                      else False in x.is_guessed, self.attrs)

[docs]    def add_Residue(self, segment, **new_attrs):
        """
        Returns
        -------
        residx of the new Residue

        Raises
        ------
        NoDataError
          If not all data was provided.  This error is raised before any changes


        .. versionchanged:: 2.1.0
           Added use of _add_new to TopologyAttr resize
        """
        # Check that all data is here before making any changes
        for attr in self.attrs:
            if not attr.per_object == 'residue':
                continue
            if attr.singular not in new_attrs:
                missing = (attr.singular for attr in self.attrs
                           if (attr.per_object == 'residue' and
                               attr.singular not in new_attrs))
                raise NoDataError("Missing the following attributes for the new"
                                  " Residue: {}".format(', '.join(missing)))

        # Resize topology table
        residx = self.tt.add_Residue(segment.segindex)

        # Add new value to each attribute
        for attr in self.attrs:
            if not attr.per_object == 'residue':
                continue
            newval = new_attrs[attr.singular]
            attr._add_new(newval)

        return residx

[docs]    def add_Segment(self, **new_attrs):
        """Adds a new Segment to the Topology

        Parameters
        ----------
        new_attrs : dict
          the new attributes for the new segment, eg {'segid': 'B'}

        Raises
        -------
        NoDataError
          if an attribute wasn't specified.

        Returns
        -------
        ix : int
          the idx of the new segment


        .. versionchanged:: 2.1.0
           Added use of _add_new to resize topology attrs
        """
        for attr in self.attrs:
            if attr.per_object == 'segment':
                if attr.singular not in new_attrs:
                    missing = (attr.singular for attr in self.attrs
                               if (attr.per_object == 'segment' and
                                   attr.singular not in new_attrs))
                    raise NoDataError("Missing the following attributes for the"
                                      " new Segment: {}"
                                      "".format(', '.join(missing)))

        segidx = self.tt.add_Segment()

        for attr in self.attrs:
            if not attr.per_object == 'segment':
                continue
            newval = new_attrs[attr.singular]
            attr._add_new(newval)

        return segidx