Source code for MDAnalysis.core.topology
# -*- Mode: python; tab-width: 4; indent-tabs-mode:nil; coding:utf-8 -*-
# vim: tabstop=4 expandtab shiftwidth=4 softtabstop=4 fileencoding=utf-8
#
# MDAnalysis --- https://www.mdanalysis.org
# Copyright (c) 2006-2017 The MDAnalysis Development Team and contributors
# (see the file AUTHORS for the full list of names)
#
# Released under the Lesser GNU Public Licence, v2.1 or any higher version
#
# Please cite your use of MDAnalysis in published work:
#
# R. J. Gowers, M. Linke, J. Barnoud, T. J. E. Reddy, M. N. Melo, S. L. Seyler,
# D. L. Dotson, J. Domanski, S. Buchoux, I. M. Kenney, and O. Beckstein.
# MDAnalysis: A Python package for the rapid analysis of molecular dynamics
# simulations. In S. Benthall and S. Rostrup editors, Proceedings of the 15th
# Python in Science Conference, pages 102-109, Austin, TX, 2016. SciPy.
# doi: 10.25080/majora-629e541a-00e
#
# N. Michaud-Agrawal, E. J. Denning, T. B. Woolf, and O. Beckstein.
# MDAnalysis: A Toolkit for the Analysis of Molecular Dynamics Simulations.
# J. Comput. Chem. 32 (2011), 2319--2327, doi:10.1002/jcc.21787
#
"""\
Core Topology object --- :mod:`MDAnalysis.core.topology`
========================================================
.. versionadded:: 0.16.0
:class:`Topology` is the core object that holds all topology information.
TODO: Add in-depth discussion.
Notes
-----
For developers: In MDAnalysis 0.16.0 this new topology system was
introduced and discussed as issue `#363`_; this issue contains key
information and discussions on the new system. The issue number *363*
is also being used as a short-hand in discussions to refer to the new
topology system.
.. _`#363`: https://github.com/MDAnalysis/mdanalysis/issues/363
Classes
-------
.. autoclass:: Topology
   :members:
.. autoclass:: TransTable
   :members:
Helper functions
----------------
.. autofunction:: make_downshift_arrays
"""
import contextlib
import numpy as np
import typing
from .topologyattrs import Atomindices, Resindices, Segindices
from ..exceptions import NoDataError
[docs]
def make_downshift_arrays(upshift, nparents):
    """From an upwards translation table, create the opposite direction
    Turns a many to one mapping (eg atoms to residues) to a one to many mapping
    (residues to atoms)
    Parameters
    ----------
    upshift : array_like
        Array of integers describing which parent each item belongs to
    nparents : integer
        Total number of parents that exist.
    Returns
    -------
    downshift : array_like (dtype object)
        An array of arrays, each containing the indices of the children
        of each parent.  Length `nparents` + 1
    Examples
    --------
    To find the residue to atom mappings for a given atom to residue mapping:
    >>> import numpy as np
    >>> import MDAnalysis as mda
    >>> from MDAnalysis.core.topology import make_downshift_arrays
    >>> atom2res = np.array([0, 1, 0, 2, 2, 0, 2])
    >>> make_downshift_arrays(atom2res, 3)
    array([array([0, 2, 5]), array([1]), array([3, 4, 6]), None], dtype=object)
    Entry 0 corresponds to residue 0 and says that this contains atoms 0, 2 & 5
    Notes
    -----
    The final entry in the return array will be ``None`` to ensure that the
    dtype of the array is :class:`object`.
    .. warning:: This means negative indexing should **never**
                 be used with these arrays.
    """
    if not len(upshift):
        return np.array([], dtype=object)
    # mergesort for a stable ordered array for the same value.
    order = np.argsort(upshift, kind="mergesort")
    upshift_sorted = upshift[order]
    u_values, indices = np.unique(upshift_sorted, return_index=True)
    # reset nparents to the larger one between input and heuristic from data
    # This is useful for creating empty Universe where default value is 1.
    nparents = np.max([nparents, u_values.max() + 1])
    residue_indices = np.zeros(nparents, dtype=int)
    missing_resids = np.sort(np.setdiff1d(np.arange(nparents), u_values))
    indices = np.append(indices, upshift_sorted.shape[0])
    residue_indices[u_values] = indices[1:]
    for missing_resid in missing_resids:
        if missing_resid == 0:
            residue_indices[missing_resid] = 0
        else:
            residue_indices[missing_resid] = residue_indices[missing_resid - 1]
    downshift = np.split(order, residue_indices[:-1])
    # Add None to end of array to force it to be of type Object
    # Without this, a rectangular array gets squashed into a single array
    downshift.append(None)
    return np.array(downshift, dtype=object)
[docs]
class TransTable(object):
    """Membership tables with methods to translate indices across levels.
    There are three levels; Atom, Residue and Segment.  Each Atom **must**
    belong in a Residue, each Residue **must** belong to a Segment.
    When translating upwards, eg finding which Segment a Residue belongs in,
    a single numpy array is returned.  When translating downwards, two options
    are available; a concatenated result (suffix `_1`) or a list for each parent
    object (suffix `_2d`).
    Parameters
    ----------
    n_atoms : int
        number of atoms in topology
    n_residues : int
        number of residues in topology
    n_segments : int
        number of segments in topology
    atom_resindex : 1-D array
        resindex for each atom in the topology; the number of unique values in
        this array must be <= `n_residues`, and the array must be length
        `n_atoms`; giving None defaults to placing all atoms in residue 0
    residue_segindex : 1-D array
        segindex for each residue in the topology; the number of unique values
        in this array must be <= `n_segments`, and the array must be length
        `n_residues`; giving None defaults to placing all residues in segment 0
    Attributes
    ----------
    n_atoms : int
        number of atoms in topology
    n_residues : int
        number of residues in topology
    n_segments : int
        number of segments in topology
    size : tuple
        tuple ``(n_atoms, n_residues, n_segments)`` describing the shape of
        the TransTable
    .. versionchanged:: 2.3.0
        Lazy building RA and SR.
    """
    def __init__(
        self,
        n_atoms,
        n_residues,
        n_segments,  # Size of tables
        atom_resindex=None,
        residue_segindex=None,  # Contents of tables
    ):
        self.n_atoms = n_atoms
        self.n_residues = n_residues
        self.n_segments = n_segments
        # built atom-to-residue mapping, and vice-versa
        if atom_resindex is None:
            self._AR = np.zeros(n_atoms, dtype=np.intp)
        else:
            self._AR = np.asarray(atom_resindex, dtype=np.intp).copy()
            if not len(self._AR) == n_atoms:
                raise ValueError("atom_resindex must be len n_atoms")
        self._RA = None
        # built residue-to-segment mapping, and vice-versa
        if residue_segindex is None:
            self._RS = np.zeros(n_residues, dtype=np.intp)
        else:
            self._RS = np.asarray(residue_segindex, dtype=np.intp).copy()
            if not len(self._RS) == n_residues:
                raise ValueError("residue_segindex must be len n_residues")
        self._SR = None
[docs]
    def copy(self):
        """Return a deepcopy of this Transtable"""
        return self.__class__(
            self.n_atoms,
            self.n_residues,
            self.n_segments,
            atom_resindex=self._AR,
            residue_segindex=self._RS,
        )
    @property
    def RA(self):
        if self._RA is None:
            self._RA = make_downshift_arrays(self._AR, self.n_residues)
        return self._RA
    @property
    def SR(self):
        if self._SR is None:
            self._SR = make_downshift_arrays(self._RS, self.n_segments)
        return self._SR
    @property
    def size(self):
        """The shape of the table, ``(n_atoms, n_residues, n_segments)``.
        :meta private:
        """
        return (self.n_atoms, self.n_residues, self.n_segments)
[docs]
    def atoms2residues(self, aix):
        """Get residue indices for each atom.
        Parameters
        ----------
        aix : array
            atom indices
        Returns
        -------
        rix : array
            residue index for each atom
        """
        return self._AR[aix]
[docs]
    def residues2atoms_1d(self, rix):
        """Get atom indices collectively represented by given residue indices.
        Parameters
        ----------
        rix : array
            residue indices
        Returns
        -------
        aix : array
            indices of atoms present in residues, collectively
        """
        RA = self.RA
        try:
            return np.concatenate(RA[rix])
        except ValueError:  # rix is not iterable or empty
            # don't accidentally return a view!
            return RA[rix].astype(np.intp, copy=True)
[docs]
    def residues2atoms_2d(self, rix):
        """Get atom indices represented by each residue index.
        Parameters
        ----------
        rix : array
            residue indices
        Returns
        -------
        raix : list
            each element corresponds to a residue index, in order given in
            `rix`, with each element being an array of the atom indices present
            in that residue
        """
        RA = self.RA
        try:
            return [RA[r].copy() for r in rix]
        except TypeError:
            return [RA[rix].copy()]  # why would this be singular for 2d?
[docs]
    def residues2segments(self, rix):
        """Get segment indices for each residue.
        Parameters
        ----------
        rix : array
            residue indices
        Returns
        -------
        six : array
            segment index for each residue
        """
        return self._RS[rix]
[docs]
    def segments2residues_1d(self, six):
        """Get residue indices collectively represented by given segment indices
        Parameters
        ----------
        six : array
            segment indices
        Returns
        -------
        rix : array
            sorted indices of residues present in segments, collectively
        """
        SR = self.SR
        try:
            return np.concatenate(SR[six])
        except ValueError:  # six is not iterable or empty
            # don't accidentally return a view!
            return SR[six].astype(np.intp, copy=True)
[docs]
    def segments2residues_2d(self, six):
        """Get residue indices represented by each segment index.
        Parameters
        ----------
        six : array
            residue indices
        Returns
        -------
        srix : list
            each element corresponds to a segment index, in order given in
            `six`, with each element being an array of the residue indices
            present in that segment
        """
        SR = self.SR
        try:
            return [SR[s].copy() for s in six]
        except TypeError:
            return [SR[six].copy()]
    # Compound moves, does 2 translations
[docs]
    def atoms2segments(self, aix):
        """Get segment indices for each atom.
        Parameters
        ----------
        aix : array
            atom indices
        Returns
        -------
        rix : array
            segment index for each atom
        """
        rix = self.atoms2residues(aix)
        return self.residues2segments(rix)
[docs]
    def segments2atoms_1d(self, six):
        """Get atom indices collectively represented by given segment indices.
        Parameters
        ----------
        six : array
            segment indices
        Returns
        -------
        aix : array
            sorted indices of atoms present in segments, collectively
        """
        rix = self.segments2residues_1d(six)
        return self.residues2atoms_1d(rix)
[docs]
    def segments2atoms_2d(self, six):
        """Get atom indices represented by each segment index.
        Parameters
        ----------
        six : array
            residue indices
        Returns
        -------
        saix : list
            each element corresponds to a segment index, in order given in
            `six`, with each element being an array of the atom indices present
            in that segment
        """
        # residues in EACH
        rixs = self.segments2residues_2d(six)
        return [self.residues2atoms_1d(rix) for rix in rixs]
    # Move between different groups.
[docs]
    def move_atom(self, aix, rix):
        """Move aix to be in rix"""
        self._AR[aix] = rix
        self._RA = None
[docs]
    def move_residue(self, rix, six):
        """Move rix to be in six"""
        self._RS[rix] = six
        self._SR = None
    def add_Residue(self, segidx):
        # segidx - index of parent
        self.n_residues += 1
        self._RA = None
        self._RS = np.concatenate([self._RS, np.array([segidx])])
        self._SR = None
        return self.n_residues - 1
    def add_Segment(self):
        self.n_segments += 1
        self._SR = None
        return self.n_segments - 1
    def __getstate__(self):
        # don't serialize _RA and _SR for performance.
        attrs = self.__dict__
        attrs["_RA"] = None
        attrs["_SR"] = None
        return attrs
[docs]
class Topology(object):
    """In-memory, array-based topology database.
    The topology model of MDanalysis features atoms, which must each be a
    member of one residue. Each residue, in turn, must be a member of one
    segment. The details of maintaining this heirarchy, and mappings of atoms
    to residues, residues to segments, and vice-versa, are handled internally
    by this object.
    """
    def __init__(
        self,
        n_atoms=1,
        n_res=1,
        n_seg=1,
        attrs=None,
        atom_resindex=None,
        residue_segindex=None,
    ):
        """
        Parameters
        ----------
        n_atoms : int
            number of atoms in topology. Must be larger then 1 at each level
        n_residues : int
            number of residues in topology. Must be larger then 1 at each level
        n_segments : int
            number of segments in topology. Must be larger then 1 at each level
        attrs : TopologyAttr objects
            components of the topology to be included
        atom_resindex : array
            1-D array giving the resindex of each atom in the system
        residue_segindex : array
            1-D array giving the segindex of each residue in the system
        """
        self.tt = TransTable(
            n_atoms,
            n_res,
            n_seg,
            atom_resindex=atom_resindex,
            residue_segindex=residue_segindex,
        )
        if attrs is None:
            attrs = []
        # add core TopologyAttrs that give access to indices
        attrs.extend((Atomindices(), Resindices(), Segindices()))
        # attach the TopologyAttrs
        self.attrs = []
        for topologyattr in attrs:
            self.add_TopologyAttr(topologyattr)
[docs]
    def copy(self):
        """Return a deepcopy of this Topology"""
        new = self.__class__(1, 1, 1)
        # copy the tt
        new.tt = self.tt.copy()
        # remove indices
        for attr in self.attrs:
            if isinstance(attr, (Atomindices, Resindices, Segindices)):
                continue
            new.add_TopologyAttr(attr.copy())
        return new
    @property
    def n_atoms(self):
        return self.tt.n_atoms
    @property
    def n_residues(self):
        return self.tt.n_residues
    @property
    def n_segments(self):
        return self.tt.n_segments
[docs]
    def add_TopologyAttr(self, topologyattr):
        """Add a new TopologyAttr to the Topology.
        Parameters
        ----------
        topologyattr : TopologyAttr
        """
        self.attrs.append(topologyattr)
        topologyattr.top = self
        self.__setattr__(topologyattr.attrname, topologyattr)
[docs]
    def del_TopologyAttr(self, topologyattr):
        """Remove a TopologyAttr from the Topology.
        If it is not present, nothing happens.
        Parameters
        ----------
        topologyattr : TopologyAttr
        .. versionadded:: 2.0.0
        """
        self.__delattr__(topologyattr.attrname)
        self.attrs.remove(topologyattr)
    @property
    def guessed_attributes(self):
        """A list of the guessed attributes in this topology"""
        return filter(
            lambda x: (
                x.is_guessed
                if (not isinstance(x.is_guessed, typing.Container))
                else True in x.is_guessed
            ),
            self.attrs,
        )
    @property
    def read_attributes(self):
        """A list of the attributes read from the topology"""
        return filter(
            lambda x: (
                not x.is_guessed
                if (not isinstance(x.is_guessed, typing.Container))
                else False in x.is_guessed
            ),
            self.attrs,
        )
[docs]
    def add_Residue(self, segment, **new_attrs):
        """
        Returns
        -------
        residx of the new Residue
        Raises
        ------
        NoDataError
          If not all data was provided.  This error is raised before any changes
        .. versionchanged:: 2.1.0
           Added use of _add_new to TopologyAttr resize
        """
        # Check that all data is here before making any changes
        for attr in self.attrs:
            if not attr.per_object == "residue":
                continue
            if attr.singular not in new_attrs:
                missing = (
                    attr.singular
                    for attr in self.attrs
                    if (
                        attr.per_object == "residue"
                        and attr.singular not in new_attrs
                    )
                )
                raise NoDataError(
                    "Missing the following attributes for the new"
                    " Residue: {}".format(", ".join(missing))
                )
        # Resize topology table
        residx = self.tt.add_Residue(segment.segindex)
        # Add new value to each attribute
        for attr in self.attrs:
            if not attr.per_object == "residue":
                continue
            newval = new_attrs[attr.singular]
            attr._add_new(newval)
        return residx
[docs]
    def add_Segment(self, **new_attrs):
        """Adds a new Segment to the Topology
        Parameters
        ----------
        new_attrs : dict
          the new attributes for the new segment, eg {'segid': 'B'}
        Raises
        -------
        NoDataError
          if an attribute wasn't specified.
        Returns
        -------
        ix : int
          the idx of the new segment
        .. versionchanged:: 2.1.0
           Added use of _add_new to resize topology attrs
        """
        for attr in self.attrs:
            if attr.per_object == "segment":
                if attr.singular not in new_attrs:
                    missing = (
                        attr.singular
                        for attr in self.attrs
                        if (
                            attr.per_object == "segment"
                            and attr.singular not in new_attrs
                        )
                    )
                    raise NoDataError(
                        "Missing the following attributes for the"
                        " new Segment: {}"
                        "".format(", ".join(missing))
                    )
        segidx = self.tt.add_Segment()
        for attr in self.attrs:
            if not attr.per_object == "segment":
                continue
            newval = new_attrs[attr.singular]
            attr._add_new(newval)
        return segidx