Source code for MDAnalysis.core.topology
# -*- Mode: python; tab-width: 4; indent-tabs-mode:nil; coding:utf-8 -*-
# vim: tabstop=4 expandtab shiftwidth=4 softtabstop=4 fileencoding=utf-8
#
# MDAnalysis --- https://www.mdanalysis.org
# Copyright (c) 2006-2017 The MDAnalysis Development Team and contributors
# (see the file AUTHORS for the full list of names)
#
# Released under the Lesser GNU Public Licence, v2.1 or any higher version
#
# Please cite your use of MDAnalysis in published work:
#
# R. J. Gowers, M. Linke, J. Barnoud, T. J. E. Reddy, M. N. Melo, S. L. Seyler,
# D. L. Dotson, J. Domanski, S. Buchoux, I. M. Kenney, and O. Beckstein.
# MDAnalysis: A Python package for the rapid analysis of molecular dynamics
# simulations. In S. Benthall and S. Rostrup editors, Proceedings of the 15th
# Python in Science Conference, pages 102-109, Austin, TX, 2016. SciPy.
# doi: 10.25080/majora-629e541a-00e
#
# N. Michaud-Agrawal, E. J. Denning, T. B. Woolf, and O. Beckstein.
# MDAnalysis: A Toolkit for the Analysis of Molecular Dynamics Simulations.
# J. Comput. Chem. 32 (2011), 2319--2327, doi:10.1002/jcc.21787
#
"""\
Core Topology object --- :mod:`MDAnalysis.core.topology`
========================================================
.. versionadded:: 0.16.0
:class:`Topology` is the core object that holds all topology information.
TODO: Add in-depth discussion.
Notes
-----
For developers: In MDAnalysis 0.16.0 this new topology system was
introduced and discussed as issue `#363`_; this issue contains key
information and discussions on the new system. The issue number *363*
is also being used as a short-hand in discussions to refer to the new
topology system.
.. _`#363`: https://github.com/MDAnalysis/mdanalysis/issues/363
Classes
-------
.. autoclass:: Topology
:members:
.. autoclass:: TransTable
:members:
Helper functions
----------------
.. autofunction:: make_downshift_arrays
"""
import contextlib
import numpy as np
import typing
from .topologyattrs import Atomindices, Resindices, Segindices
from ..exceptions import NoDataError
[docs]
def make_downshift_arrays(upshift, nparents):
"""From an upwards translation table, create the opposite direction
Turns a many to one mapping (eg atoms to residues) to a one to many mapping
(residues to atoms)
Parameters
----------
upshift : array_like
Array of integers describing which parent each item belongs to
nparents : integer
Total number of parents that exist.
Returns
-------
downshift : array_like (dtype object)
An array of arrays, each containing the indices of the children
of each parent. Length `nparents` + 1
Examples
--------
To find the residue to atom mappings for a given atom to residue mapping:
>>> import numpy as np
>>> import MDAnalysis as mda
>>> from MDAnalysis.core.topology import make_downshift_arrays
>>> atom2res = np.array([0, 1, 0, 2, 2, 0, 2])
>>> make_downshift_arrays(atom2res, 3)
array([array([0, 2, 5]), array([1]), array([3, 4, 6]), None], dtype=object)
Entry 0 corresponds to residue 0 and says that this contains atoms 0, 2 & 5
Notes
-----
The final entry in the return array will be ``None`` to ensure that the
dtype of the array is :class:`object`.
.. warning:: This means negative indexing should **never**
be used with these arrays.
"""
if not len(upshift):
return np.array([], dtype=object)
# mergesort for a stable ordered array for the same value.
order = np.argsort(upshift, kind="mergesort")
upshift_sorted = upshift[order]
u_values, indices = np.unique(upshift_sorted, return_index=True)
# reset nparents to the larger one between input and heuristic from data
# This is useful for creating empty Universe where default value is 1.
nparents = np.max([nparents, u_values.max()+1])
residue_indices = np.zeros(nparents, dtype=int)
missing_resids = np.sort(np.setdiff1d(np.arange(nparents), u_values))
indices = np.append(indices, upshift_sorted.shape[0])
residue_indices[u_values] = indices[1:]
for missing_resid in missing_resids:
if missing_resid == 0:
residue_indices[missing_resid] = 0
else:
residue_indices[missing_resid] = residue_indices[missing_resid-1]
downshift = np.split(order, residue_indices[:-1])
# Add None to end of array to force it to be of type Object
# Without this, a rectangular array gets squashed into a single array
downshift.append(None)
return np.array(downshift, dtype=object)
[docs]
class TransTable(object):
"""Membership tables with methods to translate indices across levels.
There are three levels; Atom, Residue and Segment. Each Atom **must**
belong in a Residue, each Residue **must** belong to a Segment.
When translating upwards, eg finding which Segment a Residue belongs in,
a single numpy array is returned. When translating downwards, two options
are available; a concatenated result (suffix `_1`) or a list for each parent
object (suffix `_2d`).
Parameters
----------
n_atoms : int
number of atoms in topology
n_residues : int
number of residues in topology
n_segments : int
number of segments in topology
atom_resindex : 1-D array
resindex for each atom in the topology; the number of unique values in
this array must be <= `n_residues`, and the array must be length
`n_atoms`; giving None defaults to placing all atoms in residue 0
residue_segindex : 1-D array
segindex for each residue in the topology; the number of unique values
in this array must be <= `n_segments`, and the array must be length
`n_residues`; giving None defaults to placing all residues in segment 0
Attributes
----------
n_atoms : int
number of atoms in topology
n_residues : int
number of residues in topology
n_segments : int
number of segments in topology
size : tuple
tuple ``(n_atoms, n_residues, n_segments)`` describing the shape of
the TransTable
.. versionchanged:: 2.3.0
Lazy building RA and SR.
"""
def __init__(self,
n_atoms, n_residues, n_segments, # Size of tables
atom_resindex=None, residue_segindex=None, # Contents of tables
):
self.n_atoms = n_atoms
self.n_residues = n_residues
self.n_segments = n_segments
# built atom-to-residue mapping, and vice-versa
if atom_resindex is None:
self._AR = np.zeros(n_atoms, dtype=np.intp)
else:
self._AR = np.asarray(atom_resindex, dtype=np.intp).copy()
if not len(self._AR) == n_atoms:
raise ValueError("atom_resindex must be len n_atoms")
self._RA = None
# built residue-to-segment mapping, and vice-versa
if residue_segindex is None:
self._RS = np.zeros(n_residues, dtype=np.intp)
else:
self._RS = np.asarray(residue_segindex, dtype=np.intp).copy()
if not len(self._RS) == n_residues:
raise ValueError("residue_segindex must be len n_residues")
self._SR = None
[docs]
def copy(self):
"""Return a deepcopy of this Transtable"""
return self.__class__(self.n_atoms, self.n_residues, self.n_segments,
atom_resindex=self._AR, residue_segindex=self._RS)
@property
def RA(self):
if self._RA is None:
self._RA = make_downshift_arrays(self._AR,
self.n_residues)
return self._RA
@property
def SR(self):
if self._SR is None:
self._SR = make_downshift_arrays(self._RS,
self.n_segments)
return self._SR
@property
def size(self):
"""The shape of the table, ``(n_atoms, n_residues, n_segments)``.
:meta private:
"""
return (self.n_atoms, self.n_residues, self.n_segments)
[docs]
def atoms2residues(self, aix):
"""Get residue indices for each atom.
Parameters
----------
aix : array
atom indices
Returns
-------
rix : array
residue index for each atom
"""
return self._AR[aix]
[docs]
def residues2atoms_1d(self, rix):
"""Get atom indices collectively represented by given residue indices.
Parameters
----------
rix : array
residue indices
Returns
-------
aix : array
indices of atoms present in residues, collectively
"""
RA = self.RA
try:
return np.concatenate(RA[rix])
except ValueError: # rix is not iterable or empty
# don't accidentally return a view!
return RA[rix].astype(np.intp, copy=True)
[docs]
def residues2atoms_2d(self, rix):
"""Get atom indices represented by each residue index.
Parameters
----------
rix : array
residue indices
Returns
-------
raix : list
each element corresponds to a residue index, in order given in
`rix`, with each element being an array of the atom indices present
in that residue
"""
RA = self.RA
try:
return [RA[r].copy() for r in rix]
except TypeError:
return [RA[rix].copy()] # why would this be singular for 2d?
[docs]
def residues2segments(self, rix):
"""Get segment indices for each residue.
Parameters
----------
rix : array
residue indices
Returns
-------
six : array
segment index for each residue
"""
return self._RS[rix]
[docs]
def segments2residues_1d(self, six):
"""Get residue indices collectively represented by given segment indices
Parameters
----------
six : array
segment indices
Returns
-------
rix : array
sorted indices of residues present in segments, collectively
"""
SR = self.SR
try:
return np.concatenate(SR[six])
except ValueError: # six is not iterable or empty
# don't accidentally return a view!
return SR[six].astype(np.intp, copy=True)
[docs]
def segments2residues_2d(self, six):
"""Get residue indices represented by each segment index.
Parameters
----------
six : array
residue indices
Returns
-------
srix : list
each element corresponds to a segment index, in order given in
`six`, with each element being an array of the residue indices
present in that segment
"""
SR = self.SR
try:
return [SR[s].copy() for s in six]
except TypeError:
return [SR[six].copy()]
# Compound moves, does 2 translations
[docs]
def atoms2segments(self, aix):
"""Get segment indices for each atom.
Parameters
----------
aix : array
atom indices
Returns
-------
rix : array
segment index for each atom
"""
rix = self.atoms2residues(aix)
return self.residues2segments(rix)
[docs]
def segments2atoms_1d(self, six):
"""Get atom indices collectively represented by given segment indices.
Parameters
----------
six : array
segment indices
Returns
-------
aix : array
sorted indices of atoms present in segments, collectively
"""
rix = self.segments2residues_1d(six)
return self.residues2atoms_1d(rix)
[docs]
def segments2atoms_2d(self, six):
"""Get atom indices represented by each segment index.
Parameters
----------
six : array
residue indices
Returns
-------
saix : list
each element corresponds to a segment index, in order given in
`six`, with each element being an array of the atom indices present
in that segment
"""
# residues in EACH
rixs = self.segments2residues_2d(six)
return [self.residues2atoms_1d(rix) for rix in rixs]
# Move between different groups.
[docs]
def move_atom(self, aix, rix):
"""Move aix to be in rix"""
self._AR[aix] = rix
self._RA = None
[docs]
def move_residue(self, rix, six):
"""Move rix to be in six"""
self._RS[rix] = six
self._SR = None
def add_Residue(self, segidx):
# segidx - index of parent
self.n_residues += 1
self._RA = None
self._RS = np.concatenate([self._RS, np.array([segidx])])
self._SR = None
return self.n_residues - 1
def add_Segment(self):
self.n_segments += 1
self._SR = None
return self.n_segments - 1
def __getstate__(self):
# don't serialize _RA and _SR for performance.
attrs = self.__dict__
attrs['_RA'] = None
attrs['_SR'] = None
return attrs
[docs]
class Topology(object):
"""In-memory, array-based topology database.
The topology model of MDanalysis features atoms, which must each be a
member of one residue. Each residue, in turn, must be a member of one
segment. The details of maintaining this heirarchy, and mappings of atoms
to residues, residues to segments, and vice-versa, are handled internally
by this object.
"""
def __init__(self, n_atoms=1, n_res=1, n_seg=1,
attrs=None,
atom_resindex=None,
residue_segindex=None):
"""
Parameters
----------
n_atoms : int
number of atoms in topology. Must be larger then 1 at each level
n_residues : int
number of residues in topology. Must be larger then 1 at each level
n_segments : int
number of segments in topology. Must be larger then 1 at each level
attrs : TopologyAttr objects
components of the topology to be included
atom_resindex : array
1-D array giving the resindex of each atom in the system
residue_segindex : array
1-D array giving the segindex of each residue in the system
"""
self.tt = TransTable(n_atoms, n_res, n_seg,
atom_resindex=atom_resindex,
residue_segindex=residue_segindex)
if attrs is None:
attrs = []
# add core TopologyAttrs that give access to indices
attrs.extend((Atomindices(), Resindices(), Segindices()))
# attach the TopologyAttrs
self.attrs = []
for topologyattr in attrs:
self.add_TopologyAttr(topologyattr)
[docs]
def copy(self):
"""Return a deepcopy of this Topology"""
new = self.__class__(1, 1, 1)
# copy the tt
new.tt = self.tt.copy()
# remove indices
for attr in self.attrs:
if isinstance(attr, (Atomindices, Resindices, Segindices)):
continue
new.add_TopologyAttr(attr.copy())
return new
@property
def n_atoms(self):
return self.tt.n_atoms
@property
def n_residues(self):
return self.tt.n_residues
@property
def n_segments(self):
return self.tt.n_segments
[docs]
def add_TopologyAttr(self, topologyattr):
"""Add a new TopologyAttr to the Topology.
Parameters
----------
topologyattr : TopologyAttr
"""
self.attrs.append(topologyattr)
topologyattr.top = self
self.__setattr__(topologyattr.attrname, topologyattr)
[docs]
def del_TopologyAttr(self, topologyattr):
"""Remove a TopologyAttr from the Topology.
If it is not present, nothing happens.
Parameters
----------
topologyattr : TopologyAttr
.. versionadded:: 2.0.0
"""
self.__delattr__(topologyattr.attrname)
self.attrs.remove(topologyattr)
@property
def guessed_attributes(self):
"""A list of the guessed attributes in this topology"""
return filter(lambda x: x.is_guessed
if(not isinstance(x.is_guessed, typing.Container))
else True in x.is_guessed, self.attrs)
@property
def read_attributes(self):
"""A list of the attributes read from the topology"""
return filter(lambda x: not x.is_guessed
if(not isinstance(x.is_guessed, typing.Container))
else False in x.is_guessed, self.attrs)
[docs]
def add_Residue(self, segment, **new_attrs):
"""
Returns
-------
residx of the new Residue
Raises
------
NoDataError
If not all data was provided. This error is raised before any changes
.. versionchanged:: 2.1.0
Added use of _add_new to TopologyAttr resize
"""
# Check that all data is here before making any changes
for attr in self.attrs:
if not attr.per_object == 'residue':
continue
if attr.singular not in new_attrs:
missing = (attr.singular for attr in self.attrs
if (attr.per_object == 'residue' and
attr.singular not in new_attrs))
raise NoDataError("Missing the following attributes for the new"
" Residue: {}".format(', '.join(missing)))
# Resize topology table
residx = self.tt.add_Residue(segment.segindex)
# Add new value to each attribute
for attr in self.attrs:
if not attr.per_object == 'residue':
continue
newval = new_attrs[attr.singular]
attr._add_new(newval)
return residx
[docs]
def add_Segment(self, **new_attrs):
"""Adds a new Segment to the Topology
Parameters
----------
new_attrs : dict
the new attributes for the new segment, eg {'segid': 'B'}
Raises
-------
NoDataError
if an attribute wasn't specified.
Returns
-------
ix : int
the idx of the new segment
.. versionchanged:: 2.1.0
Added use of _add_new to resize topology attrs
"""
for attr in self.attrs:
if attr.per_object == 'segment':
if attr.singular not in new_attrs:
missing = (attr.singular for attr in self.attrs
if (attr.per_object == 'segment' and
attr.singular not in new_attrs))
raise NoDataError("Missing the following attributes for the"
" new Segment: {}"
"".format(', '.join(missing)))
segidx = self.tt.add_Segment()
for attr in self.attrs:
if not attr.per_object == 'segment':
continue
newval = new_attrs[attr.singular]
attr._add_new(newval)
return segidx