Source code for MDAnalysis.analysis.leaflet

# -*- Mode: python; tab-width: 4; indent-tabs-mode:nil; coding:utf-8 -*-
# vim: tabstop=4 expandtab shiftwidth=4 softtabstop=4
#
# MDAnalysis --- https://www.mdanalysis.org
# Copyright (c) 2006-2017 The MDAnalysis Development Team and contributors
# (see the file AUTHORS for the full list of names)
#
# Released under the GNU Public Licence, v2 or any higher version
#
# Please cite your use of MDAnalysis in published work:
#
# R. J. Gowers, M. Linke, J. Barnoud, T. J. E. Reddy, M. N. Melo, S. L. Seyler,
# D. L. Dotson, J. Domanski, S. Buchoux, I. M. Kenney, and O. Beckstein.
# MDAnalysis: A Python package for the rapid analysis of molecular dynamics
# simulations. In S. Benthall and S. Rostrup editors, Proceedings of the 15th
# Python in Science Conference, pages 102-109, Austin, TX, 2016. SciPy.
# doi: 10.25080/majora-629e541a-00e
#
# N. Michaud-Agrawal, E. J. Denning, T. B. Woolf, and O. Beckstein.
# MDAnalysis: A Toolkit for the Analysis of Molecular Dynamics Simulations.
# J. Comput. Chem. 32 (2011), 2319--2327, doi:10.1002/jcc.21787
#


"""
Leaflet identification --- :mod:`MDAnalysis.analysis.leaflet`
==============================================================

This module implements the *LeafletFinder* algorithm, described in
[Michaud-Agrawal2011]_. It can identify the lipids in a bilayer of
arbitrary shape and topology, including planar and undulating bilayers
under periodic boundary conditions or vesicles.

One can use this information to identify

* the upper and lower leaflet of a *planar membrane* by comparing the
  the :meth:`~MDAnalysis.core.groups.AtomGroup.center_of_geometry` of
  the leaflet groups, or

* the outer and inner leaflet of a *vesicle* by comparing histograms
  of distances from the centre of geometry (or possibly simply the
  :meth:`~MDAnalysis.core.groups.AtomGroup.radius_of_gyration`).

See example scripts in the MDAnalysisCookbook_ on how to use
:class:`LeafletFinder`. The function :func:`optimize_cutoff` implements a
(slow) heuristic method to find the best cut off for the LeafletFinder
algorithm.

.. _MDAnalysisCookbook: https://github.com/MDAnalysis/MDAnalysisCookbook/tree/master/examples


Algorithm
---------

1. build a graph of all phosphate distances < cutoff
2. identify the largest connected subgraphs
3. analyse first and second largest graph, which correspond to the leaflets

For further details see [Michaud-Agrawal2011]_.


Classes and Functions
---------------------

.. autoclass:: LeafletFinder
   :members:

.. autofunction:: optimize_cutoff

"""
from __future__ import division, absolute_import

from six.moves import range

import warnings

import numpy as np
import networkx as NX

from .. import core
from . import distances
from .. import selections

from ..due import due, Doi

due.cite(Doi("10.1002/jcc.21787"),
         description="LeafletFinder algorithm",
         path="MDAnalysis.analysis.leaflet",
         cite_module=True)
del Doi


[docs]class LeafletFinder(object): """Identify atoms in the same leaflet of a lipid bilayer. This class implements the *LeafletFinder* algorithm [Michaud-Agrawal2011]_. Parameters ---------- universe : Universe or str :class:`MDAnalysis.Universe` or a file name (e.g., in PDB or GRO format) select : AtomGroup or str A AtomGroup instance or a :meth:`Universe.select_atoms` selection string for atoms that define the lipid head groups, e.g. universe.atoms.PO4 or "name PO4" or "name P*" cutoff : float (optional) head group-defining atoms within a distance of `cutoff` Angstroms are deemed to be in the same leaflet [15.0] pbc : bool (optional) take periodic boundary conditions into account [``False``] sparse : bool (optional) ``None``: use fastest possible routine; ``True``: use slow sparse matrix implementation (for large systems); ``False``: use fast :func:`~MDAnalysis.lib.distances.distance_array` implementation [``None``]. Example ------- The components of the graph are stored in the list :attr:`LeafletFinder.components`; the atoms in each component are numbered consecutively, starting at 0. To obtain the atoms in the input structure use :meth:`LeafletFinder.groups`:: L = LeafletFinder(PDB, 'name P*') leaflet0 = L.groups(0) leaflet1 = L.groups(1) The residues can be accessed through the standard MDAnalysis mechanism:: leaflet0.residues provides a :class:`~MDAnalysis.core.groups.ResidueGroup` instance. Similarly, all atoms in the first leaflet are then :: leaflet0.residues.atoms .. versionchanged:: 1.0.0 Changed `selection` keyword to `select` """ def __init__(self, universe, select, cutoff=15.0, pbc=False, sparse=None): universe = core.universe.as_Universe(universe) self.universe = universe self.selectionstring = select if isinstance(self.selectionstring, core.groups.AtomGroup): self.selection = self.selectionstring else: self.selection = universe.select_atoms(self.selectionstring) self.pbc = pbc self.sparse = sparse self._init_graph(cutoff) def _init_graph(self, cutoff): self.cutoff = cutoff self.graph = self._get_graph() self.components = self._get_components() # The last two calls in _get_graph() and the single line in # _get_components() are all that are needed to make the leaflet # detection work. def _get_graph(self): """Build graph from adjacency matrix at the given cutoff. Automatically select between high and low memory usage versions of contact_matrix.""" # could use self_distance_array to speed up but then need to deal with the sparse indexing if self.pbc: box = self.universe.trajectory.ts.dimensions else: box = None coord = self.selection.positions if self.sparse is False: # only try distance array try: adj = distances.contact_matrix(coord, cutoff=self.cutoff, returntype="numpy", box=box) except ValueError: # pragma: no cover warnings.warn('N x N matrix too big, use sparse=True or sparse=None', category=UserWarning, stacklevel=2) raise elif self.sparse is True: # only try sparse adj = distances.contact_matrix(coord, cutoff=self.cutoff, returntype="sparse", box=box) else: # use distance_array and fall back to sparse matrix try: # this works for small-ish systems and depends on system memory adj = distances.contact_matrix(coord, cutoff=self.cutoff, returntype="numpy", box=box) except ValueError: # pragma: no cover # but use a sparse matrix method for larger systems for memory reasons warnings.warn( 'N x N matrix too big - switching to sparse matrix method (works fine, but is currently rather ' 'slow)', category=UserWarning, stacklevel=2) adj = distances.contact_matrix(coord, cutoff=self.cutoff, returntype="sparse", box=box) return NX.Graph(adj) def _get_components(self): """Return connected components (as sorted numpy arrays), sorted by size.""" return [np.sort(list(component)) for component in NX.connected_components(self.graph)]
[docs] def update(self, cutoff=None): """Update components, possibly with a different *cutoff*""" if cutoff is None: cutoff = self.cutoff self._init_graph(cutoff)
[docs] def sizes(self): """Dict of component index with size of component.""" return dict(((idx, len(component)) for idx, component in enumerate(self.components)))
[docs] def groups(self, component_index=None): """Return a :class:`MDAnalysis.core.groups.AtomGroup` for *component_index*. If no argument is supplied, then a list of all leaflet groups is returned. See Also -------- :meth:`LeafletFinder.group` :meth:`LeafletFinder.groups_iter` """ if component_index is None: return list(self.groups_iter()) else: return self.group(component_index)
[docs] def group(self, component_index): """Return a :class:`MDAnalysis.core.groups.AtomGroup` for *component_index*.""" # maybe cache this? indices = [i for i in self.components[component_index]] return self.selection[indices]
[docs] def groups_iter(self): """Iterator over all leaflet :meth:`groups`""" for component_index in range(len(self.components)): yield self.group(component_index)
[docs] def write_selection(self, filename, **kwargs): """Write selections for the leaflets to *filename*. The format is typically determined by the extension of *filename* (e.g. "vmd", "pml", or "ndx" for VMD, PyMol, or Gromacs). See :class:`MDAnalysis.selections.base.SelectionWriter` for all options. """ sw = selections.get_writer(filename, kwargs.pop('format', None)) with sw(filename, mode=kwargs.pop('mode', 'w'), preamble="leaflets based on select={selectionstring!r} cutoff={cutoff:f}\n".format( **vars(self)), **kwargs) as writer: for i, ag in enumerate(self.groups_iter()): name = "leaflet_{0:d}".format((i + 1)) writer.write(ag, name=name)
def __repr__(self): return "<LeafletFinder({0!r}, cutoff={1:.1f} A) with {2:d} atoms in {3:d} groups>".format( self.selectionstring, self.cutoff, self.selection.n_atoms, len(self.components))
[docs]def optimize_cutoff(universe, select, dmin=10.0, dmax=20.0, step=0.5, max_imbalance=0.2, **kwargs): r"""Find cutoff that minimizes number of disconnected groups. Applies heuristics to find best groups: 1. at least two groups (assumes that there are at least 2 leaflets) 2. reject any solutions for which: .. math:: \frac{|N_0 - N_1|}{|N_0 + N_1|} > \mathrm{max_imbalance} with :math:`N_i` being the number of lipids in group :math:`i`. This heuristic picks groups with balanced numbers of lipids. Parameters ---------- universe : Universe :class:`MDAnalysis.Universe` instance select : AtomGroup or str AtomGroup or selection string as used for :class:`LeafletFinder` dmin : float (optional) dmax : float (optional) step : float (optional) scan cutoffs from `dmin` to `dmax` at stepsize `step` (in Angstroms) max_imbalance : float (optional) tuning parameter for the balancing heuristic [0.2] kwargs : other keyword arguments other arguments for :class:`LeafletFinder` Returns ------- (cutoff, N) optimum cutoff and number of groups found .. Note:: This function can die in various ways if really no appropriate number of groups can be found; it ought to be made more robust. .. versionchanged:: 1.0.0 Changed `selection` keyword to `select` """ kwargs.pop('cutoff', None) # not used, so we filter it _sizes = [] for cutoff in np.arange(dmin, dmax, step): LF = LeafletFinder(universe, select, cutoff=cutoff, **kwargs) # heuristic: # 1) N > 1 # 2) no imbalance between large groups: sizes = LF.sizes() if len(sizes) < 2: continue n0 = float(sizes[0]) # sizes of two biggest groups ... n1 = float(sizes[1]) # ... assumed to be the leaflets imbalance = np.abs(n0 - n1) / (n0 + n1) # print "sizes: %(sizes)r; imbalance=%(imbalance)f" % vars() if imbalance > max_imbalance: continue _sizes.append((cutoff, len(LF.sizes()))) results = np.rec.fromrecords(_sizes, names="cutoff,N") del _sizes results.sort(order=["N", "cutoff"]) # sort ascending by N, then cutoff return results[0] # (cutoff,N) with N>1 and shortest cutoff