Source code for MDAnalysis.analysis.leaflet
# -*- Mode: python; tab-width: 4; indent-tabs-mode:nil; coding:utf-8 -*-
# vim: tabstop=4 expandtab shiftwidth=4 softtabstop=4
#
# MDAnalysis --- https://www.mdanalysis.org
# Copyright (c) 2006-2017 The MDAnalysis Development Team and contributors
# (see the file AUTHORS for the full list of names)
#
# Released under the GNU Public Licence, v2 or any higher version
#
# Please cite your use of MDAnalysis in published work:
#
# R. J. Gowers, M. Linke, J. Barnoud, T. J. E. Reddy, M. N. Melo, S. L. Seyler,
# D. L. Dotson, J. Domanski, S. Buchoux, I. M. Kenney, and O. Beckstein.
# MDAnalysis: A Python package for the rapid analysis of molecular dynamics
# simulations. In S. Benthall and S. Rostrup editors, Proceedings of the 15th
# Python in Science Conference, pages 102-109, Austin, TX, 2016. SciPy.
# doi: 10.25080/majora-629e541a-00e
#
# N. Michaud-Agrawal, E. J. Denning, T. B. Woolf, and O. Beckstein.
# MDAnalysis: A Toolkit for the Analysis of Molecular Dynamics Simulations.
# J. Comput. Chem. 32 (2011), 2319--2327, doi:10.1002/jcc.21787
#
"""
Leaflet identification --- :mod:`MDAnalysis.analysis.leaflet`
==============================================================
This module implements the *LeafletFinder* algorithm, described in
[Michaud-Agrawal2011]_. It can identify the lipids in a bilayer of
arbitrary shape and topology, including planar and undulating bilayers
under periodic boundary conditions or vesicles.
One can use this information to identify
* the upper and lower leaflet of a *planar membrane* by comparing the
the :meth:`~MDAnalysis.core.groups.AtomGroup.center_of_geometry` of
the leaflet groups, or
* the outer and inner leaflet of a *vesicle* by comparing histograms
of distances from the centre of geometry (or possibly simply the
:meth:`~MDAnalysis.core.groups.AtomGroup.radius_of_gyration`).
See example scripts in the MDAnalysisCookbook_ on how to use
:class:`LeafletFinder`. The function :func:`optimize_cutoff` implements a
(slow) heuristic method to find the best cut off for the LeafletFinder
algorithm.
.. _MDAnalysisCookbook: https://github.com/MDAnalysis/MDAnalysisCookbook/tree/master/examples
Algorithm
---------
1. build a graph of all phosphate distances < cutoff
2. identify the largest connected subgraphs
3. analyse first and second largest graph, which correspond to the leaflets
For further details see [Michaud-Agrawal2011]_.
Classes and Functions
---------------------
.. autoclass:: LeafletFinder
:members:
.. autofunction:: optimize_cutoff
"""
from __future__ import division, absolute_import
from six.moves import range
import warnings
import numpy as np
import networkx as NX
from .. import core
from . import distances
from .. import selections
from ..due import due, Doi
due.cite(Doi("10.1002/jcc.21787"),
description="LeafletFinder algorithm",
path="MDAnalysis.analysis.leaflet",
cite_module=True)
del Doi
[docs]class LeafletFinder(object):
"""Identify atoms in the same leaflet of a lipid bilayer.
This class implements the *LeafletFinder* algorithm [Michaud-Agrawal2011]_.
Parameters
----------
universe : Universe or str
:class:`MDAnalysis.Universe` or a file name (e.g., in PDB or
GRO format)
select : AtomGroup or str
A AtomGroup instance or a
:meth:`Universe.select_atoms` selection string
for atoms that define the lipid head groups, e.g.
universe.atoms.PO4 or "name PO4" or "name P*"
cutoff : float (optional)
head group-defining atoms within a distance of `cutoff`
Angstroms are deemed to be in the same leaflet [15.0]
pbc : bool (optional)
take periodic boundary conditions into account [``False``]
sparse : bool (optional)
``None``: use fastest possible routine; ``True``: use slow
sparse matrix implementation (for large systems); ``False``:
use fast :func:`~MDAnalysis.lib.distances.distance_array`
implementation [``None``].
Example
-------
The components of the graph are stored in the list
:attr:`LeafletFinder.components`; the atoms in each component are numbered
consecutively, starting at 0. To obtain the atoms in the input structure
use :meth:`LeafletFinder.groups`::
u = mda.Universe(PDB)
L = LeafletFinder(u, 'name P*')
leaflet0 = L.groups(0)
leaflet1 = L.groups(1)
The residues can be accessed through the standard MDAnalysis mechanism::
leaflet0.residues
provides a :class:`~MDAnalysis.core.groups.ResidueGroup`
instance. Similarly, all atoms in the first leaflet are then ::
leaflet0.residues.atoms
.. versionchanged:: 1.0.0
Changed `selection` keyword to `select`
.. deprecated:: 1.0.1
The ability to pass a filename (str) in the `universe` argument
will be removed in 2.0.0.
"""
def __init__(self, universe, select, cutoff=15.0, pbc=False, sparse=None):
universe = core.universe.as_Universe(universe)
self.universe = universe
self.selectionstring = select
if isinstance(self.selectionstring, core.groups.AtomGroup):
self.selection = self.selectionstring
else:
self.selection = universe.select_atoms(self.selectionstring)
self.pbc = pbc
self.sparse = sparse
self._init_graph(cutoff)
def _init_graph(self, cutoff):
self.cutoff = cutoff
self.graph = self._get_graph()
self.components = self._get_components()
# The last two calls in _get_graph() and the single line in
# _get_components() are all that are needed to make the leaflet
# detection work.
def _get_graph(self):
"""Build graph from adjacency matrix at the given cutoff.
Automatically select between high and low memory usage versions of
contact_matrix."""
# could use self_distance_array to speed up but then need to deal with the sparse indexing
if self.pbc:
box = self.universe.trajectory.ts.dimensions
else:
box = None
coord = self.selection.positions
if self.sparse is False:
# only try distance array
try:
adj = distances.contact_matrix(coord, cutoff=self.cutoff, returntype="numpy", box=box)
except ValueError: # pragma: no cover
warnings.warn('N x N matrix too big, use sparse=True or sparse=None', category=UserWarning,
stacklevel=2)
raise
elif self.sparse is True:
# only try sparse
adj = distances.contact_matrix(coord, cutoff=self.cutoff, returntype="sparse", box=box)
else:
# use distance_array and fall back to sparse matrix
try:
# this works for small-ish systems and depends on system memory
adj = distances.contact_matrix(coord, cutoff=self.cutoff, returntype="numpy", box=box)
except ValueError: # pragma: no cover
# but use a sparse matrix method for larger systems for memory reasons
warnings.warn(
'N x N matrix too big - switching to sparse matrix method (works fine, but is currently rather '
'slow)',
category=UserWarning, stacklevel=2)
adj = distances.contact_matrix(coord, cutoff=self.cutoff, returntype="sparse", box=box)
return NX.Graph(adj)
def _get_components(self):
"""Return connected components (as sorted numpy arrays), sorted by size."""
return [np.sort(list(component)) for component in NX.connected_components(self.graph)]
[docs] def update(self, cutoff=None):
"""Update components, possibly with a different *cutoff*"""
if cutoff is None:
cutoff = self.cutoff
self._init_graph(cutoff)
[docs] def sizes(self):
"""Dict of component index with size of component."""
return dict(((idx, len(component)) for idx, component in enumerate(self.components)))
[docs] def groups(self, component_index=None):
"""Return a :class:`MDAnalysis.core.groups.AtomGroup` for *component_index*.
If no argument is supplied, then a list of all leaflet groups is returned.
See Also
--------
:meth:`LeafletFinder.group`
:meth:`LeafletFinder.groups_iter`
"""
if component_index is None:
return list(self.groups_iter())
else:
return self.group(component_index)
[docs] def group(self, component_index):
"""Return a :class:`MDAnalysis.core.groups.AtomGroup` for *component_index*."""
# maybe cache this?
indices = [i for i in self.components[component_index]]
return self.selection[indices]
[docs] def groups_iter(self):
"""Iterator over all leaflet :meth:`groups`"""
for component_index in range(len(self.components)):
yield self.group(component_index)
[docs] def write_selection(self, filename, **kwargs):
"""Write selections for the leaflets to *filename*.
The format is typically determined by the extension of *filename*
(e.g. "vmd", "pml", or "ndx" for VMD, PyMol, or Gromacs).
See :class:`MDAnalysis.selections.base.SelectionWriter` for all
options.
"""
sw = selections.get_writer(filename, kwargs.pop('format', None))
with sw(filename, mode=kwargs.pop('mode', 'w'),
preamble="leaflets based on select={selectionstring!r} cutoff={cutoff:f}\n".format(
**vars(self)),
**kwargs) as writer:
for i, ag in enumerate(self.groups_iter()):
name = "leaflet_{0:d}".format((i + 1))
writer.write(ag, name=name)
def __repr__(self):
return "<LeafletFinder({0!r}, cutoff={1:.1f} A) with {2:d} atoms in {3:d} groups>".format(
self.selectionstring, self.cutoff, self.selection.n_atoms,
len(self.components))
[docs]def optimize_cutoff(universe, select, dmin=10.0, dmax=20.0, step=0.5,
max_imbalance=0.2, **kwargs):
r"""Find cutoff that minimizes number of disconnected groups.
Applies heuristics to find best groups:
1. at least two groups (assumes that there are at least 2 leaflets)
2. reject any solutions for which:
.. math::
\frac{|N_0 - N_1|}{|N_0 + N_1|} > \mathrm{max_imbalance}
with :math:`N_i` being the number of lipids in group
:math:`i`. This heuristic picks groups with balanced numbers of
lipids.
Parameters
----------
universe : Universe
:class:`MDAnalysis.Universe` instance
select : AtomGroup or str
AtomGroup or selection string as used for :class:`LeafletFinder`
dmin : float (optional)
dmax : float (optional)
step : float (optional)
scan cutoffs from `dmin` to `dmax` at stepsize `step` (in Angstroms)
max_imbalance : float (optional)
tuning parameter for the balancing heuristic [0.2]
kwargs : other keyword arguments
other arguments for :class:`LeafletFinder`
Returns
-------
(cutoff, N)
optimum cutoff and number of groups found
.. Note:: This function can die in various ways if really no
appropriate number of groups can be found; it ought to be
made more robust.
.. versionchanged:: 1.0.0
Changed `selection` keyword to `select`
"""
kwargs.pop('cutoff', None) # not used, so we filter it
_sizes = []
for cutoff in np.arange(dmin, dmax, step):
LF = LeafletFinder(universe, select, cutoff=cutoff, **kwargs)
# heuristic:
# 1) N > 1
# 2) no imbalance between large groups:
sizes = LF.sizes()
if len(sizes) < 2:
continue
n0 = float(sizes[0]) # sizes of two biggest groups ...
n1 = float(sizes[1]) # ... assumed to be the leaflets
imbalance = np.abs(n0 - n1) / (n0 + n1)
# print "sizes: %(sizes)r; imbalance=%(imbalance)f" % vars()
if imbalance > max_imbalance:
continue
_sizes.append((cutoff, len(LF.sizes())))
results = np.rec.fromrecords(_sizes, names="cutoff,N")
del _sizes
results.sort(order=["N", "cutoff"]) # sort ascending by N, then cutoff
return results[0] # (cutoff,N) with N>1 and shortest cutoff