Source code for MDAnalysis.analysis.leaflet
# -*- Mode: python; tab-width: 4; indent-tabs-mode:nil; coding:utf-8 -*-
# vim: tabstop=4 expandtab shiftwidth=4 softtabstop=4
#
# MDAnalysis --- https://www.mdanalysis.org
# Copyright (c) 2006-2017 The MDAnalysis Development Team and contributors
# (see the file AUTHORS for the full list of names)
#
# Released under the Lesser GNU Public Licence, v2.1 or any higher version
#
# Please cite your use of MDAnalysis in published work:
#
# R. J. Gowers, M. Linke, J. Barnoud, T. J. E. Reddy, M. N. Melo, S. L. Seyler,
# D. L. Dotson, J. Domanski, S. Buchoux, I. M. Kenney, and O. Beckstein.
# MDAnalysis: A Python package for the rapid analysis of molecular dynamics
# simulations. In S. Benthall and S. Rostrup editors, Proceedings of the 15th
# Python in Science Conference, pages 102-109, Austin, TX, 2016. SciPy.
# doi: 10.25080/majora-629e541a-00e
#
# N. Michaud-Agrawal, E. J. Denning, T. B. Woolf, and O. Beckstein.
# MDAnalysis: A Toolkit for the Analysis of Molecular Dynamics Simulations.
# J. Comput. Chem. 32 (2011), 2319--2327, doi:10.1002/jcc.21787
#
"""
Leaflet identification --- :mod:`MDAnalysis.analysis.leaflet`
==============================================================
This module implements the *LeafletFinder* algorithm, described in
[Michaud-Agrawal2011]_. It can identify the lipids in a bilayer of
arbitrary shape and topology, including planar and undulating bilayers
under periodic boundary conditions or vesicles.
One can use this information to identify
* the upper and lower leaflet of a *planar membrane* by comparing the
the :meth:`~MDAnalysis.core.groups.AtomGroup.center_of_geometry` of
the leaflet groups, or
* the outer and inner leaflet of a *vesicle* by comparing histograms
of distances from the centre of geometry (or possibly simply the
:meth:`~MDAnalysis.core.groups.AtomGroup.radius_of_gyration`).
See example scripts in the MDAnalysisCookbook_ on how to use
:class:`LeafletFinder`. The function :func:`optimize_cutoff` implements a
(slow) heuristic method to find the best cut off for the LeafletFinder
algorithm.
.. _MDAnalysisCookbook: https://github.com/MDAnalysis/MDAnalysisCookbook/tree/master/examples
Algorithm
---------
1. build a graph of all phosphate distances < cutoff
2. identify the largest connected subgraphs
3. analyse first and second largest graph, which correspond to the leaflets
For further details see [Michaud-Agrawal2011]_.
Classes and Functions
---------------------
.. autoclass:: LeafletFinder
:members:
.. autofunction:: optimize_cutoff
"""
import warnings
import numpy as np
from .. import core
from . import distances
from .. import selections
from ..due import due, Doi
# networkx is an optional import
try:
import networkx as NX
except ImportError:
HAS_NX = False
else:
HAS_NX = True
due.cite(
Doi("10.1002/jcc.21787"),
description="LeafletFinder algorithm",
path="MDAnalysis.analysis.leaflet",
cite_module=True,
)
del Doi
[docs]
class LeafletFinder(object):
"""Identify atoms in the same leaflet of a lipid bilayer.
This class implements the *LeafletFinder* algorithm [Michaud-Agrawal2011]_.
Parameters
----------
universe : Universe
:class:`~MDAnalysis.core.universe.Universe` object.
select : AtomGroup or str
A AtomGroup instance or a
:meth:`Universe.select_atoms` selection string
for atoms that define the lipid head groups, e.g.
universe.atoms.PO4 or "name PO4" or "name P*"
cutoff : float (optional)
head group-defining atoms within a distance of `cutoff`
Angstroms are deemed to be in the same leaflet [15.0]
pbc : bool (optional)
take periodic boundary conditions into account [``False``]
sparse : bool (optional)
``None``: use fastest possible routine; ``True``: use slow
sparse matrix implementation (for large systems); ``False``:
use fast :func:`~MDAnalysis.lib.distances.distance_array`
implementation [``None``].
Raises
------
ImportError
This class requires the optional package `networkx`. If not found
an ImportError is raised.
Example
-------
The components of the graph are stored in the list
:attr:`LeafletFinder.components`; the atoms in each component are numbered
consecutively, starting at 0. To obtain the atoms in the input structure
use :meth:`LeafletFinder.groups`::
u = mda.Universe(PDB)
L = LeafletFinder(u, 'name P*')
leaflet0 = L.groups(0)
leaflet1 = L.groups(1)
The residues can be accessed through the standard MDAnalysis mechanism::
leaflet0.residues
provides a :class:`~MDAnalysis.core.groups.ResidueGroup`
instance. Similarly, all atoms in the first leaflet are then ::
leaflet0.residues.atoms
.. versionchanged:: 1.0.0
Changed `selection` keyword to `select`
.. versionchanged:: 2.0.0
The universe keyword no longer accepts non-Universe arguments. Please
create a :class:`~MDAnalysis.core.universe.Universe` first.
"""
def __init__(self, universe, select, cutoff=15.0, pbc=False, sparse=None):
# Raise an error if networkx is not installed
if not HAS_NX:
errmsg = (
"The LeafletFinder class requires an installation of "
"networkx. Please install networkx "
"https://networkx.org/documentation/stable/install.html"
)
raise ImportError(errmsg)
self.universe = universe
self.selectionstring = select
if isinstance(self.selectionstring, core.groups.AtomGroup):
self.selection = self.selectionstring
else:
self.selection = universe.select_atoms(self.selectionstring)
self.pbc = pbc
self.sparse = sparse
self._init_graph(cutoff)
def _init_graph(self, cutoff):
self.cutoff = cutoff
self.graph = self._get_graph()
self.components = self._get_components()
# The last two calls in _get_graph() and the single line in
# _get_components() are all that are needed to make the leaflet
# detection work.
def _get_graph(self):
"""Build graph from adjacency matrix at the given cutoff.
Automatically select between high and low memory usage versions of
contact_matrix."""
# could use self_distance_array to speed up but then need to deal with the sparse indexing
if self.pbc:
box = self.universe.trajectory.ts.dimensions
else:
box = None
coord = self.selection.positions
if self.sparse is False:
# only try distance array
try:
adj = distances.contact_matrix(
coord, cutoff=self.cutoff, returntype="numpy", box=box
)
except ValueError: # pragma: no cover
warnings.warn(
"N x N matrix too big, use sparse=True or sparse=None",
category=UserWarning,
stacklevel=2,
)
raise
elif self.sparse is True:
# only try sparse
adj = distances.contact_matrix(
coord, cutoff=self.cutoff, returntype="sparse", box=box
)
else:
# use distance_array and fall back to sparse matrix
try:
# this works for small-ish systems and depends on system memory
adj = distances.contact_matrix(
coord, cutoff=self.cutoff, returntype="numpy", box=box
)
except ValueError: # pragma: no cover
# but use a sparse matrix method for larger systems for memory reasons
warnings.warn(
"N x N matrix too big - switching to sparse matrix method (works fine, but is currently rather "
"slow)",
category=UserWarning,
stacklevel=2,
)
adj = distances.contact_matrix(
coord, cutoff=self.cutoff, returntype="sparse", box=box
)
return NX.Graph(adj)
def _get_components(self):
"""Return connected components (as sorted numpy arrays), sorted by size."""
return [
np.sort(list(component))
for component in NX.connected_components(self.graph)
]
[docs]
def update(self, cutoff=None):
"""Update components, possibly with a different *cutoff*"""
if cutoff is None:
cutoff = self.cutoff
self._init_graph(cutoff)
[docs]
def sizes(self):
"""Dict of component index with size of component."""
return dict(
(
(idx, len(component))
for idx, component in enumerate(self.components)
)
)
[docs]
def groups(self, component_index=None):
"""Return a :class:`MDAnalysis.core.groups.AtomGroup` for *component_index*.
If no argument is supplied, then a list of all leaflet groups is returned.
See Also
--------
:meth:`LeafletFinder.group`
:meth:`LeafletFinder.groups_iter`
"""
if component_index is None:
return list(self.groups_iter())
else:
return self.group(component_index)
[docs]
def group(self, component_index):
"""Return a :class:`MDAnalysis.core.groups.AtomGroup` for *component_index*."""
# maybe cache this?
indices = [i for i in self.components[component_index]]
return self.selection[indices]
[docs]
def groups_iter(self):
"""Iterator over all leaflet :meth:`groups`"""
for component_index in range(len(self.components)):
yield self.group(component_index)
[docs]
def write_selection(self, filename, **kwargs):
"""Write selections for the leaflets to *filename*.
The format is typically determined by the extension of *filename*
(e.g. "vmd", "pml", or "ndx" for VMD, PyMol, or Gromacs).
See :class:`MDAnalysis.selections.base.SelectionWriter` for all
options.
"""
sw = selections.get_writer(filename, kwargs.pop("format", None))
with sw(
filename,
mode=kwargs.pop("mode", "w"),
preamble="leaflets based on select={selectionstring!r} cutoff={cutoff:f}\n".format(
**vars(self)
),
**kwargs,
) as writer:
for i, ag in enumerate(self.groups_iter()):
name = "leaflet_{0:d}".format((i + 1))
writer.write(ag, name=name)
def __repr__(self):
return "<LeafletFinder({0!r}, cutoff={1:.1f} A) with {2:d} atoms in {3:d} groups>".format(
self.selectionstring,
self.cutoff,
self.selection.n_atoms,
len(self.components),
)
[docs]
def optimize_cutoff(
universe,
select,
dmin=10.0,
dmax=20.0,
step=0.5,
max_imbalance=0.2,
**kwargs,
):
r"""Find cutoff that minimizes number of disconnected groups.
Applies heuristics to find best groups:
1. at least two groups (assumes that there are at least 2 leaflets)
2. reject any solutions for which:
.. math::
\frac{|N_0 - N_1|}{|N_0 + N_1|} > \mathrm{max_imbalance}
with :math:`N_i` being the number of lipids in group
:math:`i`. This heuristic picks groups with balanced numbers of
lipids.
Parameters
----------
universe : Universe
:class:`MDAnalysis.Universe` instance
select : AtomGroup or str
AtomGroup or selection string as used for :class:`LeafletFinder`
dmin : float (optional)
dmax : float (optional)
step : float (optional)
scan cutoffs from `dmin` to `dmax` at stepsize `step` (in Angstroms)
max_imbalance : float (optional)
tuning parameter for the balancing heuristic [0.2]
kwargs : other keyword arguments
other arguments for :class:`LeafletFinder`
Returns
-------
(cutoff, N)
optimum cutoff and number of groups found
.. Note:: This function can die in various ways if really no
appropriate number of groups can be found; it ought to be
made more robust.
.. versionchanged:: 1.0.0
Changed `selection` keyword to `select`
"""
kwargs.pop("cutoff", None) # not used, so we filter it
_sizes = []
for cutoff in np.arange(dmin, dmax, step):
LF = LeafletFinder(universe, select, cutoff=cutoff, **kwargs)
# heuristic:
# 1) N > 1
# 2) no imbalance between large groups:
sizes = LF.sizes()
if len(sizes) < 2:
continue
n0 = float(sizes[0]) # sizes of two biggest groups ...
n1 = float(sizes[1]) # ... assumed to be the leaflets
imbalance = np.abs(n0 - n1) / (n0 + n1)
# print "sizes: %(sizes)r; imbalance=%(imbalance)f" % vars()
if imbalance > max_imbalance:
continue
_sizes.append((cutoff, len(LF.sizes())))
results = np.rec.fromrecords(_sizes, names="cutoff,N")
del _sizes
results.sort(order=["N", "cutoff"]) # sort ascending by N, then cutoff
return results[0] # (cutoff,N) with N>1 and shortest cutoff