Source code for MDAnalysis.analysis.rdf
# -*- Mode: python; tab-width: 4; indent-tabs-mode:nil; coding:utf-8 -*-
# vim: tabstop=4 expandtab shiftwidth=4 softtabstop=4
#
# MDAnalysis --- https://www.mdanalysis.org
# Copyright (c) 2006-2017 The MDAnalysis Development Team and contributors
# (see the file AUTHORS for the full list of names)
#
# Released under the Lesser GNU Public Licence, v2.1 or any higher version
#
# Please cite your use of MDAnalysis in published work:
#
# R. J. Gowers, M. Linke, J. Barnoud, T. J. E. Reddy, M. N. Melo, S. L. Seyler,
# D. L. Dotson, J. Domanski, S. Buchoux, I. M. Kenney, and O. Beckstein.
# MDAnalysis: A Python package for the rapid analysis of molecular dynamics
# simulations. In S. Benthall and S. Rostrup editors, Proceedings of the 15th
# Python in Science Conference, pages 102-109, Austin, TX, 2016. SciPy.
# doi: 10.25080/majora-629e541a-00e
#
# N. Michaud-Agrawal, E. J. Denning, T. B. Woolf, and O. Beckstein.
# MDAnalysis: A Toolkit for the Analysis of Molecular Dynamics Simulations.
# J. Comput. Chem. 32 (2011), 2319--2327, doi:10.1002/jcc.21787
#
r"""Radial Distribution Functions --- :mod:`MDAnalysis.analysis.rdf`
====================================================================
This module contains two classes to calculate radial
`pair distribution functions`_ (`radial distribution functions`_ or "RDF").
The RDF :math:`g_{ab}(r)` between types of particles :math:`a` and :math:`b` is
.. _equation-gab:
.. math::
g_{ab}(r) = \frac{1}{N_{a}} \frac{1}{N_{b}/V} \sum_{i=1}^{N_a} \sum_{j=1}^{N_b}
\langle \delta(|\mathbf{r}_i - \mathbf{r}_j| - r) \rangle
which is normalized so that the RDF becomes 1 for large separations in a
homogenous system. The RDF effectively counts the average number of :math:`b`
neighbours in a shell at distance :math:`r` around a :math:`a` particle and
represents it as a density.
The radial cumulative distribution function is
.. math::
G_{ab}(r) = \int_0^r \!\!dr' 4\pi r'^2 g_{ab}(r')
and the average number of :math:`b` particles within radius :math:`r`
.. _equation-countab:
.. math::
N_{ab}(r) = \rho G_{ab}(r)
(with the appropriate density :math:`\rho`). The latter function can be used to
compute, for instance, coordination numbers such as the number of neighbors in
the first solvation shell :math:`N(r_1)` where :math:`r_1` is the position of
the first minimum in :math:`g(r)`.
We provide options for calculating the density of particle :math:`b`
in a shell at distance :math:`r` around a :math:`a` particle, which is
.. _equation-nab:
.. math::
n_{ab}(r) = \rho g_{ab}(r)
.. _`pair distribution functions`:
https://en.wikipedia.org/wiki/Pair_distribution_function
.. _`radial distribution functions`:
https://en.wikipedia.org/wiki/Radial_distribution_function
.. Not Implemented yet:
.. - Structure factor?
.. - Coordination number
"""
import warnings
import numpy as np
from ..lib import distances
from .base import AnalysisBase
[docs]
class InterRDF(AnalysisBase):
r"""Radial distribution function
:class:`InterRDF` is a tool to calculate average radial distribution
functions between two groups of atoms. Suppose we have two AtomGroups ``A``
and ``B``. ``A`` contains atom ``A1``, ``A2``, and ``B`` contains ``B1``,
``B2``. Given ``A`` and ``B`` to :class:`InterRDF`, the output will be the
average of RDFs between ``A1`` and ``B1``, ``A1`` and ``B2``, ``A2`` and
``B1``, ``A2`` and ``B2``. A typical application is to calculate the RDF of
solvent with itself or with another solute.
The :ref:`radial distribution function<equation-gab>` is calculated by
histogramming distances between all particles in `g1` and `g2` while taking
periodic boundary conditions into account via the minimum image
convention.
The `exclusion_block` keyword may be used to exclude a set of distances
from the calculations.
Results are available in the attributes :attr:`results.rdf`
and :attr:`results.count`.
Parameters
----------
g1 : AtomGroup
First AtomGroup
g2 : AtomGroup
Second AtomGroup
nbins : int
Number of bins in the histogram
range : tuple or list
The size of the RDF
norm : str, {'rdf', 'density', 'none'}
For 'rdf' calculate :math:`g_{ab}(r)`. For
'density' the :ref:`single particle density<equation-nab>`
:math:`n_{ab}(r)` is computed. 'none' computes the number of
particles occurences in each spherical shell.
.. versionadded:: 2.3.0
exclusion_block : tuple
A tuple representing the tile to exclude from the distance array.
exclude_same : str
Will exclude pairs of atoms that share the same "residue", "segment", or "chain".
Those are the only valid values. This is intended to remove atoms that are
spatially correlated due to direct bonded connections.
verbose : bool
Show detailed progress of the calculation if set to `True`
Attributes
----------
results.bins : numpy.ndarray
:class:`numpy.ndarray` of the centers of the `nbins` histogram
bins.
.. versionadded:: 2.0.0
bins : numpy.ndarray
Alias to the :attr:`results.bins` attribute.
.. deprecated:: 2.0.0
This attribute will be removed in 3.0.0.
Use :attr:`results.bins` instead.
results.edges : numpy.ndarray
:class:`numpy.ndarray` of the `nbins + 1` edges of the histogram
bins.
.. versionadded:: 2.0.0
edges : numpy.ndarray
Alias to the :attr:`results.edges` attribute.
.. deprecated:: 2.0.0
This attribute will be removed in 3.0.0.
Use :attr:`results.edges` instead.
results.rdf : numpy.ndarray
:class:`numpy.ndarray` of the :ref:`radial distribution
function<equation-gab>` values for the :attr:`results.bins`.
.. versionadded:: 2.0.0
rdf : numpy.ndarray
Alias to the :attr:`results.rdf` attribute.
.. deprecated:: 2.0.0
This attribute will be removed in 3.0.0.
Use :attr:`results.rdf` instead.
results.count : numpy.ndarray
:class:`numpy.ndarray` representing the radial histogram, i.e.,
the raw counts, for all :attr:`results.bins`.
.. versionadded:: 2.0.0
count : numpy.ndarray
Alias to the :attr:`results.count` attribute.
.. deprecated:: 2.0.0
This attribute will be removed in 3.0.0.
Use :attr:`results.count` instead.
Example
-------
First create the :class:`InterRDF` object, by supplying two
AtomGroups then use the :meth:`run` method ::
rdf = InterRDF(ag1, ag2)
rdf.run()
Results are available through the :attr:`results.bins` and
:attr:`results.rdf` attributes::
plt.plot(rdf.results.bins, rdf.results.rdf)
The `exclusion_block` keyword allows the masking of pairs from
within the same molecule. For example, if there are 7 of each
atom in each molecule, the exclusion mask ``(7, 7)`` can be used.
.. versionadded:: 0.13.0
.. versionchanged:: 1.0.0
Support for the `start`, `stop`, and `step` keywords has been
removed. These should instead be passed to :meth:`InterRDF.run`.
.. versionchanged:: 2.0.0
Store results as attributes `bins`, `edges`, `rdf` and `count`
of the `results` attribute of
:class:`~MDAnalysis.analysis.AnalysisBase`.
"""
def __init__(
self,
g1,
g2,
nbins=75,
range=(0.0, 15.0),
norm="rdf",
exclusion_block=None,
exclude_same=None,
**kwargs,
):
super(InterRDF, self).__init__(g1.universe.trajectory, **kwargs)
self.g1 = g1
self.g2 = g2
self.norm = str(norm).lower()
self.rdf_settings = {"bins": nbins, "range": range}
self._exclusion_block = exclusion_block
if exclude_same is not None and exclude_same not in [
"residue",
"segment",
"chain",
]:
raise ValueError(
"The exclude_same argument to InterRDF must be None, 'residue', 'segment' "
"or 'chain'."
)
if exclude_same is not None and exclusion_block is not None:
raise ValueError(
"The exclude_same argument to InterRDF cannot be used with exclusion_block."
)
name_to_attr = {
"residue": "resindices",
"segment": "segindices",
"chain": "chainIDs",
}
self.exclude_same = name_to_attr.get(exclude_same)
if self.norm not in ["rdf", "density", "none"]:
raise ValueError(
f"'{self.norm}' is an invalid norm. "
"Use 'rdf', 'density' or 'none'."
)
def _prepare(self):
# Empty histogram to store the RDF
count, edges = np.histogram([-1], **self.rdf_settings)
count = count.astype(np.float64)
count *= 0.0
self.results.count = count
self.results.edges = edges
self.results.bins = 0.5 * (edges[:-1] + edges[1:])
if self.norm == "rdf":
# Cumulative volume for rdf normalization
self.volume_cum = 0
# Set the max range to filter the search radius
self._maxrange = self.rdf_settings["range"][1]
def _single_frame(self):
pairs, dist = distances.capped_distance(
self.g1.positions,
self.g2.positions,
self._maxrange,
box=self._ts.dimensions,
)
# Maybe exclude same molecule distances
if self._exclusion_block is not None:
idxA = pairs[:, 0] // self._exclusion_block[0]
idxB = pairs[:, 1] // self._exclusion_block[1]
mask = np.where(idxA != idxB)[0]
dist = dist[mask]
if self.exclude_same is not None:
# Ignore distances between atoms in the same attribute
attr_ix_a = getattr(self.g1, self.exclude_same)[pairs[:, 0]]
attr_ix_b = getattr(self.g2, self.exclude_same)[pairs[:, 1]]
mask = np.where(attr_ix_a != attr_ix_b)[0]
dist = dist[mask]
count, _ = np.histogram(dist, **self.rdf_settings)
self.results.count += count
if self.norm == "rdf":
self.volume_cum += self._ts.volume
def _conclude(self):
norm = self.n_frames
if self.norm in ["rdf", "density"]:
# Volume in each radial shell
vols = np.power(self.results.edges, 3)
norm *= 4 / 3 * np.pi * np.diff(vols)
if self.norm == "rdf":
# Number of each selection
nA = len(self.g1)
nB = len(self.g2)
N = nA * nB
# If we had exclusions, take these into account
if self._exclusion_block:
xA, xB = self._exclusion_block
nblocks = nA / xA
N -= xA * xB * nblocks
# Average number density
box_vol = self.volume_cum / self.n_frames
norm *= N / box_vol
self.results.rdf = self.results.count / norm
@property
def edges(self):
wmsg = (
"The `edges` attribute was deprecated in MDAnalysis 2.0.0 "
"and will be removed in MDAnalysis 3.0.0. Please use "
"`results.bins` instead"
)
warnings.warn(wmsg, DeprecationWarning)
return self.results.edges
@property
def count(self):
wmsg = (
"The `count` attribute was deprecated in MDAnalysis 2.0.0 "
"and will be removed in MDAnalysis 3.0.0. Please use "
"`results.bins` instead"
)
warnings.warn(wmsg, DeprecationWarning)
return self.results.count
@property
def bins(self):
wmsg = (
"The `bins` attribute was deprecated in MDAnalysis 2.0.0 "
"and will be removed in MDAnalysis 3.0.0. Please use "
"`results.bins` instead"
)
warnings.warn(wmsg, DeprecationWarning)
return self.results.bins
@property
def rdf(self):
wmsg = (
"The `rdf` attribute was deprecated in MDAnalysis 2.0.0 "
"and will be removed in MDAnalysis 3.0.0. Please use "
"`results.rdf` instead"
)
warnings.warn(wmsg, DeprecationWarning)
return self.results.rdf
[docs]
class InterRDF_s(AnalysisBase):
r"""Site-specific radial distribution function
Calculates site-specific radial distribution
functions. Instead of two groups of atoms it takes as input a list of
pairs of AtomGroup, ``[[A, B], [C, D], ...]``. Given the same ``A`` and
``B`` to
:class:`InterRDF_s`, the output will be a list of individual RDFs between
``A1`` and ``B1``, ``A1`` and ``B2``, ``A2`` and ``B1``, ``A2`` and ``B2``
(and
similarly for ``C`` and ``D``). These site-specific radial distribution
functions are typically calculated if one is interested in the solvation
shells of a ligand in a binding site or the solvation of specific residues
in a protein.
Parameters
----------
u : Universe
a Universe that contains atoms in `ags`
.. deprecated:: 2.3.0
This parameter is superflous and will be removed in
MDAnalysis 3.0.0.
ags : list
a list of pairs of :class:`~MDAnalysis.core.groups.AtomGroup`
instances
nbins : int
Number of bins in the histogram
range : tuple or list
The size of the RDF
norm : str, {'rdf', 'density', 'none'}
For 'rdf' calculate :math:`g_{ab}(r)`. For
'density' the :ref:`single particle density<equation-nab>`
:math:`n_{ab}(r)` is computed. 'none' computes the number of
particles occurences in each spherical shell.
.. versionadded:: 2.3.0
density : bool
`False`: calculate :math:`g_{ab}(r)`; `True`: calculate
the true :ref:`single particle density<equation-nab>`
:math:`n_{ab}(r)`. `density` overwrites the `norm` parameter.
.. versionadded:: 1.0.1
This keyword was available since 0.19.0 but was not
documented. Furthermore, it had the opposite
meaning. Since 1.0.1 it is officially supported as
documented.
.. deprecated:: 2.3.0
Instead of `density=True` use `norm='density'`
Attributes
----------
results.bins : numpy.ndarray
:class:`numpy.ndarray` of the centers of the `nbins` histogram
bins; all individual site-specific RDFs have the same bins.
.. versionadded:: 2.0.0
bins : numpy.ndarray
Alias to the :attr:`results.bins` attribute.
.. deprecated:: 2.0.0
This attribute will be removed in 3.0.0.
Use :attr:`results.bins` instead.
results.edges : numpy.ndarray
array of the ``nbins + 1`` edges of the histogram
bins; all individual site-specific RDFs have the same bins.
.. versionadded:: 2.0.0
edges : numpy.ndarray
Alias to the :attr:`results.edges` attribute.
.. deprecated:: 2.0.0
This attribute will be removed in 3.0.0.
Use :attr:`results.edges` instead.
results.rdf : list
:class:`list` of the site-specific :ref:`radial distribution
functions<equation-gab>` if `norm='rdf'` or :ref:`density
functions<equation-nab>` for the :attr:`bins`
if `norm='density'`. The list contains
``len(ags)`` entries. Each entry for the ``i``-th pair `[A, B]
= ags[i]` in `ags` is a :class:`numpy.ndarray` with shape
``(len(A), len(B))``, i.e., a stack of RDFs. For example,
``results.rdf[i][0, 2]`` is the RDF between atoms ``A[0]``
and ``B[2]``.
.. versionadded:: 2.0.0
rdf : list
Alias to the :attr:`results.rdf` attribute.
.. deprecated:: 2.0.0
This attribute will be removed in 3.0.0.
Use :attr:`results.rdf` instead.
results.count : list
:class:`list` of the site-specific radial histograms, i.e., the
raw counts, for all :attr:`results.bins`. The data have the same
structure as :attr:`results.rdf` except that the arrays contain
the raw counts.
.. versionadded:: 2.0.0
count : list
Alias to the :attr:`results.count` attribute.
.. deprecated:: 2.0.0
This attribute will be removed in 3.0.0.
Use :attr:`results.count` instead.
results.cdf : list
:class:`list` of the site-specific :ref:`cumulative
counts<equation-countab>`, for all :attr:`results.bins`. The data
have the same structure as :attr:`results.rdf` except that the arrays
contain the cumulative counts.
This attribute only exists after :meth:`get_cdf` has been run.
.. versionadded:: 2.0.0
cdf : list
Alias to the :attr:`results.cdf` attribute.
.. deprecated:: 2.0.0
This attribute will be removed in 3.0.0.
Use :attr:`results.cdf` instead.
Example
-------
First create the :class:`InterRDF_s` object, by supplying one Universe and
one list of pairs of AtomGroups, then use the :meth:`~InterRDF_s.run`
method::
from MDAnalysisTests.datafiles import GRO_MEMPROT, XTC_MEMPROT
u = mda.Universe(GRO_MEMPROT, XTC_MEMPROT)
s1 = u.select_atoms('name ZND and resid 289')
s2 = u.select_atoms('(name OD1 or name OD2) and resid 51 and sphzone 5.0 (resid 289)')
s3 = u.select_atoms('name ZND and (resid 291 or resid 292)')
s4 = u.select_atoms('(name OD1 or name OD2) and sphzone 5.0 (resid 291)')
ags = [[s1, s2], [s3, s4]]
rdf = InterRDF_s(u, ags)
rdf.run()
Results are available through the :attr:`results.bins`
and :attr:`results.rdf` attributes::
plt.plot(rdf.results.bins, rdf.results.rdf[0][0, 0])
(Which plots the rdf between the first atom in ``s1`` and the first atom in
``s2``)
To generate the *cumulative distribution function* (cdf) in the sense of
"particles within radius :math:`r`", i.e., :math:`N_{ab}(r)`, use the
:meth:`~InterRDF_s.get_cdf` method ::
cdf = rdf.get_cdf()
Results are available through the :attr:`results.cdf` attribute::
plt.plot(rdf.results.bins, rdf.results.cdf[0][0, 0])
(Which plots the cdf between the first atom in ``s1`` and the first atom in
``s2``)
.. versionadded:: 0.19.0
.. versionchanged:: 1.0.0
Support for the `start`, `stop`, and `step` keywords has been
removed. These should instead be passed to :meth:`InterRDF_s.run`.
.. versionchanged:: 2.0.0
Store results as attributes `bins`, `edges`, `rdf`, `count`
and `cdf` of the `results` attribute
of :class:`~MDAnalysis.analysis.AnalysisBase`.
.. versionchanged:: 2.3.0
Introduce `norm` and `exclusion_blocks` attributes.
.. deprecated:: 2.3.0
Instead of `density=True` use `norm='density'`
.. deprecated:: 2.3.0
The `universe` parameter is superflous.
"""
def __init__(
self,
u,
ags,
nbins=75,
range=(0.0, 15.0),
norm="rdf",
density=False,
**kwargs,
):
super(InterRDF_s, self).__init__(
ags[0][0].universe.trajectory, **kwargs
)
warnings.warn(
"The `u` attribute is superflous and will be removed "
"in MDAnalysis 3.0.0.",
DeprecationWarning,
)
self.ags = ags
self.norm = str(norm).lower()
self.rdf_settings = {"bins": nbins, "range": range}
if self.norm not in ["rdf", "density", "none"]:
raise ValueError(
f"'{self.norm}' is an invalid norm. "
"Use 'rdf', 'density' or 'none'."
)
if density:
warnings.warn(
"The `density` attribute was deprecated in "
"MDAnalysis 2.3.0 and will be removed in "
"MDAnalysis 3.0.0. Please use `norm=density` "
"instead.",
DeprecationWarning,
)
self.norm = "density"
def _prepare(self):
count, edges = np.histogram([-1], **self.rdf_settings)
self.results.count = [
np.zeros((ag1.n_atoms, ag2.n_atoms, len(count)), dtype=np.float64)
for ag1, ag2 in self.ags
]
self.results.edges = edges
self.results.bins = 0.5 * (edges[:-1] + edges[1:])
if self.norm == "rdf":
# Cumulative volume for rdf normalization
self.volume_cum = 0
self._maxrange = self.rdf_settings["range"][1]
def _single_frame(self):
for i, (ag1, ag2) in enumerate(self.ags):
pairs, dist = distances.capped_distance(
ag1.positions,
ag2.positions,
self._maxrange,
box=self._ts.dimensions,
)
for j, (idx1, idx2) in enumerate(pairs):
count, _ = np.histogram(dist[j], **self.rdf_settings)
self.results.count[i][idx1, idx2, :] += count
if self.norm == "rdf":
self.volume_cum += self._ts.volume
def _conclude(self):
norm = self.n_frames
if self.norm in ["rdf", "density"]:
# Volume in each radial shell
vols = np.power(self.results.edges, 3)
norm *= 4 / 3 * np.pi * np.diff(vols)
if self.norm == "rdf":
# Average number density
norm *= 1 / (self.volume_cum / self.n_frames)
# Empty lists to restore indices, RDF
self.results.indices = []
self.results.rdf = []
for i, (ag1, ag2) in enumerate(self.ags):
# Number of each selection
self.results.indices.append([ag1.indices, ag2.indices])
self.results.rdf.append(self.results.count[i] / norm)
[docs]
def get_cdf(self):
r"""Calculate the cumulative counts for all sites.
This is the :ref:`cumulative count<equation-countab>` within a given
radius, i.e., :math:`N_{ab}(r)`.
The result is returned and also stored in the attribute
:attr:`results.cdf`.
Returns
-------
cdf : list
list of arrays with the same structure as :attr:`results.rdf`
"""
self.results.cdf = []
for count in self.results.count:
self.results.cdf.append(np.cumsum(count, axis=2) / self.n_frames)
return self.results.cdf
@property
def edges(self):
wmsg = (
"The `edges` attribute was deprecated in MDAnalysis 2.0.0 "
"and will be removed in MDAnalysis 3.0.0. Please use "
"`results.bins` instead"
)
warnings.warn(wmsg, DeprecationWarning)
return self.results.edges
@property
def count(self):
wmsg = (
"The `count` attribute was deprecated in MDAnalysis 2.0.0 "
"and will be removed in MDAnalysis 3.0.0. Please use "
"`results.bins` instead"
)
warnings.warn(wmsg, DeprecationWarning)
return self.results.count
@property
def bins(self):
wmsg = (
"The `bins` attribute was deprecated in MDAnalysis 2.0.0 "
"and will be removed in MDAnalysis 3.0.0. Please use "
"`results.bins` instead"
)
warnings.warn(wmsg, DeprecationWarning)
return self.results.bins
@property
def rdf(self):
wmsg = (
"The `rdf` attribute was deprecated in MDAnalysis 2.0.0 "
"and will be removed in MDAnalysis 3.0.0. Please use "
"`results.rdf` instead"
)
warnings.warn(wmsg, DeprecationWarning)
return self.results.rdf
@property
def cdf(self):
wmsg = (
"The `cdf` attribute was deprecated in MDAnalysis 2.0.0 "
"and will be removed in MDAnalysis 3.0.0. Please use "
"`results.cdf` instead"
)
warnings.warn(wmsg, DeprecationWarning)
return self.results.cdf