Source code for MDAnalysis.analysis.encore.covariance
# -*- Mode: python; tab-width: 4; indent-tabs-mode:nil; coding:utf-8 -*-
# vim: tabstop=4 expandtab shiftwidth=4 softtabstop=4
#
# MDAnalysis --- https://www.mdanalysis.org
# Copyright (c) 2006-2017 The MDAnalysis Development Team and contributors
# (see the file AUTHORS for the full list of names)
#
# Released under the Lesser GNU Public Licence, v2.1 or any higher version
#
# Please cite your use of MDAnalysis in published work:
#
# R. J. Gowers, M. Linke, J. Barnoud, T. J. E. Reddy, M. N. Melo, S. L. Seyler,
# D. L. Dotson, J. Domanski, S. Buchoux, I. M. Kenney, and O. Beckstein.
# MDAnalysis: A Python package for the rapid analysis of molecular dynamics
# simulations. In S. Benthall and S. Rostrup editors, Proceedings of the 15th
# Python in Science Conference, pages 102-109, Austin, TX, 2016. SciPy.
# doi: 10.25080/majora-629e541a-00e
#
# N. Michaud-Agrawal, E. J. Denning, T. B. Woolf, and O. Beckstein.
# MDAnalysis: A Toolkit for the Analysis of Molecular Dynamics Simulations.
# J. Comput. Chem. 32 (2011), 2319--2327, doi:10.1002/jcc.21787
#
"""
Covariance calculation --- :mod:`encore.covariance`
=====================================================================
The module contains functions to estimate the covariance matrix of
an ensemble of structures.
:Author: Matteo Tiberti, Wouter Boomsma, Tone Bengtsen
.. versionadded:: 0.16.0
.. deprecated:: 2.8.0
This module is deprecated in favour of the
MDAKit `mdaencore <https://mdanalysis.org/mdaencore/>`_ and will be removed
in MDAnalysis 3.0.0.
"""
import numpy as np
[docs]
def ml_covariance_estimator(coordinates, reference_coordinates=None):
"""
Standard maximum likelihood estimator of the covariance matrix.
Parameters
----------
coordinates : numpy.array
Flattened array of coordiantes
reference_coordinates : numpy.array
Optional reference to use instead of mean
Returns
-------
cov_mat : numpy.array
Estimate of covariance matrix
"""
if reference_coordinates is not None:
# Offset from reference
coordinates_offset = coordinates - reference_coordinates
else:
# Normal covariance calculation: distance to the average
coordinates_offset = coordinates - np.average(coordinates, axis=0)
# Calculate covariance manually
coordinates_cov = np.zeros((coordinates.shape[1], coordinates.shape[1]))
for frame in coordinates_offset:
coordinates_cov += np.outer(frame, frame)
coordinates_cov /= coordinates.shape[0]
return coordinates_cov
[docs]
def shrinkage_covariance_estimator(
coordinates, reference_coordinates=None, shrinkage_parameter=None
):
"""
Shrinkage estimator of the covariance matrix using the method described in
Improved Estimation of the Covariance Matrix of Stock Returns With an
Application to Portfolio Selection. Ledoit, O.; Wolf, M., Journal of
Empirical Finance, 10, 5, 2003
This implementation is based on the matlab code made available by Olivier
Ledoit on his website:
http://www.ledoit.net/ole2_abstract.htm
Parameters
----------
coordinates : numpy.array
Flattened array of coordinates
reference_coordinates: numpy.array
Optional reference to use instead of mean
shrinkage_parameter: None or float
Optional shrinkage parameter
Returns
--------
cov_mat : nump.array
Covariance matrix
"""
x = coordinates
t = x.shape[0]
n = x.shape[1]
mean_x = np.average(x, axis=0)
# Use provided coordinates as "mean" if provided
if reference_coordinates is not None:
mean_x = reference_coordinates
x = x - mean_x
xmkt = np.average(x, axis=1)
# Call maximum likelihood estimator (note the additional column)
sample = (
ml_covariance_estimator(np.hstack([x, xmkt[:, np.newaxis]]), 0)
* (t - 1)
/ float(t)
)
# Split covariance matrix into components
covmkt = sample[0:n, n]
varmkt = sample[n, n]
sample = sample[:n, :n]
# Prior
prior = np.outer(covmkt, covmkt) / varmkt
prior[np.ma.make_mask(np.eye(n))] = np.diag(sample)
# If shrinkage parameter is not set, estimate it
if shrinkage_parameter is None:
# Frobenius norm
c = np.linalg.norm(sample - prior, ord="fro") ** 2
y = x**2
p = 1 / float(t) * np.sum(np.dot(np.transpose(y), y)) - np.sum(
np.sum(sample**2)
)
rdiag = 1 / float(t) * np.sum(np.sum(y**2)) - np.sum(
np.diag(sample) ** 2
)
z = x * np.repeat(xmkt[:, np.newaxis], n, axis=1)
v1 = (
1 / float(t) * np.dot(np.transpose(y), z)
- np.repeat(covmkt[:, np.newaxis], n, axis=1) * sample
)
roff1 = (
np.sum(
v1 * np.transpose(np.repeat(covmkt[:, np.newaxis], n, axis=1))
)
/ varmkt
- np.sum(np.diag(v1) * covmkt) / varmkt
)
v3 = 1 / float(t) * np.dot(np.transpose(z), z) - varmkt * sample
roff3 = (
np.sum(v3 * np.outer(covmkt, covmkt)) / varmkt**2
- np.sum(np.diag(v3) * covmkt**2) / varmkt**2
)
roff = 2 * roff1 - roff3
r = rdiag + roff
# Shrinkage constant
k = (p - r) / c
shrinkage_parameter = max(0, min(1, k / float(t)))
# calculate covariance matrix
sigma = shrinkage_parameter * prior + (1 - shrinkage_parameter) * sample
return sigma
[docs]
def covariance_matrix(
ensemble,
select="name CA",
estimator=shrinkage_covariance_estimator,
weights="mass",
reference=None,
):
"""
Calculates (optionally mass weighted) covariance matrix
Parameters
----------
ensemble : Universe object
The structural ensemble
select : str (optional)
Atom selection string in the MDAnalysis format.
estimator : function (optional)
Function that estimates the covariance matrix. It requires at least
a "coordinates" numpy array (of shape (N,M,3), where N is the number
of frames and M the number of atoms). See ml_covariance_estimator and
shrinkage_covariance_estimator for reference.
weights : str/array_like (optional)
specify weights. If ``'mass'`` then chose masses of ensemble atoms, if ``None`` chose uniform weights
reference : MDAnalysis.Universe object (optional)
Use the distances to a specific reference structure rather than the
distance to the mean.
Returns
-------
cov_mat : numpy.array
Covariance matrix
"""
# Extract coordinates from ensemble
coordinates = ensemble.trajectory.timeseries(
ensemble.select_atoms(select), order="fac"
)
# Flatten coordinate matrix into n_frame x n_coordinates
coordinates = np.reshape(coordinates, (coordinates.shape[0], -1))
# Extract coordinates from reference structure, if specified
reference_coordinates = None
if reference is not None:
# Select the same atoms in reference structure
reference_atom_selection = reference.select_atoms(select)
reference_coordinates = reference_atom_selection.atoms.positions
# Flatten reference coordinates
reference_coordinates = reference_coordinates.flatten()
sigma = estimator(coordinates, reference_coordinates)
# Optionally correct with weights
if weights is not None:
# Calculate mass-weighted covariance matrix
if (
not isinstance(weights, (list, tuple, np.ndarray))
and weights == "mass"
):
if select:
weights = ensemble.select_atoms(select).masses
else:
weights = ensemble.atoms.masses
else:
if select:
req_len = ensemble.select_atoms(select).n_atoms
else:
req_len = ensemble.atoms.n_atoms
if req_len != len(weights):
raise ValueError(
"number of weights is unequal to number of "
"atoms in ensemble"
)
# broadcast to a (len(weights), 3) array
weights = np.repeat(weights, 3)
weight_matrix = np.sqrt(np.identity(len(weights)) * weights)
sigma = np.dot(weight_matrix, np.dot(sigma, weight_matrix))
return sigma