Source code for MDAnalysis.analysis.encore.covariance

# -*- Mode: python; tab-width: 4; indent-tabs-mode:nil; coding:utf-8 -*-
# vim: tabstop=4 expandtab shiftwidth=4 softtabstop=4
#
# MDAnalysis --- https://www.mdanalysis.org
# Copyright (c) 2006-2017 The MDAnalysis Development Team and contributors
# (see the file AUTHORS for the full list of names)
#
# Released under the GNU Public Licence, v2 or any higher version
#
# Please cite your use of MDAnalysis in published work:
#
# R. J. Gowers, M. Linke, J. Barnoud, T. J. E. Reddy, M. N. Melo, S. L. Seyler,
# D. L. Dotson, J. Domanski, S. Buchoux, I. M. Kenney, and O. Beckstein.
# MDAnalysis: A Python package for the rapid analysis of molecular dynamics
# simulations. In S. Benthall and S. Rostrup editors, Proceedings of the 15th
# Python in Science Conference, pages 102-109, Austin, TX, 2016. SciPy.
# doi: 10.25080/majora-629e541a-00e
#
# N. Michaud-Agrawal, E. J. Denning, T. B. Woolf, and O. Beckstein.
# MDAnalysis: A Toolkit for the Analysis of Molecular Dynamics Simulations.
# J. Comput. Chem. 32 (2011), 2319--2327, doi:10.1002/jcc.21787
#
"""
Covariance calculation --- :mod:`encore.covariance`
=====================================================================

The module contains functions to estimate the covariance matrix of
an ensemble of structures.

:Author: Matteo Tiberti, Wouter Boomsma, Tone Bengtsen

.. versionadded:: 0.16.0
"""
import numpy as np

[docs]def ml_covariance_estimator(coordinates, reference_coordinates=None): """ Standard maximum likelihood estimator of the covariance matrix. Parameters ---------- coordinates : numpy.array Flattened array of coordiantes reference_coordinates : numpy.array Optional reference to use instead of mean Returns ------- cov_mat : numpy.array Estimate of covariance matrix """ if reference_coordinates is not None: # Offset from reference coordinates_offset = coordinates - reference_coordinates else: # Normal covariance calculation: distance to the average coordinates_offset = coordinates - np.average(coordinates, axis=0) # Calculate covariance manually coordinates_cov = np.zeros((coordinates.shape[1], coordinates.shape[1])) for frame in coordinates_offset: coordinates_cov += np.outer(frame, frame) coordinates_cov /= coordinates.shape[0] return coordinates_cov
[docs]def shrinkage_covariance_estimator( coordinates, reference_coordinates=None, shrinkage_parameter=None): """ Shrinkage estimator of the covariance matrix using the method described in Improved Estimation of the Covariance Matrix of Stock Returns With an Application to Portfolio Selection. Ledoit, O.; Wolf, M., Journal of Empirical Finance, 10, 5, 2003 This implementation is based on the matlab code made available by Olivier Ledoit on his website: http://www.ledoit.net/ole2_abstract.htm Parameters ---------- coordinates : numpy.array Flattened array of coordinates reference_coordinates: numpy.array Optional reference to use instead of mean shrinkage_parameter: None or float Optional shrinkage parameter Returns -------- cov_mat : nump.array Covariance matrix """ x = coordinates t = x.shape[0] n = x.shape[1] mean_x = np.average(x, axis=0) # Use provided coordinates as "mean" if provided if reference_coordinates is not None: mean_x = reference_coordinates x = x - mean_x xmkt = np.average(x, axis=1) # Call maximum likelihood estimator (note the additional column) sample = ml_covariance_estimator(np.hstack([x, xmkt[:, np.newaxis]]), 0)\ * (t-1)/float(t) # Split covariance matrix into components covmkt = sample[0:n, n] varmkt = sample[n, n] sample = sample[:n, :n] # Prior prior = np.outer(covmkt, covmkt)/varmkt prior[np.ma.make_mask(np.eye(n))] = np.diag(sample) # If shrinkage parameter is not set, estimate it if shrinkage_parameter is None: # Frobenius norm c = np.linalg.norm(sample - prior, ord='fro')**2 y = x**2 p = 1/float(t)*np.sum(np.dot(np.transpose(y), y))\ - np.sum(np.sum(sample**2)) rdiag = 1/float(t)*np.sum(np.sum(y**2))\ - np.sum(np.diag(sample)**2) z = x * np.repeat(xmkt[:, np.newaxis], n, axis=1) v1 = 1/float(t) * np.dot(np.transpose(y), z) \ - np.repeat(covmkt[:, np.newaxis], n, axis=1)*sample roff1 = (np.sum( v1*np.transpose( np.repeat( covmkt[:, np.newaxis], n, axis=1) ) )/varmkt - np.sum(np.diag(v1)*covmkt)/varmkt) v3 = 1/float(t)*np.dot(np.transpose(z), z) - varmkt*sample roff3 = (np.sum(v3*np.outer(covmkt, covmkt))/varmkt**2 - np.sum(np.diag(v3)*covmkt**2)/varmkt**2) roff = 2*roff1-roff3 r = rdiag+roff # Shrinkage constant k = (p-r)/c shrinkage_parameter = max(0, min(1, k/float(t))) # calculate covariance matrix sigma = shrinkage_parameter*prior+(1-shrinkage_parameter)*sample return sigma
[docs]def covariance_matrix(ensemble, select="name CA", estimator=shrinkage_covariance_estimator, weights='mass', reference=None): """ Calculates (optionally mass weighted) covariance matrix Parameters ---------- ensemble : Universe object The structural ensemble select : str (optional) Atom selection string in the MDAnalysis format. estimator : function (optional) Function that estimates the covariance matrix. It requires at least a "coordinates" numpy array (of shape (N,M,3), where N is the number of frames and M the number of atoms). See ml_covariance_estimator and shrinkage_covariance_estimator for reference. weights : str/array_like (optional) specify weights. If ``'mass'`` then chose masses of ensemble atoms, if ``None`` chose uniform weights reference : MDAnalysis.Universe object (optional) Use the distances to a specific reference structure rather than the distance to the mean. Returns ------- cov_mat : numpy.array Covariance matrix """ # Extract coordinates from ensemble coordinates = ensemble.trajectory.timeseries( ensemble.select_atoms(select), order='fac') # Flatten coordinate matrix into n_frame x n_coordinates coordinates = np.reshape(coordinates, (coordinates.shape[0], -1)) # Extract coordinates from reference structure, if specified reference_coordinates = None if reference is not None: # Select the same atoms in reference structure reference_atom_selection = reference.select_atoms(select) reference_coordinates = reference_atom_selection.atoms.positions # Flatten reference coordinates reference_coordinates = reference_coordinates.flatten() sigma = estimator(coordinates, reference_coordinates) # Optionally correct with weights if weights is not None: # Calculate mass-weighted covariance matrix if not isinstance(weights, (list, tuple, np.ndarray)) and weights == 'mass': if select: weights = ensemble.select_atoms(select).masses else: weights = ensemble.atoms.masses else: if select: req_len = ensemble.select_atoms(select).n_atoms else: req_len = ensemble.atoms.n_atoms if req_len != len(weights): raise ValueError("number of weights is unequal to number of " "atoms in ensemble") # broadcast to a (len(weights), 3) array weights = np.repeat(weights, 3) weight_matrix = np.sqrt(np.identity(len(weights))*weights) sigma = np.dot(weight_matrix, np.dot(sigma, weight_matrix)) return sigma