Source code for MDAnalysis.analysis.encore.clustering.ClusterCollection

# -*- Mode: python; tab-width: 4; indent-tabs-mode:nil; coding:utf-8 -*-
# vim: tabstop=4 expandtab shiftwidth=4 softtabstop=4
#
# MDAnalysis --- https://www.mdanalysis.org
# Copyright (c) 2006-2017 The MDAnalysis Development Team and contributors
# (see the file AUTHORS for the full list of names)
#
# Released under the GNU Public Licence, v2 or any higher version
#
# Please cite your use of MDAnalysis in published work:
#
# R. J. Gowers, M. Linke, J. Barnoud, T. J. E. Reddy, M. N. Melo, S. L. Seyler,
# D. L. Dotson, J. Domanski, S. Buchoux, I. M. Kenney, and O. Beckstein.
# MDAnalysis: A Python package for the rapid analysis of molecular dynamics
# simulations. In S. Benthall and S. Rostrup editors, Proceedings of the 15th
# Python in Science Conference, pages 102-109, Austin, TX, 2016. SciPy.
# doi: 10.25080/majora-629e541a-00e
#
# N. Michaud-Agrawal, E. J. Denning, T. B. Woolf, and O. Beckstein.
# MDAnalysis: A Toolkit for the Analysis of Molecular Dynamics Simulations.
# J. Comput. Chem. 32 (2011), 2319--2327, doi:10.1002/jcc.21787
#
"""
Cluster representation --- :mod:`MDAnalysis.analysis.encore.clustering.ClusterCollection`
=========================================================================================

The module contains the Cluster and ClusterCollection classes which are
designed to store results from clustering algorithms.

:Author: Matteo Tiberti, Wouter Boomsma, Tone Bengtsen

.. versionadded:: 0.16.0

"""
import numpy as np


[docs]class Cluster(object): """ Generic Cluster class for clusters with centroids. Attributes ---------- id : int Cluster ID number. Useful for the ClustersCollection class metadata : iterable dict of lists or numpy.array, containing metadata for the cluster elements. The iterable must return the same number of elements as those that belong to the cluster. size : int number of elements. centroid : element object cluster centroid. elements : numpy.array array containing the cluster elements. """ def __init__(self, elem_list=None, centroid=None, idn=None, metadata=None): """Class constructor. If elem_list is None, an empty cluster is created and the remaining arguments ignored. Parameters ---------- elem_list : numpy.array or None numpy array of cluster elements centroid : None or element object centroid idn : int cluster ID metadata : iterable metadata, one value for each cluster element. The iterable must have the same length as the elements array. """ self.id = idn if elem_list is None: self.size = 0 self.elements = np.array([]) self.centroid = None self.metadata = {} return self.metadata = {} self.elements = elem_list if centroid not in self.elements: raise LookupError("Centroid of cluster not found in the element list") self.centroid = centroid self.size = self.elements.shape[0] if metadata: for name, data in metadata.items(): if len(data) != self.size: raise TypeError("Size of metadata having label \"{0}\"\ is not equal to the number of cluster elmements".format(name)) self.add_metadata(name, data) def __iter__(self): """ Iterate over elements in cluster """ return iter(self.elements) def __len__(self): """ Size of cluster """ return len(self.elements) def add_metadata(self, name, data): if len(data) != self.size: raise TypeError("Size of metadata is not equal to the number of\ cluster elmements") self.metadata[name] = np.array(data) def __repr__(self): """ Textual representation """ if self.size == 0: return "<Cluster with no elements>" else: return "<Cluster with {0} elements, centroid={1}, id={2}>".format( self.size, self.centroid, self.id)
[docs]class ClusterCollection(object): """Clusters collection class; this class represents the results of a full clustering run. It stores a group of clusters defined as encore.clustering.Cluster objects. Attributes ---------- clusters : list list of of Cluster objects which are part of the Cluster collection """ def __init__(self, elements=None, metadata=None): """Class constructor. If elements is None, an empty cluster collection will be created. Otherwise, the constructor takes as input an iterable of ints, for instance: [ a, a, a, a, b, b, b, c, c, ... , z, z ] the variables a,b,c,...,z are cluster centroids, here as cluster element numbers (i.e. 3 means the 4th element of the ordered input for clustering). The array maps a correspondence between cluster elements (which are implicitly associated with the position in the array) with centroids, i. e. defines clusters. For instance: [ 1, 1, 1, 4, 4, 5 ] means that elements 0, 1, 2 form a cluster which has 1 as centroid, elements 3 and 4 form a cluster which has 4 as centroid, and element 5 has its own cluster. Parameters ---------- elements : iterable of ints or None clustering results. See the previous description for details metadata : {str:list, str:list,...} or None metadata for the data elements. The list must be of the same size as the elements array, with one value per element. """ idn = 0 if elements is None: self.clusters = None return if not len(set((type(el) for el in elements))) == 1: raise TypeError("all the elements must have the same type") self.clusters = [] elements_array = np.array(elements) centroids = np.unique(elements_array) for i in centroids: if elements[i] != i: raise ValueError("element {0}, which is a centroid, doesn't \ belong to its own cluster".format(elements[i])) for c in centroids: this_metadata = {} this_array = np.where(elements_array == c) if metadata: for k, v in metadata.items(): this_metadata[k] = np.asarray(v)[this_array] self.clusters.append( Cluster(elem_list=this_array[0], idn=idn, centroid=c, metadata=this_metadata)) idn += 1
[docs] def get_ids(self): """ Get the ID numbers of the clusters Returns ------- ids : list of int list of cluster ids """ return [v.id for v in self.clusters]
[docs] def get_centroids(self): """ Get the centroids of the clusters Returns ------- centroids : list of cluster element objects list of cluster centroids """ return [v.centroid for v in self.clusters]
def __iter__(self): """ Iterate over clusters """ return iter(self.clusters) def __len__(self): """ Length of clustering collection """ return len(self.clusters) def __repr__(self): """ Textual representation """ if self.clusters is None: return "<ClusterCollection with no clusters>" else: return "<ClusterCollection with {0} clusters>".format( len(self.clusters))