Source code for MDAnalysis.analysis.encore.clustering.ClusterCollection

# -*- Mode: python; tab-width: 4; indent-tabs-mode:nil; coding:utf-8 -*-
# vim: tabstop=4 expandtab shiftwidth=4 softtabstop=4
#
# MDAnalysis --- https://www.mdanalysis.org
# Copyright (c) 2006-2017 The MDAnalysis Development Team and contributors
# (see the file AUTHORS for the full list of names)
#
# Released under the Lesser GNU Public Licence, v2.1 or any higher version
#
# Please cite your use of MDAnalysis in published work:
#
# R. J. Gowers, M. Linke, J. Barnoud, T. J. E. Reddy, M. N. Melo, S. L. Seyler,
# D. L. Dotson, J. Domanski, S. Buchoux, I. M. Kenney, and O. Beckstein.
# MDAnalysis: A Python package for the rapid analysis of molecular dynamics
# simulations. In S. Benthall and S. Rostrup editors, Proceedings of the 15th
# Python in Science Conference, pages 102-109, Austin, TX, 2016. SciPy.
# doi: 10.25080/majora-629e541a-00e
#
# N. Michaud-Agrawal, E. J. Denning, T. B. Woolf, and O. Beckstein.
# MDAnalysis: A Toolkit for the Analysis of Molecular Dynamics Simulations.
# J. Comput. Chem. 32 (2011), 2319--2327, doi:10.1002/jcc.21787
#
"""
Cluster representation --- :mod:`MDAnalysis.analysis.encore.clustering.ClusterCollection`
=========================================================================================

The module contains the Cluster and ClusterCollection classes which are
designed to store results from clustering algorithms.

:Author: Matteo Tiberti, Wouter Boomsma, Tone Bengtsen

.. versionadded:: 0.16.0

.. deprecated:: 2.8.0
   This module is deprecated in favour of the 
   MDAKit `mdaencore <https://mdanalysis.org/mdaencore/>`_ and will be removed
   in MDAnalysis 3.0.0.

"""
import numpy as np



[docs]
class Cluster(object):
    """
    Generic Cluster class for clusters with centroids.

    Attributes
    ----------

    id : int
        Cluster ID number. Useful for the ClustersCollection class

    metadata : iterable
        dict of lists or numpy.array, containing metadata for the cluster
        elements. The iterable must return the same number of elements as
        those that belong to the cluster.

    size : int
        number of elements.

    centroid : element object
        cluster centroid.

    elements : numpy.array
        array containing the cluster elements.
    """

    def __init__(self, elem_list=None, centroid=None, idn=None, metadata=None):
        """Class constructor. If elem_list is None, an empty cluster is created
            and the remaining arguments ignored.

        Parameters
        ----------

        elem_list : numpy.array or None
            numpy array of cluster elements

        centroid : None or element object
            centroid

        idn : int
            cluster ID

        metadata : iterable
            metadata, one value for each cluster element. The iterable
            must have the same length as the elements array.

        """

        self.id = idn

        if elem_list is None:
            self.size = 0
            self.elements = np.array([])
            self.centroid = None
            self.metadata = {}
            return

        self.metadata = {}
        self.elements = elem_list
        if centroid not in self.elements:
            raise LookupError(
                "Centroid of cluster not found in the element list"
            )

        self.centroid = centroid
        self.size = self.elements.shape[0]
        if metadata:
            for name, data in metadata.items():
                if len(data) != self.size:
                    raise TypeError(
                        'Size of metadata having label "{0}" '
                        "is not equal to the number of cluster "
                        "elements".format(name)
                    )
            self.add_metadata(name, data)

    def __iter__(self):
        """
        Iterate over elements in cluster
        """
        return iter(self.elements)

    def __len__(self):
        """
        Size of cluster
        """
        return len(self.elements)

    def add_metadata(self, name, data):
        if len(data) != self.size:
            raise TypeError(
                "Size of metadata is not equal to the number of "
                "cluster elements"
            )
        self.metadata[name] = np.array(data)

    def __repr__(self):
        """
        Textual representation
        """
        if self.size == 0:
            return "<Cluster with no elements>"
        else:
            return "<Cluster with {0} elements, centroid={1}, id={2}>".format(
                self.size, self.centroid, self.id
            )




[docs]
class ClusterCollection(object):
    """Clusters collection class; this class represents the results of a full
    clustering run. It stores a group of clusters defined as
    encore.clustering.Cluster objects.

    Attributes
    ----------

    clusters : list
        list of of Cluster objects which are part of the Cluster collection

    """

    def __init__(self, elements=None, metadata=None):
        """Class constructor. If elements is None, an empty cluster collection
        will be created. Otherwise, the constructor takes as input an
        iterable of ints, for instance:

        [ a, a, a, a, b, b, b, c, c, ... , z, z ]

        the variables a,b,c,...,z are cluster centroids, here as cluster
        element numbers (i.e. 3 means the 4th element of the ordered input
        for clustering). The array maps a correspondence between
        cluster elements (which are implicitly associated with the
        position in the array) with centroids, i. e. defines clusters.
        For instance:

        [ 1, 1, 1, 4, 4, 5 ]

        means that elements 0, 1, 2 form a cluster which has 1 as centroid,
        elements 3 and 4 form a cluster which has 4 as centroid, and
        element 5 has its own cluster.


        Parameters
        ----------

        elements : iterable of ints or None
            clustering results. See the previous description for details

        metadata : {str:list, str:list,...} or None
            metadata for the data elements. The list must be of the same
            size as the elements array, with one value per element.

        """
        idn = 0
        if elements is None:
            self.clusters = None
            return

        if not len(set((type(el) for el in elements))) == 1:
            raise TypeError("all the elements must have the same type")
        self.clusters = []
        elements_array = np.array(elements)
        centroids = np.unique(elements_array)
        for i in centroids:
            if elements[i] != i:
                raise ValueError(
                    "element {0}, which is a centroid, doesn't "
                    "belong to its own cluster".format(elements[i])
                )
        for c in centroids:
            this_metadata = {}
            this_array = np.where(elements_array == c)
            if metadata:
                for k, v in metadata.items():
                    this_metadata[k] = np.asarray(v)[this_array]
            self.clusters.append(
                Cluster(
                    elem_list=this_array[0],
                    idn=idn,
                    centroid=c,
                    metadata=this_metadata,
                )
            )

            idn += 1


[docs]
    def get_ids(self):
        """
        Get the ID numbers of the clusters

        Returns
        -------

        ids : list of int
        list of cluster ids
        """
        return [v.id for v in self.clusters]



[docs]
    def get_centroids(self):
        """
        Get the centroids of the clusters

        Returns
        -------

        centroids : list of cluster element objects
        list of cluster centroids
        """

        return [v.centroid for v in self.clusters]


    def __iter__(self):
        """
        Iterate over clusters

        """
        return iter(self.clusters)

    def __len__(self):
        """
        Length of clustering collection
        """
        return len(self.clusters)

    def __repr__(self):
        """
        Textual representation
        """
        if self.clusters is None:
            return "<ClusterCollection with no clusters>"
        else:
            return "<ClusterCollection with {0} clusters>".format(
                len(self.clusters)
            )