AbstractClustering.h
Go to the documentation of this file.
1//===========================================================================
2/*!
3 *
4 *
5 * \brief Super class for clustering definitions.
6 * \file
7 *
8 *
9 * \author T. Glasmachers
10 * \date 2011
11 *
12 *
13 * \par Copyright 1995-2017 Shark Development Team
14 *
15 * <BR><HR>
16 * This file is part of Shark.
17 * <https://shark-ml.github.io/Shark/>
18 *
19 * Shark is free software: you can redistribute it and/or modify
20 * it under the terms of the GNU Lesser General Public License as published
21 * by the Free Software Foundation, either version 3 of the License, or
22 * (at your option) any later version.
23 *
24 * Shark is distributed in the hope that it will be useful,
25 * but WITHOUT ANY WARRANTY; without even the implied warranty of
26 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
27 * GNU Lesser General Public License for more details.
28 *
29 * You should have received a copy of the GNU Lesser General Public License
30 * along with Shark. If not, see <http://www.gnu.org/licenses/>.
31 *
32 */
33//===========================================================================
34
35#ifndef SHARK_MODELS_CLUSTERING_ABSTRACTCLUSTERING_H
36#define SHARK_MODELS_CLUSTERING_ABSTRACTCLUSTERING_H
37
38
40#include <shark/Core/Flags.h>
44#include <shark/Core/Shape.h>
45
46namespace shark {
47
48///\defgroup clustering Clustering Algorithms
49///
50/// A variety of models and algorithms for clustering
51/// \ingroup models
52
53///
54/// \brief Base class for clustering.
55///
56/// \par
57/// Clustering algorithms vary widely in the data structures
58/// on which they operate. For example, simple centroid-based
59/// approaches such as k-means are mutually incompatible with
60/// tree-based hierarchical clustering. This interface class
61/// is the attempt to cast different cluster descriptions into
62/// a minimal common interface that is useful for prediction.
63///
64/// \par
65/// There are at least two common types of predictions made
66/// with clusterings. The first one is the assignment of the
67/// best matching cluster to a patters, such as in vector
68/// quantization or unsupervised clustering. This is often
69/// referred to as "hard clustering". The second one is the
70/// computation of a membership function ("soft clustering").
71/// The membership of a pattern to a cluster is non-negative,
72/// and the total membership should add to one.
73///
74/// \par
75/// This interface makes minimal assumptions to allow for
76/// these types of predictions. It assumes that clusters can
77/// be enumbered (identified by an index), and that at least
78/// a membership test can be made (hard clustering). It is
79/// optional to provide a membership function. Only one of
80/// the two interfaces for best matching cluster or membership
81/// function need to be implemented, since the best matching
82/// cluster can be deduced from the membership function.
83/// However, often the best matching cluster can be computed
84/// more efficiently than the membership function. In these
85/// cases both interface functions should be implemented.
86///
87/// \par
88/// The purpose of this interface is to act as a common
89/// super class to different data structures describing the
90/// outcome of a clustering operation. The interface allows
91/// all of these data structures to be used in the two
92/// clustering model classes: HardClusteringModel and
93/// SoftClusteringModel.
94///
95/// \ingroup clustering
96template <class InputT>
98{
99public:
100 typedef InputT InputType;
101 typedef unsigned int OutputType;
104
109
110 /// Tests whether the clustering can compute a (soft)
111 /// member ship function, describing the membership
112 /// of a sample to the different clusters.
116
117 /// return the number of clusters
118 virtual std::size_t numberOfClusters() const = 0;
119
120 virtual Shape inputShape() const = 0;
121
122 /// \brief Compute best matching cluster.
123 ///
124 /// \par
125 /// This function should be overriden by sub-classes to
126 /// compute the cluster best matching the input pattern.
127 /// The (typically slow) default implementation is to
128 /// create a batch of size 1 and return the result of the batch call to hardMembership
129 virtual unsigned int hardMembership(InputType const& pattern) const{
131 getBatchElement(b,0) = pattern;
132 return hardMembership(b)(0);
133 }
134
135 /// \brief Compute best matching cluster for a batch of inputs.
136 ///
137 /// \par
138 /// This function should be overriden by sub-classes to
139 /// compute the cluster best matching the input pattern.
140 /// The (typically slow) default implementation is to
141 /// return the arg max of the soft membership function for every pattern.
142 virtual BatchOutputType hardMembership(BatchInputType const& patterns) const{
143 std::size_t numPatterns = batchSize(patterns);
144 RealMatrix f = softMembership(patterns);
145 SHARK_ASSERT(f.size2() > 0);
146 SHARK_ASSERT(f.size1() == numPatterns);
147 BatchOutputType outputs(numPatterns);
148 for(std::size_t i = 0; i != numPatterns;++i){
149 auto membership = row(f,i);
150 outputs(i) = (unsigned int)(std::max_element(membership.begin(),membership.end())-membership.begin());
151 }
152 return outputs;
153 }
154
155 /// \brief Compute cluster membership function.
156 ///
157 /// \par
158 /// This function should be overriden by sub-classes to
159 /// compute the membership of a pattern to the clusters.
160 /// The default implementation creates a batch of size 1
161 /// and calls the batched version. If this is not overriden, an xception is thrown.
162 virtual RealVector softMembership(InputType const& pattern) const{
163 auto output = softMembership(Batch<InputType>::createBatch(pattern));
164 return row(output,0);
165 }
166
167 /// \brief Compute cluster membership function.
168 ///
169 /// \par
170 /// This function should be overriden by sub-classes to
171 /// compute the membership of a pattern to the clusters.
172 /// This default implementation throws an exception.
173 virtual RealMatrix softMembership(BatchInputType const& patterns) const{
175 }
176
177 /// empty default implementation of ISerializable::read
178 void read(InArchive& archive) {}
179
180 /// empty default implementation of ISerializable::write
181 void write(OutArchive& archive) const {}
182};
183
184
185}
186#endif