Shark machine learning library
Installation
Tutorials
Benchmarks
Documentation
Quick references
Class list
Global functions
include
shark
Models
Clustering
AbstractClustering.h
Go to the documentation of this file.
1
//===========================================================================
2
/*!
3
*
4
*
5
* \brief Super class for clustering definitions.
6
* \file
7
*
8
*
9
* \author T. Glasmachers
10
* \date 2011
11
*
12
*
13
* \par Copyright 1995-2017 Shark Development Team
14
*
15
* <BR><HR>
16
* This file is part of Shark.
17
* <https://shark-ml.github.io/Shark/>
18
*
19
* Shark is free software: you can redistribute it and/or modify
20
* it under the terms of the GNU Lesser General Public License as published
21
* by the Free Software Foundation, either version 3 of the License, or
22
* (at your option) any later version.
23
*
24
* Shark is distributed in the hope that it will be useful,
25
* but WITHOUT ANY WARRANTY; without even the implied warranty of
26
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
27
* GNU Lesser General Public License for more details.
28
*
29
* You should have received a copy of the GNU Lesser General Public License
30
* along with Shark. If not, see <http://www.gnu.org/licenses/>.
31
*
32
*/
33
//===========================================================================
34
35
#ifndef SHARK_MODELS_CLUSTERING_ABSTRACTCLUSTERING_H
36
#define SHARK_MODELS_CLUSTERING_ABSTRACTCLUSTERING_H
37
38
39
#include <
shark/Data/BatchInterface.h
>
40
#include <
shark/Core/Flags.h
>
41
#include <
shark/Core/INameable.h
>
42
#include <
shark/Core/IParameterizable.h
>
43
#include <
shark/Core/ISerializable.h
>
44
#include <
shark/Core/Shape.h
>
45
46
namespace
shark
{
47
48
///\defgroup clustering Clustering Algorithms
49
///
50
/// A variety of models and algorithms for clustering
51
/// \ingroup models
52
53
///
54
/// \brief Base class for clustering.
55
///
56
/// \par
57
/// Clustering algorithms vary widely in the data structures
58
/// on which they operate. For example, simple centroid-based
59
/// approaches such as k-means are mutually incompatible with
60
/// tree-based hierarchical clustering. This interface class
61
/// is the attempt to cast different cluster descriptions into
62
/// a minimal common interface that is useful for prediction.
63
///
64
/// \par
65
/// There are at least two common types of predictions made
66
/// with clusterings. The first one is the assignment of the
67
/// best matching cluster to a patters, such as in vector
68
/// quantization or unsupervised clustering. This is often
69
/// referred to as "hard clustering". The second one is the
70
/// computation of a membership function ("soft clustering").
71
/// The membership of a pattern to a cluster is non-negative,
72
/// and the total membership should add to one.
73
///
74
/// \par
75
/// This interface makes minimal assumptions to allow for
76
/// these types of predictions. It assumes that clusters can
77
/// be enumbered (identified by an index), and that at least
78
/// a membership test can be made (hard clustering). It is
79
/// optional to provide a membership function. Only one of
80
/// the two interfaces for best matching cluster or membership
81
/// function need to be implemented, since the best matching
82
/// cluster can be deduced from the membership function.
83
/// However, often the best matching cluster can be computed
84
/// more efficiently than the membership function. In these
85
/// cases both interface functions should be implemented.
86
///
87
/// \par
88
/// The purpose of this interface is to act as a common
89
/// super class to different data structures describing the
90
/// outcome of a clustering operation. The interface allows
91
/// all of these data structures to be used in the two
92
/// clustering model classes: HardClusteringModel and
93
/// SoftClusteringModel.
94
///
95
/// \ingroup clustering
96
template
<
class
InputT>
97
class
AbstractClustering
:
public
INameable
,
public
IParameterizable
<>,
public
ISerializable
98
{
99
public
:
100
typedef
InputT
InputType
;
101
typedef
unsigned
int
OutputType
;
102
typedef
typename
Batch<InputType>::type
BatchInputType
;
103
typedef
Batch<OutputType>::type
BatchOutputType
;
104
105
enum
Feature
{
106
HAS_SOFT_MEMBERSHIP
= 1,
107
};
108
SHARK_FEATURE_INTERFACE
;
109
110
/// Tests whether the clustering can compute a (soft)
111
/// member ship function, describing the membership
112
/// of a sample to the different clusters.
113
bool
hasSoftMembershipFunction
()
const
{
114
return
m_features
&
HAS_SOFT_MEMBERSHIP
;
115
}
116
117
/// return the number of clusters
118
virtual
std::size_t
numberOfClusters
()
const
= 0;
119
120
virtual
Shape
inputShape
()
const
= 0;
121
122
/// \brief Compute best matching cluster.
123
///
124
/// \par
125
/// This function should be overriden by sub-classes to
126
/// compute the cluster best matching the input pattern.
127
/// The (typically slow) default implementation is to
128
/// create a batch of size 1 and return the result of the batch call to hardMembership
129
virtual
unsigned
int
hardMembership
(
InputType
const
& pattern)
const
{
130
typename
Batch<InputType>::type
b =
Batch<InputType>::createBatch
(pattern);
131
getBatchElement
(b,0) = pattern;
132
return
hardMembership
(b)(0);
133
}
134
135
/// \brief Compute best matching cluster for a batch of inputs.
136
///
137
/// \par
138
/// This function should be overriden by sub-classes to
139
/// compute the cluster best matching the input pattern.
140
/// The (typically slow) default implementation is to
141
/// return the arg max of the soft membership function for every pattern.
142
virtual
BatchOutputType
hardMembership
(
BatchInputType
const
& patterns)
const
{
143
std::size_t numPatterns =
batchSize
(patterns);
144
RealMatrix f =
softMembership
(patterns);
145
SHARK_ASSERT
(f.size2() > 0);
146
SHARK_ASSERT
(f.size1() == numPatterns);
147
BatchOutputType
outputs(numPatterns);
148
for
(std::size_t i = 0; i != numPatterns;++i){
149
auto
membership = row(f,i);
150
outputs(i) = (
unsigned
int)(std::max_element(membership.begin(),membership.end())-membership.begin());
151
}
152
return
outputs;
153
}
154
155
/// \brief Compute cluster membership function.
156
///
157
/// \par
158
/// This function should be overriden by sub-classes to
159
/// compute the membership of a pattern to the clusters.
160
/// The default implementation creates a batch of size 1
161
/// and calls the batched version. If this is not overriden, an xception is thrown.
162
virtual
RealVector
softMembership
(
InputType
const
& pattern)
const
{
163
auto
output =
softMembership
(
Batch<InputType>::createBatch
(pattern));
164
return
row(output,0);
165
}
166
167
/// \brief Compute cluster membership function.
168
///
169
/// \par
170
/// This function should be overriden by sub-classes to
171
/// compute the membership of a pattern to the clusters.
172
/// This default implementation throws an exception.
173
virtual
RealMatrix
softMembership
(
BatchInputType
const
& patterns)
const
{
174
SHARK_FEATURE_EXCEPTION
(
HAS_SOFT_MEMBERSHIP
);
175
}
176
177
/// empty default implementation of ISerializable::read
178
void
read
(
InArchive
& archive) {}
179
180
/// empty default implementation of ISerializable::write
181
void
write
(
OutArchive
& archive)
const
{}
182
};
183
184
185
}
186
#endif