Shark machine learning library
Installation
Tutorials
Benchmarks
Documentation
Quick references
Class list
Global functions
include
shark
Models
Clustering
Centroids.h
Go to the documentation of this file.
1
//===========================================================================
2
/*!
3
*
4
*
5
* \brief Clusters defined by centroids.
6
*
7
*
8
*
9
* \author T. Glasmachers
10
* \date 2011
11
*
12
*
13
* \par Copyright 1995-2017 Shark Development Team
14
*
15
* <BR><HR>
16
* This file is part of Shark.
17
* <https://shark-ml.github.io/Shark/>
18
*
19
* Shark is free software: you can redistribute it and/or modify
20
* it under the terms of the GNU Lesser General Public License as published
21
* by the Free Software Foundation, either version 3 of the License, or
22
* (at your option) any later version.
23
*
24
* Shark is distributed in the hope that it will be useful,
25
* but WITHOUT ANY WARRANTY; without even the implied warranty of
26
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
27
* GNU Lesser General Public License for more details.
28
*
29
* You should have received a copy of the GNU Lesser General Public License
30
* along with Shark. If not, see <http://www.gnu.org/licenses/>.
31
*
32
*/
33
//===========================================================================
34
35
#ifndef SHARK_MODELS_CLUSTERING_CENTROIDS_H
36
#define SHARK_MODELS_CLUSTERING_CENTROIDS_H
37
38
#include <
shark/Core/DLLSupport.h
>
39
#include <
shark/Models/Clustering/AbstractClustering.h
>
40
#include <
shark/Data/Dataset.h
>
41
42
43
namespace
shark
{
44
45
46
/// \brief Clusters defined by centroids.
47
///
48
/// \par
49
/// Centroids are an elementary way to define clusters by means
50
/// of the one-nearest-neighbor rule. This rule defines a hard
51
/// clustering decision.
52
///
53
/// \par
54
/// The Centroids class uses inverse distances to compute soft
55
/// clustering memberships. This is arbitrary and can be changed
56
/// by overriding the membershipKernel function.
57
///
58
///\ingroup clustering
59
class
Centroids
:
public
AbstractClustering
<RealVector>
60
{
61
typedef
AbstractClustering<RealVector>
base_type
;
62
63
public
:
64
/// Default constructor
65
SHARK_EXPORT_SYMBOL
Centroids
();
66
67
/// Constructor
68
///
69
/// \param centroids number of centroids in the model (initially zero)
70
/// \param dimension dimension of the input space, and thus of the centroids
71
SHARK_EXPORT_SYMBOL
Centroids
(std::size_t
centroids
, std::size_t dimension);
72
73
/// Constructor
74
///
75
/// \param centroids centroid vectors
76
SHARK_EXPORT_SYMBOL
Centroids
(
Data<RealVector>
const
&
centroids
);
77
78
/// \brief From INameable: return the class name.
79
std::string
name
()
const
80
{
return
"Centroids"
; }
81
82
/// from IParameterizable
83
SHARK_EXPORT_SYMBOL
RealVector
parameterVector
()
const
;
84
85
/// from IParameterizable
86
SHARK_EXPORT_SYMBOL
void
setParameterVector
(RealVector
const
& newParameters);
87
88
/// from IParameterizable
89
SHARK_EXPORT_SYMBOL
std::size_t
numberOfParameters
()
const
;
90
91
/// return the dimension of the inputs
92
Shape
inputShape
()
const
{
93
return
dataDimension
(
m_centroids
);
94
}
95
96
/// return the number of centroids in the model
97
SHARK_EXPORT_SYMBOL
std::size_t
numberOfClusters
()
const
;
98
99
/// read access to the centroid vectors
100
Data<RealVector>
const
&
centroids
()
const
{
101
return
m_centroids
;
102
}
103
104
/// overwrite the centroid vectors
105
void
setCentroids
(
Data<RealVector>
const
& newCentroids){
106
m_centroids
= newCentroids;
107
}
108
109
/// from ISerializable
110
SHARK_EXPORT_SYMBOL
void
read
(
InArchive
& archive);
111
112
/// from ISerializable
113
SHARK_EXPORT_SYMBOL
void
write
(
OutArchive
& archive)
const
;
114
115
/// from AbstractClustering: Compute cluster memberships.
116
SHARK_EXPORT_SYMBOL
RealVector
softMembership
(RealVector
const
& pattern)
const
;
117
/// From AbstractClustering: Compute cluster memberships for a batch of patterns.
118
SHARK_EXPORT_SYMBOL
RealMatrix
softMembership
(
BatchInputType
const
& patterns)
const
;
119
120
/// Computes the distances of each pattern to all cluster centers
121
SHARK_EXPORT_SYMBOL
RealMatrix
distances
(
BatchInputType
const
& patterns)
const
;
122
123
124
/// initialize centroids from labeled data: take the first
125
/// data points with different labels; if there are more
126
/// centroids than classes, the remaining centroids are filled
127
/// with the first elements in the data set
128
///
129
/// \param data dataset from which to take the centroids
130
/// \param noClusters number of centroids in the model, default 0 is mapped to the number of classes in the data set
131
/// \param noClasses number of clases in the dataset, default 0 means that the number is computed
132
SHARK_EXPORT_SYMBOL
void
initFromData
(
ClassificationDataset
const
& data, std::size_t noClusters = 0, std::size_t noClasses = 0);
133
134
/// initialize centroids from unlabeled data:
135
/// take a random subset of data points
136
///
137
/// \param dataset dataset from which to take the centroids
138
/// \param noClusters number of centroids in the model
139
SHARK_EXPORT_SYMBOL
void
initFromData
(
Data<RealVector>
const
& dataset, std::size_t noClusters);
140
141
protected
:
142
/// Compute unnormalized membership from distance.
143
/// The default implementation is to return exp(-distance)
144
SHARK_EXPORT_SYMBOL
virtual
double
membershipKernel
(
double
dist)
const
;
145
146
/// centroid vectors
147
Data<RealVector>
m_centroids
;
148
};
149
150
151
}
152
#endif