Centroids.h
Go to the documentation of this file.
1//===========================================================================
2/*!
3 *
4 *
5 * \brief Clusters defined by centroids.
6 *
7 *
8 *
9 * \author T. Glasmachers
10 * \date 2011
11 *
12 *
13 * \par Copyright 1995-2017 Shark Development Team
14 *
15 * <BR><HR>
16 * This file is part of Shark.
17 * <https://shark-ml.github.io/Shark/>
18 *
19 * Shark is free software: you can redistribute it and/or modify
20 * it under the terms of the GNU Lesser General Public License as published
21 * by the Free Software Foundation, either version 3 of the License, or
22 * (at your option) any later version.
23 *
24 * Shark is distributed in the hope that it will be useful,
25 * but WITHOUT ANY WARRANTY; without even the implied warranty of
26 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
27 * GNU Lesser General Public License for more details.
28 *
29 * You should have received a copy of the GNU Lesser General Public License
30 * along with Shark. If not, see <http://www.gnu.org/licenses/>.
31 *
32 */
33//===========================================================================
34
35#ifndef SHARK_MODELS_CLUSTERING_CENTROIDS_H
36#define SHARK_MODELS_CLUSTERING_CENTROIDS_H
37
40#include <shark/Data/Dataset.h>
41
42
43namespace shark {
44
45
46/// \brief Clusters defined by centroids.
47///
48/// \par
49/// Centroids are an elementary way to define clusters by means
50/// of the one-nearest-neighbor rule. This rule defines a hard
51/// clustering decision.
52///
53/// \par
54/// The Centroids class uses inverse distances to compute soft
55/// clustering memberships. This is arbitrary and can be changed
56/// by overriding the membershipKernel function.
57///
58///\ingroup clustering
59class Centroids : public AbstractClustering<RealVector>
60{
62
63public:
64 /// Default constructor
66
67 /// Constructor
68 ///
69 /// \param centroids number of centroids in the model (initially zero)
70 /// \param dimension dimension of the input space, and thus of the centroids
71 SHARK_EXPORT_SYMBOL Centroids(std::size_t centroids, std::size_t dimension);
72
73 /// Constructor
74 ///
75 /// \param centroids centroid vectors
77
78 /// \brief From INameable: return the class name.
79 std::string name() const
80 { return "Centroids"; }
81
82 /// from IParameterizable
84
85 /// from IParameterizable
86 SHARK_EXPORT_SYMBOL void setParameterVector(RealVector const& newParameters);
87
88 /// from IParameterizable
90
91 /// return the dimension of the inputs
94 }
95
96 /// return the number of centroids in the model
98
99 /// read access to the centroid vectors
101 return m_centroids;
102 }
103
104 /// overwrite the centroid vectors
105 void setCentroids(Data<RealVector> const& newCentroids){
106 m_centroids = newCentroids;
107 }
108
109 /// from ISerializable
111
112 /// from ISerializable
114
115 /// from AbstractClustering: Compute cluster memberships.
116 SHARK_EXPORT_SYMBOL RealVector softMembership(RealVector const& pattern) const;
117 /// From AbstractClustering: Compute cluster memberships for a batch of patterns.
118 SHARK_EXPORT_SYMBOL RealMatrix softMembership(BatchInputType const& patterns) const;
119
120 /// Computes the distances of each pattern to all cluster centers
121 SHARK_EXPORT_SYMBOL RealMatrix distances(BatchInputType const& patterns) const;
122
123
124 /// initialize centroids from labeled data: take the first
125 /// data points with different labels; if there are more
126 /// centroids than classes, the remaining centroids are filled
127 /// with the first elements in the data set
128 ///
129 /// \param data dataset from which to take the centroids
130 /// \param noClusters number of centroids in the model, default 0 is mapped to the number of classes in the data set
131 /// \param noClasses number of clases in the dataset, default 0 means that the number is computed
132 SHARK_EXPORT_SYMBOL void initFromData(ClassificationDataset const& data, std::size_t noClusters = 0, std::size_t noClasses = 0);
133
134 /// initialize centroids from unlabeled data:
135 /// take a random subset of data points
136 ///
137 /// \param dataset dataset from which to take the centroids
138 /// \param noClusters number of centroids in the model
139 SHARK_EXPORT_SYMBOL void initFromData(Data<RealVector> const& dataset, std::size_t noClusters);
140
141protected:
142 /// Compute unnormalized membership from distance.
143 /// The default implementation is to return exp(-distance)
144 SHARK_EXPORT_SYMBOL virtual double membershipKernel(double dist) const;
145
146 /// centroid vectors
148};
149
150
151}
152#endif