NormalizeComponentsUnitInterval.h
Go to the documentation of this file.
1//===========================================================================
2/*!
3 *
4 *
5 * \brief Data normalization to the unit interval
6 *
7 *
8 *
9 *
10 * \author T. Glasmachers
11 * \date 2010, 2013
12 *
13 *
14 * \par Copyright 1995-2017 Shark Development Team
15 *
16 * <BR><HR>
17 * This file is part of Shark.
18 * <https://shark-ml.github.io/Shark/>
19 *
20 * Shark is free software: you can redistribute it and/or modify
21 * it under the terms of the GNU Lesser General Public License as published
22 * by the Free Software Foundation, either version 3 of the License, or
23 * (at your option) any later version.
24 *
25 * Shark is distributed in the hope that it will be useful,
26 * but WITHOUT ANY WARRANTY; without even the implied warranty of
27 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
28 * GNU Lesser General Public License for more details.
29 *
30 * You should have received a copy of the GNU Lesser General Public License
31 * along with Shark. If not, see <http://www.gnu.org/licenses/>.
32 *
33 */
34//===========================================================================
35
36
37#ifndef SHARK_ALGORITHMS_TRAINERS_NORMALIZECOMPONENTSUNITINTERVAL_H
38#define SHARK_ALGORITHMS_TRAINERS_NORMALIZECOMPONENTSUNITINTERVAL_H
39
40
43
44namespace shark{
45
46
47///
48/// \brief Train a model to normalize the components of a dataset to fit into the unit inverval
49///
50/// \par
51/// Normalizing the components of a dataset works via
52/// training a LinearMap model. This model is then
53/// applied to the dataset in order to perform the
54/// normalization. The same model can be applied to
55/// different datasets.
56///
57/// \par
58/// The typical use case is that the AffineLinearMap
59/// model is trained on the training data. Later, as
60/// "test" data comes in, the same model is used, of
61/// course without being recalibrated. Thus, the model
62/// used for normalization must be independent of the
63/// dataset it was trained on.
64///
65/// \par
66/// Note that the transformation represented by this
67/// trainer destroys sparsity of the data. Therefore
68/// one may prefer NormalizeComponentsUnitVariance
69/// particularly on sparse data.
70/// \ingroup unsupervised_trainer
71template <class DataType = RealVector>
73{
74public:
76
79
80 /// \brief From INameable: return the class name.
81 std::string name() const
82 { return "NormalizeComponentsUnitInterval"; }
83
85 std:: size_t ic = input.numberOfElements();
86 SHARK_RUNTIME_CHECK(ic >= 2, "Input needs to consist of at least two points");
87 std::size_t dc = dataDimension(input);
88
89 RealVector min = input.element(0);
90 RealVector max = input.element(0);
91 for(std::size_t i=1; i != ic; i++){
92 for(std::size_t d = 0; d != dc; d++){
93 double x = input.element(i)(d);
94 min(d) = std::min(min(d), x);
95 max(d) = std::max(max(d), x);
96 }
97 }
98
99 RealVector diagonal(dc);
100 RealVector offset(dc);
101
102 for (std::size_t d=0; d != dc; d++)
103 {
104 if (min(d) == max(d))
105 {
106 diagonal(d) = 0.0;
107 offset(d) = -min(d) + 0.5;
108 }
109 else
110 {
111 double n = 1.0 / (max(d) - min(d));
112 diagonal(d) = n;
113 offset(d) = -min(d) * n;
114 }
115 }
116
117 model.setStructure(diagonal, offset);
118 }
119};
120
121
122}
123#endif