NormalizeComponentsUnitVariance.h
Go to the documentation of this file.
1//===========================================================================
2/*!
3 *
4 *
5 * \brief Data normalization to zero mean and unit variance
6 *
7 *
8 *
9 *
10 * \author T. Glasmachers
11 * \date 2010, 2013
12 *
13 *
14 * \par Copyright 1995-2017 Shark Development Team
15 *
16 * <BR><HR>
17 * This file is part of Shark.
18 * <https://shark-ml.github.io/Shark/>
19 *
20 * Shark is free software: you can redistribute it and/or modify
21 * it under the terms of the GNU Lesser General Public License as published
22 * by the Free Software Foundation, either version 3 of the License, or
23 * (at your option) any later version.
24 *
25 * Shark is distributed in the hope that it will be useful,
26 * but WITHOUT ANY WARRANTY; without even the implied warranty of
27 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
28 * GNU Lesser General Public License for more details.
29 *
30 * You should have received a copy of the GNU Lesser General Public License
31 * along with Shark. If not, see <http://www.gnu.org/licenses/>.
32 *
33 */
34//===========================================================================
35
36
37#ifndef SHARK_ALGORITHMS_TRAINERS_NORMALIZECOMPONENTSUNITVARIANCE_H
38#define SHARK_ALGORITHMS_TRAINERS_NORMALIZECOMPONENTSUNITVARIANCE_H
39
40
44
45namespace shark {
46
47
48///
49/// \brief Train a linear model to normalize the components of a dataset to unit variance, and optionally to zero mean.
50///
51/// \par
52/// Normalizing the components of a dataset works via
53/// training a Normalizer model. This model is then
54/// applied to the dataset in order to perform the
55/// normalization. The same model can be applied to
56/// different datasets.
57///
58/// \par
59/// The typical use case is that the Normalizer
60/// model is trained on the training data. Later, as
61/// "test" data comes in, the same model is used, of
62/// course without being recalibrated. Thus, the model
63/// used for normalization must be independent of the
64/// dataset it was trained on.
65///
66/// \par
67/// Note that subtracting the mean destroys sparsity.
68/// Therefore this feature is turned off by default.
69/// If you have non-sparse data and you need to
70/// move data to zero mean, not only to unit variance,
71/// then enable the flag zeroMean in the constructor.
72/// \ingroup unsupervised_trainer
73template <class DataType = RealVector>
75{
76public:
78
79 /// \brief Constructor
80 ///
81 /// \par
82 /// The normalizer scales the data to unit variance.
83 /// It can also remove the mean of the data. This is usually
84 /// desired, e.g., for neural network training. Note however
85 /// that this feature is sometimes undesirable since it can
86 /// destroy sparsity.
87 ///
88 /// \param zeroMean enable or disable data mean removal
90 : m_zeroMean(zeroMean){ }
91
92 /// \brief From INameable: return the class name.
93 std::string name() const
94 { return "NormalizeComponentsUnitVariance"; }
95
97 {
98 SHARK_RUNTIME_CHECK(input.numberOfElements() >= 2, "Input needs to consist of at least two points");
99 std::size_t dc = dataDimension(input);
100
101 RealVector mean;
102 RealVector variance;
103 meanvar(input, mean, variance);
104
105 RealVector diagonal(dc);
106 RealVector vector(dc);
107
108 for (std::size_t d=0; d != dc; d++){
109 double stddev = std::sqrt(variance(d));
110 if (stddev == 0.0)
111 {
112 diagonal(d) = 0.0;
113 vector(d) = 0.0;
114 }
115 else
116 {
117 diagonal(d) = 1.0 / stddev;
118 vector(d) = -mean(d) / stddev;
119 }
120 }
121
122 if (m_zeroMean)
123 model.setStructure(diagonal, vector);
124 else
125 model.setStructure(diagonal);
126 }
127
128protected:
130};
131
132
133}
134#endif