Shark machine learning library
Installation
Tutorials
Benchmarks
Documentation
Quick references
Class list
Global functions
include
shark
Algorithms
Trainers
NormalizeComponentsUnitVariance.h
Go to the documentation of this file.
1
//===========================================================================
2
/*!
3
*
4
*
5
* \brief Data normalization to zero mean and unit variance
6
*
7
*
8
*
9
*
10
* \author T. Glasmachers
11
* \date 2010, 2013
12
*
13
*
14
* \par Copyright 1995-2017 Shark Development Team
15
*
16
* <BR><HR>
17
* This file is part of Shark.
18
* <https://shark-ml.github.io/Shark/>
19
*
20
* Shark is free software: you can redistribute it and/or modify
21
* it under the terms of the GNU Lesser General Public License as published
22
* by the Free Software Foundation, either version 3 of the License, or
23
* (at your option) any later version.
24
*
25
* Shark is distributed in the hope that it will be useful,
26
* but WITHOUT ANY WARRANTY; without even the implied warranty of
27
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
28
* GNU Lesser General Public License for more details.
29
*
30
* You should have received a copy of the GNU Lesser General Public License
31
* along with Shark. If not, see <http://www.gnu.org/licenses/>.
32
*
33
*/
34
//===========================================================================
35
36
37
#ifndef SHARK_ALGORITHMS_TRAINERS_NORMALIZECOMPONENTSUNITVARIANCE_H
38
#define SHARK_ALGORITHMS_TRAINERS_NORMALIZECOMPONENTSUNITVARIANCE_H
39
40
41
#include <
shark/Models/Normalizer.h
>
42
#include <
shark/Algorithms/Trainers/AbstractTrainer.h
>
43
#include <
shark/Data/Statistics.h
>
44
45
namespace
shark
{
46
47
48
///
49
/// \brief Train a linear model to normalize the components of a dataset to unit variance, and optionally to zero mean.
50
///
51
/// \par
52
/// Normalizing the components of a dataset works via
53
/// training a Normalizer model. This model is then
54
/// applied to the dataset in order to perform the
55
/// normalization. The same model can be applied to
56
/// different datasets.
57
///
58
/// \par
59
/// The typical use case is that the Normalizer
60
/// model is trained on the training data. Later, as
61
/// "test" data comes in, the same model is used, of
62
/// course without being recalibrated. Thus, the model
63
/// used for normalization must be independent of the
64
/// dataset it was trained on.
65
///
66
/// \par
67
/// Note that subtracting the mean destroys sparsity.
68
/// Therefore this feature is turned off by default.
69
/// If you have non-sparse data and you need to
70
/// move data to zero mean, not only to unit variance,
71
/// then enable the flag zeroMean in the constructor.
72
/// \ingroup unsupervised_trainer
73
template
<
class
DataType = RealVector>
74
class
NormalizeComponentsUnitVariance
:
public
AbstractUnsupervisedTrainer
< Normalizer<DataType> >
75
{
76
public
:
77
typedef
AbstractUnsupervisedTrainer< Normalizer<DataType>
>
base_type
;
78
79
/// \brief Constructor
80
///
81
/// \par
82
/// The normalizer scales the data to unit variance.
83
/// It can also remove the mean of the data. This is usually
84
/// desired, e.g., for neural network training. Note however
85
/// that this feature is sometimes undesirable since it can
86
/// destroy sparsity.
87
///
88
/// \param zeroMean enable or disable data mean removal
89
NormalizeComponentsUnitVariance
(
bool
zeroMean)
90
:
m_zeroMean
(zeroMean){ }
91
92
/// \brief From INameable: return the class name.
93
std::string
name
()
const
94
{
return
"NormalizeComponentsUnitVariance"
; }
95
96
void
train
(
Normalizer<DataType>
& model,
UnlabeledData<DataType>
const
& input)
97
{
98
SHARK_RUNTIME_CHECK
(input.
numberOfElements
() >= 2,
"Input needs to consist of at least two points"
);
99
std::size_t dc =
dataDimension
(input);
100
101
RealVector
mean
;
102
RealVector
variance
;
103
meanvar
(input,
mean
,
variance
);
104
105
RealVector diagonal(dc);
106
RealVector vector(dc);
107
108
for
(std::size_t d=0; d != dc; d++){
109
double
stddev = std::sqrt(
variance
(d));
110
if
(stddev == 0.0)
111
{
112
diagonal(d) = 0.0;
113
vector(d) = 0.0;
114
}
115
else
116
{
117
diagonal(d) = 1.0 / stddev;
118
vector(d) = -
mean
(d) / stddev;
119
}
120
}
121
122
if
(
m_zeroMean
)
123
model.
setStructure
(diagonal, vector);
124
else
125
model.
setStructure
(diagonal);
126
}
127
128
protected
:
129
bool
m_zeroMean
;
130
};
131
132
133
}
134
#endif