Shark machine learning library
Installation
Tutorials
Benchmarks
Documentation
Quick references
Class list
Global functions
include
shark
Algorithms
Trainers
PCA.h
Go to the documentation of this file.
1
//===========================================================================
2
/*!
3
*
4
*
5
* \brief Principal Component Analysis
6
*
7
*
8
*
9
*
10
* \author T. Glasmachers, C. Igel
11
* \date 2010, 2011
12
*
13
*
14
* \par Copyright 1995-2017 Shark Development Team
15
*
16
* <BR><HR>
17
* This file is part of Shark.
18
* <https://shark-ml.github.io/Shark/>
19
*
20
* Shark is free software: you can redistribute it and/or modify
21
* it under the terms of the GNU Lesser General Public License as published
22
* by the Free Software Foundation, either version 3 of the License, or
23
* (at your option) any later version.
24
*
25
* Shark is distributed in the hope that it will be useful,
26
* but WITHOUT ANY WARRANTY; without even the implied warranty of
27
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
28
* GNU Lesser General Public License for more details.
29
*
30
* You should have received a copy of the GNU Lesser General Public License
31
* along with Shark. If not, see <http://www.gnu.org/licenses/>.
32
*
33
*/
34
//===========================================================================
35
36
37
#ifndef SHARK_ALGORITHMS_TRAINER_PCA_H
38
#define SHARK_ALGORITHMS_TRAINER_PCA_H
39
40
#include <
shark/Core/DLLSupport.h
>
41
#include <
shark/Models/LinearModel.h
>
42
#include <
shark/Algorithms/Trainers/AbstractTrainer.h
>
43
44
namespace
shark
{
45
46
/// \brief Principal Component Analysis
47
///
48
/// The Principal Component Analysis, also known as
49
/// Karhunen-Loeve transformation, takes a symmetric
50
/// \f$ n \times n \f$ matrix \f$ A \f$ and uses its decomposition
51
///
52
/// \f$
53
/// A = \Gamma \Lambda \Gamma^T,
54
/// \f$
55
///
56
/// where \f$ \Lambda \f$ is the diagonal matrix of eigenvalues
57
/// of \f$ A \f$ and \f$ \Gamma \f$ is the orthogonal matrix
58
/// with the corresponding eigenvectors as columns.
59
/// \f$ \Lambda \f$ then defines a successive orthogonal rotation
60
/// that maximizes the variances of the coordinates, i.e. the
61
/// coordinate system is rotated in such a way that the correlation
62
/// between the new axes becomes zero. If there are \f$ p \f$ axes,
63
/// the first axis is rotated in a way that the points on the new axis
64
/// have maximum variance. Then the remaining \f$ p - 1 \f$ axes are
65
/// rotated such that a another axis covers a maximum part of the rest
66
/// variance, that is not covered by the first axis. After the
67
/// rotation of \f$ p - 1 \f$ axes, the rotation destination of axis
68
/// no. \f$ p \f$ is fixed. An application for PCA is the reduction
69
/// of dimensions by skipping the components with the least
70
/// corresponding eigenvalues/variances. Furthermore, the eigenvalues
71
/// may be rescaled to one, resulting in a whitening of the data.
72
/// \ingroup unsupervised_trainer
73
class
PCA
:
public
AbstractUnsupervisedTrainer
<LinearModel<> >
74
{
75
private
:
76
typedef
AbstractUnsupervisedTrainer<LinearModel<>
>
base_type
;
77
public
:
78
enum
PCAAlgorithm
{
STANDARD
,
SMALL_SAMPLE
,
AUTO
};
79
80
/// Constructor.
81
/// The parameter defines whether the model should also
82
/// whiten the data.
83
PCA
(
bool
whitening =
false
)
84
:
m_whitening
(whitening){
85
m_algorithm
=
AUTO
;
86
};
87
/// Constructor.
88
/// The parameter defines whether the model should also
89
/// whiten the data.
90
/// The eigendecomposition of the data is stored inthe PCA object.
91
PCA
(
UnlabeledData<RealVector>
const
& inputs,
bool
whitening =
false
)
92
:
m_whitening
(whitening){
93
m_algorithm
=
AUTO
;
94
setData
(inputs);
95
};
96
97
/// \brief From INameable: return the class name.
98
std::string
name
()
const
99
{
return
"PCA"
; }
100
101
/// If set to true, the encoded data has unit variance along
102
/// the new coordinates.
103
void
setWhitening
(
bool
whitening) {
104
m_whitening
= whitening;
105
}
106
107
/// Train the model to perform PCA. The model must be a
108
/// LinearModel object with offset, and its output dimension
109
/// defines the number of principal components
110
/// represented. The model returned is the one given by the
111
/// econder() function (i.e., mapping from the original input
112
/// space to the PCA coordinate system).
113
void
train
(
LinearModel<>
& model,
UnlabeledData<RealVector>
const
& inputs) {
114
std::size_t m = model.
outputShape
().
numElements
();
///< reduced dimensionality
115
setData
(inputs);
// compute PCs
116
encoder
(model, m);
// define the model
117
}
118
119
120
//! Sets the input data and performs the PCA. This is a
121
//! computationally costly operation. The eigendecomposition
122
//! of the data is stored inthe PCA object.
123
SHARK_EXPORT_SYMBOL
void
setData
(
UnlabeledData<RealVector>
const
& inputs);
124
125
//! Returns a model mapping the original data to the
126
//! m-dimensional PCA coordinate system.
127
SHARK_EXPORT_SYMBOL
void
encoder
(
LinearModel<>
& model, std::size_t m = 0);
128
129
//! Returns a model mapping encoded data from the
130
//! m-dimensional PCA coordinate system back to the
131
//! n-dimensional original coordinate system.
132
SHARK_EXPORT_SYMBOL
void
decoder
(
LinearModel<>
& model, std::size_t m = 0);
133
134
/// Eigenvalues of last training. The number of eigenvalues
135
//! is equal to the minimum of the input dimensions (i.e.,
136
//! number of attributes) and the number of data points used
137
//! for training the PCA.
138
RealVector
const
&
eigenvalues
()
const
{
139
return
m_eigenvalues
;
140
}
141
/// Returns ith eigenvalue.
142
double
eigenvalue
(std::size_t i)
const
{
143
SIZE_CHECK
( i <
m_l
);
144
if
( i <
m_eigenvalues
.size())
145
return
m_eigenvalues
(i);
146
return
0.;
147
}
148
149
//! Eigenvectors of last training. The number of eigenvectors
150
//! is equal to the minimum of the input dimensions (i.e.,
151
//! number of attributes) and the number of data points used
152
//! for training the PCA.
153
RealMatrix
const
&
eigenvectors
()
const
{
154
return
m_eigenvectors
;
155
}
156
157
/// mean of last training
158
RealVector
const
&
mean
()
const
{
159
return
m_mean
;
160
}
161
162
protected
:
163
bool
m_whitening
;
///< normalize variance yes/no
164
RealMatrix
m_eigenvectors
;
///< eigenvectors
165
RealVector
m_eigenvalues
;
///< eigenvalues
166
RealVector
m_mean
;
///< mean value
167
168
std::size_t
m_n
;
///< number of attributes
169
std::size_t
m_l
;
///< number of training data points
170
171
PCAAlgorithm
m_algorithm
;
///< whether to use design matrix or its transpose for building covariance matrix
172
};
173
174
175
}
176
#endif
// SHARK_ML_TRAINER_PCA_H