Shark machine learning library
Installation
Tutorials
Benchmarks
Documentation
Quick references
Class list
Global functions
examples
Unsupervised
PCA.cpp
Go to the documentation of this file.
1
#include <
shark/Algorithms/Trainers/PCA.h
>
2
3
//header needed for data generation
4
#include <
shark/Statistics/Distributions/MultiVariateNormalDistribution.h
>
5
6
using namespace
shark
;
7
using namespace
std;
8
9
///In this test, we will use PCA to calculate the
10
///eigenvectors of a scatter matrix and do a
11
///reduction of the subspace to the space
12
///spanned by the two eigenvectors with the biggest
13
///eigenvalues.
14
15
///the principal components of our multivariate data distribution
16
///we will use them later for checking
17
double
principalComponents
[3][3] =
18
{
19
{ 5, 0, 0},
20
{ 0, 2, 2},
21
{ 0,-1, 1}
22
};
23
24
std::size_t
numberOfExamples
= 30000;
25
26
///The test distribution is just a multivariate Gaussian.
27
UnlabeledData<RealVector>
createData
()
28
{
29
RealVector
mean
(3);
30
mean
(0) = 1;
31
mean
(1) = -1;
32
mean
(2) = 3;
33
34
// to create the covariance matrix we first put the
35
// copy the principal components in the matrix
36
// and than use an outer product
37
RealMatrix
covariance
(3,3);
38
for
(
int
i = 0; i != 3; ++i)
39
{
40
for
(
int
j = 0; j != 3; ++j)
41
{
42
covariance
(i,j) =
principalComponents
[i][j];
43
}
44
}
45
covariance
= prod(trans(
covariance
),
covariance
);
46
47
//now we can create the distribution
48
MultiVariateNormalDistribution
distribution(
covariance
);
49
50
//and we sample from it
51
std::vector<RealVector> data(
numberOfExamples
);
52
for
(
auto
& sample: data)
53
{
54
//first element is the sample, second is the underlying uniform gaussian
55
sample =
mean
+ distribution(
random::globalRng
).first;
56
}
57
return
createDataFromRange
(data);
58
}
59
60
61
int
main
(){
62
63
// We first create our problem. Since the PCA is a unsupervised Method,
64
// We use UnlabeledData instead of Datas.
65
UnlabeledData<RealVector>
data =
createData
();
66
67
// With the definition of the model, we declare, how many
68
// principal components we want. If we want all, we set
69
// inputs=outputs = 3, but since want to do a reduction, we
70
// use only 2 in the second argument. The third argument is
71
// the bias. pca needs a bias to work.
72
LinearModel<>
pcaModel(3,2,
true
);
73
74
// Now we can construct the PCA.
75
// We can decide whether we want a whitened output or not.
76
// For testing purposes, we don't want whitening in this
77
// example.
78
PCA
pca;
79
pca.
setWhitening
(
false
);
80
pca.
train
(pcaModel,data);
81
82
83
84
//Print the rescaled results.
85
//Should be the same as principalComponents, except for sign change
86
//and numerical errors.
87
cout <<
"RESULTS: "
<< std::endl;
88
cout <<
"======== "
<< std::endl << std::endl;
89
cout <<
"principal component 1: "
<< row(pcaModel.
matrix
(),0) * sqrt(pca.
eigenvalues
()(0)) << std::endl;
90
cout <<
"principal component 2: "
<< row(pcaModel.
matrix
(),1) * sqrt( pca.
eigenvalues
()(1) ) << std::endl;
91
92
}