PCA.cpp
Go to the documentation of this file.
2
3//header needed for data generation
5
6using namespace shark;
7using namespace std;
8
9///In this test, we will use PCA to calculate the
10///eigenvectors of a scatter matrix and do a
11///reduction of the subspace to the space
12///spanned by the two eigenvectors with the biggest
13///eigenvalues.
14
15///the principal components of our multivariate data distribution
16///we will use them later for checking
17double principalComponents[3][3] =
18{
19 { 5, 0, 0},
20 { 0, 2, 2},
21 { 0,-1, 1}
22};
23
24std::size_t numberOfExamples = 30000;
25
26///The test distribution is just a multivariate Gaussian.
28{
29 RealVector mean(3);
30 mean(0) = 1;
31 mean(1) = -1;
32 mean(2) = 3;
33
34 // to create the covariance matrix we first put the
35 // copy the principal components in the matrix
36 // and than use an outer product
37 RealMatrix covariance(3,3);
38 for(int i = 0; i != 3; ++i)
39 {
40 for(int j = 0; j != 3; ++j)
41 {
43 }
44 }
45 covariance = prod(trans(covariance),covariance);
46
47 //now we can create the distribution
49
50 //and we sample from it
51 std::vector<RealVector> data(numberOfExamples);
52 for (auto& sample: data)
53 {
54 //first element is the sample, second is the underlying uniform gaussian
55 sample = mean + distribution(random::globalRng).first;
56 }
57 return createDataFromRange(data);
58}
59
60
61int main(){
62
63 // We first create our problem. Since the PCA is a unsupervised Method,
64 // We use UnlabeledData instead of Datas.
66
67 // With the definition of the model, we declare, how many
68 // principal components we want. If we want all, we set
69 // inputs=outputs = 3, but since want to do a reduction, we
70 // use only 2 in the second argument. The third argument is
71 // the bias. pca needs a bias to work.
72 LinearModel<> pcaModel(3,2,true);
73
74 // Now we can construct the PCA.
75 // We can decide whether we want a whitened output or not.
76 // For testing purposes, we don't want whitening in this
77 // example.
78 PCA pca;
79 pca.setWhitening(false);
80 pca.train(pcaModel,data);
81
82
83
84 //Print the rescaled results.
85 //Should be the same as principalComponents, except for sign change
86 //and numerical errors.
87 cout << "RESULTS: " << std::endl;
88 cout << "======== " << std::endl << std::endl;
89 cout << "principal component 1: " << row(pcaModel.matrix(),0) * sqrt(pca.eigenvalues()(0)) << std::endl;
90 cout << "principal component 2: " << row(pcaModel.matrix(),1) * sqrt( pca.eigenvalues()(1) ) << std::endl;
91
92}