OneClassSvm.cpp
Go to the documentation of this file.
1//===========================================================================
2/*!
3 *
4 *
5 * \brief One-Class Support Vector Machine example program.
6 *
7 * \par
8 * This program generates a toy data set composed of Gaussian
9 * distributions. It then uses a one-class SVM to model the
10 * densest regions. It visualizes the result.
11 *
12 *
13 *
14 * \author T. Glasmachers
15 * \date 2013
16 *
17 *
18 * \par Copyright 1995-2017 Shark Development Team
19 *
20 * <BR><HR>
21 * This file is part of Shark.
22 * <https://shark-ml.github.io/Shark/>
23 *
24 * Shark is free software: you can redistribute it and/or modify
25 * it under the terms of the GNU Lesser General Public License as published
26 * by the Free Software Foundation, either version 3 of the License, or
27 * (at your option) any later version.
28 *
29 * Shark is distributed in the hope that it will be useful,
30 * but WITHOUT ANY WARRANTY; without even the implied warranty of
31 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
32 * GNU Lesser General Public License for more details.
33 *
34 * You should have received a copy of the GNU Lesser General Public License
35 * along with Shark. If not, see <http://www.gnu.org/licenses/>.
36 *
37 */
38//===========================================================================
39
43
44using namespace shark;
45using namespace std;
46
47
48class Gaussians : public DataDistribution<RealVector>
49{
50public:
51 void draw(RealVector& point) const
52 {
53 point.resize(2);
54 size_t cluster = random::discrete(random::globalRng, 0, 4);
55 double alpha = 0.4 * M_PI * cluster;
56 point(0) = 3.0 * cos(alpha) + 0.75 * random::gauss(random::globalRng);
57 point(1) = 3.0 * sin(alpha) + 0.75 * random::gauss(random::globalRng);
58 }
59};
60
61
62int main(int argc, char** argv)
63{
64 // experiment settings
65 unsigned int ell = 100; // number of training data point
66 double nu = 0.5; // probability mass to be covered, must fulfill 0 < mu < 1
67 double gamma = 0.5; // kernel bandwidth parameter
68
69 GaussianRbfKernel<> kernel(gamma); // Gaussian kernel
70 KernelExpansion<RealVector> ke; // (affine) linear function in kernel-induced feature space
71
72 // generate artificial benchmark data
73 Gaussians problem;
74 UnlabeledData<RealVector> data = problem.generateDataset(ell);
75
76 // define the learner
77 OneClassSvmTrainer<RealVector> trainer(&kernel, nu);
78
79 // train the model
80 trainer.train(ke, data);
81
82 // evaluate the model
83 char output[35][71];
84 RealVector input(2);
85 for (std::size_t y=0; y<35; y++)
86 {
87 input(1) = 5.0 * (y - 17.0) / 17.0;
88 for (std::size_t x=0; x<70; x++)
89 {
90 input(0) = 5.0 * (x - 34.5) / 34.5;
91 double val = ke(input)(0);
92 output[y][x] = (val < 0.0) ? ' ' : ':';
93 }
94 output[y][70] = 0;
95 }
96
97 // mark the samples
99 for (UnlabeledData<RealVector>::const_element_range::const_iterator it = elements.begin(); it != elements.end(); ++it)
100 {
101 RealVector v = *it;
102 int x = (int)std::floor(34.5 * v(0) / 5.0 + 34.5 + 0.5);
103 int y = (int)std::floor(17.0 * v(1) / 5.0 + 17.0 + 0.5);
104 if (x >= 0 && y >= 0 && x < 70 && y < 35) output[y][x] = '*';
105 }
106
107 // output to the console
108 cout << endl
109 << "One-Class SVM example program." << endl
110 << "100 samples are drawn from a mixture of five Gaussians. Data samples" << endl
111 << "are marked with an asterisk '*'. The :::-shaded regions are the SVM's" << endl
112 << "estimate of the high-probability region of the distribution." << endl
113 << endl;
114 for (std::size_t y=0; y<35; y++) cout << output[y] << endl;
115 cout << endl;
116}