OneClassSvmTrainer.h
Go to the documentation of this file.
1//===========================================================================
2/*!
3 *
4 *
5 * \brief Trainer for One-Class Support Vector Machines
6 *
7 *
8 *
9 *
10 * \author T. Glasmachers
11 * \date 2007-2012
12 *
13 *
14 * \par Copyright 1995-2017 Shark Development Team
15 *
16 * <BR><HR>
17 * This file is part of Shark.
18 * <https://shark-ml.github.io/Shark/>
19 *
20 * Shark is free software: you can redistribute it and/or modify
21 * it under the terms of the GNU Lesser General Public License as published
22 * by the Free Software Foundation, either version 3 of the License, or
23 * (at your option) any later version.
24 *
25 * Shark is distributed in the hope that it will be useful,
26 * but WITHOUT ANY WARRANTY; without even the implied warranty of
27 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
28 * GNU Lesser General Public License for more details.
29 *
30 * You should have received a copy of the GNU Lesser General Public License
31 * along with Shark. If not, see <http://www.gnu.org/licenses/>.
32 *
33 */
34//===========================================================================
35
36
37#ifndef SHARK_ALGORITHMS_ONECLASSSVMTRAINER_H
38#define SHARK_ALGORITHMS_ONECLASSSVMTRAINER_H
39
40
46
47
48namespace shark {
49
50
51///
52/// \brief Training of one-class SVMs.
53///
54/// The one-class support vector machine is an unsupervised
55/// method for learning the high probability region of a
56/// distribution. Given are data points \f$ x_i, i \in \{1, \dots, m\} \f$,
57/// a kernel function k(x, x') and a regularization
58/// constant C > 0. Let H denote the kernel induced
59/// reproducing kernel Hilbert space of k, and let \f$ \phi \f$
60/// denote the corresponding feature map.
61/// Then an estimate of a high probability region of the
62/// distribution generating the sample points is described
63/// by the set where the following function takes positive
64/// values:
65/// \f[
66/// f(x) = \langle w, \phi(x) \rangle + b
67/// \f]
68/// with coefficients w and b given by the (primal)
69/// optimization problem
70/// \f[
71/// \min \frac{1}{2} \|w\|^2 + \frac{1}{\nu m} \sum_{i=1}^m \xi_i - \rho
72/// \f]
73/// \f[
74/// \text{s.t. } \langle w, \phi(x_i) \rangle + b \geq \rho - \xi_i; \xi_i \geq 0
75/// \f]
76/// \f$ 0 \leq \nu, \rho \leq 1 \f$ are parameters of the
77/// method for controlling the smoothness of the solution
78/// and the amount of probability mass covered.
79///
80/// For more details refer to the paper:<br/>
81/// <p>Estimating the support of a high-dimensional distribution. B. Sch&ouml;lkopf, J. C. Platt, J. Shawe-Taylor, A. Smola, and R. C. Williamson, 1999.</p>
82///
83/// \ingroup unsupervised_trainer
84template <class InputType, class CacheType = float>
85class OneClassSvmTrainer : public AbstractUnsupervisedTrainer<KernelExpansion<InputType> >, public QpConfig, public IParameterizable<>
86{
87public:
88
89 typedef CacheType QpFloatType;
92 typedef QpConfig base_type;
93
97
100 , m_nu(nu)
101 , m_cacheSize(0x4000000)
102 { }
103
104 /// \brief From INameable: return the class name.
105 std::string name() const
106 { return "OneClassSvmTrainer"; }
107
108 double nu() const
109 { return m_nu; }
110 void setNu(double nu)
111 { m_nu = nu; }
113 { return m_kernel; }
114 const KernelType* kernel() const
115 { return m_kernel; }
118
119 double CacheSize() const
120 { return m_cacheSize; }
121 void setCacheSize( std::size_t size )
122 { m_cacheSize = size; }
123
124 /// get the hyper-parameter vector
125 RealVector parameterVector() const{
126 size_t kp = m_kernel->numberOfParameters();
127 RealVector ret(kp + 1);
128 noalias(subrange(ret, 0, kp)) = m_kernel->parameterVector();
129 ret(kp) = m_nu;
130 return ret;
131 }
132
133 /// set the vector of hyper-parameters
134 void setParameterVector(RealVector const& newParameters){
135 size_t kp = m_kernel->numberOfParameters();
136 SHARK_ASSERT(newParameters.size() == kp + 1);
137 m_kernel->setParameterVector(subrange(newParameters, 0, kp));
138 setNu(newParameters(kp));
139 }
140
141 /// return the number of hyper-parameters
142 size_t numberOfParameters() const
143 { return (m_kernel->numberOfParameters() + 1); }
144
146 {
147 SHARK_RUNTIME_CHECK(m_nu > 0.0 && m_nu< 1.0, "invalid setting of the parameter nu (must be 0 < nu < 1)");
148 svm.setStructure(m_kernel,inputset,true);
149
150 // solve the quadratic program
152 trainSVM<PrecomputedMatrixType>(svm,inputset);
153 else
154 trainSVM<CachedMatrixType>(svm,inputset);
155
156 if (base_type::sparsify())
157 svm.sparsify();
158 }
159
160protected:
162 double m_nu;
163 std::size_t m_cacheSize;
164
165 template<class MatrixType>
167 typedef BoxedSVMProblem<MatrixType> SVMProblemType;
168 typedef SvmShrinkingProblem<SVMProblemType> ProblemType;
169
170 // Setup the problem
171
172 KernelMatrixType km(*m_kernel, inputset);
173 MatrixType matrix(&km);
174 std::size_t ic = matrix.size();
175 double upper = 1.0/(m_nu*ic);
176 SVMProblemType svmProblem(matrix,blas::repeat(0.0,ic),0.0,upper);
177 ProblemType problem(svmProblem,base_type::m_shrinking);
178
179 //solve it
180 QpSolver< ProblemType > solver(problem);
182 column(svm.alpha(),0)= problem.getUnpermutedAlpha();
183
184 // compute the offset from the KKT conditions
185 double lowerBound = -1e100;
186 double upperBound = 1e100;
187 double sum = 0.0;
188 std::size_t freeVars = 0;
189 for (std::size_t i=0; i != problem.dimensions(); i++)
190 {
191 double value = problem.gradient(i);
192 if (problem.alpha(i) == 0.0)
193 lowerBound = std::max(value,lowerBound);
194 else if (problem.alpha(i) == upper)
195 upperBound = std::min(value,upperBound);
196 else
197 {
198 sum += value;
199 freeVars++;
200 }
201 }
202 if (freeVars > 0)
203 svm.offset(0) = sum / freeVars; // stabilized (averaged) exact value
204 else
205 svm.offset(0) = 0.5 * (lowerBound + upperBound); // best estimate
206
208 }
209};
210
211
212}
213#endif