AbstractSvmTrainer.h
Go to the documentation of this file.
1//===========================================================================
2/*!
3 *
4 *
5 * \brief Abstract Support Vector Machine Trainer, general and linear case
6 *
7 *
8 * \par
9 * This file provides: 1) the QpConfig class, which can configure and
10 * provide information about an SVM training procedure; 2) a super-class
11 * for general SVM trainers, namely the AbstractSvmTrainer; and 3) a
12 * streamlined variant thereof for purely linear SVMs, namely the
13 * AbstractLinearSvmTrainer. In general, the SvmTrainers hold as parameters
14 * all hyperparameters of the underlying SVM, which includes the kernel
15 * parameters for non-linear SVMs.
16 *
17 *
18 *
19 *
20 * \author T. Glasmachers
21 * \date -
22 *
23 *
24 * \par Copyright 1995-2017 Shark Development Team
25 *
26 * <BR><HR>
27 * This file is part of Shark.
28 * <https://shark-ml.github.io/Shark/>
29 *
30 * Shark is free software: you can redistribute it and/or modify
31 * it under the terms of the GNU Lesser General Public License as published
32 * by the Free Software Foundation, either version 3 of the License, or
33 * (at your option) any later version.
34 *
35 * Shark is distributed in the hope that it will be useful,
36 * but WITHOUT ANY WARRANTY; without even the implied warranty of
37 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
38 * GNU Lesser General Public License for more details.
39 *
40 * You should have received a copy of the GNU Lesser General Public License
41 * along with Shark. If not, see <http://www.gnu.org/licenses/>.
42 *
43 */
44//===========================================================================
45
46
47#ifndef SHARK_ALGORITHMS_TRAINERS_ABSTRACTSVMTRAINER_H
48#define SHARK_ALGORITHMS_TRAINERS_ABSTRACTSVMTRAINER_H
49
50
51#include <shark/LinAlg/Base.h>
56
57
58namespace shark {
59
60
61///
62/// \brief Super class of all support vector machine trainers.
63///
64/// \par
65/// The QpConfig class holds two structures describing
66/// the stopping condition and the solution obtained by the underlying
67/// quadratic programming solvers. It provides a uniform interface for
68/// setting, e.g., the target solution accuracy and obtaining the
69/// accuracy of the actual solution.
70///
72{
73public:
74 /// Constructor
75 QpConfig(bool precomputedFlag = false, bool sparsifyFlag = true)
76 : m_precomputedKernelMatrix(precomputedFlag)
77 , m_sparsify(sparsifyFlag)
78 , m_shrinking(true)
79 , m_s2do(true)
80 , m_verbosity(0)
81 , m_accessCount(0)
82 { }
83
84 /// Read/write access to the stopping condition
87
88 /// Read access to the stopping condition
91
92 /// Access to the solution properties
95
96 /// Flag for using a precomputed kernel matrix
99
100 /// Flag for using a precomputed kernel matrix
101 bool const& precomputeKernel() const
102 { return m_precomputedKernelMatrix; }
103
104 /// Flag for sparsifying the model after training
105 bool& sparsify()
106 { return m_sparsify; }
107
108 /// Flag for sparsifying the model after training
109 bool const& sparsify() const
110 { return m_sparsify; }
111
112 /// Flag for shrinking in the decomposition solver
113 bool& shrinking()
114 { return m_shrinking; }
115
116 /// Flag for shrinking in the decomposition solver
117 bool const& shrinking() const
118 { return m_shrinking; }
119
120 /// Flag for S2DO (instead of SMO)
121 bool& s2do()
122 { return m_s2do; }
123
124 /// Flag for S2DO (instead of SMO)
125 bool const& s2do() const
126 { return m_s2do; }
127
128 /// Verbosity level of the solver
129 unsigned int& verbosity()
130 { return m_verbosity; }
131
132 /// Verbosity level of the solver
133 unsigned int const& verbosity() const
134 { return m_verbosity; }
135
136 /// Number of kernel accesses
137 unsigned long long const& accessCount() const
138 { return m_accessCount; }
139
140 // Set threshold for minimum dual accuracy stopping condition
142 // Set number of iterations for maximum number of iterations stopping condition
143 void setMaxIterations(unsigned long long i) { m_stoppingcondition.maxIterations = i; }
144 // Set values for target value stopping condition
146 // Set maximum training time in seconds for the maximum seconds stopping condition
148
149protected:
150 /// conditions for when to stop the QP solver
152 /// properties of the approximate solution found by the solver
154 /// should the solver use a precomputed kernel matrix?
156 /// should the trainer sparsify the model after training?
158 /// should shrinking be used?
160 /// should S2DO be used instead of SMO?
161 bool m_s2do;
162 /// verbosity level (currently unused)
163 unsigned int m_verbosity;
164 /// kernel access count
165 unsigned long long m_accessCount;
166};
167
168
169///
170/// \brief Super class of all kernelized (non-linear) SVM trainers.
171///
172/// \par
173/// This class holds general information shared by most if not
174/// all SVM trainers. First of all, this includes the kernel and
175/// the regularization parameter. The class also manages
176/// meta-information of the training process, like the maximal
177/// size of the kernel cache, the stopping criterion, as well
178/// as information on the actual solution.
179/// \ingroup supervised_trainer
180template <
181 class InputType, class LabelType,
182 class Model = KernelClassifier<InputType>,
184>
186: public Trainer,public QpConfig, public IParameterizable<>
187{
188public:
190
191 //! Constructor
192 //! \param kernel kernel function to use for training and prediction
193 //! \param C regularization parameter - always the 'true' value of C, even when unconstrained is set
194 //! \param offset train svm with offset - this is not supported for all SVM solvers.
195 //! \param unconstrained when a C-value is given via setParameter, should it be piped through the exp-function before using it in the solver?
196 AbstractSvmTrainer(KernelType* kernel, double C, bool offset, bool unconstrained = false)
198 , m_regularizers(1,C)
199 , m_trainOffset(offset)
200 , m_unconstrained(unconstrained)
201 , m_cacheSize(0x4000000)
202 {
203 SHARK_RUNTIME_CHECK( C > 0, "C must be larger than 0" );
204 SHARK_RUNTIME_CHECK( kernel != nullptr, "Kernel must not be NULL" );
205 }
206
207 //! Constructor featuring two regularization parameters
208 //! \param kernel kernel function to use for training and prediction
209 //! \param negativeC regularization parameter of the negative class (label 0)
210 //! \param positiveC regularization parameter of the positive class (label 1)
211 //! \param offset train svm with offset - this is not supported for all SVM solvers.
212 //! \param unconstrained when a C-value is given via setParameter, should it be piped through the exp-function before using it in the solver?
213 AbstractSvmTrainer(KernelType* kernel, double negativeC, double positiveC, bool offset, bool unconstrained = false)
215 , m_regularizers(2)
216 , m_trainOffset(offset)
217 , m_unconstrained(unconstrained)
218 , m_cacheSize(0x4000000)
219 {
220 SHARK_RUNTIME_CHECK( positiveC > 0, "C must be larger than 0" );
221 SHARK_RUNTIME_CHECK( negativeC > 0, "C must be larger than 0" );
222 SHARK_RUNTIME_CHECK( kernel != nullptr, "Kernel must not be NULL" );
223 m_regularizers[0] = negativeC;
224 m_regularizers[1] = positiveC;
225
226 }
227
228 /// \brief Return the value of the regularization parameter C.
229 double C() const
230 {
231 SIZE_CHECK(m_regularizers.size() == 1);
232 return m_regularizers[0];
233 }
234 /// \brief Set the value of the regularization parameter C.
235 void setC(double C) {
236 SHARK_RUNTIME_CHECK( C > 0, "C must be larger than 0" );
237 m_regularizers[0] = C;
238 }
239
240 RealVector const& regularizationParameters() const
241 {
242 return m_regularizers;
243 }
244
245 /// \brief Set the value of the regularization parameter C.
246 void setRegularizationParameters(RealVector const& regularizers) {
247 SHARK_RUNTIME_CHECK( min(regularizers) > 0, "regularization parameters must be larger than 0" );
248 m_regularizers = regularizers;
249 }
250
252 { return m_kernel; }
253 KernelType const* kernel() const
254 { return m_kernel; }
256 SHARK_RUNTIME_CHECK( kernel != nullptr, "Kernel must not be NULL" );
257 m_kernel = kernel;
258 }
259
260 bool isUnconstrained() const
261 { return m_unconstrained; }
262
263 bool trainOffset() const
264 { return m_trainOffset; }
265
266 std::size_t cacheSize() const
267 { return m_cacheSize; }
268 void setCacheSize( std::size_t size )
269 { m_cacheSize = size; }
270
271 /// get the hyper-parameter vector
272 RealVector parameterVector() const{
274 return m_kernel->parameterVector() | log(m_regularizers);
275 else
277 }
278
279 /// set the vector of hyper-parameters
280 void setParameterVector(RealVector const& newParameters){
281 size_t kp = m_kernel->numberOfParameters();
282 SHARK_ASSERT(newParameters.size() == kp + m_regularizers.size());
283 m_kernel->setParameterVector(subrange(newParameters,0,kp));
284 noalias(m_regularizers) = subrange(newParameters,kp,newParameters.size());
287 }
288
289 /// return the number of hyper-parameters
290 size_t numberOfParameters() const{
291 return m_kernel->numberOfParameters() + m_regularizers.size();
292 }
293
294protected:
295 KernelType* m_kernel; ///< Kernel object.
296 ///\brief Vector of regularization parameters.
297 ///
298 /// If the size of the vector is 1 there is only one regularization parameter for all classes, else there must
299 /// be one for every class in the dataset.
300 /// The exact meaning depends on the sub-class, but the value is always positive,
301 /// and higher implies a less regular solution.
302 RealVector m_regularizers;
304 bool m_unconstrained; ///< Is log(C) stored internally as a parameter instead of C? If yes, then we get rid of the constraint C > 0 on the level of the parameter interface.
305 std::size_t m_cacheSize; ///< Number of values in the kernel cache. The size of the cache in bytes is the size of one entry (4 for float, 8 for double) times this number.
306};
307
308
309///
310/// \brief Super class of all linear SVM trainers.
311///
312/// \par
313/// This class is analogous to the AbstractSvmTrainer class,
314/// but for training of linear SVMs. It represents the
315/// regularization parameter of the SVM. The class also manages
316/// meta-information of the training process, like the stopping
317/// criterion and information on the actual solution.
318/// \ingroup supervised_trainer
319template <class InputType>
321: public AbstractTrainer<LinearClassifier<InputType>, unsigned int>
322, public QpConfig
323, public IParameterizable<>
324{
325public:
328
329 //! Constructor
330 //! \param C regularization parameter - always the 'true' value of C, even when unconstrained is set
331 //! \param offset train svm with offset - this is not supported for all SVM solvers.
332 //! \param unconstrained when a C-value is given via setParameter, should it be piped through the exp-function before using it in the solver?
333 AbstractLinearSvmTrainer(double C, bool offset, bool unconstrained)
334 : m_C(C)
335 , m_trainOffset(offset)
336 , m_unconstrained(unconstrained)
337 { SHARK_RUNTIME_CHECK( C > 0, "C must be larger than 0" );}
338
339 /// \brief Return the value of the regularization parameter C.
340 double C() const
341 { return m_C; }
342
343 /// \brief Set the value of the regularization parameter C.
344 void setC(double C) {
345 SHARK_RUNTIME_CHECK( C > 0, "C must be larger than 0" );
346 m_C = C;
347 }
348
349 /// \brief Is the regularization parameter provided in logarithmic (unconstrained) form as a parameter?
350 bool isUnconstrained() const
351 { return m_unconstrained; }
352
353 bool trainOffset() const
354 { return m_trainOffset; }
355
356 /// \brief Get the hyper-parameter vector.
357 RealVector parameterVector() const
358 {
359 RealVector ret(1);
360 ret(0) = (m_unconstrained ? std::log(m_C) : m_C);
361 return ret;
362 }
363
364 /// \brief Set the vector of hyper-parameters.
365 void setParameterVector(RealVector const& newParameters)
366 {
367 SHARK_ASSERT(newParameters.size() == 1);
368 setC(m_unconstrained ? std::exp(newParameters(0)) : newParameters(0));
369 }
370
371 /// \brief Return the number of hyper-parameters.
372 size_t numberOfParameters() const
373 { return 1; }
374
378
379protected:
380 double m_C; ///< Regularization parameter. The exact meaning depends on the sub-class, but the value is always positive, and higher implies a less regular solution.
381 bool m_trainOffset; ///< Is the SVM trained with or without bias?
382 bool m_unconstrained; ///< Is log(C) stored internally as a parameter instead of C? If yes, then we get rid of the constraint C > 0 on the level of the parameter interface.
383
384};
385
386
387}
388#endif