EpsilonSvmTrainer.h
Go to the documentation of this file.
1//===========================================================================
2/*!
3 *
4 *
5 * \brief Trainer for the Epsilon-Support Vector Machine for Regression
6 *
7 *
8 *
9 *
10 * \author T. Glasmachers
11 * \date 2007-2012
12 *
13 *
14 * \par Copyright 1995-2017 Shark Development Team
15 *
16 * <BR><HR>
17 * This file is part of Shark.
18 * <https://shark-ml.github.io/Shark/>
19 *
20 * Shark is free software: you can redistribute it and/or modify
21 * it under the terms of the GNU Lesser General Public License as published
22 * by the Free Software Foundation, either version 3 of the License, or
23 * (at your option) any later version.
24 *
25 * Shark is distributed in the hope that it will be useful,
26 * but WITHOUT ANY WARRANTY; without even the implied warranty of
27 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
28 * GNU Lesser General Public License for more details.
29 *
30 * You should have received a copy of the GNU Lesser General Public License
31 * along with Shark. If not, see <http://www.gnu.org/licenses/>.
32 *
33 */
34//===========================================================================
35
36
37#ifndef SHARK_ALGORITHMS_EPSILONSVMTRAINER_H
38#define SHARK_ALGORITHMS_EPSILONSVMTRAINER_H
39
40
47
48namespace shark {
49
50
51///
52/// \brief Training of Epsilon-SVMs for regression.
53///
54/// The Epsilon-SVM is a support vector machine variant
55/// for regression problems. Given are data tuples
56/// \f$ (x_i, y_i) \f$ with x-component denoting input and
57/// y-component denoting a real-valued label (see the tutorial on
58/// label conventions; the implementation uses RealVector),
59/// a kernel function k(x, x'), a regularization constant C > 0,
60/// and a loss insensitivity parameter \f$ \varepsilon \f$.
61/// Let H denote the kernel induced reproducing kernel Hilbert
62/// space of k, and let \f$ \phi \f$ denote the corresponding
63/// feature map. Then the SVM regression function is of the form
64/// \f[
65/// (x) = \langle w, \phi(x) \rangle + b
66/// \f]
67/// with coefficients w and b given by the (primal)
68/// optimization problem
69/// \f[
70/// \min \frac{1}{2} \|w\|^2 + C \sum_i L(y_i, f(x_i)),
71/// \f]
72/// where
73/// \f[
74/// L(y, f(x)) = \max\{0, |y - f(x)| - \varepsilon \}
75/// \f]
76/// is the \f$ \varepsilon \f$ insensitive absolute loss.
77///
78/// \ingroup supervised_trainer
79template <class InputType, class CacheType = float>
80class EpsilonSvmTrainer : public AbstractSvmTrainer<InputType, RealVector, KernelExpansion<InputType> >
81{
82public:
83
84 typedef CacheType QpFloatType;
85
90
94
95 /// Constructor
96 /// \param kernel kernel function to use for training and prediction
97 /// \param C regularization parameter - always the 'true' value of C, even when unconstrained is set
98 /// \param epsilon Loss insensitivity parameter.
99 //! \param unconstrained when a C-value is given via setParameter, should it be piped through the exp-function before using it in the solver?
100 EpsilonSvmTrainer(KernelType* kernel, double C, double epsilon, bool unconstrained = false)
101 : base_type(kernel, C, true, unconstrained)
102 , m_epsilon(epsilon)
103 { }
104
105 /// \brief From INameable: return the class name.
106 std::string name() const
107 { return "EpsilonSvmTrainer"; }
108
109 double epsilon() const
110 { return m_epsilon; }
111 void setEpsilon(double epsilon)
112 { m_epsilon = epsilon; }
113
114 /// get the hyper-parameter vector
115 RealVector parameterVector() const{
116 double pEps = base_type::m_unconstrained ? std::log(m_epsilon) : m_epsilon;
117 return base_type::parameterVector() | pEps;
118 }
119
120 /// set the vector of hyper-parameters
121 void setParameterVector(RealVector const& newParameters){
122 size_t sp = base_type::numberOfParameters();
123 SHARK_ASSERT(newParameters.size() == sp + 1);
124 base_type::setParameterVector(subrange(newParameters, 0, sp));
125 setEpsilon(base_type::m_unconstrained ? std::exp(newParameters(sp)) : newParameters(sp));
126 }
127
128 /// return the number of hyper-parameters
129 size_t numberOfParameters() const
130 { return (base_type::numberOfParameters() + 1); }
131
133 svm.setStructure(base_type::m_kernel,dataset.inputs(),true,1);
134
135 SHARK_RUNTIME_CHECK(labelDimension(dataset) == 1, "Can only train 1D labels");
136
138 trainSVM<PrecomputedBlockMatrixType>(svm,dataset);
139 else
140 trainSVM<CachedBlockMatrixType>(svm,dataset);
141
142 if (base_type::sparsify()) svm.sparsify();
143 }
144
145private:
146 template<class MatrixType>
147 void trainSVM(KernelExpansion<InputType>& svm, LabeledData<InputType, RealVector> const& dataset){
148 typedef GeneralQuadraticProblem<MatrixType> SVMProblemType;
149 typedef SvmShrinkingProblem<SVMProblemType> ProblemType;
150
151 //Set up the problem
153 std::size_t ic = km.size();
154 BlockMatrixType blockkm(&km);
155 MatrixType matrix(&blockkm);
156 SVMProblemType svmProblem(matrix);
157 for(std::size_t i = 0; i != ic; ++i){
158 svmProblem.linear(i) = dataset.element(i).label(0) - m_epsilon;
159 svmProblem.linear(i+ic) = dataset.element(i).label(0) + m_epsilon;
160 svmProblem.boxMin(i) = 0;
161 svmProblem.boxMax(i) = this->C();
162 svmProblem.boxMin(i+ic) = -this->C();
163 svmProblem.boxMax(i+ic) = 0;
164 }
165 ProblemType problem(svmProblem,base_type::m_shrinking);
166
167 //solve it
168 QpSolver< ProblemType> solver(problem);
170 RealVector alpha = problem.getUnpermutedAlpha();
171 column(svm.alpha(),0)= subrange(alpha,0,ic)+subrange(alpha,ic,2*ic);
172
173 // compute the offset from the KKT conditions
174 double lowerBound = -1e100;
175 double upperBound = 1e100;
176 double sum = 0.0;
177
178 std::size_t freeVars = 0;
179 for (std::size_t i=0; i< ic; i++)
180 {
181 if (problem.alpha(i) > 0.0)
182 {
183 double value = problem.gradient(i);
184 if (problem.alpha(i) < this->C())
185 {
186 sum += value;
187 freeVars++;
188 }
189 else
190 {
191 lowerBound = std::max(value,lowerBound);
192 }
193 }
194 if (problem.alpha(i + ic) < 0.0)
195 {
196 double value = problem.gradient(i + ic);
197 if (problem.alpha(i + ic) > -this->C())
198 {
199 sum += value;
200 freeVars++;
201 }
202 else
203 {
204 upperBound = std::min(value,upperBound);
205 }
206 }
207 }
208 if (freeVars > 0)
209 svm.offset(0) = sum / freeVars; // stabilized (averaged) exact value
210 else
211 svm.offset(0) = 0.5 * (lowerBound + upperBound); // best estimate
212
213 base_type::m_accessCount = km.getAccessCount();
214 }
215 double m_epsilon;
216};
217
218
219}
220#endif