QpSolver.h
Go to the documentation of this file.
1//===========================================================================
2/*!
3 *
4 *
5 * \brief General and specialized quadratic program classes and a generic solver.
6 *
7 *
8 *
9 * \author T. Glasmachers, O.Krause
10 * \date 2007-2016
11 *
12 *
13 * \par Copyright 1995-2017 Shark Development Team
14 *
15 * <BR><HR>
16 * This file is part of Shark.
17 * <https://shark-ml.github.io/Shark/>
18 *
19 * Shark is free software: you can redistribute it and/or modify
20 * it under the terms of the GNU Lesser General Public License as published
21 * by the Free Software Foundation, either version 3 of the License, or
22 * (at your option) any later version.
23 *
24 * Shark is distributed in the hope that it will be useful,
25 * but WITHOUT ANY WARRANTY; without even the implied warranty of
26 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
27 * GNU Lesser General Public License for more details.
28 *
29 * You should have received a copy of the GNU Lesser General Public License
30 * along with Shark. If not, see <http://www.gnu.org/licenses/>.
31 *
32 */
33//===========================================================================
34
35
36#ifndef SHARK_ALGORITHMS_QP_QPSOLVER_H
37#define SHARK_ALGORITHMS_QP_QPSOLVER_H
38
39#include <shark/Core/Timer.h>
41#include <shark/Data/Dataset.h>
42
43namespace shark{
44
45/// \brief Quadratic Problem with only Box-Constraints
46/// Let K the kernel matrix, than the problem has the form
47///
48/// \f$max_\alpha - 1/2 \alpha^T K \alpha + \alpha^Tv\f$
49/// under constraints:
50/// \f$l_i <= \alpha_i <= u_i\f$
51template<class MatrixT>
53public:
54 typedef MatrixT MatrixType;
55 typedef typename MatrixType::QpFloatType QpFloatType;
56
57 //Setup only using kernel matrix, labels and regularization parameter
60 , linear(quadratic.size(),0)
61 , alpha(quadratic.size(),0)
62 , diagonal(quadratic.size())
63 , permutation(quadratic.size())
64 , boxMin(quadratic.size(),0)
65 , boxMax(quadratic.size(),0)
66 {
67 for(std::size_t i = 0; i!= dimensions(); ++i){
68 permutation[i] = i;
69 diagonal(i) = quadratic.entry(i, i);
70 }
71 }
72
73 /// \brief constructor which initializes a C-SVM problem with weighted datapoints and different regularizers for every class
76 Data<unsigned int> const& labels,
77 Data<double> const& weights,
78 RealVector const& regularizers
80 , linear(quadratic.size())
81 , alpha(quadratic.size(),0)
82 , diagonal(quadratic.size())
83 , permutation(quadratic.size())
84 , boxMin(quadratic.size())
85 , boxMax(quadratic.size())
86 {
87 SIZE_CHECK(dimensions() == linear.size());
88 SIZE_CHECK(dimensions() == quadratic.size());
91 SIZE_CHECK(regularizers.size() > 0);
92 SIZE_CHECK(regularizers.size() <= 2);
93
94 double Cn = regularizers[0];
95 double Cp = regularizers[0];
96 if(regularizers.size() == 2)
97 Cp = regularizers[1];
98
99 for(std::size_t i = 0; i!= dimensions(); ++i){
100 unsigned int label = labels.element(i);
101 double weight = weights.element(i);
102 permutation[i] = i;
103 diagonal(i) = quadratic.entry(i, i);
104 linear(i) = label? 1.0:-1.0;
105 boxMin(i) = label? 0.0:-Cn*weight;
106 boxMax(i) = label? Cp*weight : 0.0;
107 }
108 }
109
110 std::size_t dimensions()const{
111 return quadratic.size();
112 }
113
114 /// exchange two variables via the permutation
115 void flipCoordinates(std::size_t i, std::size_t j)
116 {
117 if (i == j) return;
118
119 // notify the matrix cache
120 quadratic.flipColumnsAndRows(i, j);
121 std::swap( alpha[i], alpha[j]);
122 std::swap( linear[i], linear[j]);
123 std::swap( diagonal[i], diagonal[j]);
124 std::swap( boxMin[i], boxMin[j]);
125 std::swap( boxMax[i], boxMax[j]);
126 std::swap( permutation[i], permutation[j]);
127 }
128
129 /// \brief Scales all box constraints by a constant factor and adapts the solution by scaling it by the same factor.
130 void scaleBoxConstraints(double factor){
131 alpha *= factor;
132 boxMin *=factor;
133 boxMax *=factor;
134 }
135
136 /// representation of the quadratic part of the objective function
138
139 /// \brief Linear part of the problem
140 RealVector linear;
141
142 /// Solution candidate
143 RealVector alpha;
144
145 /// diagonal matrix entries
146 /// The diagonal array is of fixed size and not subject to shrinking.
147 RealVector diagonal;
148
149 /// permutation of the variables alpha, gradient, etc.
150 std::vector<std::size_t> permutation;
151
152 /// \brief component-wise lower bound
153 RealVector boxMin;
154
155 /// \brief component-wise upper bound
156 RealVector boxMax;
157};
158
159///\brief Boxed problem for alpha in [lower,upper]^n and equality constraints.
160///
161///It is assumed for the initial alpha value that there exists a sum to one constraint and lower <= 1/n <= upper
162template<class MatrixT>
164public:
165 typedef MatrixT MatrixType;
166 typedef typename MatrixType::QpFloatType QpFloatType;
167
168 //Setup only using kernel matrix, labels and regularization parameter
169 BoxedSVMProblem(MatrixType& quadratic, RealVector const& linear, double lower, double upper)
171 , linear(linear)
172 , alpha(quadratic.size(),1.0/quadratic.size())
173 , diagonal(quadratic.size())
174 , permutation(quadratic.size())
175 , m_lower(lower)
176 , m_upper(upper)
177 {
178 SIZE_CHECK(dimensions() == linear.size());
179 SIZE_CHECK(dimensions() == quadratic.size());
180
181 for(std::size_t i = 0; i!= dimensions(); ++i){
182 permutation[i] = i;
183 diagonal(i) = quadratic.entry(i, i);
184 }
185 }
186
187 std::size_t dimensions()const{
188 return quadratic.size();
189 }
190
191 double boxMin(std::size_t i)const{
192 return m_lower;
193 }
194 double boxMax(std::size_t i)const{
195 return m_upper;
196 }
197
198 /// representation of the quadratic part of the objective function
200
201 ///\brief Linear part of the problem
202 RealVector linear;
203
204 /// Solution candidate
205 RealVector alpha;
206
207 /// diagonal matrix entries
208 /// The diagonal array is of fixed size and not subject to shrinking.
209 RealVector diagonal;
210
211 /// exchange two variables via the permutation
212 void flipCoordinates(std::size_t i, std::size_t j)
213 {
214 if (i == j) return;
215
216 // notify the matrix cache
217 quadratic.flipColumnsAndRows(i, j);
218 std::swap( alpha[i], alpha[j]);
219 std::swap( linear[i], linear[j]);
220 std::swap( diagonal[i], diagonal[j]);
221 std::swap( permutation[i], permutation[j]);
222 }
223
224 /// \brief Scales all box constraints by a constant factor and adapts the solution by scaling it by the same factor.
225 void scaleBoxConstraints(double factor){
226 m_lower*=factor;
227 m_upper*=factor;
228 alpha *= factor;
229 }
230
231 /// permutation of the variables alpha, gradient, etc.
232 std::vector<std::size_t> permutation;
233private:
234 double m_lower;
235 double m_upper;
236};
237
238
239/// \brief Problem formulation for binary C-SVM problems
240///
241/// \f$max_\alpha - 1/2 \alpha^T K \alpha + \alpha^Ty\f$
242/// under constraints:
243/// \f$l_i <= \alpha_i <= u_i\f$
244/// \f$\sum_i \alpha_i = 0\f$
245template<class MatrixT>
247public:
248 typedef MatrixT MatrixType;
249 typedef typename MatrixType::QpFloatType QpFloatType;
250
251 /// \brief Setup only using kernel matrix, labels and regularization parameter
254 , linear(quadratic.size())
255 , alpha(quadratic.size(),0)
256 , diagonal(quadratic.size())
257 , permutation(quadratic.size())
258 , positive(quadratic.size())
259 , m_Cp(C)
260 , m_Cn(C)
261 {
262 SIZE_CHECK(dimensions() == linear.size());
263 SIZE_CHECK(dimensions() == quadratic.size());
265
266 for(std::size_t i = 0; i!= dimensions(); ++i){
267 permutation[i] = i;
268 diagonal(i) = quadratic.entry(i, i);
269 linear(i) = labels.element(i)? 1.0:-1.0;
270 positive[i] = linear(i) > 0;
271 }
272 }
273 ///\brief Setup using kernel matrix, labels and different regularization parameters for positive and negative classes
274 CSVMProblem(MatrixType& quadratic, Data<unsigned int> const& labels, RealVector const& regularizers)
276 , linear(quadratic.size())
277 , alpha(quadratic.size(),0)
278 , diagonal(quadratic.size())
279 , permutation(quadratic.size())
280 , positive(quadratic.size())
281 {
282 SIZE_CHECK(dimensions() == linear.size());
283 SIZE_CHECK(dimensions() == quadratic.size());
285
286 SIZE_CHECK(regularizers.size() > 0);
287 SIZE_CHECK(regularizers.size() <= 2);
288 m_Cp = m_Cn = regularizers[0];
289 if(regularizers.size() == 2)
290 m_Cp = regularizers[1];
291
292
293 for(std::size_t i = 0; i!= dimensions(); ++i){
294 permutation[i] = i;
295 diagonal(i) = quadratic.entry(i, i);
296 linear(i) = labels.element(i)? 1.0:-1.0;
297 positive[i] = linear(i) > 0;
298 }
299 }
300
301 //Setup with changed linear part
302 CSVMProblem(MatrixType& quadratic, RealVector linear, Data<unsigned int> const& labels, double C)
304 , linear(linear)
305 , alpha(quadratic.size(),0)
306 , diagonal(quadratic.size())
307 , permutation(quadratic.size())
308 , positive(quadratic.size())
309 , m_Cp(C)
310 , m_Cn(C)
311 {
312 SIZE_CHECK(dimensions() == quadratic.size());
313 SIZE_CHECK(dimensions() == linear.size());
315
316 for(std::size_t i = 0; i!= dimensions(); ++i){
317 permutation[i] = i;
318 diagonal(i) = quadratic.entry(i, i);
319 positive[i] = labels.element(i) ? 1: 0;
320 }
321 }
322
323 std::size_t dimensions()const{
324 return quadratic.size();
325 }
326
327 double boxMin(std::size_t i)const{
328 return positive[i] ? 0.0 : -m_Cn;
329 }
330 double boxMax(std::size_t i)const{
331 return positive[i] ? m_Cp : 0.0;
332 }
333
334 /// representation of the quadratic part of the objective function
336
337 ///\brief Linear part of the problem
338 RealVector linear;
339
340 /// Solution candidate
341 RealVector alpha;
342
343 /// diagonal matrix entries
344 /// The diagonal array is of fixed size and not subject to shrinking.
345 RealVector diagonal;
346
347 /// permutation of the variables alpha, gradient, etc.
348 std::vector<std::size_t> permutation;
349
350 /// exchange two variables via the permutation
351 void flipCoordinates(std::size_t i, std::size_t j)
352 {
353 if (i == j) return;
354
355 // notify the matrix cache
356 quadratic.flipColumnsAndRows(i, j);
357 std::swap( linear[i], linear[j]);
358 std::swap( alpha[i], alpha[j]);
359 std::swap( diagonal[i], diagonal[j]);
360 std::swap( permutation[i], permutation[j]);
361 std::swap( positive[i], positive[j]);
362 }
363
364 /// \brief Scales all box constraints by a constant factor and adapts the solution by scaling it by the same factor.
365 void scaleBoxConstraints(double factor, double variableScalingFactor){
366 bool sameFactor = factor == variableScalingFactor;
367 double newCp = m_Cp*factor;
368 double newCn = m_Cn*factor;
369 for(std::size_t i = 0; i != dimensions(); ++i){
370 if(sameFactor && alpha(i)== m_Cp)
371 alpha(i) = newCp;
372 else if(sameFactor && alpha(i) == -m_Cn)
373 alpha(i) = -newCn;
374 else
375 alpha(i) *= variableScalingFactor;
376 }
377 m_Cp = newCp;
378 m_Cn = newCn;
379 }
380
381private:
382 ///\brief whether the label of the point is positive
383 std::vector<char> positive;
384
385 ///\brief Regularization constant of the positive class
386 double m_Cp;
387 ///\brief Regularization constant of the negative class
388 double m_Cn;
389};
390
395 AlphaDeactivated = 3//also: AlphaUpperBound and AlphaLowerBound
397
398///
399/// \brief Quadratic program solver
400///
401/// todo: new documentation
402template<class Problem, class SelectionStrategy = typename Problem::PreferedSelectionStrategy >
404{
405public:
407 Problem& problem
408 ):m_problem(problem){}
409
410 /// \brief Solve the quadratic program.
411 ///
412 /// The function iteratively refines the solution by applying the
413 /// SMO (subset descent) algorithm until one of the stopping
414 /// criteria is fulfilled.
415 void solve(
417 QpSolutionProperties* prop = NULL
418 ){
419 double start_time = Timer::now();
420 unsigned long long iter = 0;
421 unsigned long long shrinkCounter = 0;
422
423 SelectionStrategy workingSet;
424
425 // decomposition loop
426 for(;;){
427 //stop if iterations exceed
428 if( iter == stop.maxIterations){
429 if (prop != NULL) prop->type = QpMaxIterationsReached;
430 break;
431 }
432 //stop if the maximum running time is exceeded
433 if (stop.maxSeconds < 1e100 && (iter+1) % 1000 == 0 ){
434 double current_time = Timer::now();
435 if (current_time - start_time > stop.maxSeconds){
436 if (prop != NULL) prop->type = QpTimeout;
437 break;
438 }
439 }
440
441 // select a working set and check for optimality
442 std::size_t i = 0, j = 0;
443 double acc = workingSet(m_problem,i, j);
444 if (acc < stop.minAccuracy) {
445 m_problem.unshrink();
446 if(m_problem.checkKKT() < stop.minAccuracy){
447 if (prop != NULL) prop->type = QpAccuracyReached;
448 break;
449 }
450 m_problem.shrink(stop.minAccuracy);
451 workingSet(m_problem,i,j);
452 workingSet.reset();
453 }
454
455 //update smo with the selected working set
456 m_problem.updateSMO(i,j);
457
458 //do a shrinking every 1000 iterations. if variables got shrink
459 //notify working set selection
460 if(shrinkCounter == 0 && m_problem.shrink(stop.minAccuracy)){
461 shrinkCounter = std::max<std::size_t>(1000,m_problem.dimensions());
462 workingSet.reset();
463 }
464 iter++;
465 shrinkCounter--;
466 }
467
468 if (prop != NULL)
469 {
470 double finish_time = Timer::now();
471
472 std::size_t i = 0, j = 0;
473 prop->accuracy = workingSet(m_problem,i, j);
474 prop->value = m_problem.functionValue();
475 prop->iterations = iter;
476 prop->seconds = finish_time - start_time;
477 }
478 }
479
480protected:
481 Problem& m_problem;
482};
483
484}
485#endif