SvmProblems.h
Go to the documentation of this file.
1/*!
2 *
3 *
4 * \brief -
5 *
6 * \author T. Glasmachers, O.Krause
7 * \date 2013
8 *
9 *
10 * \par Copyright 1995-2017 Shark Development Team
11 *
12 * <BR><HR>
13 * This file is part of Shark.
14 * <https://shark-ml.github.io/Shark/>
15 *
16 * Shark is free software: you can redistribute it and/or modify
17 * it under the terms of the GNU Lesser General Public License as published
18 * by the Free Software Foundation, either version 3 of the License, or
19 * (at your option) any later version.
20 *
21 * Shark is distributed in the hope that it will be useful,
22 * but WITHOUT ANY WARRANTY; without even the implied warranty of
23 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
24 * GNU Lesser General Public License for more details.
25 *
26 * You should have received a copy of the GNU Lesser General Public License
27 * along with Shark. If not, see <http://www.gnu.org/licenses/>.
28 *
29 */
30#ifndef SHARK_ALGORITHMS_QP_SVMPROBLEMS_H
31#define SHARK_ALGORITHMS_QP_SVMPROBLEMS_H
32
34
35namespace shark{
36
37// Working-Set-Selection-Criteria are applied as follows:
38// Criterium crit;
39// value = crit(problem, i, j);
40
41
42///\brief Computes the most violating pair of the problem
43///
44/// The MVP is defined as the set of indices (i,j)
45/// such that g(i) - g(j) is maximal and alpha_i < max_j
46/// and alpha_j > min_j.
48 /// \brief Select the most violating pair (MVP)
49 ///
50 /// \return maximal KKT violation
51 /// \param problem the SVM problem to select the working set for
52 /// \param i first working set component
53 /// \param j second working set component
54 template<class Problem>
55 double operator()(Problem& problem, std::size_t& i, std::size_t& j)
56 {
57 double largestUp = -1e100;
58 double smallestDown = 1e100;
59
60 for (std::size_t a=0; a < problem.active(); a++)
61 {
62 double ga = problem.gradient(a);
63 if (!problem.isUpperBound(a))
64 {
65 if (ga > largestUp)
66 {
67 largestUp = ga;
68 i = a;
69 }
70 }
71 if (!problem.isLowerBound(a))
72 {
73 if (ga < smallestDown)
74 {
75 smallestDown = ga;
76 j = a;
77 }
78 }
79 }
80
81 // MVP stopping condition
82 return largestUp - smallestDown;
83 }
84
85 void reset(){}
86};
87
88
89///\brief Computes the maximum gian solution
90///
91/// The first index is chosen based on the maximum gradient (first index of MVP)
92/// and the second index is chosen such that the step of the corresponding alphas
93/// produces the largest gain.
95
96 /// \brief Select a working set according to the second order algorithm of LIBSVM 2.8
97 ///
98 /// \return maximal KKT vioation
99 /// \param problem the svm problem to select the working set for
100 /// \param i first working set component
101 /// \param j second working set component
102 template<class Problem>
103 double operator()(Problem& problem, std::size_t& i, std::size_t& j)
104 {
105 i = 0;
106 j = 1;
107
108 double smallestDown = 1e100;
109 double largestUp = -1e100;
110
111 for (std::size_t a=0; a < problem.active(); a++)
112 {
113 double ga = problem.gradient(a);
114 //if (aa < problem.boxMax(a))
115 if (!problem.isUpperBound(a))
116 {
117 if (ga > largestUp)
118 {
119 largestUp = ga;
120 i = a;
121 }
122 }
123 }
124 if (largestUp == -1e100) return 0.0;
125
126 // find the second index using second order information
127 typename Problem::QpFloatType* q = problem.quadratic().row(i, 0, problem.active());
128 double best = 0.0;
129 for (std::size_t a = 0; a < problem.active(); a++){
130 double ga = problem.gradient(a);
131 if (!problem.isLowerBound(a))
132 {
133 smallestDown=std::min(smallestDown,ga);
134
135 double gain = detail::maximumGainQuadratic2DOnLine(
136 problem.diagonal(i),
137 problem.diagonal(a),
138 q[a],
139 largestUp,ga
140 );
141 if (gain> best)
142 {
143 best = gain;
144 j = a;
145 }
146 }
147 }
148
149 if (best == 0.0) return 0.0; // numerical accuracy reached :(
150
151 // MVP stopping condition
152 return largestUp - smallestDown;
153 }
154
155 void reset(){}
156};
157
159public:
160 HMGSelectionCriterion():useLibSVM(true),smallProblem(false){}
161
162 /// \brief Select a working set according to the hybrid maximum gain (HMG) algorithm
163 ///
164 /// \return maximal KKT vioation
165 /// \param problem the svm problem to select the working set for
166 /// \param i first working set component
167 /// \param j second working set component
168 template<class Problem>
169 double operator()(Problem& problem, std::size_t& i, std::size_t& j)
170 {
171 if (smallProblem || useLibSVM || isInCorner(problem))
172 {
173 useLibSVM = false;
174 if(!smallProblem && sqr(problem.active()) < problem.quadratic().getMaxCacheSize())
175 smallProblem = true;
176 LibSVMSelectionCriterion libSVMSelection;
177 double value = libSVMSelection(problem,i, j);
178 last_i = i;
179 last_j = j;
180 return value;
181 }
182 //old HMG
183 MGStep besti = selectMGVariable(problem,last_i);
184 if(besti.violation == 0.0)
185 return 0;
186 MGStep bestj = selectMGVariable(problem,last_j);
187
188 if(bestj.gain > besti.gain){
189 i = last_j;
190 j = bestj.index;
191 }else{
192 i = last_i;
193 j = besti.index;
194 }
195 if (problem.gradient(i) < problem.gradient(j))
196 std::swap(i, j);
197 last_i = i;
198 last_j = j;
199 return besti.violation;
200 }
201
202 void reset(){
203 useLibSVM = true;
204 smallProblem = false;
205 }
206
207private:
208 template<class Problem>
209 bool isInCorner(Problem const& problem)const{
210 double Li = problem.boxMin(last_i);
211 double Ui = problem.boxMax(last_i);
212 double Lj = problem.boxMin(last_j);
213 double Uj = problem.boxMax(last_j);
214 double eps_i = 1e-8 * (Ui - Li);
215 double eps_j = 1e-8 * (Uj - Lj);
216
217 if ((problem.alpha(last_i) <= Li + eps_i || problem.alpha(last_i) >= Ui - eps_i)
218 && ((problem.alpha(last_j) <= Lj + eps_j || problem.alpha(last_j) >= Uj - eps_j)))
219 {
220 return true;
221 }
222 return false;
223 }
224 struct MGStep{
225 std::size_t index;//index of variable
226 double violation;//computed gradientValue
227 double gain;
228 };
229 template<class Problem>
230 MGStep selectMGVariable(Problem& problem,std::size_t i) const{
231
232 //best variable pair found so far
233 std::size_t bestIndex = 0;//index of variable
234 double bestGain = 0;
235
236 double largestUp = -1e100;
237 double smallestDown = 1e100;
238
239 // try combinations with b = old_i
240 typename Problem::QpFloatType* q = problem.quadratic().row(i, 0, problem.active());
241 double ab = problem.alpha(i);
242 double db = problem.diagonal(i);
243 double Lb = problem.boxMin(i);
244 double Ub = problem.boxMax(i);
245 double gb = problem.gradient(i);
246 for (std::size_t a = 0; a < problem.active(); a++)
247 {
248 double ga = problem.gradient(a);
249
250 if (!problem.isUpperBound(a))
251 largestUp = std::max(largestUp,ga);
252 if (!problem.isLowerBound(a))
253 smallestDown = std::min(smallestDown,ga);
254
255 if (a == i) continue;
256 //get maximum unconstrained step length
257 double denominator = (problem.diagonal(a) + db - 2.0 * q[a]);
258 double mu_max = (ga - gb) / denominator;
259
260 //check whether a step > 0 is possible at all
261 //~ if( mu_max > 0 && ( problem.isUpperBound(a) || problem.isLowerBound(b)))continue;
262 //~ if( mu_max < 0 && ( problem.isLowerBound(a) || problem.isUpperBound(b)))continue;
263
264 //constraint step to box
265 double aa = problem.alpha(a);
266 double La = problem.boxMin(a);
267 double Ua = problem.boxMax(a);
268 double mu_star = mu_max;
269 if (aa + mu_star < La) mu_star = La - aa;
270 else if (mu_star + aa > Ua) mu_star = Ua - aa;
271 if (ab - mu_star < Lb) mu_star = ab - Lb;
272 else if (ab - mu_star > Ub) mu_star = ab - Ub;
273
274 double gain = mu_star * (2.0 * mu_max - mu_star) * denominator;
275
276 // select the largest gain
277 if (gain > bestGain)
278 {
279 bestGain = gain;
280 bestIndex = a;
281 }
282 }
283 MGStep step;
284 step.violation= largestUp-smallestDown;
285 step.index = bestIndex;
286 step.gain=bestGain;
287 return step;
288
289 }
290
291 std::size_t last_i;
292 std::size_t last_j;
293
294 bool useLibSVM;
295 bool smallProblem;
296};
297
298
299template<class Problem>
301public:
302 typedef typename Problem::QpFloatType QpFloatType;
303 typedef typename Problem::MatrixType MatrixType;
305
306 SvmProblem(Problem& problem)
307 : m_problem(problem)
308 , m_gradient(problem.linear)
309 , m_active(problem.dimensions())
311 //compute the gradient if alpha != 0
312 for (std::size_t i=0; i != dimensions(); i++){
313 double v = alpha(i);
314 if (v != 0.0){
315 QpFloatType* q = quadratic().row(i, 0, dimensions());
316 for (std::size_t a=0; a < dimensions(); a++)
317 m_gradient(a) -= q[a] * v;
318 }
320 }
321 }
322 std::size_t dimensions()const{
323 return m_problem.dimensions();
324 }
325
326 std::size_t active()const{
327 return m_active;
328 }
329
330 double boxMin(std::size_t i)const{
331 return m_alphaStatus[i]==AlphaDeactivated? alpha(i): m_problem.boxMin(i);
332 }
333 double boxMax(std::size_t i)const{
334 return m_alphaStatus[i]==AlphaDeactivated? alpha(i): m_problem.boxMax(i);
335 }
336 bool isLowerBound(std::size_t i)const{
337 return m_alphaStatus[i] & AlphaLowerBound;
338 }
339 bool isUpperBound(std::size_t i)const{
340 return m_alphaStatus[i] & AlphaUpperBound;
341 }
342
343 /// representation of the quadratic part of the objective function
345 return m_problem.quadratic;
346 }
347
348 double linear(std::size_t i)const{
349 return m_problem.linear(i);
350 }
351
352 double alpha(std::size_t i)const{
353 return m_problem.alpha(i);
354 }
355
356 double diagonal(std::size_t i)const{
357 return m_problem.diagonal(i);
358 }
359
360 double gradient(std::size_t i)const{
361 return m_gradient(i);
362 }
363
364 std::size_t permutation(std::size_t i)const{
365 return m_problem.permutation[i];
366 }
367
368 RealVector getUnpermutedAlpha()const{
369 RealVector alpha(dimensions());
370 for (std::size_t i=0; i<dimensions(); i++)
371 alpha(m_problem.permutation[i]) = m_problem.alpha(i);
372 return alpha;
373 }
374
375 ///\brief Does an update of SMO given a working set with indices i and j.
376 void updateSMO(std::size_t i, std::size_t j){
377 SIZE_CHECK(i < active());
378 SIZE_CHECK(j < active());
379 // get the matrix row corresponding to the first variable of the working set
380 QpFloatType* qi = quadratic().row(i, 0, active());
381
382 // solve the sub-problem defined by i and j
383 double numerator = gradient(i) - gradient(j);
384 double denominator = diagonal(i) + diagonal(j) - 2.0 * qi[j];
385 denominator = std::max(denominator,1.e-12);
386 double step = numerator/denominator;
387
388 //update alpha in a numerically stable way
389 // do the update of the alpha values carefully - avoid numerical problems
390 double Ui = boxMax(i);
391 double Lj = boxMin(j);
392 double aiOld = m_problem.alpha(i);
393 double ajOld = m_problem.alpha(j);
394 double& ai = m_problem.alpha(i);
395 double& aj = m_problem.alpha(j);
396 if (step >= std::min(Ui - ai, aj - Lj))
397 {
398 if (Ui - ai > aj - Lj)
399 {
400 step = aj - Lj;
401 ai += step;
402 aj = Lj;
403 }
404 else if (Ui - ai < aj - Lj)
405 {
406 step = Ui - ai;
407 ai = Ui;
408 aj -= step;
409 }
410 else
411 {
412 step = Ui - ai;
413 ai = Ui;
414 aj = Lj;
415 }
416 }
417 else
418 {
419 ai += step;
420 aj -= step;
421 }
422
423 if(ai == aiOld && aj == ajOld)return;
424
425 //Update internal data structures (gradient and alpha status)
426 QpFloatType* qj = quadratic().row(j, 0, active());
427 for (std::size_t a = 0; a < active(); a++)
428 m_gradient(a) -= step * qi[a] - step * qj[a];
429
430 //update boundary status
433 }
434
435 ///\brief Returns the current function value of the problem.
436 double functionValue()const{
437 return 0.5*inner_prod(m_gradient+m_problem.linear,m_problem.alpha);
438 }
439
440 bool shrink(double){return false;}
441 void reshrink(){}
442 void unshrink(){}
443
444 /// \brief Define the initial solution for the iterative solver.
445 ///
446 /// This method can be used to warm-start the solver. It requires a
447 /// feasible solution (alpha) and the corresponding gradient of the
448 /// dual objective function.
449 void setInitialSolution(RealVector const& alpha, RealVector const& gradient)
450 {
451 std::size_t n = dimensions();
452 SIZE_CHECK(alpha.size() == n);
453 SIZE_CHECK(gradient.size() == n);
454 for (std::size_t i=0; i<n; i++)
455 {
456 std::size_t j = permutation(i);
457 SHARK_ASSERT(alpha(j) >= boxMin(j) && alpha(j) <= boxMax(j));
458 m_problem.alpha(i) = alpha(j);
459 m_gradient(i) = gradient(j);
461 }
462 }
463
464 /// \brief Define the initial solution for the iterative solver.
465 ///
466 /// This method can be used to warm-start the solver. It requires a
467 /// feasible solution (alpha), for which it computes the gradient of
468 /// the dual objective function. Note that this is a quadratic time
469 /// operation in the number of non-zero coefficients.
470 void setInitialSolution(RealVector const& alpha)
471 {
472 std::size_t n = dimensions();
473 SIZE_CHECK(alpha.size() == n);
474 RealVector gradient = m_problem.linear;
475 blas::vector<QpFloatType> q(n);
476 for (std::size_t i=0; i<n; i++)
477 {
478 double a = alpha(i);
479 if (a == 0.0) continue;
480 m_problem.quadratic.row(i, 0, n, q.storage());
481 noalias(gradient) -= a * q;
482 }
484 }
485
486 ///\brief Remove the i-th example from the problem while taking the equality constraint into account.
487 ///
488 /// The i-th element is first set to zero and as well as an unspecified set corrected so
489 /// that the constraint is fulfilled.
490 void deactivateVariable(std::size_t i){
491 SIZE_CHECK(i < dimensions());
492 //we need to correct for the equality constraint
493 //that means we have to move enough variables to satisfy the constraint again.
494 for (std::size_t j=0; j<dimensions(); j++){
495 if (j == i || m_alphaStatus[j] == AlphaDeactivated) continue;
496 //propose the maximum step possible and let applyStep cut it down.
497 applyStep(i,j, -alpha(i));
498 if(alpha(i) == 0.0) break;
499 }
501 }
502 ///\brief Reactivate an previously deactivated variable.
503 void activateVariable(std::size_t i){
504 SIZE_CHECK(i < dimensions());
507 }
508
509 /// exchange two variables via the permutation
510 void flipCoordinates(std::size_t i, std::size_t j)
511 {
512 SIZE_CHECK(i < dimensions());
513 SIZE_CHECK(j < dimensions());
514 if (i == j) return;
515
516 m_problem.flipCoordinates(i, j);
517 std::swap( m_gradient[i], m_gradient[j]);
518 std::swap( m_alphaStatus[i], m_alphaStatus[j]);
519 }
520
521 /// \brief Scales all box constraints by a constant factor and adapts the solution using a separate scaling
522 void scaleBoxConstraints(double factor, double variableScalingFactor){
523 m_problem.scaleBoxConstraints(factor,variableScalingFactor);
524 for(std::size_t i = 0; i != this->dimensions(); ++i){
525 //don't change deactivated variables
526 if(m_alphaStatus[i] == AlphaDeactivated) continue;
527 m_gradient(i) -= linear(i);
528 m_gradient(i) *= variableScalingFactor;
529 m_gradient(i) += linear(i);
531 }
532 }
533
534 /// \brief adapts the linear part of the problem and updates the internal data structures accordingly.
535 virtual void setLinear(std::size_t i, double newValue){
536 m_gradient(i) -= linear(i);
537 m_gradient(i) += newValue;
538 m_problem.linear(i) = newValue;
539 }
540
541 double checkKKT()const{
542 double largestUp = -1e100;
543 double smallestDown = 1e100;
544 for (std::size_t a = 0; a < active(); a++){
545 if (!isLowerBound(a))
546 smallestDown = std::min(smallestDown,gradient(a));
547 if (!isUpperBound(a))
548 largestUp = std::max(largestUp,gradient(a));
549 }
550 return largestUp - smallestDown;
551 }
552
553protected:
554 Problem m_problem;
555
556 /// gradient of the objective function at the current alpha
557 RealVector m_gradient;
558
559 std::size_t m_active;
560
561 /// \brief Stores the status, whther alpha is on the lower or upper bound, or whether it is free.
562 std::vector<char> m_alphaStatus;
563
564 ///\brief Update the problem by a proposed step i taking the box constraints into account.
565 ///
566 /// A step length 0<=lambda<=1 is found so that
567 /// boxMin(i) <= alpha(i)+lambda*step <= boxMax(i)
568 /// and
569 /// boxMin(j) <= alpha(j)-lambda*step <= boxMax(j)
570 /// the update is performed in a numerically stable way and the internal data structures
571 /// are also updated.
572 virtual void applyStep(std::size_t i, std::size_t j, double step){
573 SIZE_CHECK(i < active());
574 SIZE_CHECK(j < active());
575 // do the update of the alpha values carefully - avoid numerical problems
576 double Ui = boxMax(i);
577 double Lj = boxMin(j);
578 double aiOld = m_problem.alpha(i);
579 double ajOld = m_problem.alpha(j);
580 double& ai = m_problem.alpha(i);
581 double& aj = m_problem.alpha(j);
582 if (step >= std::min(Ui - ai, aj - Lj))
583 {
584 if (Ui - ai > aj - Lj)
585 {
586 step = aj - Lj;
587 ai += step;
588 aj = Lj;
589 }
590 else if (Ui - ai < aj - Lj)
591 {
592 step = Ui - ai;
593 ai = Ui;
594 aj -= step;
595 }
596 else
597 {
598 step = Ui - ai;
599 ai = Ui;
600 aj = Lj;
601 }
602 }
603 else
604 {
605 ai += step;
606 aj -= step;
607 }
608
609 if(ai == aiOld && aj == ajOld)return;
610
611 //Update internal data structures (gradient and alpha status)
612 QpFloatType* qi = quadratic().row(i, 0, active());
613 QpFloatType* qj = quadratic().row(j, 0, active());
614 for (std::size_t a = 0; a < active(); a++)
615 m_gradient(a) -= step * qi[a] - step * qj[a];
616
617 //update boundary status
620 }
621
622
623 void updateAlphaStatus(std::size_t i){
624 SIZE_CHECK(i < dimensions());
626 if(m_problem.alpha(i) == boxMax(i))
628 if(m_problem.alpha(i) == boxMin(i))
630 }
631
632 bool testShrinkVariable(std::size_t a, double largestUp, double smallestDown)const{
633 if (
634 ( isLowerBound(a) && gradient(a) < smallestDown)
635 || ( isUpperBound(a) && gradient(a) >largestUp)
636 ){
637 // In this moment no feasible step including this variable
638 // can improve the objective. Thus deactivate the variable.
639 return true;
640 }
641 return false;
642 }
643};
644
645template<class Problem>
646class SvmShrinkingProblem: public BoxBasedShrinkingStrategy<SvmProblem<Problem> >{
647public:
648 SvmShrinkingProblem(Problem& problem, bool shrink=true)
649 :BoxBasedShrinkingStrategy<SvmProblem<Problem> >(problem,shrink){}
650};
651
652}
653#endif