QpMcBoxDecomp.h
Go to the documentation of this file.
1//===========================================================================
2/*!
3 *
4 *
5 * \brief Quadratic programming m_problem for multi-class SVMs
6 *
7 *
8 *
9 *
10 * \author T. Glasmachers
11 * \date 2007-2012
12 *
13 *
14 * \par Copyright 1995-2017 Shark Development Team
15 *
16 * <BR><HR>
17 * This file is part of Shark.
18 * <https://shark-ml.github.io/Shark/>
19 *
20 * Shark is free software: you can redistribute it and/or modify
21 * it under the terms of the GNU Lesser General Public License as published
22 * by the Free Software Foundation, either version 3 of the License, or
23 * (at your option) any later version.
24 *
25 * Shark is distributed in the hope that it will be useful,
26 * but WITHOUT ANY WARRANTY; without even the implied warranty of
27 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
28 * GNU Lesser General Public License for more details.
29 *
30 * You should have received a copy of the GNU Lesser General Public License
31 * along with Shark. If not, see <http://www.gnu.org/licenses/>.
32 *
33 */
34//===========================================================================
35
36
37#ifndef SHARK_ALGORITHMS_QP_QPMCBOXDECOMP_H
38#define SHARK_ALGORITHMS_QP_QPMCBOXDECOMP_H
39
42#include <shark/Algorithms/QP/Impl/AnalyticProblems.h>
43#include <shark/Core/Timer.h>
44#include <shark/Data/Dataset.h>
45
46
47namespace shark {
48
49template <class Matrix>
51{
52public:
53 typedef typename Matrix::QpFloatType QpFloatType;
54 /// \brief Working set selection eturning th S2DO working set
55 ///
56 /// This selection operator picks the first variable by maximum gradient,
57 /// the second by maximum unconstrained gain.
59 template<class Problem>
60 double operator()(Problem& problem, std::size_t& i, std::size_t& j){
61 //todo move implementation here
62 return problem.selectWorkingSet(i,j);
63 }
64
65 void reset(){}
66 };
67
68 ///Constructor
69 ///\param kernel kernel matrix - cache or pre-computed matrix
70 ///\param M kernel modifiers in the format \f$ M_(y_i, p, y_j, q) = _M(classes*(y_i*|P|+p_i)+y_j, q) \f$
71 ///\param target the target labels for the variables
72 ///\param linearMat the linear part of the problem
73 ///\param C upper bound for all box variables, lower bound is 0.
75 Matrix& kernel,
77 Data<unsigned int> const& target,
78 RealMatrix const& linearMat,
79 double C
80 )
81 : bUnshrinked(false)
82 , m_kernelMatrix(kernel)
83 , m_M(M)
84 , m_C(C)
85 , m_classes(numberOfClasses(target))
86 , m_cardP(linearMat.size2())
87 , m_numExamples(kernel.size())
96 , m_useShrinking(true)
97 {
98 SHARK_RUNTIME_CHECK(target.numberOfElements() == kernel.size(), "Size of kernel matrix and target vector do not agree.");
99 SHARK_RUNTIME_CHECK(kernel.size() == linearMat.size1(), "Size of kernel matrix and linear factor to not agree.");
100
101 // prepare m_problem internal variables
104 for (std::size_t v=0, i=0; i<m_numExamples; i++)
105 {
106 unsigned int y = target.element(i);
107 m_examples[i].index = i;
108 m_examples[i].y = y;
109 m_examples[i].active = m_cardP;
110 m_examples[i].var = &m_storage1[m_cardP * i];
111 m_examples[i].avar = &m_storage2[m_cardP * i];
112 double k = m_kernelMatrix.entry(i, i);
113 for (unsigned int p=0; p<m_cardP; p++, v++)
114 {
115 m_variables[v].i = i;
116 m_variables[v].p = p;
117 m_variables[v].index = p;
118 double Q = m_M(m_classes * (y * m_cardP + p) + y, p) * k;
119 m_variables[v].diagonal = Q;
120 m_storage1[v] = v;
121 m_storage2[v] = v;
122
123 m_linear(v) = m_gradient(v) = linearMat(i,p);
124 }
125 }
126 }
127
128 ///enable/disable shrinking
129 void setShrinking(bool shrinking = true)
130 {
131 m_useShrinking = shrinking;
132 }
133
134 /// \brief Return the solution found.
135 RealMatrix solution() const{
136 RealMatrix solutionMatrix(m_numVariables,m_cardP,0);
137 for (std::size_t v=0; v<m_numVariables; v++)
138 {
139 solutionMatrix(originalIndex(v),m_variables[v].p) = m_alpha(v);
140 }
141 return solutionMatrix;
142 }
143
144 double alpha(std::size_t i, std::size_t p)const{
145 return m_alpha(m_cardP * i + p);
146 }
147 /// \brief Return the gradient of the solution.
148 RealMatrix solutionGradient() const{
149 RealMatrix solutionGradientMatrix(m_numVariables,m_cardP,0);
150 for (std::size_t v=0; v<m_numVariables; v++)
151 {
152 solutionGradientMatrix(originalIndex(v),m_variables[v].p) = m_gradient(v);
153 }
154 return solutionGradientMatrix;
155 }
156
157 /// \brief Compute the objective value of the current solution.
158 double functionValue()const{
159 return 0.5*inner_prod(m_gradient+m_linear,m_alpha);
160 }
161
162 unsigned int label(std::size_t i){
163 return m_examples[i].y;
164 }
165
166 std::size_t dimensions()const{
167 return m_numVariables;
168 }
169 std::size_t cardP()const{
170 return m_cardP;
171 }
172
173 std::size_t getNumExamples()const{
174 return m_numExamples;
175 }
176
177 ///return the largest KKT violation
178 double checkKKT()const
179 {
180 double maxViolation = 0.0;
181 for (std::size_t v=0; v<m_activeVar; v++)
182 {
183 double a = m_alpha(v);
184 double g = m_gradient(v);
185 if (a < m_C)
186 {
187 maxViolation = std::max(maxViolation,g);
188 }
189 if (a > 0.0)
190 {
191 maxViolation = std::max(maxViolation,-g);
192 }
193 }
194 return maxViolation;
195 }
196
197 /// \brief change the linear part of the problem by some delta
198 void addDeltaLinear(RealMatrix const& deltaLinear){
199 SIZE_CHECK(deltaLinear.size1() == m_numExamples);
200 SIZE_CHECK(deltaLinear.size2() == m_cardP);
201 for (std::size_t v=0; v<m_numVariables; v++)
202 {
203
204 std::size_t p = m_variables[v].p;
205 m_gradient(v) += deltaLinear(originalIndex(v),p);
206 m_linear(v) += deltaLinear(originalIndex(v),p);
207 }
208 }
209
210 void updateSMO(std::size_t v, std::size_t w){
213 // update
214 if (v == w)
215 {
216 // Limit case of a single variable;
217 // this means that there is only one
218 // non-optimal variables left.
219 std::size_t i = m_variables[v].i;
221 unsigned int p = m_variables[v].p;
222 unsigned int y = m_examples[i].y;
223 std::size_t r = m_cardP * y + p;
224 QpFloatType* q = m_kernelMatrix.row(i, 0, m_activeEx);
225 double Qvv = m_variables[v].diagonal;
226 double mu = -m_alpha(v);
227 detail::solveQuadraticEdge(m_alpha(v),m_gradient(v),Qvv,0,m_C);
228 mu+=m_alpha(v);
229 gradientUpdate(r, mu, q);
230 }
231 else
232 {
233 // S2DO
234 std::size_t iv = m_variables[v].i;
236 unsigned int pv = m_variables[v].p;
237 unsigned int yv = m_examples[iv].y;
238
239 std::size_t iw = m_variables[w].i;
241 unsigned int pw = m_variables[w].p;
242 unsigned int yw = m_examples[iw].y;
243
244 // get the matrix rows corresponding to the working set
245 QpFloatType* qv = m_kernelMatrix.row(iv, 0, m_activeEx);
246 QpFloatType* qw = m_kernelMatrix.row(iw, 0, m_activeEx);
247 std::size_t rv = m_cardP*yv+pv;
248 std::size_t rw = m_cardP*yw+pw;
249
250 // get the Q-matrix restricted to the working set
251 double Qvv = m_variables[v].diagonal;
252 double Qww = m_variables[w].diagonal;
253 double Qvw = m_M(m_classes * rv + yw, pw) * qv[iw];
254
255 // solve the sub-problem and update the gradient using the step sizes mu
256 double mu_v = -m_alpha(v);
257 double mu_w = -m_alpha(w);
258 detail::solveQuadratic2DBox(m_alpha(v), m_alpha(w),
259 m_gradient(v), m_gradient(w),
260 Qvv, Qvw, Qww,
261 0, m_C, 0, m_C
262 );
263 mu_v += m_alpha(v);
264 mu_w += m_alpha(w);
265
266 gradientUpdate(rv, mu_v, qv);
267 gradientUpdate(rw, mu_w, qw);
268 }
269 }
270
271 ///Shrink the problem
272 bool shrink(double epsilon)
273 {
274 if(! m_useShrinking)
275 return false;
276 if (! bUnshrinked)
277 {
278 double largest = 0.0;
279 for (std::size_t a = 0; a < m_activeVar; a++)
280 {
281 if (m_alpha(a) < m_C)
282 {
283 largest = std::max(largest,m_gradient(a));
284 }
285 if (m_alpha(a) > 0.0)
286 {
287 largest = std::max(largest,-m_gradient(a));
288 }
289 }
290 if (largest < 10.0 * epsilon)
291 {
292 // unshrink the problem at this accuracy level
293 unshrink();
294 bUnshrinked = true;
295 }
296 }
297
298 // shrink variables
299 bool se = false;
300 for (int a= (int)m_activeVar-1; a >= 0; a--)
301 {
302 double v = m_alpha(a);
303 double g = m_gradient(a);
304
305 if ((v == 0.0 && g <= 0.0) || (v == m_C && g >= 0.0))
306 {
307 // In this moment no feasible step including this variables
308 // can improve the objective. Thus deactivate the variables.
309 std::size_t e = m_variables[a].i;
311 if (m_examples[e].active == 0)
312 {
313 se = true;
314 }
315 }
316 }
317
318 if (se)
319 {
320 // exchange examples such that shrinked examples
321 // are moved to the ends of the lists
322 for (int a = (int)m_activeEx - 1; a >= 0; a--)
323 {
324 if (m_examples[a].active == 0)
326 }
327 }
328 return true;
329 }
330
331 ///Activate all variables
332 void unshrink()
333 {
334 if (m_activeVar == m_numVariables) return;
335
336 // compute the inactive m_gradient components (quadratic time complexity)
338 for (std::size_t v = 0; v != m_numVariables; v++)
339 {
340 double mu = m_alpha(v);
341 if (mu == 0.0) continue;
342
343 std::size_t iv = m_variables[v].i;
344 unsigned int pv = m_variables[v].p;
345 unsigned int yv = m_examples[iv].y;
346 std::size_t r = m_cardP * yv + pv;
347 std::vector<QpFloatType> q(m_numExamples);
348 m_kernelMatrix.row(iv, 0, m_numExamples, &q[0]);
349
350 for (std::size_t a = 0; a != m_numExamples; a++)
351 {
352 double k = q[a];
353 Example& ex = m_examples[a];
354 typename QpSparseArray<QpFloatType>::Row const& row = m_M.row(m_classes * r + ex.y);
355 QpFloatType def = row.defaultvalue;
356 for (std::size_t b=0; b<row.size; b++)
357 {
358 std::size_t f = ex.var[row.entry[b].index];
359 if (f >= m_activeVar)
360 m_gradient(f) -= mu * (row.entry[b].value - def) * k;
361 }
362 if (def != 0.0)
363 {
364 double upd = mu * def * k;
365 for (std::size_t b=ex.active; b<m_cardP; b++)
366 {
367 std::size_t f = ex.avar[b];
369 m_gradient(f) -= upd;
370 }
371 }
372 }
373 }
374
375 for (std::size_t i=0; i<m_numExamples; i++)
376 m_examples[i].active = m_cardP;
379 }
380
381 //!
382 ///\brief select the working set
383 //!
384 ///Select one or two numVariables for the sub-problem
385 ///and return the maximal KKT violation. The method
386 ///MAY select the same index for i and j. In that
387 ///case the working set consists of a single variables.
388 ///The working set may be invalid if the method reports
389 ///a KKT violation of zero, indicating optimality.
390 double selectWorkingSet(std::size_t& i, std::size_t& j)
391 {
392 // box case
393 double maxViolation = 0.0;
394
395 // first order selection
396 for (std::size_t a=0; a<m_activeVar; a++)
397 {
398 double aa = m_alpha(a);
399 double ga = m_gradient(a);
400 if (ga >maxViolation && aa < m_C)
401 {
402 maxViolation = ga;
403 i = a;
404 }
405 else if (-ga > maxViolation && aa > 0.0)
406 {
407 maxViolation = -ga;
408 i = a;
409 }
410 }
411 if (maxViolation == 0.0) return maxViolation;
412
413 // second order selection
414 Variable& vari = m_variables[i];
415 std::size_t ii = vari.i;
417 unsigned int pi = vari.p;
418 unsigned int yi = m_examples[ii].y;
419 double di = vari.diagonal;
420 double gi = m_gradient(i);
421 QpFloatType* k = m_kernelMatrix.row(ii, 0, m_activeEx);
422
423 j = i;
424 double bestgain = gi * gi / di;
425
426 for (std::size_t a=0; a<m_activeEx; a++)
427 {
428 Example const& exa = m_examples[a];
429 unsigned int ya = exa.y;
430 typename QpSparseArray<QpFloatType>::Row const& row = m_M.row(m_classes * (yi * m_cardP + pi) + ya);
431 QpFloatType def = row.defaultvalue;
432
433 for (std::size_t pf=0, b=0; pf < m_cardP; pf++)
434 {
435 std::size_t f = exa.var[pf];
436 double qif = def * k[a];
437 //check whether we are at an existing element of the sparse row
438 if( b != row.size && pf == row.entry[b].index){
439 qif = row.entry[b].value * k[a];
440 ++b;//move to next element
441 }
442 if(f >= m_activeVar || f == i)
443 continue;
444
445 double af = m_alpha(f);
446 double gf = m_gradient(f);
447 double df = m_variables[f].diagonal;
448
449 //check whether a step is possible at all.
450 if (!(af > 0.0 && gf < 0.0) && !(af < m_C && gf > 0.0))
451 continue;
452
453 double gain = detail::maximumGainQuadratic2D(di,df,qif,di,gi,gf);
454 if( gain > bestgain){
455 j = f;
456 bestgain = gain;
457 }
458 }
459 }
460
461 return maxViolation;
462 }
463
464protected:
465
466 void gradientUpdate(std::size_t r, double mu, QpFloatType* q)
467 {
468 for ( std::size_t a= 0; a< m_activeEx; a++)
469 {
470 double k = q[a];
471 Example& ex = m_examples[a];
472 typename QpSparseArray<QpFloatType>::Row const& row = m_M.row(m_classes * r + ex.y);
473 QpFloatType def = row.defaultvalue;
474 for (std::size_t b=0; b<row.size; b++){
475 std::size_t p = row.entry[b].index;
476 m_gradient(ex.var[p]) -= mu * (row.entry[b].value - def) * k;
477 }
478 if (def != 0.0){
479 double upd = mu* def * k;
480 for (std::size_t b=0; b<ex.active; b++)
481 m_gradient(ex.avar[b]) -= upd;
482 }
483 }
484 }
485
486 ///true if the problem has already been unshrinked
488
489 ///shrink a variable
490 void deactivateVariable(std::size_t v)
491 {
492 std::size_t ev = m_variables[v].i;
493 std::size_t iv = m_variables[v].index;
494 unsigned int pv = m_variables[v].p;
495 Example* exv = &m_examples[ev];
496
497 std::size_t ih = exv->active - 1;
498 std::size_t h = exv->avar[ih];
499 m_variables[v].index = ih;
500 m_variables[h].index = iv;
501 std::swap(exv->avar[iv], exv->avar[ih]);
502 iv = ih;
503 exv->active--;
504
505 std::size_t j = m_activeVar - 1;
506 std::size_t ej = m_variables[j].i;
507 std::size_t ij = m_variables[j].index;
508 unsigned int pj = m_variables[j].p;
509 Example* exj = &m_examples[ej];
510
511 // exchange entries in the lists
512 std::swap(m_alpha(v), m_alpha(j));
513 std::swap(m_gradient(v), m_gradient(j));
514 std::swap(m_linear(v), m_linear(j));
515 std::swap(m_variables[v], m_variables[j]);
516
517 m_variables[exv->avar[iv]].index = ij;
518 m_variables[exj->avar[ij]].index = iv;
519 exv->avar[iv] = j;
520 exv->var[pv] = j;
521 exj->avar[ij] = v;
522 exj->var[pj] = v;
523
524 m_activeVar--;
525 }
526
527 ///shrink an m_examples
528 void deactivateExample(std::size_t e)
529 {
531 std::size_t j = m_activeEx - 1;
532 m_activeEx--;
533 if(e == j) return;
534
535 std::swap(m_examples[e], m_examples[j]);
536
537 std::size_t* pe = m_examples[e].var;
538 std::size_t* pj = m_examples[j].var;
539 for (std::size_t v = 0; v < m_cardP; v++)
540 {
541 SHARK_ASSERT(pj[v] >= m_activeVar);
542 m_variables[pe[v]].i = e;
543 m_variables[pj[v]].i = j;
544 }
545
546 m_kernelMatrix.flipColumnsAndRows(e, j);
547 }
548
549 /// \brief Returns the original index of the example of a variable in the dataset before optimization.
550 ///
551 /// Shrinking is an internal detail so the communication with the outside world uses the original indizes.
552 std::size_t originalIndex(std::size_t v)const{
553 std::size_t i = m_variables[v].i;
554 return m_examples[i].index;//i before shrinking
555 }
556
557 /// data structure describing one m_variables of the problem
558 struct Variable
559 {
560 ///index into the example list
561 std::size_t i;
562 /// constraint corresponding to this m_variables
563 unsigned int p;
564 /// index into example->m_numVariables
565 std::size_t index;
566 /// diagonal entry of the big Q-matrix
567 double diagonal;
568 };
569
570 /// data structure describing one training example
571 struct Example
572 {
573 /// example index in the dataset, not the example vector!
574 std::size_t index;
575 /// label of this example
576 unsigned int y;
577 /// number of active m_numVariables
578 std::size_t active;
579 /// list of all m_cardP m_numVariables, in order of the p-index
580 std::size_t* var;
581 /// list of active m_numVariables
582 std::size_t* avar;
583 };
584
585 ///kernel matrix (precomputed matrix or matrix cache)
587
588 ///kernel modifiers
589 QpSparseArray<QpFloatType> const& m_M; // M(|P|*y_i+p, y_j, q)
590
591 ///complexity constant; upper bound on all variabless
592 double m_C;
593
594 ///number of m_classes in the problem
595 unsigned int m_classes;
596
597 ///number of dual m_numVariables per example
598 std::size_t m_cardP;
599
600 ///number of m_examples in the problem (size of the kernel matrix)
601 std::size_t m_numExamples;
602
603 ///number of m_numVariables in the problem = m_examples times m_cardP
604 std::size_t m_numVariables;
605
606 ///m_linear part of the objective function
607 RealVector m_linear;
608
609 ///solution candidate
610 RealVector m_alpha;
611
612 ///m_gradient of the objective function
613 ///The m_gradient array is of fixed size and not subject to shrinking.
614 RealVector m_gradient;
615
616 ///information about each training example
617 std::vector<Example> m_examples;
618
619 ///information about each m_variables of the problem
620 std::vector<Variable> m_variables;
621
622 ///space for the example[i].var pointers
623 std::vector<std::size_t> m_storage1;
624
625 ///space for the example[i].avar pointers
626 std::vector<std::size_t> m_storage2;
627
628 ///number of currently active m_examples
629 std::size_t m_activeEx;
630
631 ///number of currently active variabless
632 std::size_t m_activeVar;
633
634 ///should the m_problem use the shrinking heuristics?
636};
637
638
639template<class Matrix>
641public:
642 typedef typename Matrix::QpFloatType QpFloatType;
643 BiasSolver(QpMcBoxDecomp<Matrix>* problem) : m_problem(problem){}
644
645 void solve(
646 RealVector& bias,
649 bool sumToZero,
650 QpSolutionProperties* prop = NULL
651 ){
652 std::size_t classes = bias.size();
653 std::size_t numExamples = m_problem->getNumExamples();
654 std::size_t cardP = m_problem->cardP();
655 RealVector stepsize(classes, 0.01);
656 RealVector prev(classes,0);
657 RealVector step(classes);
658
659 double start_time = Timer::now();
660 unsigned long long iterations = 0;
661
662 do{
663 QpSolutionProperties propInner;
664 QpSolver<QpMcBoxDecomp<Matrix> > solver(*m_problem);
665 solver.solve(stop, &propInner);
666 iterations += propInner.iterations;
667
668 // Rprop loop to update the bias
669 while (true)
670 {
671 RealMatrix dualGradient = m_problem->solutionGradient();
672 // compute the primal m_gradient w.r.t. bias
673 RealVector grad(classes,0);
674
675 for (std::size_t i=0; i<numExamples; i++){
676 for (std::size_t p=0; p<cardP; p++){
677 double g = dualGradient(i,p);
678 if (g > 0.0)
679 {
680 unsigned int y = m_problem->label(i);
681 typename QpSparseArray<QpFloatType>::Row const& row = nu.row(y * cardP + p);
682 for (std::size_t b=0; b<row.size; b++)
683 grad(row.entry[b].index) -= row.entry[b].value;
684 }
685 }
686 }
687
688 //~ for (std::size_t i=0; i<numExamples; i++){
689 //~ unsigned int y = m_problem->label(i);
690 //~ for (std::size_t p=0; p<cardP; p++){
691 //~ double a = m_problem->alpha(i,p);
692 //~ if(a == 0) continue;
693 //~ typename QpSparseArray<QpFloatType>::Row const& row = nu.row(y * cardP + p);
694 //~ for (std::size_t b=0; b<row.size; b++)
695 //~ grad(row.entry[b].index) -= row.entry[b].value * a;
696 //~ }
697 //~ }
698
699 if (sumToZero)
700 {
701 // project the gradient
702 grad -= sum(grad) / classes;
703 }
704
705 // Rprop
706 for (std::size_t c=0; c<classes; c++)
707 {
708 double g = grad(c);
709 if (g > 0.0)
710 step(c) = -stepsize(c);
711 else if (g < 0.0)
712 step(c) = stepsize(c);
713
714 double gg = prev(c) * grad(c);
715 if (gg > 0.0)
716 stepsize(c) *= 1.2;
717 else
718 stepsize(c) *= 0.5;
719 }
720 prev = grad;
721
722 if (sumToZero)
723 {
724 // project the step
725 step -= sum(step) / classes;
726 }
727
728 // update the solution and the dual m_gradient
729 bias += step;
730 performBiasUpdate(step,nu);
731
732 if (max(stepsize) < 0.01 * stop.minAccuracy) break;
733 }
734 }while(m_problem->checkKKT()> stop.minAccuracy);
735
736 if (prop != NULL)
737 {
738 double finish_time = Timer::now();
739
740 prop->accuracy = m_problem->checkKKT();
741 prop->value = m_problem->functionValue();
742 prop->iterations = iterations;
743 prop->seconds = finish_time - start_time;
744 }
745 }
746private:
747 void performBiasUpdate(
748 RealVector const& step, QpSparseArray<QpFloatType> const& nu
749 ){
750 std::size_t numExamples = m_problem->getNumExamples();
751 std::size_t cardP = m_problem->cardP();
752 RealMatrix deltaLinear(numExamples,cardP,0.0);
753 for (std::size_t i=0; i<numExamples; i++){
754 for (std::size_t p=0; p<cardP; p++){
755 unsigned int y = m_problem->label(i);
756 typename QpSparseArray<QpFloatType>::Row const& row = nu.row(y * cardP +p);
757 for (std::size_t b=0; b<row.size; b++)
758 {
759 deltaLinear(i,p) -= row.entry[b].value * step(row.entry[b].index);
760 }
761 }
762 }
763 m_problem->addDeltaLinear(deltaLinear);
764
765 }
766 QpMcBoxDecomp<Matrix>* m_problem;
767};
768
769
770}
771#endif