QpMcSimplexDecomp.h
Go to the documentation of this file.
1//===========================================================================
2/*!
3 *
4 *
5 * \brief Quadratic programming problem for multi-class SVMs
6 *
7 *
8 *
9 *
10 * \author T. Glasmachers
11 * \date 2007-2012
12 *
13 *
14 * \par Copyright 1995-2017 Shark Development Team
15 *
16 * <BR><HR>
17 * This file is part of Shark.
18 * <https://shark-ml.github.io/Shark/>
19 *
20 * Shark is free software: you can redistribute it and/or modify
21 * it under the terms of the GNU Lesser General Public License as published
22 * by the Free Software Foundation, either version 3 of the License, or
23 * (at your option) any later version.
24 *
25 * Shark is distributed in the hope that it will be useful,
26 * but WITHOUT ANY WARRANTY; without even the implied warranty of
27 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
28 * GNU Lesser General Public License for more details.
29 *
30 * You should have received a copy of the GNU Lesser General Public License
31 * along with Shark. If not, see <http://www.gnu.org/licenses/>.
32 *
33 */
34//===========================================================================
35
36
37#ifndef SHARK_ALGORITHMS_QP_QPMCSIMPLEXDECOMP_H
38#define SHARK_ALGORITHMS_QP_QPMCSIMPLEXDECOMP_H
39
42#include <shark/Algorithms/QP/Impl/AnalyticProblems.h>
43#include <shark/Core/Timer.h>
44#include <shark/Data/Dataset.h>
45
46
47namespace shark {
48
49template <class Matrix>
51{
52public:
53 typedef typename Matrix::QpFloatType QpFloatType;
54 /// \brief Working set selection eturning th S2DO working set
55 ///
56 /// This selection operator picks the first variable by maximum gradient,
57 /// the second by maximum unconstrained gain.
59 template<class Problem>
60 double operator()(Problem& problem, std::size_t& i, std::size_t& j){
61 //todo move implementation here
62 return problem.selectWorkingSet(i,j);
63 }
64
65 void reset(){}
66 };
67
68 /// Constructor
69 /// \param kernel kernel matrix - cache or pre-computed matrix
70 /// \param M kernel modifiers in the format \f$ M_(y_i, p, y_j, q) = _M(classes*(y_i*|P|+p_i)+y_j, q) \f$
71 /// \param target the target labels for the variables
72 /// \param linearMat the linear part of the problem
73 /// \param C upper bound for all box variables, lower bound is 0.
75 Matrix& kernel,
77 Data<unsigned int> const& target,
78 RealMatrix const& linearMat,
79 double C
80 )
81 : m_kernelMatrix(kernel)
82 , m_M(M)
83 , m_C(C)
84 , m_classes(numberOfClasses(target))
85 , m_cardP(linearMat.size2())
86 , m_numExamples(kernel.size())
95 , m_useShrinking(true){
96 SHARK_RUNTIME_CHECK(target.numberOfElements() == kernel.size(), "size of kernel matrix and target vector do not agree");
97 SHARK_RUNTIME_CHECK(kernel.size() == linearMat.size1(), "size of kernel matrix and linear factor to not agree");
98
99 // prepare problem internal variables
102 for (std::size_t v=0, i=0; i<m_numExamples; i++)
103 {
104 unsigned int y = target.element(i);
105 m_examples[i].index = i;
106 m_examples[i].y = y;
107 m_examples[i].active = m_cardP;
108 m_examples[i].var = &m_storage1[m_cardP * i];
109 m_examples[i].avar = &m_storage2[m_cardP * i];
110 m_examples[i].varsum = 0;
111 double k = m_kernelMatrix.entry(i, i);
112 m_examples[i].diagonal = k;
113 for (std::size_t p=0; p<m_cardP; p++, v++)
114 {
115 m_variables[v].example = i;
116 m_variables[v].p = p;
117 m_variables[v].index = p;
118 double Q = m_M(m_classes * (y * m_cardP + p) + y, p) * k;
119 m_variables[v].diagonal = Q;
120 m_storage1[v] = v;
121 m_storage2[v] = v;
122
123 m_linear(v) = m_gradient(v) = linearMat(i,p);
124 }
125 }
126 // initialize unshrinking to make valgrind happy.
127 bUnshrinked = false;
128 }
129
130 /// enable/disable shrinking
131 void setShrinking(bool shrinking = true)
132 {
133 m_useShrinking = shrinking;
134 }
135
136 /// \brief Returns the solution found.
137 RealMatrix solution() const{
138 RealMatrix solutionMatrix(m_numVariables,m_cardP,0);
139 for (std::size_t v=0; v<m_numVariables; v++)
140 {
141 solutionMatrix(originalIndex(v),m_variables[v].p) = m_alpha(v);
142 }
143 return solutionMatrix;
144 }
145 /// \brief Returns the gradient of the solution.
146 RealMatrix solutionGradient() const{
147 RealMatrix solutionGradientMatrix(m_numVariables,m_cardP,0);
148 for (std::size_t v=0; v<m_numVariables; v++)
149 {
150 solutionGradientMatrix(originalIndex(v),m_variables[v].p) = m_gradient(v);
151 }
152 return solutionGradientMatrix;
153 }
154
155 /// \brief Compute the objective value of the current solution.
156 double functionValue()const{
157 return 0.5*inner_prod(m_gradient+m_linear,m_alpha);
158 }
159
160 unsigned int label(std::size_t i){
161 return m_examples[i].y;
162 }
163
164 std::size_t dimensions()const{
165 return m_numVariables;
166 }
167 std::size_t cardP()const{
168 return m_cardP;
169 }
170
171 std::size_t getNumExamples()const{
172 return m_numExamples;
173 }
174
175 /// \brief change the linear part of the problem by some delta
176 void addDeltaLinear(RealMatrix const& deltaLinear){
177 SIZE_CHECK(deltaLinear.size1() == m_numExamples);
178 SIZE_CHECK(deltaLinear.size2() == m_cardP);
179 for (std::size_t v=0; v<m_numVariables; v++)
180 {
181 std::size_t p = m_variables[v].p;
182 m_gradient(v) += deltaLinear(originalIndex(v),p);
183 m_linear(v) += deltaLinear(originalIndex(v),p);
184 }
185 }
186
187 void updateSMO(std::size_t v, std::size_t w){
190 // update
191 if (v == w)
192 {
193 // Limit case of a single variable;
194 std::size_t i = m_variables[v].example;
196 std::size_t p = m_variables[v].p;
197 unsigned int y = m_examples[i].y;
198 std::size_t r = m_cardP * y + p;
199 double& varsum = m_examples[i].varsum;
200 //the upper bound depends on the values of the variables of the other classes.
201 double upperBound = m_C-varsum+m_alpha(v);
202
203 QpFloatType* q = m_kernelMatrix.row(i, 0, m_activeEx);
204 double Qvv = m_variables[v].diagonal;
205 double mu = -m_alpha(v);
206 detail::solveQuadraticEdge(m_alpha(v),m_gradient(v),Qvv,0,upperBound);
207 mu += m_alpha(v);
208 updateVarsum(i,mu);
209 gradientUpdate(r, mu, q);
210 }
211 else
212 {
213 // S2DO
214 std::size_t iv = m_variables[v].example;
216 std::size_t pv = m_variables[v].p;
217 unsigned int yv = m_examples[iv].y;
218 double& varsumv = m_examples[iv].varsum;
219
220 std::size_t iw = m_variables[w].example;
222 std::size_t pw = m_variables[w].p;
223 unsigned int yw = m_examples[iw].y;
224 double& varsumw = m_examples[iw].varsum;
225
226 // get the matrix rows corresponding to the working set
227 QpFloatType* qv = m_kernelMatrix.row(iv, 0, m_activeEx);
228 QpFloatType* qw = m_kernelMatrix.row(iw, 0, m_activeEx);
229 std::size_t rv = m_cardP*yv+pv;
230 std::size_t rw = m_cardP*yw+pw;
231
232 // get the Q-matrix restricted to the working set
233 double Qvv = m_variables[v].diagonal;
234 double Qww = m_variables[w].diagonal;
235 double Qvw = m_M(m_classes * rv + yw, pw) * qv[iw];
236
237 //same sample - simplex case
238 double mu_v = -m_alpha(v);
239 double mu_w = -m_alpha(w);
240 if(iv == iw){
241
242 double upperBound = m_C-varsumv+m_alpha(v)+m_alpha(w);
243 // solve the sub-problem and update the gradient using the step sizes mu
244 detail::solveQuadratic2DTriangle(m_alpha(v), m_alpha(w),
245 m_gradient(v), m_gradient(w),
246 Qvv, Qvw, Qww,
247 upperBound
248 );
249 mu_v += m_alpha(v);
250 mu_w += m_alpha(w);
251 updateVarsum(iv,mu_v+mu_w);
252 }
253 else{
254 double Uv = m_C-varsumv+m_alpha(v);
255 double Uw = m_C-varsumw+m_alpha(w);
256 // solve the sub-problem and update the gradient using the step sizes mu
257 detail::solveQuadratic2DBox(m_alpha(v), m_alpha(w),
258 m_gradient(v), m_gradient(w),
259 Qvv, Qvw, Qww,
260 0, Uv, 0, Uw
261 );
262 mu_v += m_alpha(v);
263 mu_w += m_alpha(w);
264 updateVarsum(iv,mu_v);
265 updateVarsum(iw,mu_w);
266 }
267
268 double varsumvo = 0;
269 for(std::size_t p = 0; p != m_cardP; ++p){
270 std::size_t varIndex = m_examples[iv].var[p];
271 varsumvo += m_alpha[varIndex];
272 }
273 double varsumwo = 0;
274 for(std::size_t p = 0; p != m_cardP; ++p){
275 std::size_t varIndex = m_examples[iw].var[p];
276 varsumwo += m_alpha[varIndex];
277 }
278 gradientUpdate(rv, mu_v, qv);
279 gradientUpdate(rw, mu_w, qw);
280 }
281 }
282
283 /// Shrink the problem
284 bool shrink(double epsilon)
285 {
286 if(! m_useShrinking)
287 return false;
288 if (! bUnshrinked)
289 {
290 if (checkKKT() < 10.0 * epsilon)
291 {
292 // unshrink the problem at this accuracy level
293 unshrink();
294 bUnshrinked = true;
295 }
296 }
297
298 //iterate through all simplices.
299 for (std::size_t i = m_activeEx; i > 0; i--){
300 Example const& ex = m_examples[i-1];
301 std::pair<std::pair<double,std::size_t>,std::pair<double,std::size_t> > pair = getSimplexMVP(ex);
302 double up = pair.first.first;
303 double down = pair.second.first;
304
305 //check the simplex for possible search directions
306 //case 1: simplex is bounded and stays at the bound, in this case proceed as in MVP
307 if(down > 0 && ex.varsum == m_C && up-down > 0){
308 int pc = (int)ex.active;
309 for(int p = pc-1; p >= 0; --p){
310 double a = m_alpha(ex.avar[p]);
311 double g = m_gradient(ex.avar[p]);
312 //if we can't do a step along the simplex, we can shrink the variable.
313 if(a == 0 && g-down < 0){
315 }
316 else if (a == m_C && up-g < 0){
317 //shrinking this variable means, that the whole simplex can't move,
318 //so shrink every variable, even the ones that previously couldn't
319 //be shrinked
320 for(int q = (int)ex.active; q >= 0; --q){
322 }
323 p = 0;
324 }
325 }
326 }
327 //case 2: all variables are zero and pushed against the boundary
328 // -> shrink the example
329 else if(ex.varsum == 0 && up < 0){
330 int pc = (int)ex.active;
331 for(int p = pc-1; p >= 0; --p){
333 }
334 }
335 //case 3: the simplex is not bounded and there are free variables.
336 //in this case we currently do not shrink
337 //as a variable might get bounded at some point which means that all variables
338 //can be important again.
339 //else{
340 //}
341
342 }
343// std::cout<<"shrunk. remaining: "<<m_activeEx<<","<<m_activeVar<<std::endl;
344 return true;
345 }
346
347 /// Activate all m_numVariables
348 void unshrink()
349 {
350 if (m_activeVar == m_numVariables) return;
351
352 // compute the inactive m_gradient components (quadratic time complexity)
354 for (std::size_t v = 0; v != m_numVariables; v++)
355 {
356 double mu = m_alpha(v);
357 if (mu == 0.0) continue;
358
359 std::size_t iv = m_variables[v].example;
360 std::size_t pv = m_variables[v].p;
361 unsigned int yv = m_examples[iv].y;
362 std::size_t r = m_cardP * yv + pv;
363 std::vector<QpFloatType> q(m_numExamples);
364 m_kernelMatrix.row(iv, 0, m_numExamples, &q[0]);
365
366 for (std::size_t a = 0; a != m_numExamples; a++)
367 {
368 double k = q[a];
369 Example& ex = m_examples[a];
370 typename QpSparseArray<QpFloatType>::Row const& row = m_M.row(m_classes * r + ex.y);
371 QpFloatType def = row.defaultvalue;
372 for (std::size_t b=0; b<row.size; b++)
373 {
374 std::size_t f = ex.var[row.entry[b].index];
375 if (f >= m_activeVar)
376 m_gradient(f) -= mu * (row.entry[b].value - def) * k;
377 }
378 if (def != 0.0)
379 {
380 double upd = mu * def * k;
381 for (std::size_t b=ex.active; b<m_cardP; b++)
382 {
383 std::size_t f = ex.avar[b];
385 m_gradient(f) -= upd;
386 }
387 }
388 }
389 }
390
391 for (std::size_t i=0; i<m_numExamples; i++)
392 m_examples[i].active = m_cardP;
395 }
396
397 ///
398 /// \brief select the working set
399 ///
400 /// Select one or two numVariables for the sub-problem
401 /// and return the maximal KKT violation. The method
402 /// MAY select the same index for i and j. In that
403 /// case the working set consists of a single variables.
404 /// The working set may be invalid if the method reports
405 /// a KKT violation of zero, indicating optimality.
406 double selectWorkingSet(std::size_t& i, std::size_t& j)
407 {
408
409 //first order selection
410 //we select the best variable along the box constraint (for a step between samples)
411 //and the best gradient and example index for a step along the simplex (step inside a sample)
412 double maxGradient = 0;//max gradient for variables between samples (box constraints)
413 double maxSimplexGradient = 0;//max gradient along the simplex constraints
414 std::size_t maxSimplexExample = 0;//example with the maximum simplex constraint
415 i = j = 0;
416 // first order selection
417 for (std::size_t e=0; e< m_activeEx; e++)
418 {
419 Example& ex = m_examples[e];
420 bool canGrow = ex.varsum < m_C;
421
422 //calculate the maximum violationg pair for the example.
423 std::pair<std::pair<double,std::size_t>,std::pair<double,std::size_t> > pair = getSimplexMVP(ex);
424 double up = pair.first.first;
425 double down = pair.second.first;
426
427 if(!canGrow && up - down > maxSimplexGradient){
428 maxSimplexGradient = up-down;
429 maxSimplexExample = e;
430 }
431 if (canGrow && up > maxGradient) {
432 maxGradient = up;
433 i = pair.first.second;
434 }
435 if (-down > maxGradient) {
436 maxGradient = -down;
437 i = pair.second.second;
438 }
439 }
440
441 //find the best possible partner of the variable
442 //by searching every other sample
443 std::pair<std::pair<std::size_t,std::size_t> ,double > boxPair = maxGainBox(i);
444 double bestGain = boxPair.second;
445 std::pair<std::size_t, std::size_t> best = boxPair.first;
446
447 //always search the simplex of the variable
448 std::pair<std::pair<std::size_t,std::size_t> ,double > simplexPairi = maxGainSimplex(m_variables[i].example);
449 if(simplexPairi.second > bestGain){
450 best = simplexPairi.first;
451 bestGain = simplexPairi.second;
452 }
453 //finally search also in the best simplex
454 if(maxSimplexGradient > 0){
455 std::pair<std::pair<std::size_t,std::size_t> ,double > simplexPairBest = maxGainSimplex(maxSimplexExample);
456 if(simplexPairBest.second > bestGain){
457 best = simplexPairBest.first;
458 bestGain = simplexPairBest.second;
459 }
460 }
461 i = best.first;
462 j = best.second;
463 //return the mvp gradient
464 return std::max(maxGradient,maxSimplexGradient);
465 }
466
467 /// return the largest KKT violation
468 double checkKKT()const
469 {
470 double ret = 0.0;
471 for (std::size_t i=0; i<m_activeEx; i++){
472 Example const& ex = m_examples[i];
473 std::pair<std::pair<double,std::size_t>,std::pair<double,std::size_t> > pair = getSimplexMVP(ex);
474 double up = pair.first.first;
475 double down = pair.second.first;
476
477 //check all search directions
478 //case 1: decreasing the value of a variable
479 ret = std::max(-down, ret);
480 //case 2: if we are not at the boundary increasing the variable
481 if(ex.varsum < m_C)
482 ret = std::max(up, ret);
483 //case 3: along the plane \sum_i alpha_i = m_C
484 if(ex.varsum == m_C)
485 ret = std::max(up-down, ret);
486 }
487 return ret;
488 }
489
490protected:
491
492 /// data structure describing one variable of the problem
493 struct Variable
494 {
495 ///index into the example list
496 std::size_t example;
497 /// constraint corresponding to this variable
498 std::size_t p;
499 /// index into example->m_numVariables
500 std::size_t index;
501 /// diagonal entry of the big Q-matrix
502 double diagonal;
503 };
504
505 /// data structure describing one training example
506 struct Example
507 {
508 /// example index in the dataset, not the example vector!
509 std::size_t index;
510 /// label of this example
511 unsigned int y;
512 /// number of active variables
513 std::size_t active;
514 /// list of all m_cardP variables, in order of the p-index
515 std::size_t* var;
516 /// list of active variables
517 std::size_t* avar;
518 /// total sum of all variables of this example
519 double varsum;
520 /// diagonal entry of the kernel matrix k(x,x);
521 double diagonal;
522 };
523
524 /// \brief Finds the second variable of a working set using maximum gain and returns the pair and gain
525 ///
526 /// The variable is searched in-between samples. And not inside the simplex of i.
527 /// It returns the best pair (i,j) as well as the gain. If the first variable
528 /// can't make a step, gain 0 is returned with pair(i,i).
529 std::pair<std::pair<std::size_t,std::size_t>,double> maxGainBox(std::size_t i)const{
530 std::size_t e = m_variables[i].example;
532 std::size_t pi = m_variables[i].p;
533 unsigned int yi = m_examples[e].y;
534 double Qii = m_variables[i].diagonal;
535 double gi = m_gradient(i);
536 if(m_examples[e].varsum == m_C && gi > 0)
537 return std::make_pair(std::make_pair(i,i),0.0);
538
539
540 QpFloatType* k = m_kernelMatrix.row(e, 0, m_activeEx);
541
542 std::size_t bestj = i;
543 double bestGain = gi * gi / Qii;
544
545 for (std::size_t a=0; a<m_activeEx; a++)
546 {
547 //don't search the simplex of the first variable
548 if(a == e) continue;
549
550 Example const& exa = m_examples[a];
551 unsigned int ya = exa.y;
552 bool canGrow = exa.varsum != m_C;
553
554 typename QpSparseArray<QpFloatType>::Row const& row = m_M.row(m_classes * (yi * m_cardP + pi) + ya);
555 QpFloatType def = row.defaultvalue;
556
557 for (std::size_t p=0, b=0; p < m_cardP; p++)
558 {
559 std::size_t j = exa.var[p];
560
561 double Qjj = m_variables[j].diagonal;
562 double gj = m_gradient(j);
563 double Qij = def * k[a];
564 //check whether we are at an existing element of the sparse row
565 if( b != row.size && p == row.entry[b].index){
566 Qij = row.entry[b].value * k[a];
567 ++b;//move to next element
568 }
569
570 //don't check variables which are shrinked or bounded
571 if(j >= m_activeVar || (m_alpha(j) == 0 && gj <= 0)|| (!canGrow && gj >= 0))
572 continue;
573
574 double gain = detail::maximumGainQuadratic2D(Qii, Qjj, Qij, gi,gj);
575 if( bestGain < gain){
576 bestj = j;
577 bestGain = gain;
578 }
579 }
580 }
581 return std::make_pair(std::make_pair(i,bestj),bestGain);
582 }
583
584 ///\brief Returns the best variable pair (i,j) and gain for a given example.
585 ///
586 /// For a given example all possible pairs of variables are checkd and the one giving
587 /// the maximum gain is returned. This method has a special handling for the
588 /// simplex case.
589 std::pair<std::pair<std::size_t,std::size_t>,double> maxGainSimplex(std::size_t e)const{
590 Example const& ex = m_examples[e];
591 std::size_t pc = ex.active;//number of active variables for this example
592 unsigned int y = ex.y;//label of the example
593 bool canGrow = ex.varsum < m_C; //are we inside the simplex?
594 double Qee = m_examples[e].diagonal; //kernel entry of the example
595
596 double bestGain = -1e100;
597 std::size_t besti = 0;
598 std::size_t bestj = 0;
599
600 //search through all possible variable pairs
601 //for every pair we will build the quadratic subproblem
602 //and than decide whether we can do
603 // 1.a valid step in the inside of the simplex
604 // that is canGrow==true or the gradients of both variables point inside
605 // 2. a valid step along the simplex constraint,
606 // that is cangrow == true and both variables point outside)
607 // 3. a 1-D step
608 // that is canGrow == true or alpha(i) > 0 & gradient(i) < 0
609
610 //iterate over the active ones as the first variable
611 for(std::size_t p1 = 0; p1 != pc; ++p1){
612 std::size_t i = ex.avar[p1];
613 double gi = m_gradient(i);
614 double ai = m_alpha(i);
615 double Qii = m_variables[i].diagonal;
616
617 //check whether a 1-D gain is possible
618 if((gi < 0 && m_alpha(i) > 0.0) || (gi > 0 && canGrow)){
619 double gain = gi * gi / Qii;
620 if(gain > bestGain){
621 bestGain= gain;
622 besti = i;
623 bestj = i;
624 }
625 }
626
627 //now create the 2D problem for all possible variable pairs
628 //and than check for possible gain steps
629 //find first the row of coefficients for M(y,y,i,j) for all j
630 //question: is p1 == m_variables[ex.avar[p1]].p?
631 //otherwise: is p1 == m_variables[ex.var[p1]].p for *all* variables?
632 typename QpSparseArray<QpFloatType>::Row const& row = m_M.row(m_classes * (y * m_cardP + m_variables[i].p) + y);
633 QpFloatType def = row.defaultvalue;
634
635 //we need to iterate over all vars instead of only the active variables to be in sync with the matrix row
636 //we will still overstep all inactive variables
637 for(std::size_t p2 = 0, b=0; p2 != m_cardP; ++p2){
638 std::size_t j = ex.var[p2];
639 double gj = m_gradient(j);
640 double aj = m_alpha(j);
641 double Qjj = m_variables[j].diagonal;
642
643 //create the offdiagonal element of the 2D problem
644 double Qij = def * Qee;
645 //check whether we are at an existing element of the sparse row
646 if( b != row.size && p2 == row.entry[b].index){
647 Qij = row.entry[b].value * Qee;
648 ++b;//move to next element
649 }
650
651 //ignore inactive variables or variables we already checked
652 if(j >= m_activeVar || j <= i ){
653 continue;
654 }
655
656 double gain = 0;
657 //check whether we can move along the simplex constraint
658 if(!canGrow && gi > 0 && gj > 0){
659 double gainUp = 0;
660 double gainDown = 0;
661 //we need to check both search directions for ai
662 if(aj > 0 && gi-gj > 0){
663 gainUp = detail::maximumGainQuadratic2DOnLine(Qii, Qjj, Qij, gi,gj);
664 }
665 //also check whether a line search in the other direction works
666 if (ai > 0 &&gj-gi > 0){
667 gainDown = detail::maximumGainQuadratic2DOnLine(Qjj, Qii, Qij, gj,gi);
668 }
669 gain = std::max(gainUp,gainDown);
670 }
671 //else we are inside the simplex
672 //in this case only check that both variables can shrink if needed
673 else if(!(gi <= 0 && ai == 0) && !(gj<= 0 && aj == 0)){
674 gain = detail::maximumGainQuadratic2D(Qii, Qjj, Qij, gi,gj);
675 }
676
677 //accept only maximum gain
678 if(gain > bestGain){
679 bestGain= gain;
680 besti = i;
681 bestj = j;
682 }
683
684 }
685 }
686 //return best pair and possible gain
687 return std::make_pair(std::make_pair(besti,bestj),bestGain);
688 }
689
690 /// \brief For a given simplex returns the MVP indicies (max_up,min_down)
691 std::pair<
692 std::pair<double,std::size_t>,
693 std::pair<double,std::size_t>
694 > getSimplexMVP(Example const& ex)const{
695 std::size_t pc = ex.active;
696 double up = -1e100;
697 double down = 1e100;
698 std::size_t maxUp = ex.avar[0];
699 std::size_t minDown = ex.avar[0];
700 for (std::size_t p = 0; p != pc; p++)
701 {
702 std::size_t v = ex.avar[p];
704 double a = m_alpha(v);
705 double g = m_gradient(v);
706 if (g > up) {
707 maxUp = v;
708 up = g;
709 }
710 if (a > 0.0 && g < down){
711 minDown = v;
712 down = g;
713 }
714 }
715 return std::make_pair(std::make_pair(up,maxUp),std::make_pair(down,minDown));
716 }
717
718 void updateVarsum(std::size_t exampleId, double mu){
719 double& varsum = m_examples[exampleId].varsum;
720 varsum += mu;
721 if(varsum > 1.e-12 && m_C-varsum > 1.e-12*m_C)
722 return;
723 //recompute for numerical accuracy
724
725 varsum = 0;
726 for(std::size_t p = 0; p != m_cardP; ++p){
727 std::size_t varIndex = m_examples[exampleId].var[p];
728 varsum += m_alpha[varIndex];
729 }
730
731 if(varsum < 1.e-14)
732 varsum = 0;
733 if(m_C-varsum < 1.e-14*m_C)
734 varsum = m_C;
735 }
736
737 void gradientUpdate(std::size_t r, double mu, QpFloatType* q)
738 {
739 for ( std::size_t a= 0; a< m_activeEx; a++)
740 {
741 double k = q[a];
742 Example& ex = m_examples[a];
743 typename QpSparseArray<QpFloatType>::Row const& row = m_M.row(m_classes * r + ex.y);
744 QpFloatType def = row.defaultvalue;
745 for (std::size_t b=0; b<row.size; b++){
746 std::size_t p = row.entry[b].index;
747 m_gradient(ex.var[p]) -= mu * (row.entry[b].value - def) * k;
748 }
749 if (def != 0.0){
750 double upd = mu* def * k;
751 for (std::size_t b=0; b<ex.active; b++)
752 m_gradient(ex.avar[b]) -= upd;
753 }
754 }
755 }
756
757 /// shrink a variable
758 void deactivateVariable(std::size_t v)
759 {
760 std::size_t ev = m_variables[v].example;
761 std::size_t iv = m_variables[v].index;
762 std::size_t pv = m_variables[v].p;
763 Example* exv = &m_examples[ev];
764
765 std::size_t ih = exv->active - 1;
766 std::size_t h = exv->avar[ih];
767 m_variables[v].index = ih;
768 m_variables[h].index = iv;
769 std::swap(exv->avar[iv], exv->avar[ih]);
770 iv = ih;
771 exv->active--;
772
773 std::size_t j = m_activeVar - 1;
774 std::size_t ej = m_variables[j].example;
775 std::size_t ij = m_variables[j].index;
776 std::size_t pj = m_variables[j].p;
777 Example* exj = &m_examples[ej];
778
779 // exchange entries in the lists
780 std::swap(m_alpha(v), m_alpha(j));
781 std::swap(m_gradient(v), m_gradient(j));
782 std::swap(m_linear(v), m_linear(j));
783 std::swap(m_variables[v], m_variables[j]);
784
785 m_variables[exv->avar[iv]].index = ij;
786 m_variables[exj->avar[ij]].index = iv;
787 exv->avar[iv] = j;
788 exv->var[pv] = j;
789 exj->avar[ij] = v;
790 exj->var[pj] = v;
791
792 m_activeVar--;
793
794 //finally check if the example is needed anymore
795 if(exv->active == 0)
797 }
798
799 /// shrink an example
800 void deactivateExample(std::size_t e)
801 {
803 std::size_t j = m_activeEx - 1;
804 m_activeEx--;
805 if(e == j) return;
806
807 std::swap(m_examples[e], m_examples[j]);
808
809 std::size_t* pe = m_examples[e].var;
810 std::size_t* pj = m_examples[j].var;
811 for (std::size_t v = 0; v < m_cardP; v++)
812 {
813 SHARK_ASSERT(pj[v] >= m_activeVar);
814 m_variables[pe[v]].example = e;
815 m_variables[pj[v]].example = j;
816 }
817 m_kernelMatrix.flipColumnsAndRows(e, j);
818 }
819
820 /// \brief Returns the original index of the example of a variable in the dataset before optimization.
821 ///
822 /// Shrinking is an internal detail so the communication with the outside world uses the original indizes.
823 std::size_t originalIndex(std::size_t v)const{
824 std::size_t i = m_variables[v].example;
825 return m_examples[i].index;//i before shrinking
826 }
827
828 /// kernel matrix (precomputed matrix or matrix cache)
830
831 /// kernel modifiers
832 QpSparseArray<QpFloatType> const& m_M; // M(|P|*y_i+p, y_j, q)
833
834 /// complexity constant; upper bound on all variabless
835 double m_C;
836
837 /// number of classes in the problem
838 std::size_t m_classes;
839
840 /// number of dual variables per example
841 std::size_t m_cardP;
842
843 /// number of examples in the problem (size of the kernel matrix)
844 std::size_t m_numExamples;
845
846 /// number of variables in the problem = m_numExamples * m_cardP
847 std::size_t m_numVariables;
848
849 /// linear part of the objective function
850 RealVector m_linear;
851
852 /// solution candidate
853 RealVector m_alpha;
854
855 /// gradient of the objective function
856 /// The m_gradient array is of fixed size and not subject to shrinking.
857 RealVector m_gradient;
858
859 /// information about each training example
860 std::vector<Example> m_examples;
861
862 /// information about each variable of the problem
863 std::vector<Variable> m_variables;
864
865 /// space for the example[i].var pointers
866 std::vector<std::size_t> m_storage1;
867
868 /// space for the example[i].avar pointers
869 std::vector<std::size_t> m_storage2;
870
871 /// number of currently active examples
872 std::size_t m_activeEx;
873
874 /// number of currently active variables
875 std::size_t m_activeVar;
876
877 /// should the m_problem use the shrinking heuristics?
879
880 /// true if the problem has already been unshrinked
882};
883
884
885template<class Matrix>
887public:
888 typedef typename Matrix::QpFloatType QpFloatType;
889 BiasSolverSimplex(QpMcSimplexDecomp<Matrix>* problem) : m_problem(problem){}
890
891 void solve(
892 RealVector& bias,
895 bool sumToZero,
896 QpSolutionProperties* prop = NULL
897 ){
898 std::size_t classes = bias.size();
899 std::size_t numExamples = m_problem->getNumExamples();
900 std::size_t cardP = m_problem->cardP();
901 RealVector stepsize(classes, 0.01);
902 RealVector prev(classes,0);
903 RealVector step(classes);
904
905 double start_time = Timer::now();
906 unsigned long long iterations = 0;
907
908 do{
909 QpSolutionProperties propInner;
910 QpSolver<QpMcSimplexDecomp<Matrix> > solver(*m_problem);
911 solver.solve(stop, &propInner);
912 iterations += propInner.iterations;
913
914 // Rprop loop to update the bias
915 while (true)
916 {
917 RealMatrix dualGradient = m_problem->solutionGradient();
918 // compute the primal m_gradient w.r.t. bias
919 RealVector grad(classes,0);
920
921 for (std::size_t i=0; i<numExamples; i++){
922 std::size_t largestP = cardP;
923 double largest_value = 0.0;
924 for (std::size_t p=0; p<cardP; p++)
925 {
926 if (dualGradient(i,p) > largest_value)
927 {
928 largest_value = dualGradient(i,p);
929 largestP = p;
930 }
931 }
932 if (largestP < cardP)
933 {
934 unsigned int y = m_problem->label(i);
935 typename QpSparseArray<QpFloatType>::Row const& row = nu.row(y * cardP + largestP);
936 for (std::size_t b=0; b != row.size; b++)
937 grad(row.entry[b].index) -= row.entry[b].value;
938 }
939 }
940
941 if (sumToZero)
942 {
943 // project the m_gradient
944 grad -= sum(grad) / classes;
945 }
946
947 // Rprop
948 for (std::size_t c=0; c<classes; c++)
949 {
950 double g = grad(c);
951 if (g > 0.0)
952 step(c) = -stepsize(c);
953 else if (g < 0.0)
954 step(c) = stepsize(c);
955
956 double gg = prev(c) * grad(c);
957 if (gg > 0.0)
958 stepsize(c) *= 1.2;
959 else
960 stepsize(c) *= 0.5;
961 }
962 prev = grad;
963
964 if (sumToZero)
965 {
966 // project the step
967 step -= sum(step) / classes;
968 }
969
970 // update the solution and the dual m_gradient
971 bias += step;
972 performBiasUpdate(step,nu);
973
974 if (max(stepsize) < 0.01 * stop.minAccuracy) break;
975 }
976 }while(m_problem->checkKKT()> stop.minAccuracy);
977
978 if (prop != NULL)
979 {
980 double finish_time = Timer::now();
981
982 prop->accuracy = m_problem->checkKKT();
983 prop->value = m_problem->functionValue();
984 prop->iterations = iterations;
985 prop->seconds = finish_time - start_time;
986 }
987 }
988private:
989 void performBiasUpdate(
990 RealVector const& step, QpSparseArray<QpFloatType> const& nu
991 ){
992 std::size_t numExamples = m_problem->getNumExamples();
993 std::size_t cardP = m_problem->cardP();
994 RealMatrix deltaLinear(numExamples,cardP,0.0);
995 for (std::size_t i=0; i<numExamples; i++){
996 for (std::size_t p=0; p<cardP; p++){
997 unsigned int y = m_problem->label(i);
998 typename QpSparseArray<QpFloatType>::Row const& row = nu.row(y * cardP +p);
999 for (std::size_t b=0; b<row.size; b++)
1000 {
1001 deltaLinear(i,p) -= row.entry[b].value * step(row.entry[b].index);
1002 }
1003 }
1004 }
1005 m_problem->addDeltaLinear(deltaLinear);
1006
1007 }
1008 QpMcSimplexDecomp<Matrix>* m_problem;
1009};
1010
1011
1012}
1013#endif