QpMcLinear.h
Go to the documentation of this file.
1//===========================================================================
2/*!
3 *
4 *
5 * \brief Quadratic programming solvers for linear multi-class SVM training without bias.
6 *
7 *
8 *
9 * \author T. Glasmachers
10 * \date -
11 *
12 *
13 * \par Copyright 1995-2017 Shark Development Team
14 *
15 * <BR><HR>
16 * This file is part of Shark.
17 * <https://shark-ml.github.io/Shark/>
18 *
19 * Shark is free software: you can redistribute it and/or modify
20 * it under the terms of the GNU Lesser General Public License as published
21 * by the Free Software Foundation, either version 3 of the License, or
22 * (at your option) any later version.
23 *
24 * Shark is distributed in the hope that it will be useful,
25 * but WITHOUT ANY WARRANTY; without even the implied warranty of
26 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
27 * GNU Lesser General Public License for more details.
28 *
29 * You should have received a copy of the GNU Lesser General Public License
30 * along with Shark. If not, see <http://www.gnu.org/licenses/>.
31 *
32 */
33//===========================================================================
34
35
36#ifndef SHARK_ALGORITHMS_QP_QPMCLINEAR_H
37#define SHARK_ALGORITHMS_QP_QPMCLINEAR_H
38
39#include <shark/Core/Timer.h>
41#include <shark/Data/Dataset.h>
42#include <shark/Data/DataView.h>
43#include <shark/LinAlg/Base.h>
44#include <cmath>
45#include <iostream>
46#include <vector>
47
48
49namespace shark {
50
51
52/// \brief Generic solver skeleton for linear multi-class SVM problems.
53template <class InputT>
55{
56public:
60
62
63
64 ///
65 /// \brief Constructor
66 ///
67 /// \param dataset training data
68 /// \param dim problem dimension
69 /// \param classes number of classes in the problem
70 /// \param strategy coordinate selection strategy
71 /// \param shrinking flag turning shrinking on and off
72 ///
74 const DatasetType& dataset,
75 std::size_t dim,
76 std::size_t classes,
77 std::size_t strategy = ACF,
78 bool shrinking = false)
79 : m_data(dataset)
80 , m_xSquared(dataset.numberOfElements())
81 , m_dim(dim)
82 , m_classes(classes)
83 , m_strategy(strategy)
84 , m_shrinking(shrinking)
85 {
87
88 for (std::size_t i=0; i<m_data.size(); i++)
89 {
90 m_xSquared(i) = inner_prod(m_data[i].input, m_data[i].input);
91 }
92 }
93
94 ///
95 /// \brief Solve the SVM training problem.
96 ///
97 /// \param rng random number generator used by the algorithm
98 /// \param C regularization constant of the SVM
99 /// \param stop stopping condition(s)
100 /// \param prop solution properties
101 /// \param verbose if true, the solver prints status information and solution statistics
102 ///
103 RealMatrix solve(
104 random::rng_type& rng,
105 double C,
107 QpSolutionProperties* prop = NULL,
108 bool verbose = false)
109 {
110 // sanity checks
111 SHARK_ASSERT(C > 0.0);
112
113 // measure training time
114 Timer timer;
115 timer.start();
116
117 // prepare dimensions and vectors
118 std::size_t ell = m_data.size(); // number of training examples
119 RealMatrix alpha(ell, m_classes + 1, 0.0); // Lagrange multipliers; dual variables. Reserve one extra column.
120 RealMatrix w(m_classes, m_dim, 0.0); // weight vectors; primal variables
121
122 // scheduling of steps, for ACF only
123 RealVector pref(ell, 1.0); // example-wise measure of success
124 double prefsum = (double)ell; // normalization constant
125
126 std::vector<std::size_t> schedule(ell);
127 if (m_strategy == UNIFORM)
128 {
129 for (std::size_t i=0; i<ell; i++) schedule[i] = i;
130 }
131
132 // used for shrinking
133 std::size_t active = ell;
134
135 // prepare counters
136 std::size_t epoch = 0;
137 std::size_t steps = 0;
138
139 // prepare performance monitoring
140 double objective = 0.0;
141 double max_violation = 0.0;
142
143 // gain for ACF
144 const double gain_learning_rate = 1.0 / ell;
145 double average_gain = 0.0;
146
147
148 // outer optimization loop (epochs)
149 bool canstop = true;
150 while (true)
151 {
152 if (m_strategy == ACF)
153 {
154 // define schedule
155 double psum = prefsum;
156 prefsum = 0.0;
157 std::size_t pos = 0;
158 for (std::size_t i=0; i<ell; i++)
159 {
160 double p = pref(i);
161 double num = (psum < 1e-6) ? ell - pos : std::min((double)(ell - pos), (ell - pos) * p / psum);
162 std::size_t n = (std::size_t)std::floor(num);
163 double prob = num - n;
164 //~ if (random::coinToss(rng,prob)) n++;
165 if (random::uni(rng) < prob) n++;
166 for (std::size_t j=0; j<n; j++)
167 {
168 schedule[pos] = i;
169 pos++;
170 }
171 psum -= p;
172 prefsum += p;
173 }
174 SHARK_ASSERT(pos == ell);
175 }
176
177 if (m_shrinking == true)
178 {
179 //~ for (std::size_t i=0; i<active; i++)
180 //~ std::swap(schedule[i], schedule[random::discrete(rng, std::size_t(0), active - 1)]);
181 std::shuffle(schedule.begin(),schedule.begin()+active,rng);
182 }
183 else
184 {
185 //~ for (std::size_t i=0; i<ell; i++)
186 //~ std::swap(schedule[i], schedule[random::discrete(rng, std::size_t(0), ell - 1)]);
187 std::shuffle(schedule.begin(),schedule.end(),rng);
188 }
189
190 // inner loop (one epoch)
191 max_violation = 0.0;
192 size_t nPoints = ell;
193 if (m_shrinking == true)
194 nPoints = active;
195
196 for (std::size_t j=0; j<nPoints; j++)
197 {
198 // active example
199 double gain = 0.0;
200 const std::size_t i = schedule[j];
201 InputReferenceType x_i = m_data[i].input;
202 const unsigned int y_i = m_data[i].label;
203 const double q = m_xSquared(i);
204 blas::dense_vector_adaptor<double> a = row(alpha, i);
205
206 // compute gradient and KKT violation
207 RealVector wx = prod(w,x_i);
208 RealVector g(m_classes);
209 double kkt = calcGradient(g, wx, a, C, y_i);
210
211 if (kkt > 0.0)
212 {
213 max_violation = std::max(max_violation, kkt);
214
215 // perform the step on alpha
216 RealVector mu(m_classes, 0.0);
217 gain = solveSub(0.1 * stop.minAccuracy, g, q, C, y_i, a, mu);
218 objective += gain;
219 steps++;
220
221 // update weight vectors
222 updateWeightVectors(w, mu, i);
223 }
224 else if (m_shrinking == true)
225 {
226 active--;
227 std::swap(schedule[j], schedule[active]);
228 j--;
229 }
230
231 // update gain-based preferences
232 if (m_strategy == ACF)
233 {
234 if (epoch == 0) average_gain += gain / (double)ell;
235 else
236 {
237 // strategy constants
238 constexpr double CHANGE_RATE = 0.2;
239 constexpr double PREF_MIN = 0.05;
240 constexpr double PREF_MAX = 20.0;
241
242 double change = CHANGE_RATE * (gain / average_gain - 1.0);
243 double newpref = std::min(PREF_MAX, std::max(PREF_MIN, pref(i) * std::exp(change)));
244 prefsum += newpref - pref(i);
245 pref(i) = newpref;
246 average_gain = (1.0 - gain_learning_rate) * average_gain + gain_learning_rate * gain;
247 }
248 }
249 }
250
251 epoch++;
252
253 // stopping criteria
254 if (stop.maxIterations > 0 && epoch * ell >= stop.maxIterations)
255 {
256 if (prop != NULL) prop->type = QpMaxIterationsReached;
257 break;
258 }
259
260 if (timer.stop() >= stop.maxSeconds)
261 {
262 if (prop != NULL) prop->type = QpTimeout;
263 break;
264 }
265
266 if (max_violation < stop.minAccuracy)
267 {
268 if (verbose)
269 std::cout << "#" << std::flush;
270 if (canstop)
271 {
272 if (prop != NULL) prop->type = QpAccuracyReached;
273 break;
274 }
275 else
276 {
277 if (m_strategy == ACF)
278 {
279 // prepare full sweep for a reliable checking of the stopping criterion
280 canstop = true;
281 for (std::size_t i=0; i<ell; i++) pref(i) = 1.0;
282 prefsum = (double)ell;
283 }
284
285 if (m_shrinking == true)
286 {
287 // prepare full sweep for a reliable checking of the stopping criterion
288 active = ell;
289 canstop = true;
290 }
291 }
292 }
293 else
294 {
295 if (verbose) std::cout << "." << std::flush;
296 if (m_strategy == ACF)
297 canstop = false;
298 if (m_shrinking == true)
299 canstop = (active == ell);
300 }
301 }
302 timer.stop();
303
304 // calculate dual objective value
305 objective = 0.0;
306 for (std::size_t j=0; j<m_classes; j++)
307 {
308 for (std::size_t d=0; d<m_dim; d++) objective -= w(j, d) * w(j, d);
309 }
310 objective *= 0.5;
311 for (std::size_t i=0; i<ell; i++)
312 {
313 for (std::size_t j=0; j<m_classes; j++) objective += alpha(i, j);
314 }
315
316 // return solution statistics
317 if (prop != NULL)
318 {
319 prop->accuracy = max_violation; // this is approximate, but a good guess
320 prop->iterations = ell * epoch;
321 prop->value = objective;
322 prop->seconds = timer.lastLap();
323 }
324
325 // output solution statistics
326 if (verbose)
327 {
328 std::cout << std::endl;
329 std::cout << "training time (seconds): " << timer.lastLap() << std::endl;
330 std::cout << "number of epochs: " << epoch << std::endl;
331 std::cout << "number of iterations: " << (ell * epoch) << std::endl;
332 std::cout << "number of non-zero steps: " << steps << std::endl;
333 std::cout << "dual accuracy: " << max_violation << std::endl;
334 std::cout << "dual objective value: " << objective << std::endl;
335 }
336
337 // return the solution
338 return w;
339 }
340
341protected:
342 // for all c: row(w, c) += mu(c) * x
343 void add_scaled(RealMatrix& w, RealVector const& mu, InputReferenceType x)
344 {
345 for (std::size_t c=0; c<m_classes; c++) noalias(row(w, c)) += mu(c) * x;
346 }
347
348 /// \brief Compute the gradient from the inner products of the weight vectors with the current sample.
349 ///
350 /// \param gradient gradient vector to be filled in. The vector is correctly sized.
351 /// \param wx inner products of weight vectors with the current sample; wx(c) = <w_c, x>
352 /// \param alpha variables corresponding to the current sample
353 /// \param C upper bound on the variables
354 /// \param y label of the current sample
355 ///
356 /// \return The function must return the violation of the KKT conditions.
357 virtual double calcGradient(RealVector& gradient, RealVector wx, blas::dense_vector_adaptor<double const> const& alpha, double C, unsigned int y) = 0;
358
359 /// \brief Update the weight vectors (primal variables) after a step on the dual variables.
360 ///
361 /// \param w matrix of (dense) weight vectors (as rows)
362 /// \param mu dual step on the variables corresponding to the current sample
363 /// \param index current sample
364 virtual void updateWeightVectors(RealMatrix& w, RealVector const& mu, std::size_t index) = 0;
365
366 /// \brief Solve the sub-problem posed by a single training example.
367 ///
368 /// \param epsilon accuracy (dual gradient) up to which the sub-problem should be solved
369 /// \param gradient gradient of the objective function w.r.t. alpha
370 /// \param q squared norm of the current sample
371 /// \param C upper bound on the variables
372 /// \param y label of the current sample
373 /// \param alpha input: initial point; output: (near) optimal point
374 /// \param mu step from initial point to final point
375 ///
376 /// \return The function must return the gain of the step, i.e., the improvement of the objective function.
377 virtual double solveSub(double epsilon, RealVector& gradient, double q, double C, unsigned int y, blas::dense_vector_adaptor<double>& alpha, RealVector& mu) = 0;
378
379 DataView<const DatasetType> m_data; ///< view on training data
380 RealVector m_xSquared; ///< diagonal entries of the quadratic matrix
381 std::size_t m_dim; ///< input space dimension
382 std::size_t m_classes; ///< number of classes
383 std::size_t m_strategy; ///< strategy for coordinate selection
384 bool m_shrinking; ///< apply shrinking or not?
385};
386
387/// \brief Solver for the multi-class SVM by Weston & Watkins.
388template <class InputT>
389class QpMcLinearWW : public QpMcLinear<InputT>
390{
391public:
393
394 /// \brief Constructor
396 const DatasetType& dataset,
397 std::size_t dim,
398 std::size_t classes)
399 : QpMcLinear<InputT>(dataset, dim, classes)
400 { }
401
402protected:
403 /// \brief Compute the gradient from the inner products of the weight vectors with the current sample.
404 virtual double calcGradient(RealVector& gradient, RealVector wx, blas::dense_vector_adaptor<double const> const& alpha, double C, unsigned int y)
405 {
406 double violation = 0.0;
407 for (std::size_t c=0; c<wx.size(); c++)
408 {
409 if (c == y)
410 {
411 gradient(c) = 0.0;
412 }
413 else
414 {
415 const double g = 1.0 - 0.5 * (wx(y) - wx(c));
416 gradient(c) = g;
417 if (g > violation && alpha(c) < C) violation = g;
418 else if (-g > violation && alpha(c) > 0.0) violation = -g;
419 }
420 }
421 return violation;
422 }
423
424 /// \brief Update the weight vectors (primal variables) after a step on the dual variables.
425 virtual void updateWeightVectors(RealMatrix& w, RealVector const& mu, std::size_t index)
426 {
427 double sum_mu = 0.0;
428 for (std::size_t c=0; c<m_classes; c++) sum_mu += mu(c);
429 unsigned int y = m_data[index].label;
430 RealVector step(-0.5 * mu);
431 step(y) = 0.5 * sum_mu;
432 add_scaled(w, step, m_data[index].input);
433 }
434
435 /// \brief Solve the sub-problem posed by a single training example.
436 virtual double solveSub(double epsilon, RealVector& gradient, double q, double C, unsigned int y, blas::dense_vector_adaptor<double>& alpha, RealVector& mu)
437 {
438 const double qq = 0.5 * q;
439 double gain = 0.0;
440
441 // SMO loop
442 size_t iter, maxiter = 10 * m_classes;
443 for (iter=0; iter<maxiter; iter++)
444 {
445 // select working set
446 std::size_t idx = 0;
447 double kkt = 0.0;
448 for (std::size_t c=0; c<m_classes; c++)
449 {
450 if (c == y) continue;
451
452 const double g = gradient(c);
453 const double a = alpha(c);
454 if (g > kkt && a < C) { kkt = g; idx = c; }
455 else if (-g > kkt && a > 0.0) { kkt = -g; idx = c; }
456 }
457
458 // check stopping criterion
459 if (kkt < epsilon) break;
460
461 // perform step
462 const double a = alpha(idx);
463 const double g = gradient(idx);
464 double m = g / qq;
465 double a_new = a + m;
466 if (a_new <= 0.0)
467 {
468 m = -a;
469 a_new = 0.0;
470 }
471 else if (a_new >= C)
472 {
473 m = C - a;
474 a_new = C;
475 }
476 alpha(idx) = a_new;
477 mu(idx) += m;
478
479 // update gradient and total gain
480 const double dg = 0.5 * m * qq;
481 for (std::size_t c=0; c<m_classes; c++) gradient(c) -= dg;
482 gradient(idx) -= dg;
483
484 gain += m * (g - dg);
485 }
486
487 return gain;
488 }
489
490protected:
491 using QpMcLinear<InputT>::add_scaled;
492 using QpMcLinear<InputT>::m_data;
493 using QpMcLinear<InputT>::m_classes;
494};
495
496
497/// \brief Solver for the multi-class SVM by Lee, Lin & Wahba.
498template <class InputT>
499class QpMcLinearLLW : public QpMcLinear<InputT>
500{
501public:
503
504 /// \brief Constructor
506 const DatasetType& dataset,
507 std::size_t dim,
508 std::size_t classes)
509 : QpMcLinear<InputT>(dataset, dim, classes)
510 { }
511
512protected:
513 /// \brief Compute the gradient from the inner products of the weight vectors with the current sample.
514 virtual double calcGradient(RealVector& gradient, RealVector wx, blas::dense_vector_adaptor<double const> const& alpha, double C, unsigned int y)
515 {
516 double violation = 0.0;
517 for (std::size_t c=0; c<m_classes; c++)
518 {
519 if (c == y)
520 {
521 gradient(c) = 0.0;
522 }
523 else
524 {
525 const double g = 1.0 + wx(c);
526 gradient(c) = g;
527 if (g > violation && alpha(c) < C) violation = g;
528 else if (-g > violation && alpha(c) > 0.0) violation = -g;
529 }
530 }
531 return violation;
532 }
533
534 /// \brief Update the weight vectors (primal variables) after a step on the dual variables.
535 virtual void updateWeightVectors(RealMatrix& w, RealVector const& mu, std::size_t index)
536 {
537 double mean_mu = 0.0;
538 for (std::size_t c=0; c<m_classes; c++) mean_mu += mu(c);
539 mean_mu /= (double)m_classes;
540 RealVector step(m_classes);
541 for (std::size_t c=0; c<m_classes; c++) step(c) = mean_mu - mu(c);
542 add_scaled(w, step, m_data[index].input);
543 }
544
545 /// \brief Solve the sub-problem posed by a single training example.
546 virtual double solveSub(double epsilon, RealVector& gradient, double q, double C, unsigned int y, blas::dense_vector_adaptor<double>& alpha, RealVector& mu)
547 {
548 const double ood = 1.0 / m_classes;
549 const double qq = (1.0 - ood) * q;
550 double gain = 0.0;
551
552 // SMO loop
553 size_t iter, maxiter = 10 * m_classes;
554 for (iter=0; iter<maxiter; iter++)
555 {
556 // select working set
557 std::size_t idx = 0;
558 double kkt = 0.0;
559 for (std::size_t c=0; c<m_classes; c++)
560 {
561 if (c == y) continue;
562
563 const double g = gradient(c);
564 const double a = alpha(c);
565 if (g > kkt && a < C) { kkt = g; idx = c; }
566 else if (-g > kkt && a > 0.0) { kkt = -g; idx = c; }
567 }
568
569 // check stopping criterion
570 if (kkt < epsilon) break;
571
572 // perform step
573 const double a = alpha(idx);
574 const double g = gradient(idx);
575 double m = g / qq;
576 double a_new = a + m;
577 if (a_new <= 0.0)
578 {
579 m = -a;
580 a_new = 0.0;
581 }
582 else if (a_new >= C)
583 {
584 m = C - a;
585 a_new = C;
586 }
587 alpha(idx) = a_new;
588 mu(idx) += m;
589
590 // update gradient and total gain
591 const double dg = m * q;
592 const double dgc = dg / m_classes;
593 for (std::size_t c=0; c<m_classes; c++) gradient(c) += dgc;
594 gradient(idx) -= dg;
595
596 gain += m * (g - 0.5 * (dg - dgc));
597 }
598
599 return gain;
600 }
601
602protected:
603 using QpMcLinear<InputT>::add_scaled;
604 using QpMcLinear<InputT>::m_data;
605 using QpMcLinear<InputT>::m_classes;
606};
607
608
609/// \brief Solver for the multi-class SVM with absolute margin and total sum loss.
610template <class InputT>
611class QpMcLinearATS : public QpMcLinear<InputT>
612{
613public:
615
616 /// \brief Constructor
618 const DatasetType& dataset,
619 std::size_t dim,
620 std::size_t classes)
621 : QpMcLinear<InputT>(dataset, dim, classes)
622 { }
623
624protected:
625 /// \brief Compute the gradient from the inner products of the weight vectors with the current sample.
626 virtual double calcGradient(RealVector& gradient, RealVector wx, blas::dense_vector_adaptor<double const> const& alpha, double C, unsigned int y)
627 {
628 double violation = 0.0;
629 for (std::size_t c=0; c<m_classes; c++)
630 {
631 const double g = (c == y) ? 1.0 - wx(y) : 1.0 + wx(c);
632 gradient(c) = g;
633 if (g > violation && alpha(c) < C) violation = g;
634 else if (-g > violation && alpha(c) > 0.0) violation = -g;
635 }
636 return violation;
637 }
638
639 /// \brief Update the weight vectors (primal variables) after a step on the dual variables.
640 virtual void updateWeightVectors(RealMatrix& w, RealVector const& mu, std::size_t index)
641 {
642 unsigned int y = m_data[index].label;
643 double mean = -2.0 * mu(y);
644 for (std::size_t c=0; c<m_classes; c++) mean += mu(c);
645 mean /= (double)m_classes;
646 RealVector step(m_classes);
647 for (std::size_t c=0; c<m_classes; c++) step(c) = ((c == y) ? (mu(c) + mean) : (mean - mu(c)));
648 add_scaled(w, step, m_data[index].input);
649 }
650
651 /// \brief Solve the sub-problem posed by a single training example.
652 virtual double solveSub(double epsilon, RealVector& gradient, double q, double C, unsigned int y, blas::dense_vector_adaptor<double>& alpha, RealVector& mu)
653 {
654 const double ood = 1.0 / m_classes;
655 const double qq = (1.0 - ood) * q;
656 double gain = 0.0;
657
658 // SMO loop
659 size_t iter, maxiter = 10 * m_classes;
660 for (iter=0; iter<maxiter; iter++)
661 {
662 // select working set
663 std::size_t idx = 0;
664 double kkt = 0.0;
665 for (std::size_t c=0; c<m_classes; c++)
666 {
667 const double g = gradient(c);
668 const double a = alpha(c);
669 if (g > kkt && a < C) { kkt = g; idx = c; }
670 else if (-g > kkt && a > 0.0) { kkt = -g; idx = c; }
671 }
672
673 // check stopping criterion
674 if (kkt < epsilon) break;
675
676 // perform step
677 const double a = alpha(idx);
678 const double g = gradient(idx);
679 double m = g / qq;
680 double a_new = a + m;
681 if (a_new <= 0.0)
682 {
683 m = -a;
684 a_new = 0.0;
685 }
686 else if (a_new >= C)
687 {
688 m = C - a;
689 a_new = C;
690 }
691 alpha(idx) = a_new;
692 mu(idx) += m;
693
694 // update gradient and total gain
695 const double dg = m * q;
696 const double dgc = dg / m_classes;
697 if (idx == y)
698 {
699 for (std::size_t c=0; c<m_classes; c++) gradient(c) -= dgc;
700 gradient(idx) -= dg - 2.0 * dgc;
701 }
702 else
703 {
704 for (std::size_t c=0; c<m_classes; c++) gradient(c) += (c == y) ? -dgc : dgc;
705 gradient(idx) -= dg;
706 }
707
708 gain += m * (g - 0.5 * (dg - dgc));
709 }
710
711 return gain;
712 }
713
714protected:
715 using QpMcLinear<InputT>::add_scaled;
716 using QpMcLinear<InputT>::m_data;
717 using QpMcLinear<InputT>::m_classes;
718};
719
720
721/// \brief Solver for the multi-class maximum margin regression SVM
722template <class InputT>
723class QpMcLinearMMR : public QpMcLinear<InputT>
724{
725public:
727
728 /// \brief Constructor
730 const DatasetType& dataset,
731 std::size_t dim,
732 std::size_t classes)
733 : QpMcLinear<InputT>(dataset, dim, classes)
734 { }
735
736protected:
737 /// \brief Compute the gradient from the inner products of the weight vectors with the current sample.
738 virtual double calcGradient(RealVector& gradient, RealVector wx, blas::dense_vector_adaptor<double const> const& alpha, double C, unsigned int y)
739 {
740 for (std::size_t c=0; c<m_classes; c++) gradient(c) = 0.0;
741 const double g = 1.0 - wx(y);
742 gradient(y) = g;
743 const double a = alpha(0);
744 if (g > 0.0)
745 {
746 if (a == C) return 0.0;
747 else return g;
748 }
749 else
750 {
751 if (a == 0.0) return 0.0;
752 else return -g;
753 }
754 }
755
756 /// \brief Update the weight vectors (primal variables) after a step on the dual variables.
757 virtual void updateWeightVectors(RealMatrix& w, RealVector const& mu, std::size_t index)
758 {
759 unsigned int y = m_data[index].label;
760 double s = mu(0);
761 double sc = -s / m_classes;
762 double sy = s + sc;
763 RealVector step(m_classes);
764 for (size_t c=0; c<m_classes; c++) step(c) = (c == y) ? sy : sc;
765 add_scaled(w, step, m_data[index].input);
766 }
767
768 /// \brief Solve the sub-problem posed by a single training example.
769 virtual double solveSub(double epsilon, RealVector& gradient, double q, double C, unsigned int y, blas::dense_vector_adaptor<double>& alpha, RealVector& mu)
770 {
771 const double ood = 1.0 / m_classes;
772 const double qq = (1.0 - ood) * q;
773
774 double kkt = 0.0;
775 const double g = gradient(y);
776 const double a = alpha(0);
777 if (g > kkt && a < C) kkt = g;
778 else if (-g > kkt && a > 0.0) kkt = -g;
779
780 // check stopping criterion
781 if (kkt < epsilon) return 0.0;
782
783 // perform step
784 double m = g / qq;
785 double a_new = a + m;
786 if (a_new <= 0.0)
787 {
788 m = -a;
789 a_new = 0.0;
790 }
791 else if (a_new >= C)
792 {
793 m = C - a;
794 a_new = C;
795 }
796 alpha(0) = a_new;
797 mu(0) = m;
798
799 // return the gain
800 return m * (g - 0.5 * m * qq);
801 }
802
803protected:
804 using QpMcLinear<InputT>::add_scaled;
805 using QpMcLinear<InputT>::m_data;
806 using QpMcLinear<InputT>::m_classes;
807};
808
809
810/// \brief Solver for the multi-class SVM by Crammer & Singer.
811template <class InputT>
812class QpMcLinearCS : public QpMcLinear<InputT>
813{
814public:
816
817 /// \brief Constructor
819 const DatasetType& dataset,
820 std::size_t dim,
821 std::size_t classes)
822 : QpMcLinear<InputT>(dataset, dim, classes)
823 { }
824
825protected:
826 /// \brief Compute the gradient from the inner products of the weight vectors with the current sample.
827 virtual double calcGradient(RealVector& gradient, RealVector wx, blas::dense_vector_adaptor<double const> const& alpha, double C, unsigned int y)
828 {
829 if (alpha(m_classes) < C)
830 {
831 double violation = 0.0;
832 for (std::size_t c=0; c<wx.size(); c++)
833 {
834 if (c == y)
835 {
836 gradient(c) = 0.0;
837 }
838 else
839 {
840 const double g = 1.0 - 0.5 * (wx(y) - wx(c));
841 gradient(c) = g;
842 if (g > violation) violation = g;
843 else if (-g > violation && alpha(c) > 0.0) violation = -g;
844 }
845 }
846 return violation;
847 }
848 else
849 {
850 // double kkt_up = -1e100, kkt_down = 1e100;
851 double kkt_up = 0.0, kkt_down = 1e100;
852 for (std::size_t c=0; c<m_classes; c++)
853 {
854 if (c == y)
855 {
856 gradient(c) = 0.0;
857 }
858 else
859 {
860 const double g = 1.0 - 0.5 * (wx(y) - wx(c));
861 gradient(c) = g;
862 if (g > kkt_up && alpha(c) < C) kkt_up = g;
863 if (g < kkt_down && alpha(c) > 0.0) kkt_down = g;
864 }
865 }
866 return std::max(0.0, kkt_up - kkt_down);
867 }
868 }
869
870 /// \brief Update the weight vectors (primal variables) after a step on the dual variables.
871 virtual void updateWeightVectors(RealMatrix& w, RealVector const& mu, std::size_t index)
872 {
873 unsigned int y = m_data[index].label;
874 double sum_mu = 0.0;
875 for (std::size_t c=0; c<m_classes; c++) if (c != y) sum_mu += mu(c);
876 RealVector step(-0.5 * mu);
877 step(y) = 0.5 * sum_mu;
878 add_scaled(w, step, m_data[index].input);
879 }
880
881 /// \brief Solve the sub-problem posed by a single training example.
882 virtual double solveSub(double epsilon, RealVector& gradient, double q, double C, unsigned int y, blas::dense_vector_adaptor<double>& alpha, RealVector& mu)
883 {
884 const double qq = 0.5 * q;
885 double gain = 0.0;
886
887 // SMO loop
888 size_t iter, maxiter = 10 * m_classes;
889 for (iter=0; iter<maxiter; iter++)
890 {
891 // select working set
892 std::size_t idx = 0;
893 std::size_t idx_up = 0, idx_down = 0;
894 bool size2 = false;
895 double kkt = 0.0;
896 double grad = 0.0;
897 if (alpha(m_classes) == C)
898 {
899 double kkt_up = -1e100, kkt_down = 1e100;
900 for (std::size_t c=0; c<m_classes; c++)
901 {
902 if (c == y) continue;
903
904 const double g = gradient(c);
905 const double a = alpha(c);
906 if (g > kkt_up && a < C) { kkt_up = g; idx_up = c; }
907 if (g < kkt_down && a > 0.0) { kkt_down = g; idx_down = c; }
908 }
909
910 if (kkt_up <= 0.0)
911 {
912 idx = idx_down;
913 grad = kkt_down;
914 kkt = -kkt_down;
915 }
916 else
917 {
918 grad = kkt_up - kkt_down;
919 kkt = grad;
920 size2 = true;
921 }
922 }
923 else
924 {
925 for (std::size_t c=0; c<m_classes; c++)
926 {
927 if (c == y) continue;
928
929 const double g = gradient(c);
930 const double a = alpha(c);
931 if (g > kkt) { kkt = g; idx = c; }
932 else if (-g > kkt && a > 0.0) { kkt = -g; idx = c; }
933 }
934 grad = gradient(idx);
935 }
936
937 // check stopping criterion
938 if (kkt < epsilon) return gain;
939
940 if (size2)
941 {
942 // perform step
943 const double a_up = alpha(idx_up);
944 const double a_down = alpha(idx_down);
945 double m = grad / qq;
946 double a_up_new = a_up + m;
947 double a_down_new = a_down - m;
948 if (a_down_new <= 0.0)
949 {
950 m = a_down;
951 a_up_new = a_up + m;
952 a_down_new = 0.0;
953 }
954 alpha(idx_up) = a_up_new;
955 alpha(idx_down) = a_down_new;
956 mu(idx_up) += m;
957 mu(idx_down) -= m;
958
959 // update gradient and total gain
960 const double dg = 0.5 * m * qq;
961 gradient(idx_up) -= dg;
962 gradient(idx_down) += dg;
963 gain += m * (grad - 2.0 * dg);
964 }
965 else
966 {
967 // perform step
968 const double a = alpha(idx);
969 const double a_sum = alpha(m_classes);
970 double m = grad / qq;
971 double a_new = a + m;
972 double a_sum_new = a_sum + m;
973 if (a_new <= 0.0)
974 {
975 m = -a;
976 a_new = 0.0;
977 a_sum_new = a_sum + m;
978 }
979 else if (a_sum_new >= C)
980 {
981 m = C - a_sum;
982 a_sum_new = C;
983 a_new = a + m;
984 }
985 alpha(idx) = a_new;
986 alpha(m_classes) = a_sum_new;
987 mu(idx) += m;
988
989 // update gradient and total gain
990 const double dg = 0.5 * m * qq;
991 for (std::size_t c=0; c<m_classes; c++) gradient(c) -= dg;
992 gradient(idx) -= dg;
993 gain += m * (grad - dg);
994 }
995 }
996
997 return gain;
998 }
999
1000protected:
1001 using QpMcLinear<InputT>::add_scaled;
1002 using QpMcLinear<InputT>::m_data;
1003 using QpMcLinear<InputT>::m_classes;
1004};
1005
1006
1007/// \brief Solver for the multi-class SVM with absolute margin and discriminative maximum loss.
1008template <class InputT>
1009class QpMcLinearADM : public QpMcLinear<InputT>
1010{
1011public:
1013
1014 /// \brief Constructor
1016 const DatasetType& dataset,
1017 std::size_t dim,
1018 std::size_t classes)
1019 : QpMcLinear<InputT>(dataset, dim, classes)
1020 { }
1021
1022protected:
1023 /// \brief Compute the gradient from the inner products of the weight vectors with the current sample.
1024 virtual double calcGradient(RealVector& gradient, RealVector wx, blas::dense_vector_adaptor<double const> const& alpha, double C, unsigned int y)
1025 {
1026 if (alpha(m_classes) < C)
1027 {
1028 double violation = 0.0;
1029 for (std::size_t c=0; c<m_classes; c++)
1030 {
1031 if (c == y)
1032 {
1033 gradient(c) = 0.0;
1034 }
1035 else
1036 {
1037 const double g = 1.0 + wx(c);
1038 gradient(c) = g;
1039 if (g > violation) violation = g;
1040 else if (-g > violation && alpha(c) > 0.0) violation = -g;
1041 }
1042 }
1043 return violation;
1044 }
1045 else
1046 {
1047 double kkt_up = 0.0, kkt_down = 1e100;
1048 for (std::size_t c=0; c<m_classes; c++)
1049 {
1050 if (c == y)
1051 {
1052 gradient(c) = 0.0;
1053 }
1054 else
1055 {
1056 const double g = 1.0 + wx(c);
1057 gradient(c) = g;
1058 if (g > kkt_up && alpha(c) < C) kkt_up = g;
1059 if (g < kkt_down && alpha(c) > 0.0) kkt_down = g;
1060 }
1061 }
1062 return std::max(0.0, kkt_up - kkt_down);
1063 }
1064 }
1065
1066 /// \brief Update the weight vectors (primal variables) after a step on the dual variables.
1067 virtual void updateWeightVectors(RealMatrix& w, RealVector const& mu, std::size_t index)
1068 {
1069 double mean_mu = 0.0;
1070 for (std::size_t c=0; c<m_classes; c++) mean_mu += mu(c);
1071 mean_mu /= (double)m_classes;
1072 RealVector step(m_classes);
1073 for (size_t c=0; c<m_classes; c++) step(c) = mean_mu - mu(c);
1074 add_scaled(w, step, m_data[index].input);
1075 }
1076
1077 /// \brief Solve the sub-problem posed by a single training example.
1078 virtual double solveSub(double epsilon, RealVector& gradient, double q, double C, unsigned int y, blas::dense_vector_adaptor<double>& alpha, RealVector& mu)
1079 {
1080 const double ood = 1.0 / m_classes;
1081 const double qq = (1.0 - ood) * q;
1082 double gain = 0.0;
1083
1084 // SMO loop
1085 size_t iter, maxiter = 10 * m_classes;
1086 for (iter=0; iter<maxiter; iter++)
1087 {
1088 // select working set
1089 std::size_t idx = 0;
1090 std::size_t idx_up = 0, idx_down = 0;
1091 bool size2 = false;
1092 double kkt = 0.0;
1093 double grad = 0.0;
1094 if (alpha(m_classes) == C)
1095 {
1096 double kkt_up = -1e100, kkt_down = 1e100;
1097 for (std::size_t c=0; c<m_classes; c++)
1098 {
1099 if (c == y) continue;
1100
1101 const double g = gradient(c);
1102 const double a = alpha(c);
1103 if (g > kkt_up && a < C) { kkt_up = g; idx_up = c; }
1104 if (g < kkt_down && a > 0.0) { kkt_down = g; idx_down = c; }
1105 }
1106
1107 if (kkt_up <= 0.0)
1108 {
1109 idx = idx_down;
1110 grad = kkt_down;
1111 kkt = -kkt_down;
1112 }
1113 else
1114 {
1115 grad = kkt_up - kkt_down;
1116 kkt = grad;
1117 size2 = true;
1118 }
1119 }
1120 else
1121 {
1122 for (std::size_t c=0; c<m_classes; c++)
1123 {
1124 if (c == y) continue;
1125
1126 const double g = gradient(c);
1127 const double a = alpha(c);
1128 if (g > kkt) { kkt = g; idx = c; }
1129 else if (-g > kkt && a > 0.0) { kkt = -g; idx = c; }
1130 }
1131 grad = gradient(idx);
1132 }
1133
1134 // check stopping criterion
1135 if (kkt < epsilon) return gain;
1136
1137 if (size2)
1138 {
1139 // perform step
1140 const double a_up = alpha(idx_up);
1141 const double a_down = alpha(idx_down);
1142 double m = grad / (2.0 * q);
1143 double a_up_new = a_up + m;
1144 double a_down_new = a_down - m;
1145 if (a_down_new <= 0.0)
1146 {
1147 m = a_down;
1148 a_up_new = a_up + m;
1149 a_down_new = 0.0;
1150 }
1151 alpha(idx_up) = a_up_new;
1152 alpha(idx_down) = a_down_new;
1153 mu(idx_up) += m;
1154 mu(idx_down) -= m;
1155
1156 // update gradient and total gain
1157 const double dg = m * q;
1158 const double dgc = dg / m_classes;
1159 gradient(idx_up) -= dg;
1160 gradient(idx_down) += dg;
1161 gain += m * (grad - (dg - dgc));
1162 }
1163 else
1164 {
1165 // perform step
1166 const double a = alpha(idx);
1167 const double a_sum = alpha(m_classes);
1168 double m = grad / qq;
1169 double a_new = a + m;
1170 double a_sum_new = a_sum + m;
1171 if (a_new <= 0.0)
1172 {
1173 m = -a;
1174 a_new = 0.0;
1175 a_sum_new = a_sum + m;
1176 }
1177 else if (a_sum_new >= C)
1178 {
1179 m = C - a_sum;
1180 a_sum_new = C;
1181 a_new = a + m;
1182 }
1183 alpha(idx) = a_new;
1184 alpha(m_classes) = a_sum_new;
1185 mu(idx) += m;
1186
1187 // update gradient and total gain
1188 const double dg = m * q;
1189 const double dgc = dg / m_classes;
1190 for (std::size_t c=0; c<m_classes; c++) gradient(c) += dgc;
1191 gradient(idx) -= dg;
1192 gain += m * (grad - 0.5 * (dg - dgc));
1193 }
1194 }
1195
1196 return gain;
1197 }
1198
1199protected:
1200 using QpMcLinear<InputT>::add_scaled;
1201 using QpMcLinear<InputT>::m_data;
1202 using QpMcLinear<InputT>::m_classes;
1203};
1204
1205
1206/// \brief Solver for the multi-class SVM with absolute margin and total maximum loss.
1207template <class InputT>
1208class QpMcLinearATM : public QpMcLinear<InputT>
1209{
1210public:
1212
1213 /// \brief Constructor
1215 const DatasetType& dataset,
1216 std::size_t dim,
1217 std::size_t classes)
1218 : QpMcLinear<InputT>(dataset, dim, classes)
1219 { }
1220
1221protected:
1222 /// \brief Compute the gradient from the inner products of the weight vectors with the current sample.
1223 virtual double calcGradient(RealVector& gradient, RealVector wx, blas::dense_vector_adaptor<double const> const& alpha, double C, unsigned int y)
1224 {
1225 if (alpha(m_classes) < C)
1226 {
1227 double violation = 0.0;
1228 for (std::size_t c=0; c<m_classes; c++)
1229 {
1230 const double g = (c == y) ? 1.0 - wx(y) : 1.0 + wx(c);
1231 gradient(c) = g;
1232 if (g > violation) violation = g;
1233 else if (-g > violation && alpha(c) > 0.0) violation = -g;
1234 }
1235 return violation;
1236 }
1237 else
1238 {
1239 double kkt_up = 0.0, kkt_down = 1e100;
1240 for (std::size_t c=0; c<m_classes; c++)
1241 {
1242 const double g = (c == y) ? 1.0 - wx(y) : 1.0 + wx(c);
1243 gradient(c) = g;
1244 if (g > kkt_up && alpha(c) < C) kkt_up = g;
1245 if (g < kkt_down && alpha(c) > 0.0) kkt_down = g;
1246 }
1247 return std::max(0.0, kkt_up - kkt_down);
1248 }
1249 }
1250
1251 /// \brief Update the weight vectors (primal variables) after a step on the dual variables.
1252 virtual void updateWeightVectors(RealMatrix& w, RealVector const& mu, std::size_t index)
1253 {
1254 unsigned int y = m_data[index].label;
1255 double mean = -2.0 * mu(y);
1256 for (std::size_t c=0; c<m_classes; c++) mean += mu(c);
1257 mean /= (double)m_classes;
1258 RealVector step(m_classes);
1259 for (size_t c=0; c<m_classes; c++) step(c) = (c == y) ? (mu(c) + mean) : (mean - mu(c));
1260 add_scaled(w, step, m_data[index].input);
1261 }
1262
1263 /// \brief Solve the sub-problem posed by a single training example.
1264 virtual double solveSub(double epsilon, RealVector& gradient, double q, double C, unsigned int y, blas::dense_vector_adaptor<double>& alpha, RealVector& mu)
1265 {
1266 const double ood = 1.0 / m_classes;
1267 const double qq = (1.0 - ood) * q;
1268 double gain = 0.0;
1269
1270 // SMO loop
1271 size_t iter, maxiter = 10 * m_classes;
1272 for (iter=0; iter<maxiter; iter++)
1273 {
1274 // select working set
1275 std::size_t idx = 0;
1276 std::size_t idx_up = 0, idx_down = 0;
1277 bool size2 = false;
1278 double kkt = 0.0;
1279 double grad = 0.0;
1280 if (alpha(m_classes) == C)
1281 {
1282 double kkt_up = -1e100, kkt_down = 1e100;
1283 for (std::size_t c=0; c<m_classes; c++)
1284 {
1285 const double g = gradient(c);
1286 const double a = alpha(c);
1287 if (g > kkt_up && a < C) { kkt_up = g; idx_up = c; }
1288 if (g < kkt_down && a > 0.0) { kkt_down = g; idx_down = c; }
1289 }
1290
1291 if (kkt_up <= 0.0)
1292 {
1293 idx = idx_down;
1294 grad = kkt_down;
1295 kkt = -kkt_down;
1296 }
1297 else
1298 {
1299 grad = kkt_up - kkt_down;
1300 kkt = grad;
1301 size2 = true;
1302 }
1303 }
1304 else
1305 {
1306 for (std::size_t c=0; c<m_classes; c++)
1307 {
1308 const double g = gradient(c);
1309 const double a = alpha(c);
1310 if (g > kkt) { kkt = g; idx = c; }
1311 else if (-g > kkt && a > 0.0) { kkt = -g; idx = c; }
1312 }
1313 grad = gradient(idx);
1314 }
1315
1316 // check stopping criterion
1317 if (kkt < epsilon) return gain;
1318
1319 if (size2)
1320 {
1321 // perform step
1322 const double a_up = alpha(idx_up);
1323 const double a_down = alpha(idx_down);
1324 double m = grad / (2.0 * q);
1325 double a_up_new = a_up + m;
1326 double a_down_new = a_down - m;
1327 if (a_down_new <= 0.0)
1328 {
1329 m = a_down;
1330 a_up_new = a_up + m;
1331 a_down_new = 0.0;
1332 }
1333 alpha(idx_up) = a_up_new;
1334 alpha(idx_down) = a_down_new;
1335 mu(idx_up) += m;
1336 mu(idx_down) -= m;
1337
1338 // update gradient and total gain
1339 const double dg = m * q;
1340 const double dgc = dg / m_classes;
1341 if (idx_up == y)
1342 {
1343 for (std::size_t c=0; c<m_classes; c++) gradient(c) -= dgc;
1344 gradient(idx_up) -= dg - 2.0 * dgc;
1345 gradient(idx_down) += dg;
1346 }
1347 else if (idx_down == y)
1348 {
1349 gradient(idx_up) -= dg;
1350 gradient(idx_down) += dg - 2.0 * dgc;
1351 }
1352 else
1353 {
1354 gradient(idx_up) -= dg;
1355 gradient(idx_down) += dg;
1356 }
1357 gain += m * (grad - (dg - dgc));
1358 }
1359 else
1360 {
1361 // perform step
1362 const double a = alpha(idx);
1363 const double a_sum = alpha(m_classes);
1364 double m = grad / qq;
1365 double a_new = a + m;
1366 double a_sum_new = a_sum + m;
1367 if (a_new <= 0.0)
1368 {
1369 m = -a;
1370 a_new = 0.0;
1371 a_sum_new = a_sum + m;
1372 }
1373 else if (a_sum_new >= C)
1374 {
1375 m = C - a_sum;
1376 a_sum_new = C;
1377 a_new = a + m;
1378 }
1379 alpha(idx) = a_new;
1380 alpha(m_classes) = a_sum_new;
1381 mu(idx) += m;
1382
1383 // update gradient and total gain
1384 const double dg = m * q;
1385 const double dgc = dg / m_classes;
1386 if (idx == y)
1387 {
1388 for (std::size_t c=0; c<m_classes; c++) gradient(c) -= dgc;
1389 gradient(idx) -= dg - 2.0 * dgc;
1390 }
1391 else
1392 {
1393 for (std::size_t c=0; c<m_classes; c++) gradient(c) += (c == y) ? -dgc : dgc;
1394 gradient(idx) -= dg;
1395 }
1396 gain += m * (grad - 0.5 * (dg - dgc));
1397 }
1398 }
1399
1400 return gain;
1401 }
1402
1403protected:
1404 using QpMcLinear<InputT>::add_scaled;
1405 using QpMcLinear<InputT>::m_data;
1406 using QpMcLinear<InputT>::m_classes;
1407};
1408
1409
1410/// \brief Solver for the "reinforced" multi-class SVM.
1411template <class InputT>
1412class QpMcLinearReinforced : public QpMcLinear<InputT>
1413{
1414public:
1416
1417 /// \brief Constructor
1419 const DatasetType& dataset,
1420 std::size_t dim,
1421 std::size_t classes)
1422 : QpMcLinear<InputT>(dataset, dim, classes)
1423 { }
1424
1425protected:
1426 /// \brief Compute the gradient from the inner products of the weight vectors with the current sample.
1427 virtual double calcGradient(RealVector& gradient, RealVector wx, blas::dense_vector_adaptor<double const> const& alpha, double C, unsigned int y)
1428 {
1429 double violation = 0.0;
1430 for (std::size_t c=0; c<m_classes; c++)
1431 {
1432 const double g = (c == y) ? (m_classes - 1.0) - wx(y) : 1.0 + wx(c);
1433 gradient(c) = g;
1434 if (g > violation && alpha(c) < C) violation = g;
1435 else if (-g > violation && alpha(c) > 0.0) violation = -g;
1436 }
1437 return violation;
1438 }
1439
1440 /// \brief Update the weight vectors (primal variables) after a step on the dual variables.
1441 virtual void updateWeightVectors(RealMatrix& w, RealVector const& mu, std::size_t index)
1442 {
1443 unsigned int y = m_data[index].label;
1444 double mean = -2.0 * mu(y);
1445 for (std::size_t c=0; c<m_classes; c++) mean += mu(c);
1446 mean /= (double)m_classes;
1447 RealVector step(m_classes);
1448 for (std::size_t c=0; c<m_classes; c++) step(c) = ((c == y) ? (mu(c) + mean) : (mean - mu(c)));
1449 add_scaled(w, step, m_data[index].input);
1450 }
1451
1452 /// \brief Solve the sub-problem posed by a single training example.
1453 virtual double solveSub(double epsilon, RealVector& gradient, double q, double C, unsigned int y, blas::dense_vector_adaptor<double>& alpha, RealVector& mu)
1454 {
1455 const double ood = 1.0 / m_classes;
1456 const double qq = (1.0 - ood) * q;
1457 double gain = 0.0;
1458
1459 // SMO loop
1460 size_t iter, maxiter = 10 * m_classes;
1461 for (iter=0; iter<maxiter; iter++)
1462 {
1463 // select working set
1464 std::size_t idx = 0;
1465 double kkt = 0.0;
1466 for (std::size_t c=0; c<m_classes; c++)
1467 {
1468 const double g = gradient(c);
1469 const double a = alpha(c);
1470 if (g > kkt && a < C) { kkt = g; idx = c; }
1471 else if (-g > kkt && a > 0.0) { kkt = -g; idx = c; }
1472 }
1473
1474 // check stopping criterion
1475 if (kkt < epsilon) break;
1476
1477 // perform step
1478 const double a = alpha(idx);
1479 const double g = gradient(idx);
1480 double m = g / qq;
1481 double a_new = a + m;
1482 if (a_new <= 0.0)
1483 {
1484 m = -a;
1485 a_new = 0.0;
1486 }
1487 else if (a_new >= C)
1488 {
1489 m = C - a;
1490 a_new = C;
1491 }
1492 alpha(idx) = a_new;
1493 mu(idx) += m;
1494
1495 // update gradient and total gain
1496 const double dg = m * q;
1497 const double dgc = dg / m_classes;
1498 if (idx == y)
1499 {
1500 for (std::size_t c=0; c<m_classes; c++) gradient(c) -= dgc;
1501 gradient(idx) -= dg - 2.0 * dgc;
1502 }
1503 else
1504 {
1505 for (std::size_t c=0; c<m_classes; c++) gradient(c) += (c == y) ? -dgc : dgc;
1506 gradient(idx) -= dg;
1507 }
1508
1509 gain += m * (g - 0.5 * (dg - dgc));
1510 }
1511
1512 return gain;
1513 }
1514
1515protected:
1516 using QpMcLinear<InputT>::add_scaled;
1517 using QpMcLinear<InputT>::m_data;
1518 using QpMcLinear<InputT>::m_classes;
1519};
1520
1521
1522}
1523#endif