75 InputType, unsigned int,
76 KernelClassifier<InputType>,
77 AbstractWeightedTrainer<KernelClassifier<InputType> >
116 :
base_type(
kernel,negativeC, positiveC, offset, unconstrained), m_computeDerivative(false), m_McSvmType(
McSvm::
WW)
121 {
return "CSvmTrainer"; }
124 m_computeDerivative = compute;
142 if (f.basis() == dataset.
inputs() && f.kernel() ==
base_type::m_kernel && f.alpha().size1() == ell && f.alpha().size2() == 1) {
151 trainBinary(f,dataset);
160 trainOVA(svm,dataset);
165 bool sumToZero =
false;
166 bool simplex =
false;
170 switch (m_McSvmType){
174 setupMcParametersWWCS(nu,M, classes);
179 setupMcParametersWWCS(nu,M, classes);
184 setupMcParametersADMLLW(nu,M, classes);
189 setupMcParametersATMATS(nu,M, classes);
194 setupMcParametersATMATS(nu,M, classes);
199 setupMcParametersADMLLW(nu,M, classes);
204 setupMcParametersATMATS(nu,M, classes);
209 setupMcParametersMMR(nu,M, classes);
216 RealMatrix linear(ell,M.
width(),1.0);
218 auto const& labels = dataset.
labels();
220 for(
unsigned int y: labels.elements()){
221 linear(i, y) = classes - 1.0;
227 RealMatrix alpha(ell,M.
width(),0.0);
228 RealVector bias(classes,0.0);
230 solveMcSimplex(sumToZero,nu,M,linear,alpha,bias,dataset);
232 solveMcBox(sumToZero,nu,M,linear,alpha,bias,dataset);
238 for (std::size_t i=0; i<ell; i++)
240 unsigned int y = dataset.
element(i).label;
241 for (std::size_t c=0; c<classes; c++)
244 std::size_t r = alpha.size2() * y;
245 for (std::size_t p=0; p != alpha.size2(); p++, r++)
246 sum += nu(r, c) * alpha(i, p);
262 std::size_t n = dataset.numberOfElements();
264 if (f.basis() == dataset.
inputs() && f.kernel() ==
base_type::m_kernel && f.alpha().size1() == n && f.alpha().size2() == 1) {
267 else f.offset() = RealVector();
274 trainBinary(f, dataset);
287 RealMatrix& alpha, RealVector& bias,
298 PrecomputedMatrixType matrix(&km);
307 QpSolver<QpMcSimplexDecomp< PrecomputedMatrixType> > solver(problem);
310 alpha = problem.solution();
315 QpMcSimplexDecomp< CachedMatrixType> problem(matrix, M, dataset.
labels(), linear, this->C());
319 BiasSolverSimplex<CachedMatrixType> biasSolver(&problem);
323 QpSolver<QpMcSimplexDecomp< CachedMatrixType> > solver(problem);
326 alpha = problem.solution();
332 bool sumToZero, QpSparseArray<QpFloatType>
const& nu,QpSparseArray<QpFloatType>
const& M, RealMatrix
const& linear,
333 RealMatrix& alpha, RealVector& bias,
334 LabeledData<InputType, unsigned int>
const& dataset
336 typedef KernelMatrix<InputType, QpFloatType> KernelMatrixType;
337 typedef CachedMatrix< KernelMatrixType > CachedMatrixType;
338 typedef PrecomputedMatrix< KernelMatrixType > PrecomputedMatrixType;
344 PrecomputedMatrixType matrix(&km);
345 QpMcBoxDecomp< PrecomputedMatrixType> problem(matrix, M, dataset.labels(), linear, this->C());
349 BiasSolver<PrecomputedMatrixType> biasSolver(&problem);
353 QpSolver<QpMcBoxDecomp< PrecomputedMatrixType> > solver(problem);
356 alpha = problem.solution();
361 QpMcBoxDecomp< CachedMatrixType> problem(matrix, M, dataset.labels(), linear, this->C());
365 BiasSolver<CachedMatrixType> biasSolver(&problem);
369 QpSolver<QpMcBoxDecomp< CachedMatrixType> > solver(problem);
372 alpha = problem.solution();
377 template<
class Trainer>
378 void trainMc(KernelClassifier<InputType>& svm, LabeledData<InputType, unsigned int>
const& dataset){
382 trainer.sparsify() = this->
sparsify();
384 trainer.s2do() = this->
s2do();
387 trainer.train(svm,dataset);
392 void setupMcParametersWWCS(QpSparseArray<QpFloatType>& nu,QpSparseArray<QpFloatType>& M, std::size_t classes)
const{
393 nu.resize(classes * (classes-1), classes, 2*classes*(classes-1));
394 for (
unsigned int r=0, y=0; y<classes; y++)
396 for (
unsigned int p=0, pp=0; p<classes-1; p++, pp++, r++)
412 M.resize(classes * (classes-1) * classes, classes-1, 2 * classes * (classes-1) * (classes-1));
413 for (
unsigned int r=0, yv=0; yv<classes; yv++)
415 for (
unsigned int pv=0, ppv=0; pv<classes-1; pv++, ppv++)
417 if (ppv == yv) ppv++;
418 for (
unsigned int yw=0; yw<classes; yw++, r++)
421 M.setDefaultValue(r, baseM);
424 M.add(r, ppv - (ppv >= yw ? 1 : 0), baseM + (
QpFloatType)0.25);
428 M.add(r, yv - (yv >= yw ? 1 : 0), baseM - (
QpFloatType)0.25);
432 unsigned int pw = ppv - (ppv >= yw ? 1 : 0);
433 unsigned int pw2 = yv - (yv >= yw ? 1 : 0);
450 void setupMcParametersATMATS(QpSparseArray<QpFloatType>& nu,QpSparseArray<QpFloatType>& M, std::size_t classes)
const{
451 nu.resize(classes*classes, classes, classes*classes);
452 for (
unsigned int r=0, y=0; y<classes; y++)
454 for (
unsigned int p=0; p<classes; p++, r++)
456 nu.add(r, p, (
QpFloatType)((p == y) ? 1.0 : -1.0));
460 M.resize(classes * classes * classes, classes, 2 * classes * classes * classes);
463 for (
unsigned int r=0, yv=0; yv<classes; yv++)
465 for (
unsigned int pv=0; pv<classes; pv++)
468 for (
unsigned int yw=0; yw<classes; yw++, r++)
470 M.setDefaultValue(r, sign * c_ne);
473 M.add(r, pv, -sign * c_eq);
477 M.add(r, pv, sign * c_eq);
478 M.add(r, yw, -sign * c_ne);
485 void setupMcParametersADMLLW(QpSparseArray<QpFloatType>& nu,QpSparseArray<QpFloatType>& M, std::size_t classes)
const{
486 nu.resize(classes * (classes-1), classes, classes*(classes-1));
487 for (
unsigned int r=0, y=0; y<classes; y++)
489 for (
unsigned int p=0, pp=0; p<classes-1; p++, pp++, r++)
496 M.resize(classes * (classes-1) * classes, classes-1, classes * (classes-1) * (classes-1));
499 for (
unsigned int r=0, yv=0; yv<classes; yv++)
501 for (
unsigned int pv=0, ppv=0; pv<classes-1; pv++, ppv++)
503 if (ppv == yv) ppv++;
504 for (
unsigned int yw=0; yw<classes; yw++, r++)
506 M.setDefaultValue(r, mood);
509 unsigned int pw = ppv - (ppv > yw ? 1 : 0);
517 void setupMcParametersMMR(QpSparseArray<QpFloatType>& nu,QpSparseArray<QpFloatType>& M, std::size_t classes)
const{
518 nu.resize(classes, classes, classes);
519 for (
unsigned int y=0; y<classes; y++)
522 M.resize(classes * classes, 1, classes);
525 for (
unsigned int r=0, yv=0; yv<classes; yv++)
527 for (
unsigned int yw=0; yw<classes; yw++, r++)
529 M.setDefaultValue(r, mood);
530 if (yv == yw) M.add(r, 0, val);
535 void trainOVA(KernelClassifier<InputType>& svm,
const LabeledData<InputType, unsigned int>& dataset){
537 svm.decisionFunction().setStructure(this->
m_kernel,dataset.inputs(),this->m_trainOffset,classes);
544 for (
unsigned int c=0; c<classes; c++)
547 KernelClassifier<InputType> binsvm;
553 bintrainer.setCacheSize(this->
cacheSize());
554 bintrainer.sparsify() =
false;
560 bintrainer.train(binsvm, bindata);
564 column(svm.decisionFunction().alpha(), c) = column(binsvm.decisionFunction().alpha(), 0);
566 svm.decisionFunction().offset(c) = binsvm.decisionFunction().offset(0);
571 svm.decisionFunction().sparsify();
575 template<
class T,
class DatasetTypeT>
576 void trainBinary(KernelExpansion<T>& svm, DatasetTypeT
const& dataset){
578 trainBinary(km,svm,dataset);
582 template<
class T,
class DatasetTypeT>
583 void trainBinary(KernelExpansion<CompressedRealVector>& svm, DatasetTypeT
const& dataset){
585 typedef GaussianRbfKernel<CompressedRealVector> Gaussian;
588 GaussianKernelMatrix<CompressedRealVector,QpFloatType> km(
kernel->gamma(),dataset.inputs());
589 trainBinary(km,svm,dataset);
592 KernelMatrix<CompressedRealVector, QpFloatType> km(*
base_type::m_kernel, dataset.inputs());
593 trainBinary(km,svm,dataset);
598 template<
class Matrix,
class T>
599 void trainBinary(Matrix& km, KernelExpansion<T>& svm, LabeledData<T, unsigned int>
const& dataset){
602 PrecomputedMatrix<Matrix> matrix(&km);
604 optimize(svm,svmProblem,dataset);
608 CachedMatrix<Matrix> matrix(&km);
610 optimize(svm,svmProblem,dataset);
616 template<
class Matrix,
class T>
617 void trainBinary(Matrix& km, KernelExpansion<T>& svm, WeightedLabeledData<T, unsigned int>
const& dataset){
620 PrecomputedMatrix<Matrix> matrix(&km);
621 GeneralQuadraticProblem<PrecomputedMatrix<Matrix> > svmProblem(
624 optimize(svm,svmProblem,dataset.data());
628 CachedMatrix<Matrix> matrix(&km);
629 GeneralQuadraticProblem<CachedMatrix<Matrix> > svmProblem(
632 optimize(svm,svmProblem,dataset.data());
637 template<
class SVMProblemType>
638 void optimize(KernelExpansion<InputType>& svm, SVMProblemType& svmProblem, LabeledData<InputType, unsigned int>
const& dataset){
641 typedef SvmShrinkingProblem<SVMProblemType> ProblemType;
643 QpSolver< ProblemType > solver(problem);
646 double C_minus = reg(0);
647 double C_plus = (reg.size() == 1) ? reg(0) : reg(1);
649 for (
auto label : dataset.labels().elements()) {
650 double a = svm.alpha()(i, 0);
651 if (label == 0) a = std::max(std::min(a, 0.0), -C_minus);
652 else a = std::min(std::max(a, 0.0), C_plus);
653 svm.alpha()(i, 0) = a;
656 problem.setInitialSolution(blas::column(svm.alpha(), 0));
658 column(svm.alpha(),0)= problem.getUnpermutedAlpha();
659 svm.offset(0) = computeBias(problem,dataset);
663 typedef BoxConstrainedShrinkingProblem<SVMProblemType> ProblemType;
665 QpSolver< ProblemType> solver(problem);
668 double C_minus = reg(0);
669 double C_plus = (reg.size() == 1) ? reg(0) : reg(1);
671 for (
auto label : dataset.labels().elements()) {
672 double a = svm.alpha()(i, 0);
673 if (label == 0) a = std::max(std::min(a, 0.0), -C_minus);
674 else a = std::min(std::max(a, 0.0), C_plus);
675 svm.alpha()(i, 0) = a;
678 problem.setInitialSolution(blas::column(svm.alpha(), 0));
680 column(svm.alpha(),0) = problem.getUnpermutedAlpha();
684 RealVector m_db_dParams;
686 bool m_computeDerivative;
689 template<
class Problem>
690 double computeBias(Problem
const& problem, LabeledData<InputType, unsigned int>
const& dataset){
692 m_db_dParams.resize(nkp+1);
693 m_db_dParams.clear();
695 std::size_t ell = problem.dimensions();
696 if (ell == 0)
return 0.0;
699 double lowerBound = -1e100;
700 double upperBound = 1e100;
702 std::size_t freeVars = 0;
703 std::size_t lower_i = 0;
704 std::size_t upper_i = 0;
705 for (std::size_t i=0; i<ell; i++)
707 double value = problem.gradient(i);
708 if (problem.alpha(i) == problem.boxMin(i))
710 if (value > lowerBound) {
715 else if (problem.alpha(i) == problem.boxMax(i))
717 if (value < upperBound) {
729 return sum / freeVars;
731 if(!m_computeDerivative)
732 return 0.5 * (lowerBound + upperBound);
734 lower_i = problem.permutation(lower_i);
735 upper_i = problem.permutation(upper_i);
741 double dlower_dC = 0.0;
742 double dupper_dC = 0.0;
745 RealVector dupper_dkernel( nkp,0 );
746 RealVector dlower_dkernel( nkp,0 );
749 RealVector der(nkp );
754 typename Batch<InputType>::type singleInput = Batch<InputType>::createBatch( dataset.element(0).input, 1 );
755 typename Batch<InputType>::type lowerInput = Batch<InputType>::createBatch( dataset.element(lower_i).input, 1 );
756 typename Batch<InputType>::type upperInput = Batch<InputType>::createBatch( dataset.element(upper_i).input, 1 );
759 RealMatrix one(1,1,1);
760 RealMatrix result(1,1);
762 for (std::size_t i=0; i<ell; i++) {
763 double cur_alpha = problem.alpha(problem.permutation(i));
764 if ( cur_alpha != 0 ) {
765 int cur_label = ( cur_alpha>0.0 ? 1 : -1 );
769 dlower_dC += cur_label * result(0,0);
771 for ( std::size_t k=0; k<nkp; k++ ) {
772 dlower_dkernel(k) += cur_label * der(k);
776 dupper_dC += cur_label * result(0,0);
778 for ( std::size_t k=0; k<nkp; k++ ) {
779 dupper_dkernel(k) += cur_label * der(k);
784 m_db_dParams( nkp ) = -0.5 * ( dlower_dC + dupper_dC );
785 for ( std::size_t k=0; k<nkp; k++ ) {
786 m_db_dParams(k) = -0.5 * this->
C() * ( dlower_dkernel(k) + dupper_dkernel(k) );
789 m_db_dParams( nkp ) *= this->
C();
792 return 0.5 * (lowerBound + upperBound);