RegularizationNetworkTrainer.h
Go to the documentation of this file.
1//===========================================================================
2/*!
3 *
4 *
5 * \brief Trainer for a Regularization Network or a Gaussian Process
6 *
7 *
8 *
9 *
10 * \author T. Glasmachers
11 * \date 2007-2012
12 *
13 *
14 * \par Copyright 1995-2017 Shark Development Team
15 *
16 * <BR><HR>
17 * This file is part of Shark.
18 * <https://shark-ml.github.io/Shark/>
19 *
20 * Shark is free software: you can redistribute it and/or modify
21 * it under the terms of the GNU Lesser General Public License as published
22 * by the Free Software Foundation, either version 3 of the License, or
23 * (at your option) any later version.
24 *
25 * Shark is distributed in the hope that it will be useful,
26 * but WITHOUT ANY WARRANTY; without even the implied warranty of
27 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
28 * GNU Lesser General Public License for more details.
29 *
30 * You should have received a copy of the GNU Lesser General Public License
31 * along with Shark. If not, see <http://www.gnu.org/licenses/>.
32 *
33 */
34//===========================================================================
35
36
37#ifndef SHARK_ALGORITHMS_REGULARIZATIONNETWORKTRAINER_H
38#define SHARK_ALGORITHMS_REGULARIZATIONNETWORKTRAINER_H
39
40
43
44
45namespace shark {
46
47
48///
49/// \brief Training of a regularization network.
50///
51/// A regularization network is a kernel-based model for
52/// regression problems. Given are data tuples
53/// \f$ (x_i, y_i) \f$ with x-component denoting input and
54/// y-component denoting a real-valued label (see the tutorial on
55/// label conventions; the implementation uses RealVector),
56/// a kernel function k(x, x') and a regularization
57/// constant \f$ C > 0\f$. Let H denote the kernel induced
58/// reproducing kernel Hilbert space of k, and let \f$ \phi \f$
59/// denote the corresponding feature map.
60/// Then the SVM regression function is of the form
61/// \f[
62/// f(x) = \langle w, \phi(x) \rangle + b
63/// \f]
64/// with coefficients w and b given by the (primal)
65/// optimization problem
66/// \f[
67/// \min \frac{1}{2} \|w\|^2 + C \sum_i L(y_i, f(x_i)),
68/// \f]
69/// where the simple quadratic loss is employed:
70/// \f[
71/// L(y, f(x)) = (y - f(x))^2
72/// \f]
73/// Regularization networks can be interpreted as a special
74/// type of support vector machine (for regression, with
75/// squared loss, and thus with non-sparse weights).
76///
77/// Training a regularization network is identical to training a
78/// Gaussian process for regression. The parameter \f$ C \f$ then
79/// corresponds precision of the noise (denoted by \f$ \beta \f$ in
80/// Bishop's textbook). The precision is the inverse of the variance
81/// of the noise. The variance of the noise is denoted by \f$
82/// \sigma_n^2 \f$ in the textbook by Rasmussen and
83/// Williams. Accordingly, \f$ C = 1/\sigma_n^2 \f$.
84/// \ingroup supervised_trainer
85template <class InputType>
86class RegularizationNetworkTrainer : public AbstractSvmTrainer<InputType, RealVector,KernelExpansion<InputType> >
87{
88public:
92
93 /// \param kernel Kernel
94 /// \param betaInv Inverse precision, equal to assumed noise variance, equal to inverse regularization parameter C
95 /// \param unconstrained Indicates exponential encoding of the regularization parameter
96 RegularizationNetworkTrainer(KernelType* kernel, double betaInv, bool unconstrained = false)
97 : base_type(kernel, 1.0 / betaInv, false, unconstrained)
98 { }
99
100 /// \brief From INameable: return the class name.
101 std::string name() const
102 { return "RegularizationNetworkTrainer"; }
103
104 /// \brief Returns the assumed noise variance (i.e., 1/C)
105 double noiseVariance() const
106 { return 1.0 / this->C(); }
107 /// \brief Sets the assumed noise variance (i.e., 1/C)
108 void setNoiseVariance(double betaInv)
109 { this->C() = 1.0 / betaInv; }
110
111 /// \brief Returns the precision (i.e., C), the inverse of the assumed noise variance
112 double precision() const
113 { return this->C(); }
114 /// \brief Sets the precision (i.e., C), the inverse of the assumed noise variance
115 void setPrecision(double beta)
116 { this->C() = beta; }
117
119 svm.setStructure(base_type::m_kernel,dataset.inputs(),true, labelDimension(dataset));
120
121 // Setup the kernel matrix
122 RealMatrix M = calculateRegularizedKernelMatrix(*(this->m_kernel),dataset.inputs(), noiseVariance());
123 RealMatrix V = createBatch<RealVector>(dataset.labels().elements());
124 RealVector mean = sum(as_columns(V))/V.size1();
125 noalias(V) -= blas::repeat(mean,V.size1());
126
127 //check whether lambda is large enough to make the eigenvalues numerically stable
128 if(noiseVariance()/max(diag(M)) < 1.e-5)
129 noalias(svm.alpha()) = inv(M,blas::symm_semi_pos_def()) % V;
130 else//we think now it is stable so we can use the fast pure cholesky decomposition
131 noalias(svm.alpha()) = inv(M,blas::symm_pos_def()) % V;
132 noalias(svm.offset()) = mean;
133 }
134};
135
136
137// A regularization network can be interpreted as a Gaussian
138// process, with the same trainer:
139#define GaussianProcessTrainer RegularizationNetworkTrainer
140
141
142}
143#endif