GaussianRbfKernel.h
Go to the documentation of this file.
1//===========================================================================
2/*!
3 *
4 *
5 * \brief Radial Gaussian kernel
6 *
7 *
8 *
9 * \author T.Glasmachers, O. Krause, M. Tuma
10 * \date 2010-2012
11 *
12 *
13 * \par Copyright 1995-2017 Shark Development Team
14 *
15 * <BR><HR>
16 * This file is part of Shark.
17 * <https://shark-ml.github.io/Shark/>
18 *
19 * Shark is free software: you can redistribute it and/or modify
20 * it under the terms of the GNU Lesser General Public License as published
21 * by the Free Software Foundation, either version 3 of the License, or
22 * (at your option) any later version.
23 *
24 * Shark is distributed in the hope that it will be useful,
25 * but WITHOUT ANY WARRANTY; without even the implied warranty of
26 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
27 * GNU Lesser General Public License for more details.
28 *
29 * You should have received a copy of the GNU Lesser General Public License
30 * along with Shark. If not, see <http://www.gnu.org/licenses/>.
31 *
32 */
33//===========================================================================
34
35#ifndef SHARK_MODELS_KERNELS_GAUSSIAN_RBF_KERNEL_H
36#define SHARK_MODELS_KERNELS_GAUSSIAN_RBF_KERNEL_H
37
38
40namespace shark{
41
42/// \brief Gaussian radial basis function kernel.
43///
44/// Gaussian radial basis function kernel
45/// \f$ k(x_1, x_2) = \exp(-\gamma \cdot \| x_1 - x_2 \|^2) \f$
46/// with single bandwidth parameter \f$ \gamma \f$.
47/// Optionally, the parameter can be encoded as \f$ \exp(\eta) \f$,
48/// which allows for unconstrained optimization.
49/// \ingroup kernels
50template<class InputType=RealVector>
52{
53private:
55
56 struct InternalState: public State{
57 RealMatrix norm2;
58 RealMatrix expNorm;
59
60 void resize(std::size_t sizeX1, std::size_t sizeX2){
61 norm2.resize(sizeX1, sizeX2);
62 expNorm.resize(sizeX1, sizeX2);
63 }
64 };
65public:
69
77
78 /// \brief From INameable: return the class name.
79 std::string name() const
80 { return "GaussianRbfKernel"; }
81
82 RealVector parameterVector() const{
83 RealVector ret(1);
84 if (m_unconstrained){
85 ret(0) = std::log(m_gamma);
86 }
87 else{
88 ret(0) = m_gamma;
89 }
90 return ret;
91 }
92 void setParameterVector(RealVector const& newParameters){
93 SHARK_RUNTIME_CHECK(newParameters.size() == 1, "[GaussianRbfKernel::setParameterVector] invalid size of parameter vector");
94 if (m_unconstrained){
95 m_gamma = std::exp(newParameters(0));
96 }
97 else{
98 SHARK_RUNTIME_CHECK(newParameters(0) > 0.0, "[GaussianRbfKernel::setParameterVector] gamma must be positive");
99 m_gamma = newParameters(0);
100 }
101 }
102
103 size_t numberOfParameters() const {
104 return 1;
105 }
106
107 /// Get the bandwidth parameter value.
108 double gamma() const {
109 return m_gamma;
110 }
111
112 /// Return ``standard deviation'' of Gaussian.
113 double sigma() const{
114 return 1. / std::sqrt(2 * m_gamma);
115 }
116
117 /// Set the bandwidth parameter value.
118 /// \throws shark::Exception if gamma <= 0.
119 void setGamma(double gamma){
120 SHARK_RUNTIME_CHECK(gamma > 0.0, "[GaussianRbfKernel::setGamma] gamma must be positive");
121 m_gamma = gamma;
122 }
123
124 /// Set ``standard deviation'' of Gaussian.
125 void setSigma(double sigma){
126 m_gamma = 1. / (2 * sigma * sigma);
127 }
128
129 /// From ISerializable.
130 void read(InArchive& ar){
131 ar >> m_gamma;
132 ar >> m_unconstrained;
133 }
134
135 /// From ISerializable.
136 void write(OutArchive& ar) const{
137 ar << m_gamma;
138 ar << m_unconstrained;
139 }
140
141 ///\brief creates the internal state of the kernel
142 boost::shared_ptr<State> createState()const{
143 return boost::shared_ptr<State>(new InternalState());
144 }
145
146 /// \brief evaluates \f$ k(x_1,x_2)\f$
147 ///
148 /// Gaussian radial basis function kernel
149 /// \f[ k(x_1, x_2) = \exp(-\gamma \cdot \| x_1 - x_2 \|^2) \f]
151 SIZE_CHECK(x1.size() == x2.size());
152 double norm2 = distanceSqr(x2, x1);
153 double exponential = std::exp(-m_gamma * norm2);
154 return exponential;
155 }
156
157 /// \brief evaluates \f$ k(x_1,x_2)\f$ and computes the intermediate value
158 ///
159 /// Gaussian radial basis function kernel
160 /// \f[ k(x_1, x_2) = \exp(-\gamma \cdot \| x_1 - x_2 \|^2) \f]
161 void eval(ConstBatchInputReference batchX1, ConstBatchInputReference batchX2, RealMatrix& result, State& state) const{
162 SIZE_CHECK(batchX1.size2() == batchX2.size2());
163 std::size_t sizeX1=batchX1.size1();
164 std::size_t sizeX2=batchX2.size1();
165
166 //configure state memory
167 InternalState& s=state.toState<InternalState>();
168 s.resize(sizeX1,sizeX2);
169
170 //calculate kernel response
171 noalias(s.norm2)=distanceSqr(batchX1,batchX2);
172 noalias(s.expNorm)=exp(-m_gamma*s.norm2);
173 result=s.expNorm;
174 }
175
176 void eval(ConstBatchInputReference batchX1, ConstBatchInputReference batchX2, RealMatrix& result) const{
177 SIZE_CHECK(batchX1.size2() == batchX2.size2());
178 result = distanceSqr(batchX1,batchX2);
179 noalias(result)=exp(-m_gamma*result);
180 }
181
185 RealMatrix const& coefficients,
186 State const& state,
187 RealVector& gradient
188 ) const{
189 std::size_t sizeX1=batchX1.size1();
190 std::size_t sizeX2=batchX2.size1();
191 InternalState const& s = state.toState<InternalState>();
192
193 //internal checks
194 SIZE_CHECK(batchX1.size2() == batchX2.size2());
195 SIZE_CHECK(s.norm2.size1() == sizeX1);
196 SIZE_CHECK(s.norm2.size2() == sizeX2);
197 SIZE_CHECK(s.expNorm.size1() == sizeX1);
198 SIZE_CHECK(s.expNorm.size2() == sizeX2);
199
200 gradient.resize(1);
201 gradient(0)= - sum(coefficients *s.expNorm * s.norm2);
202 if(m_unconstrained){
203 gradient *= m_gamma;
204 }
205 }
209 RealMatrix const& coefficientsX2,
210 State const& state,
211 BatchInputType& gradient
212 ) const{
213 std::size_t sizeX1=batchX1.size1();
214 std::size_t sizeX2=batchX2.size1();
215 InternalState const& s = state.toState<InternalState>();
216
217 //internal checks
218 SIZE_CHECK(batchX1.size2() == batchX2.size2());
219 SIZE_CHECK(s.norm2.size1() == sizeX1);
220 SIZE_CHECK(s.norm2.size2() == sizeX2);
221 SIZE_CHECK(s.expNorm.size1() == sizeX1);
222 SIZE_CHECK(s.expNorm.size2() == sizeX2);
223
224 gradient.resize(sizeX1,batchX1.size2());
225 RealMatrix W = coefficientsX2*s.expNorm;
226 noalias(gradient) = prod(W,batchX2);
227 RealVector columnSum = sum(as_rows(coefficientsX2*s.expNorm));
228
229 for(std::size_t i = 0; i != sizeX1; ++i){
230 noalias(row(gradient,i)) -= columnSum(i) * row(batchX1,i);
231 }
232 gradient*=2.0*m_gamma;
233 }
234
235
236 //~ /// \brief Evaluates \f$ \frac {\partial k(x_1,x_2)}{\partial \gamma}\f$ and \f$ \frac {\partial^2 k(x_1,x_2)}{\partial \gamma^2}\f$
237 //~ ///
238 //~ /// Gaussian radial basis function kernel
239 //~ /// \f[ \frac {\partial k(x_1,x_2)}{\partial \gamma} = - \| x_1 - x_2 \|^2 \cdot k(x_1,x_2) \f]
240 //~ /// \f[ \frac {\partial^2 k(x_1,x_2)}{\partial^2 \gamma^2} = \| x_1 - x_2 \|^4 \cdot k(x_1,x_2) \f]
241 //~ void parameterDerivative(ConstInputReference x1, ConstInputReference x2, Intermediate const& intermediate, RealVector& gradient, RealMatrix& hessian) const{
242 //~ SIZE_CHECK(x1.size() == x2.size());
243 //~ SIZE_CHECK(intermediate.size() == numberOfIntermediateValues(x1,x2));
244 //~ double norm2 = intermediate[0];
245 //~ double exponential = intermediate[1];
246
247 //~ gradient.resize(1);
248 //~ hessian.resize(1, 1);
249 //~ if (!m_unconstrained){
250 //~ gradient(0) = -exponential * norm2;
251 //~ hessian(0, 0) = -gradient(0) * norm2;
252 //~ }
253 //~ else{
254 //~ gradient(0) = -exponential * norm2 * m_gamma;
255 //~ hessian(0, 0) = -gradient(0) * norm2 * m_gamma;
256 //~ }
257 //~ }
258 //~ /// \brief Evaluates \f$ \frac {\partial k(x_1,x_2)}{\partial x_1}\f$ and \f$ \frac {\partial^2 k(x_1,x_2)}{\partial x_1^2}\f$
259 //~ ///
260 //~ /// Gaussian radial basis function kernel
261 //~ /// \f[ \frac {\partial k(x_1,x_2)}{\partial x_1} = -2 \gamma \left( x_1 - x_2 \right)\cdot k(x_1,x_2) \f]
262 //~ /// \f[ \frac {\partial^2 k(x_1,x_2)}{\partial^2 x_1^2} =2 \gamma \left[ -k(x_1,x_2) \cdot \mathbb{I} - \frac {\partial k(x_1,x_2)}{\partial x_1} ( x_1 - x_2 )^T\right] \f]
263 //~ void inputDerivative(const InputType& x1, const InputType& x2, Intermediate const& intermediate, InputType& gradient, InputMatrixType& hessian) const{
264 //~ SIZE_CHECK(x1.size() == x2.size());
265 //~ SIZE_CHECK(intermediate.size() == numberOfIntermediateValues(x1,x2));
266 //~ double exponential = intermediate[1];
267 //~ gradient.resize(x1.size());
268 //~ noalias(gradient) = (2.0 * m_gamma * exponential) * (x2 - x1);
269 //~ hessian.resize(x1.size(), x1.size());
270 //~ noalias(hessian) = 2*m_gamma*outer_prod(gradient,x2 - x1)
271 //~ - RealIdentityMatrix(x1.size())*2*m_gamma*exponential;
272 //~ }
273
274protected:
275 double m_gamma; ///< kernel bandwidth parameter
276 bool m_unconstrained; ///< use log storage
277};
278
281
282
283}
284#endif