AbstractKernelFunction.h
Go to the documentation of this file.
1//===========================================================================
2/*!
3 *
4 *
5 * \brief abstract super class of all kernel functions
6 * \file
7 *
8 *
9 * \author T.Glasmachers, O. Krause, M. Tuma
10 * \date 2010-2012
11 *
12 *
13 * \par Copyright 1995-2017 Shark Development Team
14 *
15 * <BR><HR>
16 * This file is part of Shark.
17 * <https://shark-ml.github.io/Shark/>
18 *
19 * Shark is free software: you can redistribute it and/or modify
20 * it under the terms of the GNU Lesser General Public License as published
21 * by the Free Software Foundation, either version 3 of the License, or
22 * (at your option) any later version.
23 *
24 * Shark is distributed in the hope that it will be useful,
25 * but WITHOUT ANY WARRANTY; without even the implied warranty of
26 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
27 * GNU Lesser General Public License for more details.
28 *
29 * You should have received a copy of the GNU Lesser General Public License
30 * along with Shark. If not, see <http://www.gnu.org/licenses/>.
31 *
32 */
33//===========================================================================
34
35#ifndef SHARK_MODELS_KERNELS_ABSTRACTKERNELFUNCTION_H
36#define SHARK_MODELS_KERNELS_ABSTRACTKERNELFUNCTION_H
37
39#include <shark/LinAlg/Base.h>
40#include <shark/Core/Flags.h>
41#include <shark/Core/State.h>
42namespace shark {
43
44#ifdef SHARK_COUNT_KERNEL_LOOKUPS
45 #define INCREMENT_KERNEL_COUNTER( counter ) { counter++; }
46#else
47 #define INCREMENT_KERNEL_COUNTER( counter ) { }
48#endif
49
50///\defgroup kernels Kernels
51///\ingroup models
52///
53/// A kernel is a positive definite function k(x,y), which can be understood as a generalized scalar product. Kernel methods.
54/// like support vector machines or gaussian processes rely on the kernels.
55
56/// \brief Base class of all Kernel functions.
57///
58/// \par
59/// A (Mercer) kernel is a symmetric positive definite
60/// function of two parameters. It is (currently) used
61/// in two contexts in Shark, namely for kernel methods
62/// such as support vector machines (SVMs), and for
63/// radial basis function networks.
64///
65/// \par
66/// In Shark a kernel function class represents a parametric
67/// family of such kernel functions: The AbstractKernelFunction
68/// interface inherits the IParameterizable interface.
69/// \ingroup kernels
70template<class InputTypeT>
71class AbstractKernelFunction : public AbstractMetric<InputTypeT>
72{
73private:
76public:
77 /// \brief Input type of the Kernel.
79 /// \brief batch input type of the kernel
81 /// \brief Const references to InputType
83 /// \brief Const references to BatchInputType
85
87
88 /// enumerations of kerneland metric features (flags)
89 enum Feature {
90 HAS_FIRST_PARAMETER_DERIVATIVE = 1, ///< is the kernel differentiable w.r.t. its parameters?
91 HAS_FIRST_INPUT_DERIVATIVE = 2, ///< is the kernel differentiable w.r.t. its inputs?
92 IS_NORMALIZED = 4 , ///< does k(x, x) = 1 hold for all inputs x?
93 SUPPORTS_VARIABLE_INPUT_SIZE = 8 ///< Input arguments must have same size, but not the same size in different calls to eval
94 };
95
96 /// This statement declares the member m_features. See Core/Flags.h for details.
98
105 bool isNormalized() const{
106 return m_features & IS_NORMALIZED;
107 }
111
112 ///\brief Creates an internal state of the kernel.
113 ///
114 ///The state is needed when the derivatives are to be
115 ///calculated. Eval can store a state which is then reused to speed up
116 ///the calculations of the derivatives. This also allows eval to be
117 ///evaluated in parallel!
118 virtual boost::shared_ptr<State> createState()const
119 {
120 SHARK_RUNTIME_CHECK(!hasFirstParameterDerivative() && !hasFirstInputDerivative(), "createState must be overridden by kernels with derivatives");
121 return boost::shared_ptr<State>(new EmptyState());
122 }
123
124 ///////////////////////////////////////////SINGLE ELEMENT INTERFACE///////////////////////////////////////////
125 // By default, this is mapped to the batch case.
126
127 /// \brief Evaluates the kernel function.
128 virtual double eval(ConstInputReference x1, ConstInputReference x2) const{
129 RealMatrix res;
130 BatchInputType b1 = Traits::createBatch(x1,1);
131 BatchInputType b2 = Traits::createBatch(x2,1);
132 getBatchElement(b1,0) = x1;
133 getBatchElement(b2,0) = x2;
134 eval(b1, b2, res);
135 return res(0, 0);
136 }
137
138 /// \brief Convenience operator which evaluates the kernel function.
140 return eval(x1, x2);
141 }
142
143 //////////////////////////////////////BATCH INTERFACE///////////////////////////////////////////
144
145 /// \brief Evaluates the subset of the KernelGram matrix which is defined by X1(rows) and X2 (columns).
146 ///
147 /// The result matrix is filled in with the values result(i,j) = kernel(x1[i], x2[j]);
148 /// The State object is filled in with data used in subsequent derivative computations.
149 virtual void eval(ConstBatchInputReference batchX1, ConstBatchInputReference batchX2, RealMatrix& result, State& state) const = 0;
150
151 /// \brief Evaluates the subset of the KernelGram matrix which is defined by X1(rows) and X2 (columns).
152 ///
153 /// The result matrix is filled in with the values result(i,j) = kernel(x1[i], x2[j]);
154 virtual void eval(ConstBatchInputReference batchX1, ConstBatchInputReference batchX2, RealMatrix& result) const {
155 boost::shared_ptr<State> state = createState();
156 eval(batchX1, batchX2, result, *state);
157 }
158
159 /// \brief Evaluates the subset of the KernelGram matrix which is defined by X1(rows) and X2 (columns).
160 ///
161 /// Convenience operator.
162 /// The result matrix is filled in with the values result(i,j) = kernel(x1[i], x2[j]);
163 inline RealMatrix operator () (ConstBatchInputReference batchX1, ConstBatchInputReference batchX2) const{
164 RealMatrix result;
165 eval(batchX1, batchX2, result);
166 return result;
167 }
168
169 /// \brief Computes the gradient of the parameters as a weighted sum over the gradient of all elements of the batch.
170 ///
171 /// The default implementation throws a "not implemented" exception.
175 RealMatrix const& coefficients,
176 State const& state,
177 RealVector& gradient
178 ) const {
180 }
181
182 /// \brief Calculates the derivative of the inputs X1 (only x1!).
183 ///
184 /// The i-th row of the resulting matrix is a weighted sum of the form:
185 /// c[i,0] * k'(x1[i], x2[0]) + c[i,1] * k'(x1[i], x2[1]) + ... + c[i,n] * k'(x1[i], x2[n]).
186 ///
187 /// The default implementation throws a "not implemented" exception.
191 RealMatrix const& coefficientsX2,
192 State const& state,
193 BatchInputType& gradient
194 ) const {
196 }
197
198
199 //////////////////////////////////NORMS AND DISTANCES/////////////////////////////////
200
201 /// Computes the squared distance in the kernel induced feature space.
203 if (isNormalized()){
204 double k12 = eval(x1, x2);
205 return (2.0 - 2.0 * k12);
206 } else {
207 double k11 = eval(x1, x1);
208 double k12 = eval(x1, x2);
209 double k22 = eval(x2, x2);
210 return (k11 - 2.0 * k12 + k22);
211 }
212 }
213
215 std::size_t sizeX1 = batchSize(batchX1);
216 std::size_t sizeX2 = batchSize(batchX2);
217 RealMatrix result=(*this)(batchX1,batchX2);
218 result *= -2.0;
219 if (isNormalized()){
220 noalias(result) += 2.0;
221 } else {
222 //compute self-product
223 RealVector kx2(sizeX2);
224 for(std::size_t i = 0; i != sizeX2;++i){
225 kx2(i)=eval(getBatchElement(batchX2,i),getBatchElement(batchX2,i));
226 }
227 for(std::size_t j = 0; j != sizeX1;++j){
228 double kx1=eval(getBatchElement(batchX1,j),getBatchElement(batchX1,j));
229 noalias(row(result,j)) += kx1 + kx2;
230 }
231 }
232 return result;
233 }
234};
235
236
237}
238#endif