WeightedSumKernel.h
Go to the documentation of this file.
1//===========================================================================
2/*!
3 *
4 *
5 * \brief Weighted sum of m_base kernels.
6 *
7 *
8 *
9 * \author T.Glasmachers, O. Krause, M. Tuma
10 * \date 2010, 2011
11 *
12 *
13 * \par Copyright 1995-2017 Shark Development Team
14 *
15 * <BR><HR>
16 * This file is part of Shark.
17 * <https://shark-ml.github.io/Shark/>
18 *
19 * Shark is free software: you can redistribute it and/or modify
20 * it under the terms of the GNU Lesser General Public License as published
21 * by the Free Software Foundation, either version 3 of the License, or
22 * (at your option) any later version.
23 *
24 * Shark is distributed in the hope that it will be useful,
25 * but WITHOUT ANY WARRANTY; without even the implied warranty of
26 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
27 * GNU Lesser General Public License for more details.
28 *
29 * You should have received a copy of the GNU Lesser General Public License
30 * along with Shark. If not, see <http://www.gnu.org/licenses/>.
31 *
32 */
33//===========================================================================
34
35#ifndef SHARK_MODELS_KERNELS_WEIGHTED_SUM_KERNEL_H
36#define SHARK_MODELS_KERNELS_WEIGHTED_SUM_KERNEL_H
37
38
40
41#include <boost/utility/enable_if.hpp>
42namespace shark {
43
44
45/// \brief Weighted sum of kernel functions
46///
47/// For a set of positive definite kernels \f$ k_1, \dots, k_n \f$
48/// with positive coeffitients \f$ w_1, \dots, w_n \f$ the sum
49/// \f[ \tilde k(x_1, x_2) := \sum_{i=1}^{n} w_i \cdot k_i(x_1, x_2) \f]
50/// is again a positive definite kernel function.
51/// Internally, the weights are represented as
52/// \f$ w_i = \exp(\xi_i) \f$
53/// to allow for unconstrained optimization.
54///
55/// This variant of the weighted sum kernel eleminates one redundant
56/// degree of freedom by fixing the first kernel weight to 1.0.
57///
58/// The result of the kernel evaluation is devided by the sum of the
59/// kernel weights, so that in total, this amounts to fixing the sum
60/// of the of the weights to one.
61///
62/// \ingroup kernels
63template<class InputType=RealVector>
65{
66private:
68
69 struct InternalState: public State{
70 RealMatrix result;
71 std::vector<RealMatrix> kernelResults;
72 std::vector<boost::shared_ptr<State> > kernelStates;
73
74 InternalState(std::size_t numSubKernels)
75 :kernelResults(numSubKernels),kernelStates(numSubKernels){}
76
77 void resize(std::size_t sizeX1, std::size_t sizeX2){
78 result.resize(sizeX1, sizeX2);
79 for(std::size_t i = 0; i != kernelResults.size(); ++i){
80 kernelResults[i].resize(sizeX1, sizeX2);
81 }
82 }
83 };
84public:
88
90 SHARK_RUNTIME_CHECK( base.size() > 0, "[WeightedSumKernel::WeightedSumKernel] There should be at least one sub-kernel.");
91
92 m_base.resize( base.size() );
93 m_numParameters = m_base.size()-1;
94 m_adaptWeights = true;
95 for (std::size_t i=0; i != m_base.size() ; i++) {
96 SHARK_ASSERT( base[i] != NULL );
97 m_base[i].kernel = base[i];
98 m_base[i].weight = 1.0;
99 m_base[i].adaptive = false;
100 }
101 m_weightsum = (double)m_base.size();
102
103 // set m_base class features
104 bool hasFirstParameterDerivative = true;
105 for ( unsigned int i=0; i<m_base.size(); i++ ){
106 if ( !m_base[i].kernel->hasFirstParameterDerivative() ) {
108 break;
109 }
110 }
111 bool hasFirstInputDerivative = true;
112 for ( unsigned int i=0; i<m_base.size(); i++ ){
113 if ( !m_base[i].kernel->hasFirstInputDerivative() ) {
115 break;
116 }
117 }
118
121
122 if ( hasFirstInputDerivative )
124 }
125
126 /// \brief From INameable: return the class name.
127 std::string name() const
128 { return "WeightedSumKernel"; }
129
130 /// Check whether m_base kernel index is adaptive.
131 bool isAdaptive( std::size_t index ) const {
132 return m_base[index].adaptive;
133 }
134 /// Set adaptivity of m_base kernel index.
135 void setAdaptive( std::size_t index, bool b = true ) {
136 m_base[index].adaptive = b;
138 }
139 /// Set adaptivity of all m_base kernels.
140 void setAdaptiveAll( bool b = true ) {
141 for (std::size_t i=0; i!= m_base.size(); i++)
142 m_base[i].adaptive = b;
144 }
145
146 /// Get the weight of a kernel
147 double weight(std::size_t index){
148 RANGE_CHECK(index < m_base.size());
149 return m_base[index].weight;
150 }
151
152 void setAdaptiveWeights(bool b){
153 m_adaptWeights = b;
154 }
155
156 /// return the parameter vector. The first N-1 entries are the (log-encoded) kernel
157 /// weights, the sub-kernel's parameters are stacked behind each other after that.
158 RealVector parameterVector() const {
159 std::size_t num = numberOfParameters();
160 RealVector ret(num);
161
162 std::size_t index = 0;
163 for (; index != m_base.size()-1; index++){
164 ret(index) = std::log(m_base[index+1].weight);
165
166 }
167 for (std::size_t i=0; i != m_base.size(); i++){
168 if (m_base[i].adaptive){
169 std::size_t n = m_base[i].kernel->numberOfParameters();
170 subrange(ret,index,index+n) = m_base[i].kernel->parameterVector();
171 index += n;
172 }
173 }
174 return ret;
175 }
176
177 ///\brief creates the internal state of the kernel
178 boost::shared_ptr<State> createState()const{
179 InternalState* state = new InternalState(m_base.size());
180 for(std::size_t i = 0; i != m_base.size(); ++i){
181 state->kernelStates[i]=m_base[i].kernel->createState();
182 }
183 return boost::shared_ptr<State>(state);
184 }
185
186 /// set the parameter vector. The first N-1 entries are the (log-encoded) kernel
187 /// weights, the sub-kernel's parameters are stacked behind each other after that.
188 void setParameterVector(RealVector const& newParameters) {
189 SIZE_CHECK(newParameters.size() == numberOfParameters());
190
191 m_weightsum = 1.0;
192 std::size_t index = 0;
193 for (; index != m_base.size()-1; index++){
194 double w = newParameters(index);
195 m_base[index+1].weight = std::exp(w);
196 m_weightsum += m_base[index+1].weight;
197
198 }
199 for (std::size_t i=0; i != m_base.size(); i++){
200 if (m_base[i].adaptive){
201 std::size_t n = m_base[i].kernel->numberOfParameters();
202 m_base[i].kernel->setParameterVector(subrange(newParameters,index,index+n));
203 index += n;
204 }
205 }
206 }
207
208 std::size_t numberOfParameters() const {
209 return m_numParameters;
210 }
211
212 /// Evaluate the weighted sum kernel (according to the following equation:)
213 /// \f$ k(x, y) = \frac{\sum_i \exp(w_i) k_i(x, y)}{sum_i exp(w_i)} \f$
215 double numerator = 0.0;
216 for (std::size_t i=0; i != m_base.size(); i++){
217 double result = m_base[i].kernel->eval(x1, x2);
218 numerator += m_base[i].weight*result;
219 }
220 return numerator / m_weightsum;
221 }
222
223 /// Evaluate the kernel according to the equation:
224 /// \f$ k(x, y) = \frac{\sum_i \exp(w_i) k_i(x, y)}{sum_i exp(w_i)} \f$
225 /// for two batches of inputs.
226 void eval(ConstBatchInputReference batchX1, ConstBatchInputReference batchX2, RealMatrix& result) const{
227 std::size_t sizeX1 = batchSize(batchX1);
228 std::size_t sizeX2 = batchSize(batchX2);
229 ensure_size(result,sizeX1,sizeX2);
230 result.clear();
231
232 RealMatrix kernelResult(sizeX1,sizeX2);
233 for (std::size_t i = 0; i != m_base.size(); i++){
234 m_base[i].kernel->eval(batchX1, batchX2,kernelResult);
235 result += m_base[i].weight*kernelResult;
236 }
237 result /= m_weightsum;
238 }
239
240 /// Evaluate the kernel according to the equation:
241 /// \f$ k(x, y) = \frac{\sum_i \exp(w_i) k_i(x, y)}{sum_i exp(w_i)} \f$
242 /// for two batches of inputs.
243 /// (see the documentation of numberOfIntermediateValues for the workings of the intermediates)
244 void eval(ConstBatchInputReference batchX1, ConstBatchInputReference batchX2, RealMatrix& result, State& state) const{
245 std::size_t sizeX1 = batchSize(batchX1);
246 std::size_t sizeX2 = batchSize(batchX2);
247 ensure_size(result,sizeX1,sizeX2);
248 result.clear();
249
250 InternalState& s = state.toState<InternalState>();
251 s.resize(sizeX1,sizeX2);
252
253 for (std::size_t i=0; i != m_base.size(); i++){
254 m_base[i].kernel->eval(batchX1,batchX2,s.kernelResults[i],*s.kernelStates[i]);
255 result += m_base[i].weight*s.kernelResults[i];
256 }
257 //store summed result
258 s.result=result;
259 result /= m_weightsum;
260 }
261
265 RealMatrix const& coefficients,
266 State const& state,
267 RealVector& gradient
268 ) const{
269 ensure_size(gradient,numberOfParameters());
270
271 std::size_t numKernels = m_base.size(); //how far the loop goes;
272
273 InternalState const& s = state.toState<InternalState>();
274
275 double sumSquared = sqr(m_weightsum); //denominator
276
277 //first the derivative with respect to the (log-encoded) weight parameter
278 //the first weight is not a parameter and does not need a gradient.
279 //[Theoretically, we wouldn't need to store its result .]
280 //calculate the weighted sum over all results
281 double numeratorSum = sum(coefficients * s.result);
282 for (std::size_t i = 1; i != numKernels && m_adaptWeights; i++) {
283 //calculate the weighted sum over all results of this kernel
284 double summedK=sum(coefficients * s.kernelResults[i]);
285 gradient(i-1) = m_base[i].weight * (summedK * m_weightsum - numeratorSum) / sumSquared;
286 }
287
288 std::size_t gradPos = m_adaptWeights ? numKernels-1: 0; //starting position of subkerel gradient
289 RealVector kernelGrad;
290 for (std::size_t i=0; i != numKernels; i++) {
291 if (isAdaptive(i)){
292 double coeff = m_base[i].weight / m_weightsum;
293 m_base[i].kernel->weightedParameterDerivative(batchX1,batchX2,coefficients,*s.kernelStates[i],kernelGrad);
294 std::size_t n = kernelGrad.size();
295 noalias(subrange(gradient,gradPos,gradPos+n)) = coeff * kernelGrad;
296 gradPos += n;
297 }
298 }
299 }
300
301 /// Input derivative, calculated according to the equation:
302 /// <br/>
303 /// \f$ \frac{\partial k(x, y)}{\partial x}
304 /// \frac{\sum_i \exp(w_i) \frac{\partial k_i(x, y)}{\partial x}}
305 /// {\sum_i exp(w_i)} \f$
306 /// and summed up over all of the second batch
310 RealMatrix const& coefficientsX2,
311 State const& state,
312 BatchInputType& gradient
313 ) const{
314 SIZE_CHECK(coefficientsX2.size1() == batchSize(batchX1));
315 SIZE_CHECK(coefficientsX2.size2() == batchSize(batchX2));
316 weightedInputDerivativeImpl<BatchInputType>(batchX1,batchX2,coefficientsX2,state,gradient);
317 }
318
319 void read(InArchive& ar){
320 for(std::size_t i = 0;i != m_base.size(); ++i ){
321 ar >> m_base[i].weight;
322 ar >> m_base[i].adaptive;
323 ar >> *(m_base[i].kernel);
324 }
325 ar >> m_weightsum;
326 ar >> m_numParameters;
327 }
328 void write(OutArchive& ar) const{
329 for(std::size_t i=0;i!= m_base.size();++i){
330 ar << m_base[i].weight;
331 ar << m_base[i].adaptive;
332 ar << const_cast<AbstractKernelFunction<InputType> const&>(*(m_base[i].kernel));
333 }
334 ar << m_weightsum;
335 ar << m_numParameters;
336 }
337
338protected:
339 /// structure describing a single m_base kernel
340 struct tBase
341 {
342 AbstractKernelFunction<InputType>* kernel; ///< pointer to the m_base kernel object
343 double weight; ///< weight in the linear combination
344 bool adaptive; ///< whether the parameters of the kernel are part of the WeightedSumKernel's parameter vector?
345 };
346
348 m_numParameters = m_adaptWeights? m_base.size()-1 : 0;
349 for (std::size_t i=0; i != m_base.size(); i++)
350 if (m_base[i].adaptive)
351 m_numParameters += m_base[i].kernel->numberOfParameters();
352 }
353
354 //we need to choose the correct implementation at compile time to ensure, that in the case, InputType
355 //does not implement the needed operations, the implementation is replaced by a safe default which throws an exception
356 //for this, we use enable_if/disable_if. The method is called with BatchInputType as template argument.
357 //real implementation first.
358 template <class T>
362 RealMatrix const& coefficientsX2,
363 State const& state,
364 BatchInputType& gradient,
365 typename boost::enable_if<boost::is_same<T,RealMatrix > >::type* dummy = 0
366 )const{
367 std::size_t numKernels = m_base.size(); //how far the loop goes;
368 InternalState const& s = state.toState<InternalState>();
369
370
371 //initialize gradient with the first kernel
372 m_base[0].kernel->weightedInputDerivative(batchX1, batchX2, coefficientsX2, *s.kernelStates[0], gradient);
373 gradient *= m_base[0].weight / m_weightsum;
374 BatchInputType kernelGrad;
375 for (std::size_t i=1; i != numKernels; i++){
376 m_base[i].kernel->weightedInputDerivative(batchX1, batchX2, coefficientsX2, *s.kernelStates[i], kernelGrad);
377 double coeff = m_base[i].weight / m_weightsum;
378 gradient += coeff * kernelGrad;
379 }
380 }
381 template <class T>
385 RealMatrix const& coefficientsX2,
386 State const& state,
387 BatchInputType& gradient,
388 typename boost::disable_if<boost::is_same<T,RealMatrix > >::type* dummy = 0
389 )const{
390 throw SHARKEXCEPTION("[WeightedSumKernel::weightdInputDerivative] The used BatchInputType is no Vector");
391 }
392
393 std::vector<tBase> m_base; ///< collection of m_base kernels
394 double m_weightsum; ///< sum of all weights
395 std::size_t m_numParameters; ///< total number of parameters
396 bool m_adaptWeights; ///< whether the weights should be adapted
397};
398
401
402}
403#endif