NormalizedKernel.h
Go to the documentation of this file.
1//===========================================================================
2/*!
3 *
4 *
5 * \brief Normalization of a kernel function.
6 *
7 *
8 *
9 * \author T.Glasmachers, O. Krause, M. Tuma
10 * \date 2010, 2011
11 *
12 *
13 * \par Copyright 1995-2017 Shark Development Team
14 *
15 * <BR><HR>
16 * This file is part of Shark.
17 * <https://shark-ml.github.io/Shark/>
18 *
19 * Shark is free software: you can redistribute it and/or modify
20 * it under the terms of the GNU Lesser General Public License as published
21 * by the Free Software Foundation, either version 3 of the License, or
22 * (at your option) any later version.
23 *
24 * Shark is distributed in the hope that it will be useful,
25 * but WITHOUT ANY WARRANTY; without even the implied warranty of
26 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
27 * GNU Lesser General Public License for more details.
28 *
29 * You should have received a copy of the GNU Lesser General Public License
30 * along with Shark. If not, see <http://www.gnu.org/licenses/>.
31 *
32 */
33//===========================================================================
34
35#ifndef SHARK_MODELS_KERNELS_NORMALIZED_KERNEL_H
36#define SHARK_MODELS_KERNELS_NORMALIZED_KERNEL_H
37
38
40
41namespace shark {
42
43
44/// \brief Normalized version of a kernel function
45///
46/// For a positive definite kernel k, the normalized kernel
47/// \f[ \tilde k(x, y) := \frac{k(x, y)}{\sqrt{k(x, x) \cdot k(y, y)}} \f]
48/// is again a positive definite kernel function.
49/// \ingroup kernels
50template<class InputType=RealVector>
52{
53private:
55
56 struct InternalState: public State{
57 RealMatrix kxy;
58 RealVector kxx;
59 RealVector kyy;
60
61 boost::shared_ptr<State> stateKxy;
62 std::vector<boost::shared_ptr<State> > stateKxx;
63 std::vector<boost::shared_ptr<State> > stateKyy;
64
65 void resize(std::size_t sizeX1,std::size_t sizeX2, AbstractKernelFunction<InputType> const* base){
66 kxy.resize(sizeX1,sizeX2);
67 kxx.resize(sizeX1);
68 kyy.resize(sizeX2);
69 stateKxx.resize(sizeX1);
70 stateKyy.resize(sizeX2);
71 stateKxy = base->createState();
72 for(std::size_t i = 0; i != sizeX1;++i){
73 stateKxx[i] = base->createState();
74 }
75 for(std::size_t i = 0; i != sizeX2;++i){
76 stateKyy[i] = base->createState();
77 }
78 }
79 };
80public:
84
93
94 /// \brief From INameable: return the class name.
95 std::string name() const
96 { return "NormalizedKernel<" + m_base->name() + ">"; }
97
98 RealVector parameterVector() const{
99 return m_base->parameterVector();
100 }
101
102 void setParameterVector(RealVector const& newParameters){
103 m_base->setParameterVector(newParameters);
104 }
105
106 std::size_t numberOfParameters() const{
107 return m_base->numberOfParameters();
108 }
109
110 ///\brief creates the internal state of the kernel
111 boost::shared_ptr<State> createState()const{
112 InternalState* state = new InternalState();
113 return boost::shared_ptr<State>(state);
114 }
115
116 ///evaluates \f$ k(x,y) \f$
117 ///
118 /// calculates
119 /// \f[ \tilde k(x, y) := \frac{k(x, y)}{\sqrt{k(x, x) \cdot k(y, y)}} \f]
121 double val = m_base->eval(x1, x2);
122 val /= std::sqrt(m_base->eval(x1, x1));
123 val /= std::sqrt(m_base->eval(x2, x2));
124 return val;
125 }
126
127
128 ///evaluates \f$ k(x_i,y_j) \f$ for a batch of inputs x=(x...x_n) and x=(y_1...y_m)
129 ///
130 /// calculates
131 /// \f[ \tilde k(x_i,y_j) := \frac{k(x_i,y_j)}{\sqrt{k(x_i,x_i) \cdot k(y_j, y_j)}} \f]
132 void eval(ConstBatchInputReference const& batchX1, ConstBatchInputReference const& batchX2, RealMatrix& result, State& state) const{
133 InternalState& s = state.toState<InternalState>();
134
135 std::size_t sizeX1 = batchSize(batchX1);
136 std::size_t sizeX2 = batchSize(batchX2);
137 s.resize(sizeX1,sizeX2,m_base);
138 result.resize(sizeX1,sizeX2);
139
140 m_base->eval(batchX1, batchX2,s.kxy, *s.stateKxy);
141
142
143 //possible very slow way to evaluate
144 //we need to calculate k(x_i,x_i) and k(y_j,y_j) for every element.
145 //we do it by copying the single element in a batch of size 1 and evaluating this.
146 //the following could be made easier with an interface like
147 //m_base->eval(batchX1,s.kxx,s.statekxx);
149 RealMatrix singleResult(1,1);
150 for(std::size_t i = 0; i != sizeX1;++i){
151 getBatchElement(singleBatch,0) = getBatchElement(batchX1,i);
152 m_base->eval(singleBatch,singleBatch,singleResult,*s.stateKxx[i]);
153 s.kxx[i] = singleResult(0,0);
154 }
155
156 for(std::size_t j = 0; j != sizeX2;++j){
157 getBatchElement(singleBatch,0) = getBatchElement(batchX2,j);
158 m_base->eval(singleBatch,singleBatch,singleResult,*s.stateKyy[j]);
159 s.kyy[j] = singleResult(0,0);
160 }
161 RealVector sqrtKyy=sqrt(s.kyy);
162
163 //finally calculate the result
164 //r(i,j) = k(x_i,x_j)/sqrt(k(x_i,x_i))*sqrt(k(x_j,kx_j))
165 noalias(result) = s.kxy / outer_prod(sqrt(s.kxx),sqrtKyy);
166 }
167
168 ///evaluates \f$ k(x,y) \f$ for a batch of inputs
169 ///
170 /// calculates
171 /// \f[ \tilde k(x, y) := \frac{k(x, y)}{\sqrt{k(x, x) \cdot k(y, y)}} \f]
172 void eval(ConstBatchInputReference const& batchX1, ConstBatchInputReference const& batchX2, RealMatrix& result) const{
173 std::size_t sizeX1 = batchSize(batchX1);
174 std::size_t sizeX2 = batchSize(batchX2);
175
176 m_base->eval(batchX1, batchX2,result);
177
178 RealVector sqrtKyy(sizeX2);
179 for(std::size_t j = 0; j != sizeX2;++j){
180 sqrtKyy(j) = std::sqrt(m_base->eval(getBatchElement(batchX2,j),getBatchElement(batchX2,j)));
181 }
182
183 for(std::size_t i = 0; i != sizeX1;++i){
184 double sqrtKxx = std::sqrt(m_base->eval(getBatchElement(batchX2,i),getBatchElement(batchX2,i)));
185 noalias(row(result,i)) = sqrtKxx* row(result,i) / sqrtKyy;
186 }
187 }
188
189 /// calculates the weighted derivate w.r.t. the parameters \f$ w \f$
190 ///
191 /// The derivative for a single element is calculated as follows:
192 ///\f[ \frac{\partial \tilde k_w(x, y)}{\partial w} = \frac{k_w'(x,y)}{\sqrt{k_w(x,x) k_w(y,y)}} - \frac{k_w(x,y) \left(k_w(y,y) k_w'(x,x)+k_w(x,x) k_w'(y,y)\right)}{2 (k_w(x,x) k_w(y,y))^{3/2}} \f]
193 /// where \f$ k_w'(x, y) = \partial k_w(x, y) / \partial w \f$.
195 ConstBatchInputReference const& batchX1,
196 ConstBatchInputReference const& batchX2,
197 RealMatrix const& coefficients,
198 State const& state,
199 RealVector& gradient
200 ) const{
201 ensure_size(gradient,numberOfParameters());
202 InternalState const& s = state.toState<InternalState>();
203 std::size_t sizeX1 = batchSize(batchX1);
204 std::size_t sizeX2 = batchSize(batchX2);
205
206 RealMatrix weights = coefficients / sqrt(outer_prod(s.kxx,s.kyy));
207
208 m_base->weightedParameterDerivative(batchX1,batchX2,weights,*s.stateKxy,gradient);
209
210 noalias(weights) *= s.kxy;
211 RealVector wx = sum(as_rows(weights)) / (2.0 * s.kxx);
212 RealVector wy = sum(as_columns(weights)) / (2.0 * s.kyy);
213
214 //the following mess could be made easier with an interface like
215 //m_base->weightedParameterDerivative(batchX1,wx,s.statekxx,subGradient);
216 //m_base->weightedParameterDerivative(batchX2,wy,s.statekyy,subGradient);
217 //(calculating the weighted parameter derivative of k(x_i,x_i) or (k(y_i,y_i)
218 RealVector subGradient(gradient.size());
220 RealMatrix coeff(1,1);
221 for(std::size_t i = 0; i != sizeX1; ++i){
222 getBatchElement(singleBatch,0) = getBatchElement(batchX1,i);
223 coeff(0,0) = wx(i);
224 m_base->weightedParameterDerivative(singleBatch,singleBatch,coeff,*s.stateKxx[i],subGradient);
225 gradient -= subGradient;
226 }
227 for(std::size_t j = 0; j != sizeX2; ++j){
228 getBatchElement(singleBatch,0) = getBatchElement(batchX2,j);
229 coeff(0,0) = wy(j);
230 m_base->weightedParameterDerivative(singleBatch,singleBatch,coeff,*s.stateKyy[j],subGradient);
231 gradient -= subGradient;
232 }
233 }
234
235 /// Input derivative, calculated according to the equation:
236 /// <br/>
237 /// \f$ \frac{\partial k(x, y)}{\partial x}
238 /// \frac{\sum_i \exp(w_i) \frac{\partial k_i(x, y)}{\partial x}}
239 /// {\sum_i exp(w_i)} \f$
240 /// and summed up over all elements of the second batch
242 ConstBatchInputReference const& batchX1,
243 ConstBatchInputReference const& batchX2,
244 RealMatrix const& coefficientsX2,
245 State const& state,
246 BatchInputType& gradient
247 ) const{
248 InternalState const& s = state.toState<InternalState>();
249 std::size_t sizeX1 = batchSize(batchX1);
250
251 RealMatrix weights = coefficientsX2 / sqrt(outer_prod(s.kxx,s.kyy));
252
253 m_base->weightedInputDerivative(batchX1,batchX2,weights,*s.stateKxy,gradient);
254
255 noalias(weights) *= s.kxy;
256 RealVector wx = sum(as_rows(weights))/s.kxx;
257
258 //the following mess could be made easier with an interface like
259 //m_base->weightedInputDerivative(batchX1,wx,s.statekxx,subGradient);
260 //(calculating the weighted input derivative of k(x_i,x_i)
261 RealMatrix subGradient;
263 RealMatrix coeff(1,1);
264 for(std::size_t i = 0; i != sizeX1; ++i){
265 getBatchElement(singleBatch,0) = getBatchElement(batchX1,i);
266 coeff(0,0) = wx(i);
267 m_base->weightedInputDerivative(singleBatch,singleBatch,coeff,*s.stateKxx[i],subGradient);
268 noalias(row(gradient,i)) -= row(subGradient,0);
269 }
270 }
271
272protected:
273 /// kernel to normalize
275};
276
279
280
281}
282#endif