PolynomialKernel.h
Go to the documentation of this file.
1//===========================================================================
2/*!
3 *
4 *
5 * \brief Polynomial kernel.
6 *
7 *
8 *
9 * \author T.Glasmachers
10 * \date 2011
11 *
12 *
13 * \par Copyright 1995-2017 Shark Development Team
14 *
15 * <BR><HR>
16 * This file is part of Shark.
17 * <https://shark-ml.github.io/Shark/>
18 *
19 * Shark is free software: you can redistribute it and/or modify
20 * it under the terms of the GNU Lesser General Public License as published
21 * by the Free Software Foundation, either version 3 of the License, or
22 * (at your option) any later version.
23 *
24 * Shark is distributed in the hope that it will be useful,
25 * but WITHOUT ANY WARRANTY; without even the implied warranty of
26 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
27 * GNU Lesser General Public License for more details.
28 *
29 * You should have received a copy of the GNU Lesser General Public License
30 * along with Shark. If not, see <http://www.gnu.org/licenses/>.
31 *
32 */
33//===========================================================================
34
35#ifndef SHARK_MODELS_KERNELS_POLYNOMIAL_KERNEL_H
36#define SHARK_MODELS_KERNELS_POLYNOMIAL_KERNEL_H
37
39
40namespace shark {
41
42
43/// \brief Polynomial kernel.
44///
45/// \par
46/// The polynomial kernel is defined as
47/// \f$ \left( \left\langle x_1, x_2 \right\rangle + b \right)^n \f$
48/// with degree \f$ n \in \mathbb{N} \f$ and non-negative offset
49/// \f$ b \geq 0 \f$. For n=1 and b=0 it matches the linear kernel
50/// (standard inner product).
51/// \ingroup kernels
52template<class InputType = RealVector>
54{
55private:
57
58 struct InternalState: public State{
59 RealMatrix base;//stores the inner product of vectors x_1,x_j
60 RealMatrix exponentedProd;//pow(innerProd,m_exponent)
61
62 void resize(std::size_t sizeX1, std::size_t sizeX2){
63 base.resize(sizeX1, sizeX2);
64 exponentedProd.resize(sizeX1, sizeX2);
65 }
66 };
67public:
71
72 /// Constructor.
73 ///
74 /// \param degree exponent of the polynomial
75 /// \param offset constant added to the standard inner product
76 /// \param degree_is_parameter should the degree be a regular model parameter? if yes, the kernel will not be differentiable
77 /// \param unconstrained should the offset internally be represented as exponential of the externally visible parameter?
78 PolynomialKernel( unsigned int degree = 2, double offset = 0.0, bool degree_is_parameter = true, bool unconstrained = false )
79 : m_degree( degree ),
80 m_offset( offset ),
81 m_degreeIsParam( degree_is_parameter ),
82 m_unconstrained( unconstrained ) {
83 SHARK_RUNTIME_CHECK(degree > 0, "[PolynomialKernel::PolynomialKernel] degree must be positive");
86 if ( !m_degreeIsParam )
88 }
89
90 /// \brief From INameable: return the class name.
91 std::string name() const
92 { return "PolynomialKernel"; }
93
94 void setDegree( unsigned int deg ) {
95 RANGE_CHECK( deg > 0 );
96 SHARK_RUNTIME_CHECK( !m_degreeIsParam, "[PolynomialKernel::setDegree] Please use setParameterVector when the degree is a parameter.");
97 m_degree = deg;
98 }
99
100 unsigned int degree() const {
101 return m_degree;
102 }
103
104 RealVector parameterVector() const {
105 if ( m_degreeIsParam ) {
106 RealVector ret(2);
107 ret(0) = m_degree;
108 if ( m_unconstrained )
109 ret(1) = std::log( m_offset );
110 else
111 ret(1) = m_offset;
112 return ret;
113 } else {
114 RealVector ret(1);
115 if ( m_unconstrained )
116 ret(0) = std::log( m_offset );
117 else
118 ret(0) = m_offset;
119 return ret;
120 }
121 }
122
123 void setParameterVector(RealVector const& newParameters) {
124 if ( m_degreeIsParam ) {
125 SIZE_CHECK(newParameters.size() == 2);
126 SHARK_ASSERT(std::abs((unsigned int)newParameters(0) - newParameters(0)) <= 2*std::numeric_limits<double>::epsilon());
127 RANGE_CHECK(newParameters(0) >= 1.0);
128 m_degree = (unsigned int)newParameters(0);
129 if ( m_unconstrained ) {
130 m_offset = std::exp(newParameters(1));
131 } else {
132 RANGE_CHECK(newParameters(1) >= 0.0);
133 m_offset = newParameters(1);
134 }
135 } else {
136 SIZE_CHECK(newParameters.size() == 1);
137 if ( m_unconstrained ) {
138 m_offset = std::exp(newParameters(0));
139 } else {
140 RANGE_CHECK(newParameters(0) >= 0.0);
141 m_offset = newParameters(0);
142 }
143 }
144 }
145
146 std::size_t numberOfParameters() const {
147 if ( m_degreeIsParam )
148 return 2;
149 else
150 return 1;
151 }
152
153 ///\brief creates the internal state of the kernel
154 boost::shared_ptr<State> createState()const{
155 return boost::shared_ptr<State>(new InternalState());
156 }
157
158 /////////////////////////EVALUATION//////////////////////////////
159
160 /// \f$ k(x_1, x_2) = \left( \langle x_1, x_2 \rangle + b \right)^n \f$
162 SIZE_CHECK(x1.size() == x2.size());
163 double base = inner_prod(x1, x2) + m_offset;
164 return std::pow(base,m_degree);
165 }
166
167 void eval(ConstBatchInputReference batchX1,ConstBatchInputReference batchX2, RealMatrix& result) const {
168 SIZE_CHECK(batchX1.size2() == batchX2.size2());
169 std::size_t sizeX1 = batchX1.size1();
170 std::size_t sizeX2 = batchX2.size1();
171 result.resize(sizeX1,sizeX2);
172
173 //calculate the inner product
174 noalias(result) = prod(batchX1,trans(batchX2));
175 result += m_offset;
176 //now do exponentiation
177 if(m_degree != 1)
178 noalias(result) = pow(result,m_degree);
179 }
180
181 void eval(ConstBatchInputReference batchX1, ConstBatchInputReference batchX2, RealMatrix& result, State& state) const{
182 SIZE_CHECK(batchX1.size2() == batchX2.size2());
183
184 std::size_t sizeX1 = batchX1.size1();
185 std::size_t sizeX2 = batchX2.size1();
186
187 InternalState& s = state.toState<InternalState>();
188 s.resize(sizeX1,sizeX2);
189 result.resize(sizeX1,sizeX2);
190
191 //calculate the inner product
192 noalias(s.base) = prod(batchX1,trans(batchX2));
193 s.base += m_offset;
194
195 //now do exponentiation
196 if(m_degree != 1)
197 noalias(result) = pow(s.base,m_degree);
198 else
199 noalias(result) = s.base;
200 noalias(s.exponentedProd) = result;
201 }
202
203 /////////////////////DERIVATIVES////////////////////////////////////
204
208 RealMatrix const& coefficients,
209 State const& state,
210 RealVector& gradient
211 ) const{
212
213 std::size_t sizeX1 = batchX1.size1();
214 std::size_t sizeX2 = batchX2.size1();
215 gradient.resize(1);
216 InternalState const& s = state.toState<InternalState>();
217
218 //internal checks
219 SIZE_CHECK(batchX1.size2() == batchX2.size2());
220 SIZE_CHECK(s.base.size1() == sizeX1);
221 SIZE_CHECK(s.base.size2() == sizeX2);
222 SIZE_CHECK(s.exponentedProd.size1() == sizeX1);
223 SIZE_CHECK(s.exponentedProd.size2() == sizeX2);
224
225
226 //m_degree == 1 is easy
227 if(m_degree == 1){//result_ij/base_ij = 1
228 gradient(0) = sum(coefficients);
229 if ( m_unconstrained )
230 gradient(0) *= m_offset;
231 return;
232 }
233
234 gradient(0) = m_degree * sum(safe_div(s.exponentedProd,s.base,0.0) * coefficients);
235 if ( m_unconstrained )
236 gradient(0) *= m_offset;
237 }
238
239 /// \f$ k(x_1, x_2) = \left( \langle x_1, x_2 \rangle + b \right)^n \f$
240 /// <br/>
241 /// \f$ \frac{\partial k(x_1, x_2)}{\partial x_1} = \left[ n \cdot (\langle x_1, x_2 \rangle + b)^{n-1} \right] \cdot x_2 \f$
245 RealMatrix const& coefficientsX2,
246 State const& state,
247 BatchInputType& gradient
248 ) const{
249
250 std::size_t sizeX1 = batchX1.size1();
251 std::size_t sizeX2 = batchX2.size1();
252 gradient.resize(sizeX1,batchX1.size2());
253
254 InternalState const& s = state.toState<InternalState>();
255
256 //internal checks
257 SIZE_CHECK(batchX1.size2() == batchX2.size2());
258 SIZE_CHECK(s.base.size1() == sizeX1);
259 SIZE_CHECK(s.base.size2() == sizeX2);
260 SIZE_CHECK(s.exponentedProd.size1() == sizeX1);
261 SIZE_CHECK(s.exponentedProd.size2() == sizeX2);
262 SIZE_CHECK(coefficientsX2.size1() == sizeX1);
263 SIZE_CHECK(coefficientsX2.size2() == sizeX2);
264
265
266 //again m_degree == 1 is easy, as it is for the i-th row
267 //just c_i X2;
268 if(m_degree == 1){
269 noalias(gradient) = prod(coefficientsX2,batchX2);
270 return;
271 }
272
273 //first calculate weights(i,j) = coeff(i)*exp(i,j)/prod(i,j)
274 //we have to take the usual division by 0 into account
275 RealMatrix weights = coefficientsX2 * safe_div(s.exponentedProd,s.base,0.0);
276 //and the derivative of input i of batch x1 is
277 //g = sum_j m_n*weights(i,j)*x2_j
278 //we now sum over j which is a matrix-matrix product
279 noalias(gradient) = m_degree * prod(weights,batchX2);
280 }
281
282 void read(InArchive& ar){
283 ar >> m_degree;
284 ar >> m_offset;
285 ar >> m_degreeIsParam;
286 ar >> m_unconstrained;
287 }
288
289 void write(OutArchive& ar) const{
290 ar << m_degree;
291 ar << m_offset;
292 ar << m_degreeIsParam;
293 ar << m_unconstrained;
294 }
295
296protected:
297 int m_degree; ///< exponent n
298 double m_offset; ///< offset b
299 bool m_degreeIsParam; ///< is the degree a model parameter?
300 bool m_unconstrained; ///< is the degree internally represented as exponential of the parameter?
301};
302
305
306
307}
308#endif