MultiTaskKernel.h
Go to the documentation of this file.
1//===========================================================================
2/*!
3 *
4 *
5 * \brief Special kernel classes for multi-task and transfer learning.
6 *
7 *
8 *
9 * \author T. Glasmachers, O.Krause
10 * \date 2012
11 *
12 *
13 * \par Copyright 1995-2017 Shark Development Team
14 *
15 * <BR><HR>
16 * This file is part of Shark.
17 * <https://shark-ml.github.io/Shark/>
18 *
19 * Shark is free software: you can redistribute it and/or modify
20 * it under the terms of the GNU Lesser General Public License as published
21 * by the Free Software Foundation, either version 3 of the License, or
22 * (at your option) any later version.
23 *
24 * Shark is distributed in the hope that it will be useful,
25 * but WITHOUT ANY WARRANTY; without even the implied warranty of
26 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
27 * GNU Lesser General Public License for more details.
28 *
29 * You should have received a copy of the GNU Lesser General Public License
30 * along with Shark. If not, see <http://www.gnu.org/licenses/>.
31 *
32 */
33//===========================================================================
34
35#ifndef SHARK_MODELS_KERNELS_MULTITASKKERNEL_H
36#define SHARK_MODELS_KERNELS_MULTITASKKERNEL_H
37
40#include <shark/Data/Dataset.h>
41#include "Impl/MklKernelBase.h"
42
43namespace shark {
44
45///
46/// \brief Aggregation of input data and task index.
47///
48/// \par
49/// Generic data structure for augmenting arbitrary data
50/// with an integer. This integer is typically used as a
51/// task identifier in multi-task and transfer learning.
52/// \ingroup kernels
53template <class InputTypeT>
55{
56 typedef InputTypeT InputType;
57 /// \brief Default constructor.
60
61 /// \brief Construction from an input and a task index
62 MultiTaskSample(InputType const& i, std::size_t t)
63 : input(i), task(t)
64 { }
65
66 void read(InArchive& ar){
67 ar >> input;
68 ar >> task;
69 }
70
71 void write(OutArchive& ar) const{
72 ar << input;
73 ar << task;
74 }
75
76 InputType input; ///< input data
77 std::size_t task; ///< task index
78
79};
80}
81
82#ifndef DOXYGEN_SHOULD_SKIP_THIS
83
84 BOOST_FUSION_ADAPT_TPL_STRUCT(
85 (InputType),
87 (InputType, input)(std::size_t, task)
88 )
89
90namespace shark {
91template<class InputType>
92struct Batch< MultiTaskSample<InputType> >{
94 MultiTaskSample<InputType>,
95 (InputType, input)(std::size_t, task)
96 )
97};
98}
99
100#endif /* DOXYGEN_SHOULD_SKIP_THIS */
101namespace shark {
102
103///
104/// \brief Special "Gaussian-like" kernel function on tasks.
105///
106/// \par
107/// See<br/>
108/// Learning Marginal Predictors: Transfer to an Unlabeled Task.
109/// G. Blanchard, G. Lee, C. Scott.
110///
111/// \par
112/// This class computes a Gaussian kernel based on the distance
113/// of empirical distributions in feature space induced by yet
114/// another kernel. This is useful for multi-task and transfer
115/// learning. It reduces the definition of a kernel on tasks to
116/// that of a kernel on inputs, plus a single bandwidth parameter
117/// for the Gaussian kernel of distributions.
118///
119/// \par
120/// Given unlabaled data \f$ x_i, t_i \f$ where the x-component
121/// is an input and the t-component is a task index, the kernel
122/// on tasks t and t' is defined as
123/// \f[
124/// k(t, t') = \exp \left( -\gamma \cdot \left\| \frac{1}{\ell_{t}\ell{t'}} \sum_{i | t_i = t}\sum_{j | t_j = t'} k'(x_i, x_j) \right\|^2 \right)
125/// \f]
126/// where k' is an arbitrary kernel on inputs.
127/// \ingroup kernels
128template <class InputTypeT >
130{
131private:
133public:
134 typedef InputTypeT InputType;
137
138 /// \brief Construction of a Gaussian kernel on tasks.
139 ///
140 /// \param data unlabeled data from multiple tasks
141 /// \param tasks number of tasks in the problem
142 /// \param inputkernel kernel on inputs based on which task similarity is defined
143 /// \param gamma Gaussian bandwidth parameter (also refer to the member functions setGamma and setSigma).
145 Data<MultiTaskSampleType> const& data,
146 std::size_t tasks,
147 KernelType& inputkernel,
148 double gamma)
149 : DiscreteKernel(RealMatrix(tasks, tasks,0.0))
150 , m_data(data)
151 , mpe_inputKernel(&inputkernel)
152 , m_gamma(gamma){
154 }
155
156 /// \brief From INameable: return the class name.
157 std::string name() const
158 { return "GaussianTaskKernel"; }
159
160 RealVector parameterVector() const{
162 }
163
164 void setParameterVector(RealVector const& newParameters){
165 std::size_t kParams = mpe_inputKernel->numberOfParameters();
166 mpe_inputKernel->setParameterVector(subrange(newParameters,0,kParams));
167 m_gamma = newParameters.back();
169 }
170
171 std::size_t numberOfParameters() const{
173 }
174
175 std::size_t numberOfTasks() const
176 { return size(); }
177
178 /// \brief Kernel bandwidth parameter.
179 double gamma() const
180 { return m_gamma; }
181
182 /// \brief Kernel width parameter, equivalent to the bandwidth parameter.
183 ///
184 /// The bandwidth gamma and the width sigma are connected: \f$ gamma = 1 / (2 \cdot sigma^2) \f$.
185 double sigma() const
186 { return (1.0 / std::sqrt(2 * m_gamma)); }
187
188 // \brief Set the kernel bandwidth parameter.
189 void setGamma(double gamma)
190 {
191 SHARK_ASSERT(gamma > 0.0);
192 m_gamma = gamma;
193 }
194
195 /// \brief Set the kernel width (equivalent to setting the bandwidth).
196 ///
197 /// The bandwidth gamma and the width sigma are connected: \f$ gamma = 1 / (2 \cdot sigma^2) \f$.
198 void setWidth(double sigma)
199 {
200 SHARK_ASSERT(sigma > 0.0);
201 m_gamma = 1.0 / (2.0 * sigma * sigma);
202 }
203
204 /// From ISerializable.
205 void read(InArchive& ar)
206 {
207 base_type::read(ar);
208 ar >> m_gamma;
209 }
210
211 /// From ISerializable.
212 void write(OutArchive& ar) const
213 {
215 ar << m_gamma;
216 }
217
218protected:
219
220 /// \brief Compute the Gram matrix of the task kernel.
221 ///
222 /// \par
223 /// Here is the real meat. This function implements the
224 /// kernel function defined in<br/>
225 /// Learning Marginal Predictors: Transfer to an Unlabeled Task.
226 /// G. Blanchard, G. Lee, C. Scott.
227 ///
228 /// \par
229 /// In a first step the function computes the inner products
230 /// of the task-wise empirical distributions, represented by
231 /// their mean elements in the kernel-induced feature space.
232 /// In a second step this information is used for the computation
233 /// of squared distances between empirical distribution, which
234 /// allows for the straightforward computation of a Gaussian
235 /// kernel.
237 {
238 // count number of examples for each task
239 const std::size_t tasks = numberOfTasks();
240 std::size_t elements = m_data.numberOfElements();
241 std::vector<std::size_t> ell(tasks, 0);
242 for (std::size_t i=0; i<elements; i++)
243 ell[m_data.element(i).task]++;
244
245 // compute inner products between mean elements of empirical distributions
246 for (std::size_t i=0; i<elements; i++){
247 const std::size_t task_i = m_data.element(i).task;
248 for (std::size_t j=0; j<i; j++){
249 const std::size_t task_j = m_data.element(j).task;
250 const double k = mpe_inputKernel->eval(m_data.element(i).input, m_data.element(j).input);
251 base_type::m_matrix(task_i, task_j) += k;
252 base_type::m_matrix(task_j, task_i) += k;
253 }
254 const double k = mpe_inputKernel->eval(m_data.element(i).input, m_data.element(i).input);
255 base_type::m_matrix(task_i, task_i) += k;
256 }
257 for (std::size_t i=0; i<tasks; i++){
258 if (ell[i] == 0) continue;
259 for (std::size_t j=0; j<tasks; j++){
260 if (ell[j] == 0) continue;
261 base_type::m_matrix(i, j) /= (double)(ell[i] * ell[j]);
262 }
263 }
264
265 // compute Gaussian kernel
266 for (std::size_t i=0; i<tasks; i++)
267 {
268 const double norm2_i = base_type::m_matrix(i, i);
269 for (std::size_t j=0; j<i; j++)
270 {
271 const double norm2_j = base_type::m_matrix(j, j);
272 const double dist2 = norm2_i + norm2_j - 2.0 * base_type::m_matrix(i, j);
273 const double k = std::exp(-m_gamma * dist2);
275 }
276 }
277 for (std::size_t i=0; i<tasks; i++) base_type::m_matrix(i, i) = 1.0;
278 }
279
280
281 Data<MultiTaskSampleType > const& m_data; ///< multi-task data
282 KernelType* mpe_inputKernel; ///< kernel on inputs
283 double m_gamma; ///< bandwidth of the Gaussian task kernel
284};
285
286
287///
288/// \brief Special kernel function for multi-task and transfer learning.
289///
290/// \par
291/// This class is a convenience wrapper for the product of an
292/// input kernel and a kernel on tasks. It also encapsulates
293/// the projection from multi-task learning data (see class
294/// MultiTaskSample) to inputs and task indices.
295///
296template <class InputTypeT>
298: private detail::MklKernelBase<MultiTaskSample<InputTypeT> >
299, public ProductKernel< MultiTaskSample<InputTypeT> >
300{
301private:
302 typedef detail::MklKernelBase<MultiTaskSample<InputTypeT> > base_type1;
304public:
306 /// \brief Constructor.
307 ///
308 /// \param inputkernel kernel on inputs
309 /// \param taskkernel kernel on task indices
311 InputKernelType* inputkernel,
312 DiscreteKernel* taskkernel)
313 :base_type1(boost::fusion::make_vector(inputkernel,taskkernel))
314 ,base_type2(base_type1::makeKernelVector())
315 {}
316
317 /// \brief From INameable: return the class name.
318 std::string name() const
319 { return "MultiTaskKernel"; }
320};
321
322} // namespace shark {
323
324#endif