MergeBudgetMaintenanceStrategy.h
Go to the documentation of this file.
1//===========================================================================
2/*!
3 *
4 *
5 * \brief Merge budget maintenance strategy
6 *
7 * \par
8 * This is an budget strategy that adds a new vector by merging
9 * a pair of budget vectors. The pair to merge is found by first
10 * searching for the budget vector with the smallest alpha-coefficients
11 * (measured in 2-norm), and then finding the second one by
12 * computing a certain degradation measure. This is therefore linear
13 * in the size of the budget.
14 *
15 * \par
16 * The method is an implementation of the merge strategy
17 * given in wang, crammer, vucetic: "Breaking the Curse of Kernelization:
18 * Budgeted Stochastic Gradient Descent for Large-Scale SVM Training"
19 * and owes very much to the implementation in BudgetedSVM.
20 *
21 *
22 *
23 * \author Aydin Demircioglu
24 * \date 2014
25 *
26 *
27 * \par Copyright 1995-2017 Shark Development Team
28 *
29 * <BR><HR>
30 * This file is part of Shark.
31 * <https://shark-ml.github.io/Shark/>
32 *
33 * Shark is free software: you can redistribute it and/or modify
34 * it under the terms of the GNU Lesser General Public License as published
35 * by the Free Software Foundation, either version 3 of the License, or
36 * (at your option) any later version.
37 *
38 * Shark is distributed in the hope that it will be useful,
39 * but WITHOUT ANY WARRANTY; without even the implied warranty of
40 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
41 * GNU Lesser General Public License for more details.
42 *
43 * You should have received a copy of the GNU Lesser General Public License
44 * along with Shark. If not, see <http://www.gnu.org/licenses/>.
45 *
46 */
47//===========================================================================
48
49
50#ifndef SHARK_MODELS_MERGEBUDGETMAINTENANCESTRATEGY_H
51#define SHARK_MODELS_MERGEBUDGETMAINTENANCESTRATEGY_H
52
54#include <shark/Data/Dataset.h>
55#include <shark/Data/DataView.h>
57
59
60
61namespace shark
62{
63
64///
65/// \brief Budget maintenance strategy that merges two vectors
66///
67/// \par This is an budget strategy that simply merges two budget vectors
68/// in order to make space for a new one. This is done by first searching
69/// for the budget vector that has smallest \f[ ||\alpha||_2\f] coefficient-- only
70/// then a second one is searched for, by inspecting the expected
71/// weight degradation after merging. The vector with smallest weight
72/// degradation is the vector one should merge with the first one.
73/// By this heuristic, the merging strategy has complexity \f[ \mathcal{O}(B) \f].
74/// Compared with the projection strategy, merging should be faster, and stil
75/// obtains similar accuracy. Unluckily any kind of timing numbers are missing
76/// in the reference paper of Wang, Crammer and Vucetic.
77///
78/// \par Note that in general it is unclear how two data objects should be merged,
79/// e.g. strings must be merged differently than vectors. Therefore it is necessary
80/// to create an specialization of this strategy for a given input type.
81///
82template<class InputType>
84
85
86
87///
88/// \brief Budget maintenance strategy merging vectors.
89///
90/// \par This is an specialization of the merge budget maintenance strategy
91/// that handles simple real-valued vectors. This is a nearly 1:1 adoption of
92/// the strategy presented in Wang, Cramer and Vucetic.
93///
94template<>
96{
99 typedef RealVector InputType;
100
101public:
102
103 /// This is the objective function we need to optimize during merging.
104 /// Basically the merging strategy needs a line search to find the parameter
105 /// which maximizes \f[ a \cdot k_h (x_m, x_n) + b \cdot k_{1-h}(x_m, x_n) \f].
106 /// (all in the notation of wang, crammer and vucetic).
107 /// The coefficients a and b are given by the alpha coefficients of the
108 /// corresponding support vectors \f[ x_m \f] and \f[ x_n\f], more precicely
109 /// we have \f[ a = \sum \alpha_m^{(i)}/d_i\f] and \f[b = 1 - a = \sum \alpha_n^{(i)}/d_i\f]
110 /// with \f[d_i = \alpha_m^{(i)} + \alpha_n^{(i)} \f].
111 ///
112 struct MergingProblemFunction : public SingleObjectiveFunction
113 {
115
116 /// class name
117 std::string name() const
118 { return "MergingProblemFunction"; }
119
120
121 /// parameters for the function.
122 double m_a, m_b;
123 double m_k; //< contains (in our problem) k(x_m, x_n)
124
125
126 /// constructor.
127 /// \param[in] a a coefficient of the formula
128 /// \param[in] b b coefficient of the formula
129 /// \param[in] k k coefficient of the formula
130 ///
131 MergingProblemFunction(double a, double b, double k)
132 {
133 m_a = a;
134 m_b = b;
135 m_k = k;
136 }
137
138
139 /// number of variables, we have a one-dimensional problem here.
140 std::size_t numberOfVariables()const
141 {
142 return 1;
143 }
144
145
146 /// evaluation
147 /// \param[in] pattern vector to evaluate the function at. as we have a 1d problem,
148 /// we ignore everything beyond the first component.
149 /// \return function value at the point
150 ///
151 virtual double eval(RealVector const& pattern)const
152 {
153 double h = pattern(0);
154 // we want to maximize, thus minimize -function
155 return (- (m_a * pow(m_k, (1.0 - h) * (1.0 - h)) + m_b * pow(m_k, h * h)));
156 }
157
158
159 /// Derivative of function.
160 /// Unsure if the derivative is really needed, but wolfram alpha
161 /// helped computing it, do not want to let it down, wasting its capacity.
162 /// The search routine uses it, as we did not removed the derivative-feature.
163 /// \param[in] input Point to evaluate the function at
164 /// \param[out] derivative Derivative at the given point
165 /// \return Function value at the point
166 ///
167 virtual double evalDerivative(const SearchPointType & input, FirstOrderDerivative & derivative)const
168 {
169 double h = input(0);
170 // we want to maximize, thus minimize -function
171 derivative(0) = 2 * log(m_k) * (-m_a * (h - 1.0) * pow(m_k, (h - 1.0) * (h - 1.0))
172 - m_b * h * pow(m_k, (h * h)));
173 return eval(input);
174 }
175 };
176
177
178
179 /// Reduce the budget.
180 /// This is a helper routine. after the addToModel adds the new support vector
181 /// to the end of the budget (it was chosen one bigger than the capacity),
182 /// this routine will do the real merging. Given a index it will search for a second
183 /// index, so that merging is 'optimal'. It then will perform the merging. After that
184 /// the last budget vector will be freed again (by setting its alpha-coefficients to zero).
185 /// \param[in] model Model to work on
186 /// \param[in] firstIndex The index of the first element of the pair to merge.
187 ///
188 virtual void reduceBudget(ModelType& model, size_t firstIndex)
189 {
190 size_t maxIndex = model.basis().numberOfElements();
191
192 // compute the kernel row of the given, first element and all the others
193 // should take O(B) time, as it is a row of size B
194 blas::vector<float> kernelRow(maxIndex, 0.0);
195 for(size_t j = 0; j < maxIndex; j++)
196 kernelRow(j) = static_cast<float>( model.kernel()->eval(model.basis().element(firstIndex), model.basis().element(j)));
197
198 // initialize the search
199 double fret(0.);
200 RealVector h(1); // initial search starting point
201 RealVector xi(1); // direction of search
202 RealVector d(1); // derivative ater line-search (not needed)
203
204 // save the parameter at the minimum
205 double minDegradation = std::numeric_limits<double>::infinity();
206 double minH = 0.0;
207 double minAlphaMergedFirst = 0.0;
208 double minAlphaMergedSecond = 0.0;
209 size_t secondIndex = 0;
210
211
212 // we need to check every other vector
213 RealMatrix &alpha = model.alpha();
214 for(size_t currentIndex = 0; currentIndex < maxIndex; currentIndex++)
215 {
216 // we do not want the vector already chosen
217 if(firstIndex == currentIndex)
218 continue;
219
220 // compute the alphas for the model, this is the formula
221 // between (6.7) and (6.8) in wang, crammer, vucetic
222 double a = 0.0;
223 double b = 0.0;
224 for(size_t c = 0; c < alpha.size2(); c++)
225 {
226 double d = std::min(0.00001, alpha(currentIndex, c) + alpha(firstIndex, c));
227 a += alpha(firstIndex, c) / d;
228 b += alpha(currentIndex, c) / d;
229 }
230
231 // Initialize search starting point and direction:
232 h(0) = 0.0;
233 xi(0) = 0.5;
234 double k = kernelRow(currentIndex);
235 MergingProblemFunction mergingProblemFunction(a, b, k);
236 fret = mergingProblemFunction.evalDerivative(h,d);
237 //perform a line-search
238 LineSearch<RealVector> lineSearch;
240 lineSearch.init(mergingProblemFunction);
241 lineSearch(h,fret,xi,d,1.0);
242
243 // the optimal point is now given by h.
244 // the vector that corresponds to this is
245 // $z = h x_m + (1-h) x_n$ by formula (6.7)
246 RealVector firstVector = model.basis().element(firstIndex);
247 RealVector currentVector = model.basis().element(currentIndex);
248 RealVector mergedVector = h(0) * firstVector + (1.0 - h(0)) * currentVector;
249
250 // this is another minimization problem, which has as optimal
251 // solution $\alpha_z^{(i)} = \alpha_m^{(i)} k(x_m, z) + \alpha_n^{(i)} k(x_n, z).$
252
253 // the coefficient of this z is computed by approximating
254 // both vectors by the merged one. maybe KernelBasisDistance can be used
255 // but i am not sure, if at all and if, if its faster.
256
257 long double alphaMergedFirst = pow(k, (1.0 - h(0)) * (1.0 - h(0)));
258 long double alphaMergedCurrent = pow(k, h(0) * h(0));
259
260 // degradation is computed for each class
261 // this is computed by using formula (6.8), applying it to each class and summing up
262 // here a kernel with $k(x,x) = 1$ is assumed
263 double currentDegradation = 0.0f;
264 for(size_t c = 0; c < alpha.size2(); c++)
265 {
266 double zAlpha = alphaMergedFirst * alpha(firstIndex, c) + alphaMergedCurrent * alpha(currentIndex, c);
267 // TODO: unclear to me why this is the thing we want to compute
268 currentDegradation += pow(alpha(firstIndex, c), 2) + pow(alpha(currentIndex, c), 2) +
269 2.0 * k * alpha(firstIndex, c) * alpha(currentIndex, c) - zAlpha * zAlpha;
270 }
271
272 // TODO: this is shamelessly copied, as the rest, but maybe i want to refactor it and make it nicer.
273 if(currentDegradation < minDegradation)
274 {
275 minDegradation = currentDegradation;
276 minH = h(0);
277 minAlphaMergedFirst = alphaMergedFirst;
278 minAlphaMergedSecond = alphaMergedCurrent;
279 secondIndex = currentIndex;
280 }
281 }
282
283 // compute merged vector
284 RealVector firstVector = model.basis().element(firstIndex);
285 RealVector secondVector = model.basis().element(secondIndex);
286 RealVector mergedVector = minH * firstVector + (1.0 - minH) * secondVector;
287
288 // replace the second vector by the merged one
289 model.basis().element(secondIndex) = mergedVector;
290
291 // and update the alphas
292 for(size_t c = 0; c < alpha.size2(); c++)
293 {
294 alpha(secondIndex, c) = minAlphaMergedFirst * alpha(firstIndex, c) + minAlphaMergedSecond * alpha(secondIndex, c);
295 }
296
297 // the first index is now obsolete, so we copy the
298 // last vector, which serves as a buffer, to this position
299 row(alpha, firstIndex) = row(alpha, maxIndex - 1);
300 model.basis().element(firstIndex) = model.basis().element(maxIndex - 1);
301
302 // clear the buffer by cleaning the alphas
303 // finally the vectors we merged.
304 row(model.alpha(), maxIndex - 1).clear();
305 }
306
307
308
309 /// add a vector to the model.
310 /// this will add the given vector to the model and merge the budget so that afterwards
311 /// the budget size is kept the same. If the budget has a free entry anyway, no merging
312 /// will be performed, but instead the given vector is simply added to the budget.
313 ///
314 /// @param[in,out] model the model the strategy will work with
315 /// @param[in] alpha alphas for the new budget vector
316 /// @param[in] supportVector the vector to add to the model by applying the maintenance strategy
317 ///
318 virtual void addToModel(ModelType& model, InputType const& alpha, ElementType const& supportVector)
319 {
320
321 // find the two indicies we want to merge
322
323 // note that we have to crick ourselves, as the model has
324 // a fixed size, but actually we want to work with the model
325 // together with the new supportvector. so our budget size
326 // is one greater than the user specified and we use this
327 // last entry of the model for buffer. it will be freed again,
328 // when merging is finished.
329
330 // put the new vector into place
331 size_t maxIndex = model.basis().numberOfElements();
332 model.basis().element(maxIndex - 1) = supportVector.input;
333 row(model.alpha(), maxIndex - 1) = alpha;
334
335
336 // the first vector to merge is the one with the smallest alpha coefficient
337 // (it cannot be our new vector, because in each iteration the
338 // old weights get downscaled and the new ones get the biggest)
339 size_t firstIndex = 0;
340 double firstAlpha = 0;
341 findSmallestVector(model, firstIndex, firstAlpha);
342
343 // if the smallest vector has zero alpha,
344 // the budget is not yet filled so we can skip merging it.
345 if(firstAlpha == 0.0f)
346 {
347 // as we need the last vector to be zero, we put the new
348 // vector to that place and undo our putting-the-vector-to-back-position
349 model.basis().element(firstIndex) = supportVector.input;
350 row(model.alpha(), firstIndex) = alpha;
351
352 // enough to zero out the alpha
353 row(model.alpha(), maxIndex - 1).clear();
354
355 // ready.
356 return;
357 }
358
359 // the second one is given by searching for the best match now,
360 // taking O(B) time. we also have to provide to the findVectorToMerge
361 // function the supportVector we want to add, as we cannot, as
362 // said, just extend the model with this vector.
363 reduceBudget(model, firstIndex);
364 }
365
366
367 /// class name
368 std::string name() const
369 { return "MergeBudgetMaintenanceStrategy"; }
370
371
372protected:
373};
374
375}
376#endif