GridSearch.h
Go to the documentation of this file.
1//===========================================================================
2/*!
3 *
4 *
5 * \brief GridSearch.h
6 *
7 *
8 *
9 * \author O. Krause
10 * \date 2010
11 *
12 *
13 * \par Copyright 1995-2017 Shark Development Team
14 *
15 * <BR><HR>
16 * This file is part of Shark.
17 * <https://shark-ml.github.io/Shark/>
18 *
19 * Shark is free software: you can redistribute it and/or modify
20 * it under the terms of the GNU Lesser General Public License as published
21 * by the Free Software Foundation, either version 3 of the License, or
22 * (at your option) any later version.
23 *
24 * Shark is distributed in the hope that it will be useful,
25 * but WITHOUT ANY WARRANTY; without even the implied warranty of
26 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
27 * GNU Lesser General Public License for more details.
28 *
29 * You should have received a copy of the GNU Lesser General Public License
30 * along with Shark. If not, see <http://www.gnu.org/licenses/>.
31 *
32 */
33//===========================================================================
34
35#ifndef SHARK_ALGORITHMS_GRIDSEARCH_H
36#define SHARK_ALGORITHMS_GRIDSEARCH_H
37
38
40#include <shark/Core/Random.h>
41
42#include <boost/serialization/vector.hpp>
43
44
45namespace shark {
46
47///
48/// \brief Optimize by trying out a grid of configurations
49///
50/// \par
51/// The GridSearch class allows for the definition of a grid in
52/// parameter space. It does a simple one-step optimization over
53/// the grid by trying out every possible parameter combination.
54/// Please note that the computation effort grows exponentially
55/// with the number of parameters.
56///
57/// \par
58/// If you only want to try a subset of the grid, consider using
59/// the PointSearch class instead.
60/// A more sophisticated (less exhaustive) grid search variant is
61/// available with the NestedGridSearch class.
62/// \ingroup singledirect
64{
65public:
67 m_configured=false;
68 }
69
70 /// \brief From INameable: return the class name.
71 std::string name() const
72 {
73 return "GridSearch";
74 }
75
76 /// uniform initialization for all parameters
77 /// \param params number of model parameters to optimize
78 /// \param min smallest parameter value
79 /// \param max largest parameter value
80 /// \param numSections total number of values in the interval
81 void configure(size_t params, double min, double max, size_t numSections)
82 {
83 SIZE_CHECK(params>=1);
84 RANGE_CHECK(min<=max);
85 SIZE_CHECK(numSections>=1);
86 m_nodeValues.resize(params);
87 for (size_t i = 0; i < numSections; i++)
88 {
89 double section = min + i * (max - min) / (numSections - 1.0);
90 for(auto& node: m_nodeValues)
91 node.push_back(section);
92 }
93 m_configured=true;
94 }
95
96 /// individual definition for every parameter
97 /// \param min smallest value for every parameter
98 /// \param max largest value for every parameter
99 /// \param sections total number of values for every parameter
100 void configure(const std::vector<double>& min, const std::vector<double>& max, const std::vector<size_t>& sections)
101 {
102 size_t params = min.size();
103 SIZE_CHECK(sections.size() == params);
104 SIZE_CHECK(max.size() == params);
105 RANGE_CHECK(min <= max);
106
107 m_nodeValues.resize(params);
108 for (size_t dimension = 0; dimension < params; dimension++)
109 {
110 size_t numSections = sections[dimension];
111 std::vector<double>& node = m_nodeValues[dimension];
112 node.resize(numSections);
113
114 if ( numSections == 1 )
115 {
116 node[0] = (( min[dimension] + max[dimension] ) / 2.0);
117 }
118 else for (size_t section = 0; section < numSections; section++)
119 {
120 node[section] = (min[dimension] + section * (max[dimension] - min[dimension]) / (numSections - 1.0));
121 }
122 }
123 m_configured=true;
124 }
125
126
127 /// special case for 2D grid, individual definition for every parameter
128 /// \param min1 smallest value for first parameter
129 /// \param max1 largest value for first parameter
130 /// \param sections1 total number of values for first parameter
131 /// \param min2 smallest value for second parameter
132 /// \param max2 largest value for second parameter
133 /// \param sections2 total number of values for second parameter
134 void configure(double min1, double max1, size_t sections1, double min2, double max2, size_t sections2)
135 {
136 RANGE_CHECK(min1<=max1);
137 RANGE_CHECK(min2<=max2);
138 RANGE_CHECK(sections1 > 0);
139 RANGE_CHECK(sections2 > 0);
140
141 m_nodeValues.resize(2u);
142
143 if ( sections1 == 1 ) {
144 m_nodeValues[0].push_back(( min1 + max1 ) / 2.0);
145 } else {
146 for (size_t section = 0; section < sections1; section++)
147 m_nodeValues[0].push_back(min1 + section * (max1 - min1) / (sections1 - 1.0));
148 }
149
150 if ( sections2 == 1 ) {
151 m_nodeValues[1].push_back(( min2 + max2 ) / 2.0);
152 } else {
153 for (size_t section = 0; section < sections2; section++)
154 m_nodeValues[1].push_back(min2 + section * (max2 - min2) / (sections2 - 1.0));
155 }
156 }
157
158 /// special case for line search
159 /// \param min1 smallest value for first parameter
160 /// \param max1 largest value for first parameter
161 /// \param sections1 total number of values for first parameter
162 void configure(double min1, double max1, size_t sections1)
163 {
164 RANGE_CHECK(min1<=max1);
165 RANGE_CHECK(sections1 > 0);
166
167 m_nodeValues.resize(1u);
168
169 if ( sections1 == 1 ) {
170 m_nodeValues[0].push_back(( min1 + max1 ) / 2.0);
171 } else {
172 for (size_t section = 0; section < sections1; section++)
173 m_nodeValues[0].push_back(min1 + section * (max1 - min1) / (sections1 - 1.0));
174 }
175 }
176
177
178 /// uniform definition of the values to test for all parameters
179 /// \param params number of model parameters to optimize
180 /// \param values values used for every coordinate
181 void configure(size_t params, const std::vector<double>& values)
182 {
183 SIZE_CHECK(params > 0);
184 SIZE_CHECK(values.size() > 0);
185 m_nodeValues.resize(params);
186 for(auto& node: m_nodeValues)
187 node=values;
188 m_configured=true;
189 }
190
191 /// individual definition for every parameter
192 /// \param values values used. The first dimension is the parameter, the second dimension is the node.
193 void configure(const std::vector<std::vector<double> >& values)
194 {
195 SIZE_CHECK(values.size() > 0);
196 m_nodeValues = values;
197 m_configured=true;
198 }
199
200 //from ISerializable
201 virtual void read( InArchive & archive )
202 {
203 archive>>m_nodeValues;
204 archive>>m_configured;
205 archive>>m_best.point;
206 archive>>m_best.value;
207 }
208
209 virtual void write( OutArchive & archive ) const
210 {
211 archive<<m_nodeValues;
212 archive<<m_configured;
213 archive<<m_best.point;
214 archive<<m_best.value;
215 }
216
217 /*! If Gridsearch wasn't configured before calling this method, it is default constructed
218 * as a net spanning the range [-1,1] in all dimensions with 5 searchpoints (-1,-0.5,0,0.5,1).
219 * so don't forget to scale the parameter-ranges of the objective function!
220 * The startingPoint can actually be anything, only its dimension has to be correct.
221 */
222 virtual void init(ObjectiveFunctionType const& objectiveFunction, SearchPointType const& startingPoint) {
223 checkFeatures(objectiveFunction);
224
225 if(!m_configured)
226 configure(startingPoint.size(),-1,1,5);
227 SIZE_CHECK(startingPoint.size() == m_nodeValues.size());
228 m_best.point=startingPoint;
229 }
231 /*! Assign linearly progressing grid values to one certain parameter only.
232 * This is especially useful if one parameter needs special treatment
233 * \param index the index of the parameter to which grid values are assigned
234 * \param noOfSections how many grid points should be assigned to that parameter
235 * \param min smallest value for that parameter
236 * \param max largest value for that parameter */
237 void assignLinearRange(size_t index, size_t noOfSections, double min, double max)
238 {
239 SIZE_CHECK( noOfSections >= 1);
240 RANGE_CHECK( min <= max );
241 SIZE_CHECK( index < m_nodeValues.size() );
242
243 m_nodeValues[index].clear();
244 if ( noOfSections == 1 ) {
245 m_nodeValues[index].push_back(( min+max) / 2.0);
246 }
247 else {
248 m_nodeValues[index].reserve(noOfSections);
249 for (size_t section = 0; section < noOfSections; section++)
250 m_nodeValues[index].push_back(min + section*( max-min ) / ( noOfSections-1.0 ));
251 }
252 }
253
254 /*! Set exponentially progressing grid values for one certain parameter only.
255 * This is especially useful if one parameter needs special treatment. The
256 * grid points will be filled with values \f$ factor \cdot expbase ^i \f$,
257 * where i does integer steps between min and max.
258 * \param index the index of the parameter that gets new grid values
259 * \param factor the value that the exponential base grid should be multiplied by
260 * \param exp_base the exponential grid will progress on this base (e.g. 2, 10)
261 * \param min the smallest exponent for exp_base
262 * \param max the largest exponent for exp_base */
263 void assignExponentialRange(size_t index, double factor, double exp_base, int min, int max)
264 {
265 SIZE_CHECK( min <= max );
266 RANGE_CHECK( index < m_nodeValues.size() );
267
268 m_nodeValues[index].clear();
269 m_nodeValues[index].reserve(max-min);
270 for (int section = 0; section <= (max-min); section++)
271 m_nodeValues[index].push_back( factor * std::pow( exp_base, section+min ));
272 }
273
274 /// Please note that for the grid search optimizer it does
275 /// not make sense to call step more than once, as the
276 /// solution does not improve iteratively.
277 void step(ObjectiveFunctionType const& objectiveFunction) {
278 size_t dimensions = m_nodeValues.size();
279 std::vector<size_t> index(dimensions, 0);
280 m_best.value = 1e100;
281 RealVector point(dimensions);
282
283 // loop through all grid points
284 while (true)
285 {
286 // define the parameters
287 for (size_t dimension = 0; dimension < dimensions; dimension++)
288 point(dimension) = m_nodeValues[dimension][index[dimension]];
289
290 // evaluate the model
291 if (objectiveFunction.isFeasible(point))
292 {
293 double error = objectiveFunction.eval(point);
294
295#ifdef SHARK_CV_VERBOSE_1
296 std::cout << "." << std::flush;
297#endif
298#ifdef SHARK_CV_VERBOSE
299 std::cout << point << "\t" << error << std::endl;
300#endif
301 if (error < m_best.value)
302 {
303 m_best.value = error;
304 m_best.point = point; // [TG] swap() solution is out, caused ugly memory bug, I changed this back
305 }
306 }
307
308 // next index
309 size_t dimension = 0;
310 for (; dimension < dimensions; dimension++)
311 {
312 index[dimension]++;
313 if (index[dimension] < m_nodeValues[dimension].size()) break;
314 index[dimension] = 0;
315 }
316 if (dimension == dimensions) break;
317 }
318#ifdef SHARK_CV_VERBOSE_1
319 std::cout << std::endl;
320#endif
321 }
322
323protected:
324
325 /// The array columns contain the grid values for the corresponding parameter axis.
326 std::vector<std::vector<double> > m_nodeValues;
327
329};
330
331
332///
333/// \brief Nested grid search
334///
335/// \par
336/// The NestedGridSearch class is an iterative optimizer,
337/// doing one grid search in every iteration. In every
338/// iteration, it halves the grid extent doubling the
339/// resolution in every coordinate.
340///
341/// \par
342/// Although nested grid search is much less exhaustive
343/// than standard grid search, it still suffers from
344/// exponential time and memory complexity in the number
345/// of variables optimized. Therefore, if the number of
346/// variables is larger than 2 or 3, consider using the
347/// CMA instead.
348///
349/// \par
350/// Nested grid search works as follows: The optimizer
351/// defined a 5x5x...x5 equi-distant grid (depending on
352/// the search space dimension) on an initially defined
353/// search cube. During every grid search iteration,
354/// the error is computed for all grid points.
355/// Then the grid is moved
356/// to the best grid point found so far and contracted
357/// by a factor of two in each dimension. Each call to
358/// the optimize() function performs one such step.
359///
360/// \par
361/// Let N denote the number of parameters to optimize.
362/// To compute the error landscape at the current
363/// zoom level, the algorithm has to do \f$ 5^N \f$
364/// error function evaluations in every iteration.
365///
366/// \par
367/// The grid is always centered around the best
368/// solution currently known. If this solution is
369/// located at the boundary, the landscape may exceed
370/// the parameter range defined m_minimum and m_maximum.
371/// These invalid landscape values are not used.
372///
374{
375public:
376 /// Constructor
378 {
379 m_configured=false;
380 }
381
382 /// \brief From INameable: return the class name.
383 std::string name() const
384 {
385 return "NestedGridSearch";
386 }
387
388 ///
389 /// \brief Initialization of the nested grid search.
390 ///
391 /// \par
392 /// The min and max arrays define ranges for every parameter to optimize.
393 /// These ranges are strict, that is, the algorithm will not try values
394 /// beyond the range, even if is finds a boundary minimum.
395 ///
396 /// \param min lower end of the parameter range
397 /// \param max upper end of the parameter range
398 void configure(const std::vector<double>& min, const std::vector<double>& max)
399 {
400 size_t dimensions = min.size();
401 SIZE_CHECK(max.size() == dimensions);
402
403 m_minimum = min;
404 m_maximum = max;
405
406 m_stepsize.resize(dimensions);
407 m_best.point.resize(dimensions);
408 for (size_t dimension = 0; dimension < dimensions; dimension++)
409 {
410 m_best.point(dimension)=0.5 *(min[dimension] + max[dimension]);
411 m_stepsize[dimension] = 0.25 * (max[dimension] - min[dimension]);
412 }
413 m_configured=true;
414 }
415
416 ///
417 /// \brief Initialization of the nested grid search.
418 ///
419 /// \par
420 /// The min and max values define ranges for every parameter to optimize.
421 /// These ranges are strict, that is, the algorithm will not try values
422 /// beyond the range, even if is finds a boundary minimum.
423 ///
424 /// \param parameters number of parameters to optimize
425 /// \param min lower end of the parameter range
426 /// \param max upper end of the parameter range
427 void configure(size_t parameters, double min, double max)
428 {
429 SIZE_CHECK(parameters > 0);
430
431 m_minimum=std::vector<double>(parameters,min);
432 m_maximum=std::vector<double>(parameters,max);
433 m_stepsize=std::vector<double>(parameters,0.25 * (max - min));
434
435 m_best.point.resize(parameters);
436
437 double start=0.5 *(min + max);
438 for(double& value: m_best.point)
439 value=start;
440 m_configured=true;
441 }
442
443 //from ISerializable
444 virtual void read( InArchive & archive )
445 {
446 archive>>m_minimum;
447 archive>>m_maximum;
448 archive>>m_stepsize;
449 archive>>m_configured;
450 archive>>m_best.point;
451 archive>>m_best.value;
452 }
453
454 virtual void write( OutArchive & archive ) const
455 {
456 archive<<m_minimum;
457 archive<<m_maximum;
458 archive<<m_stepsize;
459 archive<<m_configured;
460 archive<<m_best.point;
461 archive<<m_best.value;
462 }
463
464 /// if NestedGridSearch was not configured before this call, it is default initialized ti the range[-1,1] for every parameter
465 virtual void init(ObjectiveFunctionType const& objectiveFunction, SearchPointType const& startingPoint) {
466 checkFeatures(objectiveFunction);
467
468 if(!m_configured)
469 configure(startingPoint.size(),-1,1);
470 SIZE_CHECK(m_stepsize.size()==startingPoint.size());
471
472 }
474
475
476 /// Every call of the optimization member computes the
477 /// error landscape on the current grid. It picks the
478 /// best error value and zooms into the error landscape
479 /// by a factor of 2.
480 void step(ObjectiveFunctionType const& objectiveFunction) {
481 size_t dimensions = m_stepsize.size();
482 SIZE_CHECK(m_minimum.size() == dimensions);
483 SIZE_CHECK(m_maximum.size() == dimensions);
484
485 m_best.value = 1e99;
486 std::vector<size_t> index(dimensions,0);
487
488 RealVector point=m_best.point;
489
490 // loop through the grid
491 while (true)
492 {
493 // compute the grid point,
494
495 // set the parameters
496 bool compute=true;
497 for (size_t d = 0; d < dimensions; d++)
498 {
499 point(d) += (index[d] - 2.0) * m_stepsize[d];
500 if (point(d) < m_minimum[d] || point(d) > m_maximum[d])
501 {
502 compute = false;
503 break;
504 }
505 }
506
507 // evaluate the grid point
508 if (compute && objectiveFunction.isFeasible(point))
509 {
510 double error = objectiveFunction.eval(point);
511
512 // remember the best solution
513 if (error < m_best.value)
514 {
515 m_best.value = error;
516 m_best.point=point;
517 }
518 }
519 // move to the next grid point
520 size_t d = 0;
521 for (; d < dimensions; d++)
522 {
523 index[d]++;
524 if (index[d] <= 4) break;
525 index[d] = 0;
526 }
527 if (d == dimensions) break;
528 }
529 // decrease the step sizes
530 for(double& step: m_stepsize)
531 step *= 0.5;
532 }
533
534protected:
535 /// minimum parameter value to check
536 std::vector<double> m_minimum;
537
538 /// maximum parameter value to check
539 std::vector<double> m_maximum;
540
541 /// current step size for every parameter
542 std::vector<double> m_stepsize;
543
545};
546
547
548///
549/// \brief Optimize by trying out predefined configurations
550///
551/// \par
552/// The PointSearch class is similair to the GridSearch class
553/// by the property that it optimizes a model in a single pass
554/// just trying out a predefined number of parameter configurations.
555/// The main difference is that every parameter configuration has
556/// to be explicitly defined. It is not possible to define a set
557/// of values for every axis; see GridSearch for this purpose.
558/// Thus, the PointSearch class allows for more flexibility.
559///
560/// If no configure method is called, this class just samples random points.
561/// They are uniformly distributed in [-1,1].
562/// parameters^2 points but minimum 20 are sampled in this case.
563///
565{
566public:
567 /// Constructor
569 m_configured=false;
570 }
571
572 /// \brief From INameable: return the class name.
573 std::string name() const
574 {
575 return "PointSearch";
576 }
577
578 /// Initialization of the search points.
579 /// \param points array of points to evaluate
580 void configure(const std::vector<RealVector>& points) {
581 m_points=points;
582 m_configured=true;
583 }
584
585 /// samples random points in the range [min,max]^parameters
586 void configure(size_t parameters,size_t samples, double min,double max) {
587 RANGE_CHECK(min<=max);
588 m_points.resize(samples);
589 for(size_t sample=0; sample!=samples; ++sample)
590 {
591 m_points[sample].resize(parameters);
592 for(size_t param=0; param!=parameters; ++param)
593 {
594 m_points[sample](param)=random::uni(random::globalRng, min,max);
595 }
596 }
597 m_configured=true;
598 }
599
600 virtual void read( InArchive & archive )
601 {
602 archive>>m_points;
603 archive>>m_configured;
604 archive>>m_best.point;
605 archive>>m_best.value;
606 }
607
608 virtual void write( OutArchive & archive ) const
609 {
610 archive<<m_points;
611 archive<<m_configured;
612 archive<<m_best.point;
613 archive<<m_best.value;
614 }
615
616 /// If the class wasn't configured before, this method samples random uniform distributed points in [-1,1]^n.
617 void init(ObjectiveFunctionType const& objectiveFunction, SearchPointType const& startingPoint) {
618 checkFeatures(objectiveFunction);
619
620 if(!m_configured)
621 {
622 size_t parameters=startingPoint.size();
623 size_t samples=std::min(sqr(parameters),(size_t)20);
624 configure(parameters,samples,-1,1);
625 }
626 }
628 /// Please note that for the point search optimizer it does
629 /// not make sense to call step more than once, as the
630 /// solution does not improve iteratively.
631 void step(ObjectiveFunctionType const& objectiveFunction) {
632 size_t numPoints = m_points.size();
633 m_best.value = 1e100;
634 size_t bestIndex=0;
635
636 // loop through all points
637 for (size_t point = 0; point < numPoints; point++)
638 {
639 // evaluate the model
640 if (objectiveFunction.isFeasible(m_points[point]))
641 {
642 double error = objectiveFunction.eval(m_points[point]);
643 if (error < m_best.value)
644 {
645 m_best.value = error;
646 bestIndex=point;
647 }
648 }
649 }
650 m_best.point=m_points[bestIndex];
651 }
652
653protected:
654 /// The array holds one parameter configuration in every column.
655 std::vector<RealVector> m_points;
656
657 /// verbosity level
659};
660
661
662}
663#endif