DistantModes.h
Go to the documentation of this file.
1/*!
2 * \brief Implements the DistantModes/ArtificialModes benchmark problem
3 *
4 * \author O. Krause, A.Fischer, K.Bruegge
5 * \date 2012
6 *
7 *
8 * \par Copyright 1995-2017 Shark Development Team
9 *
10 * <BR><HR>
11 * This file is part of Shark.
12 * <https://shark-ml.github.io/Shark/>
13 *
14 * Shark is free software: you can redistribute it and/or modify
15 * it under the terms of the GNU Lesser General Public License as published
16 * by the Free Software Foundation, either version 3 of the License, or
17 * (at your option) any later version.
18 *
19 * Shark is distributed in the hope that it will be useful,
20 * but WITHOUT ANY WARRANTY; without even the implied warranty of
21 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
22 * GNU Lesser General Public License for more details.
23 *
24 * You should have received a copy of the GNU Lesser General Public License
25 * along with Shark. If not, see <http://www.gnu.org/licenses/>.
26 *
27 */
28#ifndef UNSUPERVISED_RBM_PROBLEMS_DISTANTMODES_H
29#define UNSUPERVISED_RBM_PROBLEMS_DISTANTMODES_H
30
31#include <shark/Data/Dataset.h>
32#include <shark/LinAlg/Base.h>
33#include <shark/Core/Random.h>
35
36namespace shark{
37
38
39///\brief Creates a set of pattern (each later representing a mode)
40/// which than are randomly perturbed to create the data set.
41/// The dataset was introduced in Desjardins et al. (2010) (Parallel Tempering for training restricted Boltzmann machines, AISTATS 2010)
42///
43///The higher the perturbation is the harder it is to classify,
44///but the closer are the modes and thus the easier the data distribution is to learn.
46private:
48
49 double m_p;
50 unsigned m_dim;
51 unsigned m_modes;
52 unsigned m_copies;
53 std::size_t m_batchSize;
54
55 //Generates a basic pattern representing the "center" of a mode.
56 void modePrototype(RealVector& pattern, unsigned mode) const {
57 for (std::size_t i = 0; i != pattern.size(); ++i){
58 pattern(i) = (mode % 2) ^ (i * (mode / 2 + 1) / pattern.size()) % 2;
59 }
60 }
61
62
63 ///Perturbates the pattern by randomly flipping pixels
64 ///@param pattern the pattern
65 ///@param p the flipping probability
66 void perturbate(RealVector& pattern, double p)const{
67 for (std::size_t i = 0; i < pattern.size(); ++i){
68 if (random::uni(random::globalRng, 0,1) > p){
69 pattern(i) = !pattern(i);
70 }
71 }
72 }
73
74 void init() {
75 std::vector<RealVector> data(m_modes * m_copies,RealVector(m_dim));
76 for (std::size_t i = 0; i != data.size(); ++i) {
77 RealVector& element=data[i];
78 unsigned mode = i % m_modes;
79 modePrototype(element, mode);
80 perturbate(element, m_p);
81 }
82 m_data = createDataFromRange(data, m_batchSize);
83 }
84
85public:
86 ///generates the DistantModes distribution.
87 ///
88 ///\param p the probability of changing a input neuron
89 ///\param dim the dimensionality of the data.
90 ///\param modes the number of modes, should be a multiple of 2
91 ///\param copies the number of disturbed copies for each mode
92 ///\param batchSize the size of the batches in which the generated data set is organized
93 DistantModes(double p = 0, unsigned dim = 16, unsigned modes=4, unsigned copies =2500, size_t batchSize=0)
94 :m_p(p), m_dim(dim), m_modes(modes), m_copies(copies), m_batchSize(batchSize) {
95 init();
96 }
97
98 ///returns the generated dataset
100 return m_data;
101 };
102
103 ///returns the dimensionality of the data
104 std::size_t inputDimension() const {
105 return m_dim;
106 }
107};
108
109}
110#endif