MultiChainApproximator.h
Go to the documentation of this file.
1/*!
2 *
3 *
4 * \brief -
5 *
6 * \author -
7 * \date -
8 *
9 *
10 * \par Copyright 1995-2017 Shark Development Team
11 *
12 * <BR><HR>
13 * This file is part of Shark.
14 * <https://shark-ml.github.io/Shark/>
15 *
16 * Shark is free software: you can redistribute it and/or modify
17 * it under the terms of the GNU Lesser General Public License as published
18 * by the Free Software Foundation, either version 3 of the License, or
19 * (at your option) any later version.
20 *
21 * Shark is distributed in the hope that it will be useful,
22 * but WITHOUT ANY WARRANTY; without even the implied warranty of
23 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
24 * GNU Lesser General Public License for more details.
25 *
26 * You should have received a copy of the GNU Lesser General Public License
27 * along with Shark. If not, see <http://www.gnu.org/licenses/>.
28 *
29 */
30#ifndef SHARK_UNSUPERVISED_RBM_GRADIENTAPPROXIMATIONS_MULTICHAINAPPROXIMATOR_H
31#define SHARK_UNSUPERVISED_RBM_GRADIENTAPPROXIMATIONS_MULTICHAINAPPROXIMATOR_H
32
34#include "Impl/DataEvaluator.h"
35#include <vector>
36
37namespace shark{
38///\brief Approximates the gradient by taking samples from an ensemble of Markov chains running in parallel.
39///
40///The advantage is, that every chain can produce samples of a different mode of the distribution.
41///The disadvantage is however, that mixing is slower and a higher value of sampling steps between subsequent samples
42///need to be chosen.
43template<class MarkovChainType>
45public:
46 typedef typename MarkovChainType::RBM RBM;
47
49 : mpe_rbm(rbm),m_chainOperator(rbm),m_k(1),m_samples(0),m_numBatches(0),m_regularizer(0){
50 SHARK_ASSERT(rbm != NULL);
51 setBatchSize(500);
52
56 }
57
58 /// \brief From INameable: return the class name.
59 std::string name() const
60 { return "MultiChainApproximator"; }
61
62 void setK(unsigned int k){
63 m_k = k;
64 }
65 void setNumberOfSamples(std::size_t samples){
66 m_samples = samples;
67 }
68 void setBatchSize(std::size_t batchSize){
69 m_batchSize = batchSize;
70 if(!MarkovChainType::computesBatch)
71 m_batchSize=1;
72 }
73
74 MarkovChainType& chain(){
75 return m_chainOperator;
76 }
77 MarkovChainType const& chain() const{
78 return m_chainOperator;
79 }
80
81 /// \brief Returns the number of batches of the dataset that are used in every iteration.
82 ///
83 /// If it is less than all batches, the batches are chosen at random. if it is 0, all batches are used
84 std::size_t numBatches()const{
85 return m_numBatches;
86 }
87
88 /// \brief Returns a reference to the number of batches of the dataset that are used in every iteration.
89 ///
90 /// If it is less than all batches, the batches are chosen at random.if it is 0, all batches are used.
91 std::size_t& numBatches(){
92 return m_numBatches;
93 }
94
96 m_data = data;
97
98 //construct a gradient object to get the information about which values of the samples are needed
99 typename RBM::GradientType grad(mpe_rbm);
100
101 //if the number of samples is 0 = unset, set it to the number of points in the data set
102 if(!m_samples){
104 }
105
106 //calculate the number of batches
107 std::size_t batches = m_samples / m_batchSize;
108 if(m_samples - batches*m_batchSize != 0){
109 ++batches;
110 }
111 m_chains.resize(batches);
112
113 //swap every sample batch from the vector into the operator, initialize it and shift it back out.
114 for(std::size_t i = 0; i != batches;++i){
115 swap(m_chains[i],m_chainOperator.samples());
116 std::size_t currentBatchSize = std::min(m_samples-i*m_batchSize, m_batchSize);
117 m_chainOperator.setBatchSize(currentBatchSize);
118 m_chainOperator.initializeChain(m_data);
119 swap(m_chains[i],m_chainOperator.samples());
120 }
121 }
122
124 return mpe_rbm->parameterVector();
125 }
126
127 std::size_t numberOfVariables()const{
128 return mpe_rbm->numberOfParameters();
129 }
130
131 void setRegularizer(double factor, SingleObjectiveFunction* regularizer){
132 m_regularizer = regularizer;
133 m_regularizationStrength = factor;
134 }
135
136 double evalDerivative( SearchPointType const & parameter, FirstOrderDerivative & derivative ) const {
137 mpe_rbm->setParameterVector(parameter);
138
139 typename RBM::GradientType modelAverage(mpe_rbm);
140 RealVector empiricalAverage = detail::evaluateData(m_data,*mpe_rbm,m_numBatches);
141
142 //approximate the expectation of the energy gradient with respect to the model distribution
143 //using samples from the Markov chain
144 for(std::size_t i = 0; i != m_chains.size();++i){
145 swap(m_chains[i],m_chainOperator.samples());//set the current GibbsChain
146 m_chainOperator.step(m_k);//do the next step along the gibbs chain
147 modelAverage.addVH(m_chainOperator.samples().hidden, m_chainOperator.samples().visible);//update gradient
148 swap(m_chains[i],m_chainOperator.samples());//save the GibbsChain.
149 }
150
151 derivative.resize(mpe_rbm->numberOfParameters());
152 noalias(derivative) = modelAverage.result() - empiricalAverage;
153
154 if(m_regularizer){
155 FirstOrderDerivative regularizerDerivative;
156 m_regularizer->evalDerivative(parameter,regularizerDerivative);
157 noalias(derivative) += m_regularizationStrength*regularizerDerivative;
158 }
159
160 return std::numeric_limits<double>::quiet_NaN();
161 }
162private:
163 RBM* mpe_rbm;
164 mutable MarkovChainType m_chainOperator;
165 mutable std::vector<typename MarkovChainType::SampleBatch> m_chains;
167
168 unsigned int m_k;
169 std::size_t m_samples;
170 std::size_t m_batchSize;
171 std::size_t m_numBatches;
172
173 SingleObjectiveFunction* m_regularizer;
174 double m_regularizationStrength;
175};
176}
177
178#endif
179