SingleChainApproximator.h
Go to the documentation of this file.
1/*!
2 *
3 *
4 * \brief -
5 *
6 * \author -
7 * \date -
8 *
9 *
10 * \par Copyright 1995-2017 Shark Development Team
11 *
12 * <BR><HR>
13 * This file is part of Shark.
14 * <https://shark-ml.github.io/Shark/>
15 *
16 * Shark is free software: you can redistribute it and/or modify
17 * it under the terms of the GNU Lesser General Public License as published
18 * by the Free Software Foundation, either version 3 of the License, or
19 * (at your option) any later version.
20 *
21 * Shark is distributed in the hope that it will be useful,
22 * but WITHOUT ANY WARRANTY; without even the implied warranty of
23 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
24 * GNU Lesser General Public License for more details.
25 *
26 * You should have received a copy of the GNU Lesser General Public License
27 * along with Shark. If not, see <http://www.gnu.org/licenses/>.
28 *
29 */
30#ifndef SHARK_UNSUPERVISED_RBM_SINGLECHAINAPPROXIMATOR_H
31#define SHARK_UNSUPERVISED_RBM_SINGLECHAINAPPROXIMATOR_H
32
34#include "Impl/DataEvaluator.h"
35
36namespace shark{
37
38///\brief Approximates the gradient by taking samples from a single Markov chain.
39///
40///Taking samples only from a single chain leads to a high mixing rate but the correlation of the samples is higher than using
41///several chains. This approximator should be used with a sampling scheme which also achieves a faster decorrelation of samples like
42///tempering.
43template<class MarkovChainType>
45public:
46 typedef typename MarkovChainType::RBM RBM;
47
49 : mpe_rbm(rbm),m_chain(rbm),m_k(1)
50 ,m_samples(0),m_batchSize(500)
51 ,m_numBatches(0),m_regularizer(0){
52 SHARK_ASSERT(rbm != NULL);
53
57
58 m_chain.setBatchSize(1);
59 };
60
61 /// \brief From INameable: return the class name.
62 std::string name() const
63 { return "SingleChainApproximator"; }
64
65 void setK(unsigned int k){
66 m_k = k;
67 }
68 void setNumberOfSamples(std::size_t samples){
69 m_samples = samples;
70 }
71
72 /// \brief Returns the number of batches of the dataset that are used in every iteration.
73 ///
74 /// If it is less than all batches, the batches are chosen at random. if it is 0, all batches are used
75 std::size_t numBatches()const{
76 return m_numBatches;
77 }
78
79 /// \brief Returns a reference to the number of batches of the dataset that are used in every iteration.
80 ///
81 /// If it is less than all batches, the batches are chosen at random.if it is 0, all batches are used.
82 std::size_t& numBatches(){
83 return m_numBatches;
84 }
85
86 MarkovChainType& chain(){
87 return m_chain;
88 }
89 MarkovChainType const& chain() const{
90 return m_chain;
91 }
92
94 m_data = data;
95 m_chain.initializeChain(m_data);
96 }
97
99 return mpe_rbm->parameterVector();
100 }
101
102 std::size_t numberOfVariables()const{
103 return mpe_rbm->numberOfParameters();
104 }
105
106 void setRegularizer(double factor, SingleObjectiveFunction* regularizer){
107 m_regularizer = regularizer;
108 m_regularizationStrength = factor;
109 }
110
111 double evalDerivative( SearchPointType const & parameter, FirstOrderDerivative & derivative ) const {
112 mpe_rbm->setParameterVector(parameter);
113
114 typename RBM::GradientType modelAverage(mpe_rbm);
115 RealVector empiricalAverage = detail::evaluateData(m_data,*mpe_rbm,m_numBatches);
116
117 //approximate the expectation of the energy gradient with respect to the model distribution
118 //using samples from the Markov chain
119
120 //calculate number of samples to draw and size of batches used in the gradient update
121 std::size_t samplesToDraw = m_samples > 0 ? m_samples: m_data.numberOfElements();
122
123 std::size_t batches = samplesToDraw / m_batchSize;
124 if(samplesToDraw - batches*m_batchSize != 0){
125 ++batches;
126 }
127
128 //calculate the gradient. we do this by normal k-step sampling for exactly as many
129 //samples as calculated in samplesToDraw but saving the result in an intermediate
130 //batch variable gradientbatch. When this batch is full, we do an update step of the gradient.
131 //this is an a bit more efficient grouping and preserves us from using batches of size1 as the argument
132 //of addVH which might be inefficient.
133 for(std::size_t batch = 0; batch != batches; ++batch){
134 //calculate the size of the next batch which is batchSize as long as there are enough samples left to draw
135 std::size_t currentBatchSize = std::min(samplesToDraw-batch*m_batchSize, m_batchSize);
136 typename MarkovChainType::SampleBatch gradientBatch(currentBatchSize, mpe_rbm->numberOfVN(),mpe_rbm->numberOfHN());
137 //fill the batch with fresh samples
138 for(std::size_t i = 0; i != currentBatchSize; ++i){
139 m_chain.step(m_k);
140 getBatchElement(gradientBatch,i) = m_chain.sample();
141 }
142 //do the gradient update
143 modelAverage.addVH(gradientBatch.hidden, gradientBatch.visible);
144 }
145
146 derivative.resize(mpe_rbm->numberOfParameters());
147 noalias(derivative) = modelAverage.result() - empiricalAverage;
148
149 if(m_regularizer){
150 FirstOrderDerivative regularizerDerivative;
151 m_regularizer->evalDerivative(parameter,regularizerDerivative);
152 noalias(derivative) += m_regularizationStrength*regularizerDerivative;
153 }
154
155 return std::numeric_limits<double>::quiet_NaN();
156 }
157
158private:
159 RBM* mpe_rbm;
160 mutable MarkovChainType m_chain;
162
163 unsigned int m_k;
164 unsigned int m_samples;
165 std::size_t m_batchSize;
166 std::size_t m_numBatches;
167
168 SingleObjectiveFunction* m_regularizer;
169 double m_regularizationStrength;
170};
171
172}
173
174#endif