GibbsOperator.h
Go to the documentation of this file.
1/*!
2 *
3 *
4 * \brief Implements Block Gibbs Sampling
5 *
6 * \author O.Krause
7 * \date 2014
8 *
9 *
10 * \par Copyright 1995-2017 Shark Development Team
11 *
12 * <BR><HR>
13 * This file is part of Shark.
14 * <https://shark-ml.github.io/Shark/>
15 *
16 * Shark is free software: you can redistribute it and/or modify
17 * it under the terms of the GNU Lesser General Public License as published
18 * by the Free Software Foundation, either version 3 of the License, or
19 * (at your option) any later version.
20 *
21 * Shark is distributed in the hope that it will be useful,
22 * but WITHOUT ANY WARRANTY; without even the implied warranty of
23 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
24 * GNU Lesser General Public License for more details.
25 *
26 * You should have received a copy of the GNU Lesser General Public License
27 * along with Shark. If not, see <http://www.gnu.org/licenses/>.
28 *
29 */
30#ifndef SHARK_UNSUPERVISED_RBM_SAMPLING_GIBBSOPERATOR_H
31#define SHARK_UNSUPERVISED_RBM_SAMPLING_GIBBSOPERATOR_H
32
33#include <shark/LinAlg/Base.h>
34#include "Impl/SampleTypes.h"
35namespace shark{
36
37///\brief Implements Block Gibbs Sampling related transition operators for various temperatures.
38///
39/// The operator generates transitions from the current state of the neurons of an RBM
40/// to a new one and thus can be used to produce a Markov chain.
41/// The Gibbs Operator works by computing the conditional distribution of the hidden given the visible p(h|v) (or
42/// vice versa) and than samples the new hidden (or visible) state from it.
43///
44/// As an interesting twist, this operator can also be used to implement Flip-The-State sampling using two values alpha_visible
45/// and alpha_hidden both being between 0 and 1 (inclusively). for alpha_visible=alpha_hidden=0, pure gibbs sampling is performed.
46/// if for one of the layers, the value is not 0 a mixture of gibbs and flip-the-state sampling is performed. 1 equals to pure flip-the state
47/// sampling.
48/// The trick of this sampler is that it takes the previous state into account while sampling. If the current state has a low probability,
49/// the sampler jumps deterministically in another state with higher probability. This is counterbalanced by having a higher chance to jump away from
50/// this state.
51template< class RBMType >
53public:
54 typedef RBMType RBM;
55
56 ///The operator holds a 'sample' of the visible and hidden neurons.
57 ///Such a sample does not only contain the states of the neurons but all other information
58 ///needed to approximate the gradient
59
60 ///\brief the type of a concrete sample.
61 typedef detail::GibbsSample<
62 typename RBMType::HiddenType::SufficientStatistics
64 ///\brief the type of a concrete sample.
65 typedef detail::GibbsSample<
66 typename RBMType::VisibleType::SufficientStatistics
68
69 ///\brief Represents the state of a batch of hidden samples and additional information required by the gradient.
70 ///
71 ///Aside from the hidden state, this structure can also hold the actual values
72 ///of the input, the phi-function and the sufficient statistics
74
75 ///\brief Represents the state of the visible units and additional information required by the gradient.
76 ///
77 ///Aside from the visible state, this structure can also hold the actual values
78 ///of the hidden input, the phi-function and the sufficient statistics
80
81 ///\brief Constructs the Operator using an allready defined Distribution to sample from.
82 GibbsOperator(RBM* rbm, double alphaVisible = 0,double alphaHidden = 0)
83 :mpe_rbm(rbm){
84 setAlpha(alphaVisible,alphaHidden);
85 }
86
87 ///\brief Returns the internal RBM.
88 RBM* rbm()const{
89 return mpe_rbm;
90 }
91
92
93 ///\brief Calculates internal data needed for sampling the hidden units as well as requested information for the gradient.
94 ///
95 ///This function calculates the conditional probability distribution p(h|v) with inverse temperature beta for the whole batch of samples
96 ///Be aware that a change of temperature may occur between sampleVisible and precomputeHidden.
97 /// @param hiddenBatch the batch of hidden samples to be created
98 /// @param visibleBatch the batch of visible samples to be created
99 /// @param beta the vector of inverse temperatures
100 template<class BetaVector>
101 void precomputeHidden(HiddenSampleBatch& hiddenBatch, VisibleSampleBatch& visibleBatch, BetaVector const& beta)const{
102 SIZE_CHECK(visibleBatch.size()==hiddenBatch.size());
103 mpe_rbm->energy().inputHidden(hiddenBatch.input, visibleBatch.state);
104 //calculate the sufficient statistics of the hidden units
105 mpe_rbm->hiddenNeurons().sufficientStatistics(hiddenBatch.input,hiddenBatch.statistics, beta);
106 }
107
108
109 ///\brief calculates internal data needed for sampling the visible units as well as requested information for the gradient
110 ///
111 ///This function calculates the conditional probability distribution p(v|h) with inverse temperature beta for a whole batch of inputs.
112 ///Be aware that a change of temperature may occur between sampleHidden and precomputeVisible.
113 template<class BetaVector>
114 void precomputeVisible(HiddenSampleBatch& hiddenBatch, VisibleSampleBatch& visibleBatch, BetaVector const& beta)const{
115 SIZE_CHECK(visibleBatch.size()==hiddenBatch.size());
116 mpe_rbm->energy().inputVisible(visibleBatch.input, hiddenBatch.state);
117 //calculate the sufficient statistics of the visible units for every element of the batch
118 mpe_rbm->visibleNeurons().sufficientStatistics(visibleBatch.input,visibleBatch.statistics, beta);
119
120 }
121
122 ///\brief Samples a new batch of states of the hidden units using their precomputed statistics.
123 void sampleHidden(HiddenSampleBatch& sampleBatch)const{
124 //sample state of the hidden neurons, input and statistics was allready computed by precompute
125 mpe_rbm->hiddenNeurons().sample(sampleBatch.statistics, sampleBatch.state, m_alphaHidden, mpe_rbm->rng());
126 }
127
128
129 ///\brief Samples a new batch of states of the visible units using their precomputed statistics.
130 void sampleVisible(VisibleSampleBatch& sampleBatch)const{
131 //sample state of the visible neurons, input and statistics was allready computed by precompute
132 mpe_rbm->visibleNeurons().sample(sampleBatch.statistics, sampleBatch.state, m_alphaVisible, mpe_rbm->rng());
133 }
134
135 /// \brief Applies the Gibbs operator a number of times to a given sample.
136 ///
137 /// Performs one complete step for a sample by sampling first the hidden, than the visible and computing the probability of a hidden given the visible unit
138 /// That is, Given a State (v,h), computes p(v|h),draws v and then computes p(h|v) and draws h . this is repeated several times
139 template<class BetaVector>
140 void stepVH(HiddenSampleBatch& hiddenBatch, VisibleSampleBatch& visibleBatch, std::size_t numberOfSteps, BetaVector const& beta){
141 for(unsigned int i=0; i != numberOfSteps; i++){
142 precomputeVisible(hiddenBatch,visibleBatch,beta);
143 sampleVisible(visibleBatch);
144 precomputeHidden(hiddenBatch, visibleBatch,beta);
145 sampleHidden(hiddenBatch);
146 }
147 }
148
149 ///\brief Creates hidden/visible sample pairs from the states of the visible neurons, i.e. sets the visible units to the given states and samples hidden states based on the states of the visible units.
150 /// This can directly be used to calculate the gradient.
151 ///
152 /// @param hiddenBatch the batch of hidden samples to be created
153 /// @param visibleBatch the batch of visible samples to be created
154 /// @param states the states of the visible neurons in the sample
155 /// @param beta the vector of inverse temperatures
156 template<class States, class BetaVector>
157 void createSample(HiddenSampleBatch& hiddenBatch,VisibleSampleBatch& visibleBatch, States const& states, BetaVector const& beta)const{
158 SIZE_CHECK(batchSize(states)==visibleBatch.size());
159 SIZE_CHECK(hiddenBatch.size()==visibleBatch.size());
160 visibleBatch.state = states;
161
162 precomputeHidden(hiddenBatch,visibleBatch, beta);
163 sampleHidden(hiddenBatch);
164 }
165
166 ///\brief Creates hidden/visible sample pairs from the states of the visible neurons, i.e. sets the visible units to the given states and samples hidden states based on the states of the visible units.
167 /// This can directly be used to calculate the gradient.
168 ///
169 /// @param hiddenBatch the batch of hidden samples to be created
170 /// @param visibleBatch the batch of visible samples to be created
171 /// @param states the states of the visible neurons in the sample
172 template<class States>
173 void createSample(HiddenSampleBatch& hiddenBatch,VisibleSampleBatch& visibleBatch, States const& states)const{
174 createSample(hiddenBatch,visibleBatch,states, blas::repeat(1.0,states.size1()));
175 }
176
177 ///\brief Calculates the Energy of a sample of the visible and hidden neurons created by this chain.
178 ///
179 ///@param hiddenBatch the batch of samples of the hidden neurons
180 ///@param visibleBatch the batch of samples of the visible neurons (holding also the precomputed input of the visibles)
181 ///@return the value of the energy function
182 RealVector calculateEnergy(HiddenSampleBatch const& hiddenBatch, VisibleSampleBatch const& visibleBatch)const{
183 return mpe_rbm->energy().energyFromHiddenInput(
184 hiddenBatch.input,
185 hiddenBatch.state,
186 visibleBatch.state
187 );
188 }
189
190 void setAlpha(double newAlphaVisible, double newAlphaHidden){
191 SHARK_RUNTIME_CHECK(newAlphaVisible >= 0.0, "alpha >= 0 not fulfilled for the visible layer");
192 SHARK_RUNTIME_CHECK(newAlphaVisible <= 1., "alpha <=1 not fulfilled for the visible layer");
193 SHARK_RUNTIME_CHECK(newAlphaHidden >= 0.0, "alpha >= 0 not fulfilled for the hidden layer");
194 SHARK_RUNTIME_CHECK(newAlphaHidden <= 1., "alpha <=1 not fulfilled for the hidden layer");
195 m_alphaVisible = newAlphaVisible;
196 m_alphaHidden = newAlphaHidden;
197 }
198private:
199 RBM* mpe_rbm;
200 double m_alphaVisible;
201 double m_alphaHidden;
202};
203
204
205}
206#endif