BinaryLayer.h
Go to the documentation of this file.
1/*!
2 *
3 *
4 * \brief -
5 *
6 * \author -
7 * \date -
8 *
9 *
10 * \par Copyright 1995-2017 Shark Development Team
11 *
12 * <BR><HR>
13 * This file is part of Shark.
14 * <https://shark-ml.github.io/Shark/>
15 *
16 * Shark is free software: you can redistribute it and/or modify
17 * it under the terms of the GNU Lesser General Public License as published
18 * by the Free Software Foundation, either version 3 of the License, or
19 * (at your option) any later version.
20 *
21 * Shark is distributed in the hope that it will be useful,
22 * but WITHOUT ANY WARRANTY; without even the implied warranty of
23 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
24 * GNU Lesser General Public License for more details.
25 *
26 * You should have received a copy of the GNU Lesser General Public License
27 * along with Shark. If not, see <http://www.gnu.org/licenses/>.
28 *
29 */
30#ifndef SHARK_UNSUPERVISED_RBM_NEURONLAYERS_BINARYLAYER_H
31#define SHARK_UNSUPERVISED_RBM_NEURONLAYERS_BINARYLAYER_H
32
35#include <shark/LinAlg/Base.h>
37#include <shark/Core/Random.h>
39#include <shark/Core/OpenMP.h>
40namespace shark{
41
42///\brief Layer of binary units taking values in {0,1}.
43
44///A neuron in a Binary Layer takes values in {0,1} and the conditional probability to be 1
45///given the states of the neurons in the connected layer is determined by the sigmoid function
46///and the input it gets from the connected layer.
48private:
49 ///\brief The bias terms associated with the neurons.
50 RealVector m_bias;
51 RealVector m_baseRate;
52public:
53 ///\brief The state space of this neuron is binary.
55
56 ///\brief The sufficient statistics for the Binary Layer store the probability for a neuron to be on
57 typedef RealVector SufficientStatistics;
58 ///\brief Sufficient statistics of a batch of data.
60
61 /// \brief Returns the bias values of the units.
62 const RealVector& bias()const{
63 return m_bias;
64 }
65
66 /// \brief Returns the bias values of the units.
67 RealVector& bias(){
68 return m_bias;
69 }
70
71
72 /// \brief Returns the base rate of the units
73 ///
74 ///The base-rate is the tempered disttribution for beta=0
75 ///beta then does a fading between the RBM and the base-rate
76 RealVector const& baseRate()const{
77 return m_baseRate;
78 }
79
80 /// \brief Returns the base rate of the units
81 ///
82 ///The base-rate is the tempered disttribution for beta=0
83 ///beta then does a fading between the RBM and the base-rate
84 RealVector& baseRate(){
85 return m_baseRate;
86 }
87
88 ///\brief Resizes this neuron layer.
89 ///
90 ///@param newSize number of neurons in the layer
91 void resize(std::size_t newSize){
92 m_bias.resize(newSize);
93 m_baseRate.resize(newSize);
94 m_baseRate.clear();
95 }
96
97 ///\brief Returns the number of neurons of this layer.
98 std::size_t size()const{
99 return m_bias.size();
100 }
101
102 /// \brief Takes the input of the neuron and estimates the expectation of the response of the neuron.
103 /// For binary neurons the expectation is identical with the conditional probability for the neuron to be on given the state of the connected layer.
104 ///
105 /// @param input the batch of inputs of the neuron
106 /// @param statistics sufficient statistics containing the probabilities of the neurons to be one
107 /// @param beta the inverse Temperature of the RBM (typically 1) for the whole batch
108 template<class Input, class BetaVector>
109 void sufficientStatistics(Input const& input, StatisticsBatch& statistics,BetaVector const& beta)const{ // \todo: auch hier noch mal namen ueberdenken
110 SIZE_CHECK(input.size2() == size());
111 SIZE_CHECK(statistics.size2() == size());
112 SIZE_CHECK(input.size1() == statistics.size1());
113
114 for(std::size_t i = 0; i != input.size1(); ++i){
115 noalias(row(statistics,i)) = sigmoid((row(input,i)+m_bias)*beta(i)+(1.0-beta(i))*m_baseRate);
116 }
117 }
118
119 /// \brief Samples from the distribution using either Gibbs- or flip-the-state sampling.
120 ///
121 /// For alpha= 0 gibbs sampling is performed. That is the next state for neuron i is directly taken from the conditional distribution of the i-th neuron.
122 /// In the case of alpha=1, flip-the-state sampling is performed, which takes the last state into account and tries to do deterministically jump
123 /// into states with higher probability. This is counterbalanced by a higher chance to jump back into a lower probability state in later steps.
124 /// For alpha between 0 and 1 a mixture of both is performed.
125 ///
126 /// @param statistics sufficient statistics containing the probabilities of the neurons to be one
127 /// @param state the state vector that shell hold the sampled states
128 /// @param alpha factor changing from gibbs to flip-the state sampling. 0<=alpha<=1
129 /// @param rng the random number generator used for sampling
130 template<class Matrix, class Rng>
131 void sample(StatisticsBatch const& statistics, Matrix& state, double alpha, Rng& rng) const{
132 SIZE_CHECK(statistics.size2() == size());
133 SIZE_CHECK(statistics.size1() == state.size1());
134 SIZE_CHECK(statistics.size2() == state.size2());
135
137 if(alpha == 0.0){//special case: normal gibbs sampling
138 for(std::size_t s = 0; s != state.size1();++s){
139 for(std::size_t i = 0; i != state.size2();++i){
140 state(s,i) = random::coinToss(rng, statistics(s,i));
141 }
142 }
143 }
144 else{//flip-the state sampling
145 for(size_t s = 0; s != state.size1(); ++s){
146 for (size_t i = 0; i != state.size2(); i++) {
147 double prob = statistics(s,i);
148 if (state(s,i) == 0) {
149 if (prob <= 0.5) {
150 prob = (1. - alpha) * prob + alpha * prob / (1. - prob);
151 } else {
152 prob = (1. - alpha) * prob + alpha;
153 }
154 } else {
155 if (prob >= 0.5) {
156 prob = (1. - alpha) * prob + alpha * (1. - (1. - prob) / prob);
157 } else {
158 prob = (1. - alpha) * prob;
159 }
160 }
161 state(s,i) = random::coinToss(rng, prob);
162 }
163 }
164 }
165 }
166 }
167
168 /// \brief Computes the log of the probability of the given states in the conditional distribution
169 ///
170 /// Currently it is only possible to compute the case with alpha=0
171 ///
172 /// @param statistics the statistics of the conditional distribution
173 /// @param state the state to check
174 template<class Matrix>
175 RealVector logProbability(StatisticsBatch const& statistics, Matrix const& state) const{
176 SIZE_CHECK(statistics.size2() == size());
177 SIZE_CHECK(statistics.size1() == state.size1());
178 SIZE_CHECK(statistics.size2() == state.size2());
179
180 RealVector logProbabilities(state.size1(),1.0);
181 for(std::size_t s = 0; s != state.size1();++s){
182 for(std::size_t i = 0; i != state.size2();++i){
183 logProbabilities(s) += (state(s,i) > 0.0)? std::log(statistics(s,i)) : std::log(1-statistics(s,i));
184 }
185 }
186 return logProbabilities;
187 }
188
189 /// \brief Transforms the current state of the neurons for the multiplication with the weight matrix of the RBM,
190 /// i.e. calculates the value of the phi-function used in the interaction term.
191 /// In the case of binary neurons the phi-function is just the identity.
192 ///
193 /// @param state the state matrix of the neuron layer
194 /// @return the value of the phi-function
195 template<class Matrix>
196 Matrix const& phi(Matrix const& state)const{
197 SIZE_CHECK(state.size2() == size());
198 return state;
199 }
200
201
202 /// \brief Returns the conditional expectation of the phi-function given the state of the connected layer,
203 /// i.e. in this case the probabilities of the neurons having state one.
204 ///
205 /// @param statistics the sufficient statistics of the layer
206 RealMatrix const& expectedPhiValue(StatisticsBatch const& statistics)const{
207 return statistics;
208 }
209
210 /// \brief Returns the mean given the state of the connected layer, i.e. in this case the probabilities of the neurons having state one.
211 ///
212 /// @param statistics the sufficient statistics of the layer for a whole batch
213 RealMatrix const& mean(StatisticsBatch const& statistics)const{
214 SIZE_CHECK(statistics.size2() == size());
215 return statistics;
216 }
217
218 /// \brief Returns the energy term this neuron adds to the energy function.
219 ///
220 /// @param state the state of the neuron layer
221 /// @param beta the inverse temperature of the i-th state
222 /// @return the energy term of the neuron layer
223 template<class Matrix, class BetaVector>
224 RealVector energyTerm(Matrix const& state, BetaVector const& beta)const{
225 SIZE_CHECK(state.size2() == size());
226 SIZE_CHECK(state.size1() == beta.size());
227 //the following code does for batches the equivalent thing to:
228 //return inner_prod(m_bias,state)
229 RealVector energies = prod(state,m_bias);
230 RealVector baseRateEnergies = prod(state,m_baseRate);
231 noalias(energies) = beta*energies +(1-beta)*baseRateEnergies;
232
233 return energies;
234 }
235
236
237 ///\brief Sums over all possible values of the terms of the energy function which depend on the this layer and returns the logarithmic result.
238 ///
239 ///This function is called by Energy when the unnormalized marginal probability of the connected layer is to be computed.
240 ///This function calculates the part which depends on the neurons which are to be marginalized out.
241 ///(In the case of the binary hidden neuron, this is the term \f$ \sum_h e^{\vec h^T W \vec v+ \vec h^T \vec c} \f$).
242 ///The rest is calculated by the energy function.
243 ///In the general case of a hidden layer, this function calculates \f$ \int_h e^(\phi_h(\vec h)^T W \phi_v(\vec v)+f_h(\vec h) ) \f$
244 ///where f_h is the energy term of this layer.
245 ///
246 /// @param inputs the inputs of the neurons they get from the other layer
247 /// @param beta the inverse temperature of the RBM
248 /// @return the marginal distribution of the connected layer
249 template<class Input>
250 double logMarginalize(Input const& inputs, double beta) const{
251 SIZE_CHECK(inputs.size() == size());
252 long double logFactorization = 0;
253 for(std::size_t i = 0; i != inputs.size(); ++i){
254 double arg = (inputs(i)+m_bias(i))*beta+(1-beta)*m_baseRate(i);
255 //~ double arg = (inputs(i)+m_bias(i))*beta;
256 logFactorization += softPlus(arg);
257 }
258 return logFactorization;
259 }
260
261 ///\brief Calculates the expectation of the derivatives of the energy term of this neuron layer with respect to it's parameters - the bias weights.
262 /// The expectation is taken with respect to the conditional probability distribution of the layer given the state of the connected layer.
263 ///
264 ///This function takes a batch of samples and weights the results
265 ///@param derivative the derivative with respect to the parameters, the result is added on top of it to accumulate derivatives
266 ///@param samples the samples from which the informations can be extracted
267 ///@param weights The weights for alle samples
268 template<class Vector, class SampleBatch, class WeightVector>
269 void expectedParameterDerivative(Vector& derivative, SampleBatch const& samples, WeightVector const& weights )const{
270 SIZE_CHECK(derivative.size() == size());
271 noalias(derivative) += prod(weights,samples.statistics);
272 }
273
274 ///\brief Calculates the derivatives of the energy term of this neuron layer with respect to it's parameters - the bias weights.
275 ///
276 ///This function takes a batch of samples and calculates a weighted derivative
277 ///@param derivative the derivative with respect to the parameters, the result is added on top of it to accumulate derivatives
278 ///@param samples the sample from which the informations can be extracted
279 ///@param weights the weights for the single sample derivatives
280 template<class Vector, class SampleBatch, class WeightVector>
281 void parameterDerivative(Vector& derivative, SampleBatch const& samples, WeightVector const& weights)const{
282 SIZE_CHECK(derivative.size() == size());
283 noalias(derivative) += prod(weights,samples.state);
284 }
285
286 /// \brief Returns the vector with the parameters associated with the neurons in the layer, i.e. the bias vector.
287 RealVector parameterVector()const{
288 return m_bias;
289 }
290
291 /// \brief Sets the parameters associated with the neurons in the layer, i.e. the bias vector.
292 void setParameterVector(RealVector const& newParameters){
293 m_bias = newParameters;
294 }
295
296 /// \brief Returns the number of the parameters associated with the neurons in the layer.
297 std::size_t numberOfParameters()const{
298 return size();
299 }
300
301 /// \brief Reads the bias vector from an archive.
302 ///
303 /// @param archive the archive
304 void read( InArchive & archive ){
305 archive >> m_bias;
306 m_baseRate = RealVector(m_bias.size(),0);
307 }
308
309 /// \brief Writes the bias vector to an archive.
310 ///
311 /// @param archive the archive
312 void write( OutArchive & archive ) const{
313 archive << m_bias;
314 }
315};
316}
317#endif