BipolarLayer.h
Go to the documentation of this file.
1/*!
2 *
3 * \brief Implements the bipolar (-1,1) state neuron layer
4 *
5 * \author Asja Fischer
6 * \date 2013
7 *
8 * \par Copyright 1995-2017 Shark Development Team
9 *
10 * <BR><HR>
11 * This file is part of Shark.
12 * <https://shark-ml.github.io/Shark/>
13 *
14 * Shark is free software: you can redistribute it and/or modify
15 * it under the terms of the GNU Lesser General Public License as published
16 * by the Free Software Foundation, either version 3 of the License, or
17 * (at your option) any later version.
18 *
19 * Shark is distributed in the hope that it will be useful,
20 * but WITHOUT ANY WARRANTY; without even the implied warranty of
21 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
22 * GNU Lesser General Public License for more details.
23 *
24 * You should have received a copy of the GNU Lesser General Public License
25 * along with Shark. If not, see <http://www.gnu.org/licenses/>.
26 *
27 */
28#ifndef SHARK_UNSUPERVISED_RBM_NEURONLAYERS_BIPOLARLAYER_H
29#define SHARK_UNSUPERVISED_RBM_NEURONLAYERS_BIPOLARLAYER_H
30
33#include <shark/LinAlg/Base.h>
35#include <shark/Core/Random.h>
37#include <shark/Core/OpenMP.h>
38namespace shark{
39
40///\brief Layer of bipolar units taking values in {-1,1}.
41
42///A neuron in a BipolarLayer takes values in {-1,1} and the conditional probability to be 1
43///given the states of the neurons in the connected layer is determined by the sigmoid function
44///and the input it gets from the connected layer.
46private:
47 ///\brief The bias terms associated with the neurons.
48 RealVector m_bias;
49public:
50 ///the state space of this neuron is binary
52
53 ///\brief The sufficient statistics for the Binary Layer store the probability for a neuron to be on
54 typedef RealVector SufficientStatistics;
55 ///\brief Sufficient statistics of a batch of data.
57
58 /// \brief Returns the bias values of the units.
59 const RealVector& bias()const{
60 return m_bias;
61 }
62
63 /// \brief Returns the bias values of the units.
64 RealVector& bias(){
65 return m_bias;
66 }
67
68 ///\brief Resizes this neuron layer.
69 ///
70 ///@param newSize number of neurons in the layer
71 void resize(std::size_t newSize){
72 m_bias.resize(newSize);
73 }
74
75 ///\brief Returns the number of neurons of this layer.
76 std::size_t size()const{
77 return m_bias.size();
78 }
79
80 /// \brief Takes the input of the neuron and calculates the distribution of the neurons
81 /// For binary neuronsthis is identical with the conditional probability for the neuron to be on given the state of the connected layer.
82 ///
83 /// @param input the batch of inputs of the neuron
84 /// @param statistics sufficient statistics containing the probabilities of the neurons to be one
85 /// @param beta the inverse Temperature of the RBM (typically 1) for the whole batch
86 template<class Input, class BetaVector>
87 void sufficientStatistics(Input const& input, StatisticsBatch& statistics,BetaVector const& beta)const{ // \todo: auch hier noch mal namen ueberdenken
88 SIZE_CHECK(input.size2() == size());
89 SIZE_CHECK(statistics.size2() == size());
90 SIZE_CHECK(input.size1() == statistics.size1());
91
92 for(std::size_t i = 0; i != input.size1(); ++i){
93 noalias(row(statistics,i)) = sigmoid(2*(row(input,i)+m_bias)*beta(i));
94 }
95 }
96
97 /// \brief Samples from the distribution using either Gibbs- or flip-the-state sampling.
98 ///
99 /// For alpha= 0 gibbs sampling is performed. That is the next state for neuron i is directly taken from the conditional distribution of the i-th neuron.
100 /// In the case of alpha=1, flip-the-state sampling is performed, which takes the last state into account and tries to do deterministically jump
101 /// into states with higher probability. This is counterbalanced by a higher chance to jump back into a lower probability state in later steps.
102 /// For alpha between 0 and 1 a mixture of both is performed.
103 ///
104 /// @param statistics sufficient statistics containing the probabilities of the neurons to be one
105 /// @param state the state vector that shell hold the sampled states
106 /// @param alpha factor changing from gibbs to flip-the state sampling. 0<=alpha<=1
107 /// @param rng the random number generator used for sampling
108 template<class Matrix, class Rng>
109 void sample(StatisticsBatch const& statistics, Matrix& state, double alpha, Rng& rng) const{
110 SIZE_CHECK(statistics.size2() == size());
111 SIZE_CHECK(statistics.size1() == state.size1());
112 SIZE_CHECK(statistics.size2() == state.size2());
113
115 if(alpha == 0.0){//special case: normal gibbs sampling
116 for(std::size_t s = 0; s != state.size1();++s){
117 for(std::size_t i = 0; i != state.size2();++i){
118 state(s,i) = random::coinToss(rng,statistics(s,i));
119 if(state(s,i)==0) state(s,i)=-1.;
120 }
121 }
122 }
123 else{//flip-the state sampling
124 for(size_t s = 0; s != state.size1(); ++s){
125 for (size_t i = 0; i != state.size2(); i++) {
126 double prob = statistics(s,i);
127 if (state(s,i) == -1) {
128 if (prob <= 0.5) {
129 prob = (1. - alpha) * prob + alpha * prob / (1. - prob);
130 } else {
131 prob = (1. - alpha) * prob + alpha;
132 }
133 } else {
134 if (prob >= 0.5) {
135 prob = (1. - alpha) * prob + alpha * (1. - (1. - prob) / prob);
136 } else {
137 prob = (1. - alpha) * prob;
138 }
139 }
140 state(s,i) = random::coinToss(rng, prob);
141 if(state(s,i)==0) state(s,i)=-1.;
142 }
143 }
144 }
145 }
146 }
147
148 /// \brief Computes the log of the probability of the given states in the conditional distribution
149 ///
150 /// Currently it is only possible to compute the case with alpha=0
151 ///
152 /// @param statistics the statistics of the conditional distribution
153 /// @param state the state to check
154 template<class Matrix>
155 RealVector logProbability(StatisticsBatch const& statistics, Matrix const& state) const{
156 SIZE_CHECK(statistics.size2() == size());
157 SIZE_CHECK(statistics.size1() == state.size1());
158 SIZE_CHECK(statistics.size2() == state.size2());
159
160 RealVector logProbabilities(state.size1(),1.0);
161 for(std::size_t s = 0; s != state.size1();++s){
162 for(std::size_t i = 0; i != state.size2();++i){
163 logProbabilities(s) += (state(s,i) > 0.0)? std::log(statistics(s,i)) : std::log(1-statistics(s,i));
164 }
165 }
166 return logProbabilities;
167 }
168
169 /// \brief Transforms the current state of the neurons for the multiplication with the weight matrix of the RBM,
170 /// i.e. calculates the value of the phi-function used in the interaction term.
171 /// In the case of bipolar neurons the phi-function is just the identity.
172 ///
173 /// @param state the state matrix of the neuron layer
174 /// @return the value of the phi-function
175 template<class Matrix>
176 Matrix const& phi(Matrix const& state)const{
177 SIZE_CHECK(state.size2() == size());
178 return state;
179 }
180
181
182 /// \brief Returns the conditional expectation of the phi-function given the state of the connected layer,
183 ///
184 /// @param statistics the sufficient statistics of the layer
185 RealMatrix expectedPhiValue(StatisticsBatch const& statistics)const{
186 //calculation of the expectation: 1*P(h_i=1|v)- 1*(1-P(h_i=1|v))= 2*P(h_i=1|v)-1
187 return 2*statistics - 1;
188 }
189
190 /// \brief Returns the mean of the distribution
191 ///
192 /// @param statistics the sufficient statistics of the layer for a whole batch
193 RealMatrix mean(StatisticsBatch const& statistics)const{
194 SIZE_CHECK(statistics.size2() == size());
195 //calculation of the expectation: 1*P(h_i=1|v)- 1*(1-P(h_i=1|v))= 2*P(h_i=1|v)-1
196 return 2*statistics - 1;
197 }
198
199
200 /// \brief Returns the energy term this neuron adds to the energy function.
201 ///
202 /// @param state the state of the neuron layer
203 /// @param beta the inverse temperature of the i-th state
204 /// @return the energy term of the neuron layer
205 template<class Matrix, class BetaVector>
206 RealVector energyTerm(Matrix const& state, BetaVector const& beta)const{
207 SIZE_CHECK(state.size2() == size());
208
209 RealVector energies = beta*prod(state,m_bias);
210 return energies;
211 }
212
213
214 ///\brief Sums over all possible values of the terms of the energy function which depend on the this layer and returns the logarithmic result.
215 ///
216 ///This function is called by Energy when the unnormalized marginal probability of the connected layer is to be computed.
217 ///This function calculates the part which depends on the neurons which are to be marginalized out.
218 ///(In the case of the bipolar hidden neuron, this is the term \f$ \sum_h e^{\vec h^T W \vec v+ \vec h^T \vec c} \f$).
219 ///The rest is calculated by the energy function.
220 ///
221 /// @param inputs the inputs of the neurons they get from the other layer
222 /// @param beta the inverse temperature of the RBM
223 /// @return the marginal distribution of the connected layer
224 template<class Input>
225 double logMarginalize(Input const& inputs, double beta) const{
226 SIZE_CHECK(inputs.size() == size());
227 long double logFactorization = 0;
228 for(std::size_t i = 0; i != inputs.size(); ++i){
229 long double arg = std::abs((inputs(i)+m_bias(i))*beta);
230 logFactorization += softPlus(-2*arg)+arg;
231 }
232 return logFactorization;
233 }
234
235 ///\brief Calculates the expectation of the derivatives of the energy term of this neuron layer with respect to it's parameters - the bias weights.
236 /// The expectation is taken with respect to the conditional probability distribution of the layer given the state of the connected layer.
237 ///
238 ///This function takes a batch of samples and weights the results
239 ///@param derivative the derivative with respect to the parameters, the result is added on top of it to accumulate derivatives
240 ///@param samples the samples from which the informations can be extracted
241 ///@param weights The weights for alle samples
242 template<class Vector, class SampleBatch, class WeightVector>
243 void expectedParameterDerivative(Vector& derivative, SampleBatch const& samples, WeightVector const& weights )const{
244 SIZE_CHECK(derivative.size() == size());
245 noalias(derivative) += 2*prod(weights,samples.statistics) - sum(weights);
246 }
247
248 ///\brief Calculates the derivatives of the energy term of this neuron layer with respect to it's parameters - the bias weights.
249 ///
250 ///This function takes a batch of samples and calculates a weighted derivative
251 ///@param derivative the derivative with respect to the parameters, the result is added on top of it to accumulate derivatives
252 ///@param samples the sample from which the informations can be extracted
253 ///@param weights the weights for the single sample derivatives
254 template<class Vector, class SampleBatch, class WeightVector>
255 void parameterDerivative(Vector& derivative, SampleBatch const& samples, WeightVector const& weights)const{
256 SIZE_CHECK(derivative.size() == size());
257 noalias(derivative) += prod(weights,samples.state);
258 }
259
260 /// \brief Returns the vector with the parameters associated with the neurons in the layer, i.e. the bias vector.
261 RealVector parameterVector()const{
262 return m_bias;
263 }
264
265 /// \brief Sets the parameters associated with the neurons in the layer, i.e. the bias vector.
266 void setParameterVector(RealVector const& newParameters){
267 m_bias = newParameters;
268 }
269
270 /// \brief Returns the number of the parameters associated with the neurons in the layer.
271 std::size_t numberOfParameters()const{
272 return size();
273 }
274
275 /// \brief Reads the bias vector from an archive.
276 ///
277 /// @param archive the archive
278 void read( InArchive & archive ){
279 archive >> m_bias;
280 }
281
282 /// \brief Writes the bias vector to an archive.
283 ///
284 /// @param archive the archive
285 void write( OutArchive & archive ) const{
286 archive << m_bias;
287 }
288};
289
290}
291#endif