GaussianLayer.h
Go to the documentation of this file.
1/*!
2 *
3 *
4 * \brief -
5 *
6 * \author -
7 * \date -
8 *
9 *
10 * \par Copyright 1995-2017 Shark Development Team
11 *
12 * <BR><HR>
13 * This file is part of Shark.
14 * <https://shark-ml.github.io/Shark/>
15 *
16 * Shark is free software: you can redistribute it and/or modify
17 * it under the terms of the GNU Lesser General Public License as published
18 * by the Free Software Foundation, either version 3 of the License, or
19 * (at your option) any later version.
20 *
21 * Shark is distributed in the hope that it will be useful,
22 * but WITHOUT ANY WARRANTY; without even the implied warranty of
23 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
24 * GNU Lesser General Public License for more details.
25 *
26 * You should have received a copy of the GNU Lesser General Public License
27 * along with Shark. If not, see <http://www.gnu.org/licenses/>.
28 *
29 */
30#ifndef SHARK_UNSUPERVISED_RBM_NEURONLAYERS_GAUSSIANLAYER_H
31#define SHARK_UNSUPERVISED_RBM_NEURONLAYERS_GAUSSIANLAYER_H
32
33#include <shark/LinAlg/Base.h>
35#include <shark/Core/Random.h>
38#include <shark/Core/Math.h>
40#include <shark/Core/OpenMP.h>
41namespace shark{
42
43///\brief A layer of Gaussian neurons.
44///
45/// For a Gaussian neuron/variable the conditional probability distribution of the
46/// state of the variable given the state of the other layer is given by a Gaussian
47/// distribution with the input of the neuron as mean and unit variance.
49private:
50 RealVector m_bias; ///the bias terms associated with the neurons
51public:
52 ///the state space of this neuron is binary
54
55 ///\brief The sufficient statistics for the Guassian Layer stores the mean of the neuron and the inverse temperature
56 typedef RealVector SufficientStatistics;
57 ///\brief Sufficient statistics of a batch of data.
59
60 /// \brief Returns the bias values of the units.
61 const RealVector& bias()const{
62 return m_bias;
63 }
64 /// \brief Returns the bias values of the units.
65 RealVector& bias(){
66 return m_bias;
67 }
68
69 ///\brief Resizes this neuron layer.
70 ///
71 ///@param newSize number of neurons in the layer
72 void resize(std::size_t newSize){
73 m_bias.resize(newSize);
74 }
75
76 ///\brief Returns the number of neurons of this layer.
77 std::size_t size()const{
78 return m_bias.size();
79 }
80
81 /// \brief Takes the input of the neuron and estimates the expectation of the response of the neuron.
82 ///
83 /// @param input the batch of inputs of the neuron
84 /// @param statistics sufficient statistics containing the mean of the resulting Gaussian distribution
85 /// @param beta the inverse Temperature of the RBM (typically 1) for the whole batch
86 template<class Input, class BetaVector>
87 void sufficientStatistics(Input const& input, StatisticsBatch& statistics,BetaVector const& beta)const{ // \todo: auch hier noch mal namen ueberdenken
88 SIZE_CHECK(input.size2() == size());
89 SIZE_CHECK(statistics.size2() == size());
90 SIZE_CHECK(input.size1() == statistics.size1());
91
92 for(std::size_t i = 0; i != input.size1(); ++i){
93 noalias(row(statistics,i)) = row(input,i)*beta(i)+m_bias;
94 }
95 }
96
97
98 /// \brief Given a the precomputed statistics (the mean of the Gaussian), the elements of the vector are sampled.
99 /// This happens either with Gibbs-Sampling or Flip-the-State sampling.
100 /// For alpha= 0 gibbs sampling is performed. That is the next state for neuron i is directly taken from the conditional distribution of the i-th neuron.
101 /// In the case of alpha=1, flip-the-state sampling is performed, which takes the last state into account and tries to do deterministically jump
102 /// into states with higher probability. THIS IS NOT IMPLEMENTED YET and alpha is ignored!
103 ///
104 ///
105 /// @param statistics sufficient statistics containing the mean of the conditional Gaussian distribution of the neurons
106 /// @param state the state matrix that will hold the sampled states
107 /// @param alpha factor changing from gibbs to flip-the state sampling. 0<=alpha<=1
108 /// @param rng the random number generator used for sampling
109 template<class Matrix, class Rng>
110 void sample(StatisticsBatch const& statistics, Matrix& state, double alpha, Rng& rng) const{
111 SIZE_CHECK(statistics.size2() == size());
112 SIZE_CHECK(statistics.size1() == state.size1());
113 SIZE_CHECK(statistics.size2() == state.size2());
114
116 for(std::size_t i = 0; i != state.size1();++i){
117 for(std::size_t j = 0; j != state.size2();++j){
118 state(i,j) = random::gauss(rng,statistics(i,j), 1.0);
119 }
120 }
121 }
122 (void) alpha;
123 }
124
125 /// \brief Computes the log of the probability of the given states in the conditional distribution
126 ///
127 /// Currently it is only possible to compute the case with alpha=0
128 ///
129 /// @param statistics the statistics of the conditional distribution
130 /// @param state the state to check
131 template<class Matrix>
132 RealVector logProbability(StatisticsBatch const& statistics, Matrix const& state) const{
133 SIZE_CHECK(statistics.size2() == size());
134 SIZE_CHECK(statistics.size1() == state.size1());
135 SIZE_CHECK(statistics.size2() == state.size2());
136
137 RealVector logProbabilities(state.size1(),1.0);
138 for(std::size_t s = 0; s != state.size1();++s){
139 for(std::size_t i = 0; i != state.size2();++i){
140 logProbabilities(s) -= 0.5*sqr(statistics(s,i)-state(s,i));
141 }
142 }
143 return logProbabilities;
144 }
145
146 /// \brief Transforms the current state of the neurons for the multiplication with the weight matrix of the RBM,
147 /// i.e. calculates the value of the phi-function used in the interaction term.
148 /// In the case of Gaussian neurons the phi-function is just the identity.
149 ///
150 /// @param state the state matrix of the neuron layer
151 /// @return the value of the phi-function
152 template<class Matrix>
153 Matrix const& phi(Matrix const& state)const{
154 SIZE_CHECK(state.size2() == size());
155 return state;
156 }
157
158
159 /// \brief Returns the expectation of the phi-function.
160 /// @param statistics the sufficient statistics (the mean of the distribution).
161 RealMatrix const& expectedPhiValue(StatisticsBatch const& statistics)const{
162 SIZE_CHECK(statistics.size2() == size());
163 return statistics;
164 }
165 /// \brief Returns the mean given the state of the connected layer, i.e. in this case the mean of the Gaussian
166 ///
167 /// @param statistics the sufficient statistics of the layer for a whole batch
168 RealMatrix const& mean(StatisticsBatch const& statistics)const{
169 SIZE_CHECK(statistics.size2() == size());
170 return statistics;
171 }
172
173 /// \brief The energy term this neuron adds to the energy function for a batch of inputs.
174 ///
175 /// @param state the state of the neuron layer
176 /// @param beta the inverse temperature of the i-th state
177 /// @return the energy term of the neuron layer
178 template<class Matrix, class BetaVector>
179 RealVector energyTerm(Matrix const& state, BetaVector const& beta)const{
180 SIZE_CHECK(state.size2() == size());
181 SIZE_CHECK(state.size1() == beta.size());
182 //the following code does for batches the equivalent thing to:
183 //return beta * inner_prod(m_bias,state) - norm_sqr(state)/2.0;
184
185 std::size_t batchSize = state.size1();
186 RealVector energies = prod(state,m_bias);
187 noalias(energies) *= beta;
188 for(std::size_t i = 0; i != batchSize; ++i){
189 energies(i) -= norm_sqr(row(state,i))/2.0;
190 }
191 return energies;
192
193 }
194
195
196 ///\brief Sums over all possible values of the terms of the energy function which depend on the this layer and returns the logarithmic result.
197 ///
198 ///This function is called by Energy when the unnormalized marginal probability of the connected layer is to be computed.
199 ///This function calculates the part which depends on the neurons which are to be marginalized out.
200 ///(In the case of the binary hidden neuron, this is the term \f$ \log \sum_h e^{\vec h^T W \vec v+ \vec h^T \vec c} \f$).
201 ///The rest is calculated by the energy function.
202 ///In the general case of a hidden layer, this function calculates \f$ \log \int_h e^(\phi_h(\vec h)^T W \phi_v(\vec v)+f_h(\vec h) ) \f$
203 ///where f_h is the energy term of this.
204 ///
205 /// @param inputs the inputs of the neurons they get from the other layer
206 /// @param beta the inverse temperature of the RBM
207 /// @return the marginal distribution of the connected layer
208 template<class Input>
209 double logMarginalize(const Input& inputs, double beta) const{
210 SIZE_CHECK(inputs.size() == size());
211 double lnResult = 0;
212 double logNormalizationTerm = std::log(SQRT_2_PI) - 0.5 * std::log(beta);
213
214 for(std::size_t i = 0; i != size(); ++i){
215 lnResult += 0.5 * sqr(inputs(i)+m_bias(i))*beta;
216 lnResult += logNormalizationTerm;
217 }
218 return lnResult;
219 }
220
221 template<class Vector, class SampleBatch, class Vector2 >
222 void expectedParameterDerivative(Vector& derivative, SampleBatch const& samples, Vector2 const& weights )const{
223 SIZE_CHECK(derivative.size() == size());
224 noalias(derivative) += prod(weights,samples.statistics);
225 }
226
227 ///\brief Calculates the derivatives of the energy term of this neuron layer with respect to it's parameters - the bias weights.
228 ///
229 ///This function takes a batch of samples and calculates a weighted derivative
230 ///@param derivative the derivative with respect to the parameters, the result is added on top of it to accumulate derivatives
231 ///@param samples the sample from which the informations can be extracted
232 ///@param weights the weights for the single sample derivatives
233 template<class Vector, class SampleBatch, class WeightVector>
234 void parameterDerivative(Vector& derivative, SampleBatch const& samples, WeightVector const& weights)const{
235 SIZE_CHECK(derivative.size() == size());
236 noalias(derivative) += prod(weights,samples.state);
237 }
238
239 ///\brief Returns the vector with the parameters associated with the neurons in the layer.
240 RealVector parameterVector()const{
241 return m_bias;
242 }
243
244 ///\brief Returns the vector with the parameters associated with the neurons in the layer.
245 void setParameterVector(RealVector const& newParameters){
246 m_bias = newParameters;
247 }
248
249 ///\brief Returns the number of the parameters associated with the neurons in the layer.
250 std::size_t numberOfParameters()const{
251 return size();
252 }
253
254 /// \brief Reads the bias parameters from an archive.
255 void read( InArchive & archive ){
256 archive >> m_bias;
257 }
258 /// \brief Writes the bias parameters to an archive.
259 void write( OutArchive & archive ) const{
260 archive << m_bias;
261 }
262};
263
264}
265#endif