Energy.h
Go to the documentation of this file.
1/*!
2 *
3 *
4 * \brief -
5 *
6 * \author -
7 * \date -
8 *
9 *
10 * \par Copyright 1995-2017 Shark Development Team
11 *
12 * <BR><HR>
13 * This file is part of Shark.
14 * <https://shark-ml.github.io/Shark/>
15 *
16 * Shark is free software: you can redistribute it and/or modify
17 * it under the terms of the GNU Lesser General Public License as published
18 * by the Free Software Foundation, either version 3 of the License, or
19 * (at your option) any later version.
20 *
21 * Shark is distributed in the hope that it will be useful,
22 * but WITHOUT ANY WARRANTY; without even the implied warranty of
23 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
24 * GNU Lesser General Public License for more details.
25 *
26 * You should have received a copy of the GNU Lesser General Public License
27 * along with Shark. If not, see <http://www.gnu.org/licenses/>.
28 *
29 */
30#ifndef SHARK_UNSUPERVISED_RBm_ENERGY_H
31#define SHARK_UNSUPERVISED_RBm_ENERGY_H
32
33#include <shark/LinAlg/Base.h>
35
36namespace shark{
37
38/// \brief The Energy function determining the Gibbs distribution of an RBM.
39///
40///General Energy function which uses the information given by the neurons to automatize
41///the calculation of the value of the energy for certain states, the derivative of the energy
42///and the factorization of the probability.
43///
44/// Following (but slightly simplifying from the formulas given by)
45/// Welling at al. a general form of an RBM's Energy function is given by
46/// \f$ E(\vec v,\vec h)= f_h(\vec h) + f_v(\vec v) + \sum_{k,l} \phi_{hk}(\vec h) W_{k,l} \phi_{vl}(\vec v) \f$
47/// We call \f$ f_h(\vec h) \f$ and \f$ f_v(\vec v) \f$ the term of the Energy (energy term)
48/// associated to the hidden or the visible neurons respectively.
49/// \f$ \sum_{k,l} \phi_{hk}(\vec h) W_{k,l} \phi_{vl}(\vec v) \f$ is called the interaction term.
50/// In the standard case of an binary RBM we have \f$ f_h(\vec h) = \vec h \vec c \f$
51/// and \f$ f_v(\vec v) = \vec v \vec b \f$, where \f$ \vec c \f$ and \f$ \vec b \f$
52/// are the vectors of the bias parameters for the hidden and the visible neurons respectively.
53/// Furthermore, the interaction term simplifies to \f$ \vec h W \vec v \f$, so we have just
54/// one singe 'phi-function' for each layer that is the identity function.
55
56template<class RBM>
57struct Energy{
58 typedef typename RBM::HiddenType HiddenType; //< type of the hidden layer
59 typedef typename RBM::VisibleType VisibleType; //< type of the visible layer
60
61 //typedefs for single element
62 typedef typename HiddenType::SufficientStatistics HiddenStatistics;
63 typedef typename VisibleType::SufficientStatistics VisibleStatistics;
64
65 //batch typedefs
66 typedef typename HiddenType::StatisticsBatch HiddenStatisticsBatch;
67 typedef typename VisibleType::StatisticsBatch VisibleStatisticsBatch;
68
69 Energy(RBM const& rbm)
70 : m_rbm(rbm)
71 , m_hiddenNeurons(rbm.hiddenNeurons())
72 , m_visibleNeurons(rbm.visibleNeurons()){}
73
74 ///\brief Calculates the Energy given the states of batches of hidden and visible variables .
75 RealVector energy(RealMatrix const& hidden, RealMatrix const& visible)const{
76 SIZE_CHECK(visible.size1() == hidden.size1());
77
78 std::size_t batchSize = visible.size1();
79 RealMatrix input(batchSize,m_hiddenNeurons.size());
80 inputHidden( input, visible);
81
82 return energyFromHiddenInput( input, hidden, visible);
83 }
84
85 ///\brief Calculates the input of the hidden neurons given the state of the visible in a batch-vise fassion.
86 ///
87 ///@param inputs the batch of vectors the input of the hidden neurons is stored in
88 ///@param visibleStates the batch of states of the visible neurons@
89 ///@todo Remove this and replace fully by the rbm method if possible
90 void inputHidden(RealMatrix& inputs, RealMatrix const& visibleStates)const{
91 m_rbm.inputHidden(inputs,visibleStates);
92 }
93
94
95 ///\brief Calculates the input of the visible neurons given the state of the hidden.
96 ///
97 ///@param inputs the vector the input of the visible neurons is stored in
98 ///@param hiddenStates the state of the hidden neurons
99 ///@todo Remove this and replace fully by the rbm method if possible
100 void inputVisible(RealMatrix& inputs, RealMatrix const& hiddenStates)const{
101 m_rbm.inputVisible(inputs,hiddenStates);
102 }
103
104 ///\brief Computes the logarithm of the unnormalized probability of each state of the
105 /// hidden neurons in a batch by using the precomputed input/activation of the visible neurons.
106 ///
107 ///@param hiddenState the batch of states of the hidden neurons
108 ///@param visibleInput the batch of current inputs for he visible units given hiddenState
109 ///@param beta the inverse temperature
110 ///@return the unnormalized probability
111 template<class BetaVector>
113 RealMatrix const& hiddenState,
114 RealMatrix const& visibleInput,
115 BetaVector const& beta
116 )const{
117 SIZE_CHECK(hiddenState.size1()==visibleInput.size1());
118 SIZE_CHECK(hiddenState.size1()==beta.size());
119 std::size_t batchSize = hiddenState.size1();
120
121 //calculate the energy terms of the hidden neurons for the whole batch
122 RealVector energyTerms = m_hiddenNeurons.energyTerm(hiddenState,beta);
123
124 //calculate resulting probabilities in sequence
125 RealVector p(batchSize);
126 for(std::size_t i = 0; i != batchSize; ++i){
127 p(i) = m_visibleNeurons.logMarginalize(row(visibleInput,i),beta(i))+energyTerms(i);
128 }
129 return p;
130 }
131
132
133 ///\brief Computes the logarithm of the unnormalized probability of each state of the
134 /// visible neurons in a batch by using the precomputed input/activation of the hidden neurons.
135 ///
136 ///@param visibleState the batch of states of the hidden neurons
137 ///@param hiddenInput the batch of current inputs for he visible units given visibleState
138 ///@param beta the inverse temperature
139 ///@return the unnormalized probability
140 template<class BetaVector>
142 RealMatrix const& visibleState,
143 RealMatrix const& hiddenInput,
144 BetaVector const& beta
145 )const{
146 SIZE_CHECK(visibleState.size1()==hiddenInput.size1());
147 SIZE_CHECK(visibleState.size1()==beta.size());
148 std::size_t batchSize = visibleState.size1();
149
150 //calculate the energy terms of the visible neurons for the whole batch
151 RealVector energyTerms = m_visibleNeurons.energyTerm(visibleState,beta);
152
153 RealVector p(batchSize);
154 for(std::size_t i = 0; i != batchSize; ++i){
155 p(i) = m_hiddenNeurons.logMarginalize(row(hiddenInput,i),beta(i))+energyTerms(i);
156 }
157 return p;
158 }
159
160
161 ///\brief Computes the logarithm of the unnormalized probability for each state of the visible neurons from a batch.
162 ///
163 ///@param visibleStates the batch of states of the hidden neurons
164 ///@param beta the inverse temperature
165 template<class BetaVector>
166 RealVector logUnnormalizedProbabilityVisible(RealMatrix const& visibleStates, BetaVector const& beta)const{
167 SIZE_CHECK(visibleStates.size1() == beta.size());
168
169 RealMatrix hiddenInputs(beta.size(),m_hiddenNeurons.size());
170 inputHidden(hiddenInputs,visibleStates);
171 return logUnnormalizedProbabilityVisible(visibleStates, hiddenInputs, beta);
172 }
173
174 ///\brief Computes the logarithm of the unnormalized probability of each state of the hidden neurons from a batch.
175 ///
176 ///@param hiddenStates a batch of states of the hidden neurons
177 ///@param beta the inverse temperature
178 template<class BetaVector>
179 RealVector logUnnormalizedProbabilityHidden(RealMatrix const& hiddenStates, BetaVector const& beta)const{
180 SIZE_CHECK(hiddenStates.size1() == beta.size());
181
182 RealMatrix visibleInputs(beta.size(),m_visibleNeurons.size());
183 inputVisible(visibleInputs,hiddenStates);
184 return logUnnormalizedProbabilityHidden(hiddenStates, visibleInputs, beta);
185 }
186
187 ///\brief Optimization of the calculation of the energy, when the input of the hidden units is already available.
188 ///@param hiddenInput the vector of inputs of the hidden neurons
189 ///@param hidden the states of the hidden neurons
190 ///@param visible the states of the visible neurons
191 ///@return the value of the energy function
193 RealMatrix const& hiddenInput,
194 RealMatrix const& hidden,
195 RealMatrix const& visible
196 )const{
197 RealMatrix const& phiOfH = m_hiddenNeurons.phi(hidden);
198 std::size_t batchSize = hiddenInput.size1();
199 RealVector energies(batchSize);
200 for(std::size_t i = 0; i != batchSize; ++i){
201 energies(i) = -inner_prod(row(hiddenInput,i),row(phiOfH,i));
202 }
203 energies -= m_hiddenNeurons.energyTerm(hidden,blas::repeat(1.0,batchSize));
204 energies -= m_visibleNeurons.energyTerm(visible,blas::repeat(1.0,batchSize));
205 return energies;
206 }
207
208
209 ///\brief Optimization of the calculation of the energy, when the input of the visible units is already available.
210 ///@param visibleInput the vector of inputs of the visible neurons
211 ///@param hidden the states of the hidden neurons
212 ///@param visible the states of the visible neurons
213 ///@return the value of the energy function
215 RealMatrix const& visibleInput,
216 RealMatrix const& hidden,
217 RealMatrix const& visible
218 )const{
219 RealMatrix const& phiOfV = m_visibleNeurons.phi(visible);
220 std::size_t batchSize = visibleInput.size1();
221 RealVector energies(batchSize);
222 for(std::size_t i = 0; i != batchSize; ++i){
223 energies(i) = -inner_prod(row(phiOfV,i),row(visibleInput,i));
224 }
225 energies -= m_hiddenNeurons.energyTerm(hidden,blas::repeat(1.0,batchSize));
226 energies -= m_visibleNeurons.energyTerm(visible,blas::repeat(1.0,batchSize));
227 return energies;
228 }
229private:
230 RBM const& m_rbm;
231 HiddenType const& m_hiddenNeurons;
232 VisibleType const& m_visibleNeurons;
233};
234
235}
236
237#endif