ExactGradient.h
Go to the documentation of this file.
1/*!
2 *
3 *
4 * \brief -
5 *
6 * \author -
7 * \date -
8 *
9 *
10 * \par Copyright 1995-2017 Shark Development Team
11 *
12 * <BR><HR>
13 * This file is part of Shark.
14 * <https://shark-ml.github.io/Shark/>
15 *
16 * Shark is free software: you can redistribute it and/or modify
17 * it under the terms of the GNU Lesser General Public License as published
18 * by the Free Software Foundation, either version 3 of the License, or
19 * (at your option) any later version.
20 *
21 * Shark is distributed in the hope that it will be useful,
22 * but WITHOUT ANY WARRANTY; without even the implied warranty of
23 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
24 * GNU Lesser General Public License for more details.
25 *
26 * You should have received a copy of the GNU Lesser General Public License
27 * along with Shark. If not, see <http://www.gnu.org/licenses/>.
28 *
29 */
30#ifndef SHARK_UNSUPERVISED_RBM_GRADIENTAPPROXIMATIONS_EXACTGRADIENT_H
31#define SHARK_UNSUPERVISED_RBM_GRADIENTAPPROXIMATIONS_EXACTGRADIENT_H
32
36
37namespace shark{
38
39template<class RBMType>
41private:
43public:
44 typedef RBMType RBM;
45
46 ExactGradient(RBM* rbm): mpe_rbm(rbm),m_regularizer(0){
47 SHARK_ASSERT(rbm != NULL);
48
51 };
52
53 /// \brief From INameable: return the class name.
54 std::string name() const
55 { return "ExactGradient"; }
56
58 m_data = data;
59 }
60
62 return mpe_rbm->parameterVector();
63 }
64
65 std::size_t numberOfVariables()const{
66 return mpe_rbm->numberOfParameters();
67 }
68
69 void setRegularizer(double factor, SingleObjectiveFunction* regularizer){
70 m_regularizer = regularizer;
71 m_regularizationStrength = factor;
72 }
73
74 double eval( SearchPointType const & parameter) const {
75 mpe_rbm->setParameterVector(parameter);
76
77 double negLogLikelihood = negativeLogLikelihood(*mpe_rbm,m_data)/m_data.numberOfElements();
78 if(m_regularizer){
79 negLogLikelihood += m_regularizationStrength * m_regularizer->eval(parameter);
80 }
81 return negLogLikelihood;
82 }
83
84 double evalDerivative( SearchPointType const & parameter, FirstOrderDerivative & derivative ) const {
85 mpe_rbm->setParameterVector(parameter);
86
87 //the gradient approximation for the energy terms of the RBM
88 typename RBM::GradientType empiricalExpectation(mpe_rbm);
89 typename RBM::GradientType modelExpectation(mpe_rbm);
90
91 Gibbs gibbsSampler(mpe_rbm);
92
93 //calculate the expectation of the energy gradient with respect to the data
94 double negLogLikelihood = 0;
95 for(RealMatrix const& batch: m_data.batches()) {
96 std::size_t currentBatchSize = batch.size1();
97 typename Gibbs::HiddenSampleBatch hiddenSamples(currentBatchSize,mpe_rbm->numberOfHN());
98 typename Gibbs::VisibleSampleBatch visibleSamples(currentBatchSize,mpe_rbm->numberOfVN());
99
100 gibbsSampler.createSample(hiddenSamples,visibleSamples,batch);
101 empiricalExpectation.addVH(hiddenSamples, visibleSamples);
102 negLogLikelihood -= sum(mpe_rbm->energy().logUnnormalizedProbabilityVisible(
103 batch,hiddenSamples.input,blas::repeat(1,currentBatchSize)
104 ));
105 }
106
107 //calculate the expectation of the energy gradient with respect to the model distribution
108 if(mpe_rbm->numberOfVN() < mpe_rbm->numberOfHN()){
109 integrateOverVisible(modelExpectation);
110 }
111 else{
112 integrateOverHidden(modelExpectation);
113 }
114
115 derivative.resize(mpe_rbm->numberOfParameters());
116 noalias(derivative) = modelExpectation.result() - empiricalExpectation.result();
117
118 m_logPartition = modelExpectation.logWeightSum();
119 negLogLikelihood/=m_data.numberOfElements();
120 negLogLikelihood += m_logPartition;
121
122 if(m_regularizer){
123 FirstOrderDerivative regularizerDerivative;
124 negLogLikelihood += m_regularizationStrength * m_regularizer->evalDerivative(parameter,regularizerDerivative);
125 noalias(derivative) += m_regularizationStrength * regularizerDerivative;
126 }
127
128 return negLogLikelihood;
129 }
130
131 long double getLogPartition(){
132 return m_logPartition;
133 }
134
135private:
136 RBM* mpe_rbm;
137
138 SingleObjectiveFunction* m_regularizer;
139 double m_regularizationStrength;
140
141 //batchwise loops over all hidden units to calculate the gradient as well as partition
142 template<class GradientApproximator>//mostly dummy right now
143 void integrateOverVisible(GradientApproximator & modelExpectation) const{
144
145 Gibbs sampler(mpe_rbm);
146
147 typedef typename RBM::VisibleType::StateSpace VisibleStateSpace;
148 std::size_t values = VisibleStateSpace::numberOfStates(mpe_rbm->numberOfVN());
149 std::size_t batchSize = std::min(values, std::size_t(256));
150
151 for (std::size_t x = 0; x < values; x+=batchSize) {
152 //create batch of states
153 std::size_t currentBatchSize=std::min(batchSize,values-x);
154 typename Batch<RealVector>::type stateBatch(currentBatchSize,mpe_rbm->numberOfVN());
155 for(std::size_t elem = 0; elem != currentBatchSize;++elem){
156 //generation of the x+elem-th state vector
157 VisibleStateSpace::state(row(stateBatch,elem),x+elem);
158 }
159
160 //create sample from state batch
161 typename Gibbs::HiddenSampleBatch hiddenBatch(currentBatchSize,mpe_rbm->numberOfHN());
162 typename Gibbs::VisibleSampleBatch visibleBatch(currentBatchSize,mpe_rbm->numberOfVN());
163 sampler.createSample(hiddenBatch,visibleBatch,stateBatch);
164
165 //calculate probabilities and update
166 RealVector logP = mpe_rbm->energy().logUnnormalizedProbabilityVisible(
167 stateBatch,hiddenBatch.input,blas::repeat(1,currentBatchSize)
168 );
169 modelExpectation.addVH(hiddenBatch, visibleBatch, logP);
170 }
171 }
172
173 //batchwise loops over all hidden units to calculate the gradient as well as partition
174 template<class GradientApproximator>//mostly dummy right now
175 void integrateOverHidden(GradientApproximator & modelExpectation) const{
176
177 Gibbs sampler(mpe_rbm);
178
179 typedef typename RBM::HiddenType::StateSpace HiddenStateSpace;
180 std::size_t values = HiddenStateSpace::numberOfStates(mpe_rbm->numberOfHN());
181 std::size_t batchSize = std::min(values, std::size_t(256) );
182
183 for (std::size_t x = 0; x < values; x+=batchSize) {
184 //create batch of states
185 std::size_t currentBatchSize=std::min(batchSize,values-x);
186 typename Batch<RealVector>::type stateBatch(currentBatchSize,mpe_rbm->numberOfHN());
187 for(std::size_t elem = 0; elem != currentBatchSize;++elem){
188 //generation of the x+elem-th state vector
189 HiddenStateSpace::state(row(stateBatch,elem),x+elem);
190 }
191
192 //create sample from state batch
193 typename Gibbs::HiddenSampleBatch hiddenBatch(currentBatchSize,mpe_rbm->numberOfHN());
194 typename Gibbs::VisibleSampleBatch visibleBatch(currentBatchSize,mpe_rbm->numberOfVN());
195 hiddenBatch.state=stateBatch;
196 sampler.precomputeVisible(hiddenBatch,visibleBatch, blas::repeat(1,currentBatchSize));
197
198 //calculate probabilities and update
199 RealVector logP = mpe_rbm->energy().logUnnormalizedProbabilityHidden(
200 stateBatch,visibleBatch.input,blas::repeat(1,currentBatchSize)
201 );
202 modelExpectation.addHV(hiddenBatch, visibleBatch, logP);
203 }
204 }
205
206 UnlabeledData<RealVector> m_data;
207
208 mutable double m_logPartition; //the partition function of the model distribution
209};
210
211}
212
213#endif