ConcatenatedModel.h
Go to the documentation of this file.
1//===========================================================================
2/*!
3 *
4 *
5 * \brief concatenation of two models, with type erasure
6 *
7 *
8 *
9 * \author O. Krause
10 * \date 2010-2011
11 *
12 *
13 * \par Copyright 1995-2017 Shark Development Team
14 *
15 * <BR><HR>
16 * This file is part of Shark.
17 * <https://shark-ml.github.io/Shark/>
18 *
19 * Shark is free software: you can redistribute it and/or modify
20 * it under the terms of the GNU Lesser General Public License as published
21 * by the Free Software Foundation, either version 3 of the License, or
22 * (at your option) any later version.
23 *
24 * Shark is distributed in the hope that it will be useful,
25 * but WITHOUT ANY WARRANTY; without even the implied warranty of
26 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
27 * GNU Lesser General Public License for more details.
28 *
29 * You should have received a copy of the GNU Lesser General Public License
30 * along with Shark. If not, see <http://www.gnu.org/licenses/>.
31 *
32 */
33//===========================================================================
34
35#ifndef SHARK_MODEL_CONCATENATEDMODEL_H
36#define SHARK_MODEL_CONCATENATEDMODEL_H
37
39#include <boost/scoped_ptr.hpp>
40#include <boost/serialization/scoped_ptr.hpp>
41
42namespace shark {
43
44///\brief ConcatenatedModel concatenates two models such that the output of the first model is input to the second.
45///
46///Sometimes a series of models is needed to generate the desired output. For example when input data needs to be
47///normalized before it can be put into the trained model. In this case, the ConcatenatedModel can be used to
48///represent this series as one model.
49///The easiest way to do is is using the operator >> of AbstractModel:
50///ConcatenatedModel<InputType,OutputType> model = model1>>model2;
51///InputType must be the type of input model1 receives and model2 the output of model2. The output of model1 and input
52///of model2 must match. Another way of construction is calling the constructor of ConcatenatedModel using the constructor:
53/// ConcatenatedModel<InputType,OutputType> model (&modell,&model2);
54///warning: model1 and model2 must outlive model. When they are destroyed first, behavior is undefined.
55///
56/// \ingroup models
57template<class VectorType>
58class ConcatenatedModel: public AbstractModel<VectorType, VectorType, VectorType> {
59private:
61public:
65
66 /// \brief From INameable: return the class name.
67 std::string name() const
68 { return "ConcatenatedModel"; }
69
70
71 ///\brief Returns the expected shape of the input
73 return m_layers.front().model->inputShape();
74 }
75 ///\brief Returns the shape of the output
77 return m_layers.back().model->outputShape();
78 }
79
80
82 m_layers.push_back({layer,optimize});
83 enableModelOptimization(m_layers.size()-1, optimize);//recompute capabilities
84 }
85
86 ///\brief sets whether the parameters of the index-th model should be optimized
87 ///
88 /// If the model has non-differentiable submodels disabling those will make
89 /// the whole model differentiable.
90 /// Note that the models are ordered as model0 >> model1>> model2>>...
91 void enableModelOptimization(std::size_t index, bool opt){
92 SIZE_CHECK(index < m_layers.size());
93 m_layers[index].optimize = opt;
94 this->m_features.reset();
95 bool inputDerivative = true;
96 bool parameterDerivative = true;
97 for(std::size_t k = 0; k != m_layers.size(); ++k){
98 auto const& layer = m_layers[m_layers.size() - k -1];//we iterate backwards through the layers
99 if( layer.optimize && (!layer.model->hasFirstParameterDerivative() || !inputDerivative)){
100 parameterDerivative = false;
101 }
102 if( !layer.model->hasFirstInputDerivative()){
103 inputDerivative = false;
104 }
105 }
106
107 if (parameterDerivative){
109 }
110
111 if (inputDerivative){
113 }
114
115 }
116 ParameterVectorType parameterVector() const {
118 std::size_t pos = 0;
119 for(auto layer: m_layers){
120 if(!layer.optimize) continue;
121 ParameterVectorType layerParams = layer.model->parameterVector();
122 noalias(subrange(params,pos,pos+layerParams.size())) = layerParams;
123 pos += layerParams.size();
124 }
125 return params;
126 }
127
128 void setParameterVector(ParameterVectorType const& newParameters) {
129 std::size_t pos = 0;
130 for(auto layer: m_layers){
131 if(!layer.optimize) continue;
132 ParameterVectorType layerParams = subrange(newParameters,pos,pos+layer.model->numberOfParameters());
133 layer.model->setParameterVector(layerParams);
134 pos += layerParams.size();
135 }
136 }
137
138 std::size_t numberOfParameters() const{
139 std::size_t numParams = 0;
140 for(auto layer: m_layers){
141 if(!layer.optimize) continue;
142 numParams += layer.model->numberOfParameters();
143 }
144 return numParams;
145 }
146
147 boost::shared_ptr<State> createState()const{
148 InternalState* state = new InternalState;
149 for(std::size_t i = 0; i != m_layers.size(); ++i){
150 state->state.push_back(m_layers[i].model->createState());
151 state->intermediates.push_back(BatchOutputType());
152 }
153 return boost::shared_ptr<State>(state);
154 }
155
156 BatchOutputType const& hiddenResponses(State const& state, std::size_t index)const{
157 InternalState const& s = state.toState<InternalState>();
158 return s.intermediates[index];
159 }
160
161 State const& hiddenState(State const& state, std::size_t index)const{
162 InternalState const& s = state.toState<InternalState>();
163 return *s.state[index];
164 }
165
166 using base_type::eval;
167 void eval(BatchInputType const& patterns, BatchOutputType& outputs)const {
168 BatchOutputType intermediates;
169 outputs = patterns;
170 for(auto layer: m_layers){
171 swap(intermediates,outputs);
172 layer.model->eval(intermediates,outputs);
173 }
174 }
175 void eval(BatchInputType const& patterns, BatchOutputType& outputs, State& state)const{
176 InternalState& s = state.toState<InternalState>();
177 outputs = patterns;
178 for(std::size_t i = 0; i != m_layers.size(); ++i){
179 if(i == 0)
180 m_layers[i].model->eval(patterns,s.intermediates[i], *s.state[i]);
181 else
182 m_layers[i].model->eval(s.intermediates[i-1],s.intermediates[i], *s.state[i]);
183 }
184 outputs = s.intermediates.back();
185 }
186
188 BatchInputType const& patterns,
189 BatchOutputType const & outputs,
190 BatchOutputType const& coefficients,
191 State const& state,
192 ParameterVectorType& gradient
193 )const{
194 InternalState const& s = state.toState<InternalState>();
195 BatchOutputType inputDerivativeLast;
196 BatchOutputType inputDerivative = coefficients;
197 gradient.resize(numberOfParameters());
198 std::size_t paramEnd = gradient.size();
199 for(std::size_t k = 0; k != m_layers.size(); ++k){
200 std::size_t i = m_layers.size() - k -1;//we iterate backwards through the layers
201 BatchInputType const* pInput = &patterns;
202 if(i != 0)
203 pInput = &s.intermediates[i-1];
204
205 swap(inputDerivativeLast,inputDerivative);
206 //if the current layer does not need to be optimized, we just check whether we have to compute the chain rule
207 if(!m_layers[i].optimize || m_layers[i].model->numberOfParameters() == 0){
208 if(i != 0) //check, if we are done, the input layer does not need to compute anything
209 m_layers[i].model->weightedInputDerivative(*pInput,s.intermediates[i], inputDerivativeLast, *s.state[i], inputDerivative);
210 }else{
211 ParameterVectorType paramDerivative;
212 if(i != 0){//if we are in an intermediates layer, compute chain rule
213 m_layers[i].model->weightedDerivatives(*pInput,s.intermediates[i], inputDerivativeLast, *s.state[i], paramDerivative,inputDerivative);
214 }
215 else{//lowest layer only needs to compute parameter derivative
216 m_layers[i].model->weightedParameterDerivative(*pInput,s.intermediates[i], inputDerivativeLast, *s.state[i], paramDerivative);
217 }
218 noalias(subrange(gradient,paramEnd - paramDerivative.size(),paramEnd)) = paramDerivative;
219 paramEnd -= paramDerivative.size();
220 }
221 }
222 }
223
225 BatchInputType const& patterns,
226 BatchOutputType const & outputs,
227 BatchOutputType const& coefficients,
228 State const& state,
229 BatchOutputType& derivatives
230 )const{
231 InternalState const& s = state.toState<InternalState>();
232 BatchOutputType derivativeLast;
233 derivatives = coefficients;
234 for(std::size_t k = 0; k != m_layers.size(); ++k){
235 std::size_t i = m_layers.size() - k -1;//we iterate backwards through the layers
236
237 BatchInputType const* pInput = &patterns;
238 if(i != 0)
239 pInput = &s.intermediates[i-1];
240
241 swap(derivativeLast,derivatives);
242 m_layers[i].model->weightedInputDerivative(*pInput,s.intermediates[i], derivativeLast, *s.state[i], derivatives);
243 }
244 }
245
247 BatchInputType const & patterns,
248 BatchOutputType const & outputs,
249 BatchOutputType const & coefficients,
250 State const& state,
251 ParameterVectorType& gradient,
252 BatchInputType& inputDerivative
253 )const{
254 InternalState const& s = state.toState<InternalState>();
255 BatchOutputType inputDerivativeLast;
256 inputDerivative = coefficients;
257 gradient.resize(numberOfParameters());
258 std::size_t paramEnd = gradient.size();
259 for(std::size_t k = 0; k != m_layers.size(); ++k){
260 std::size_t i = m_layers.size() - k -1;//we iterate backwards through the layers
261 BatchInputType const* pInput = &patterns;
262 if(i != 0)
263 pInput = &s.intermediates[i-1];
264
265 swap(inputDerivativeLast,inputDerivative);
266 //if the current layer does not need to be optimized, we just check whether we have to compute the chain rule
267 if(!m_layers[i].optimize || m_layers[i].model->numberOfParameters() == 0){
268 m_layers[i].model->weightedInputDerivative(*pInput,s.intermediates[i], inputDerivativeLast, *s.state[i], inputDerivative);
269 }else{
270 ParameterVectorType paramDerivative;
271 m_layers[i].model->weightedDerivatives(*pInput,s.intermediates[i], inputDerivativeLast, *s.state[i], paramDerivative,inputDerivative);
272 noalias(subrange(gradient,paramEnd - paramDerivative.size(),paramEnd)) = paramDerivative;
273 paramEnd -= paramDerivative.size();
274 }
275 }
276 }
277
278 /// From ISerializable
279 void read( InArchive & archive ){
280 for(auto layer: m_layers){
281 archive >> *layer.model;
282 archive >> layer.optimize;
283 }
284 }
285
286 /// From ISerializable
287 void write( OutArchive & archive ) const{
288 for(auto layer: m_layers){
289 archive << *layer.model;
290 archive << layer.optimize;
291 }
292 }
293private:
294 struct Layer{
296 bool optimize;
297 };
298 std::vector<Layer> m_layers;
299
300 struct InternalState: State{
301 std::vector<boost::shared_ptr<State> > state;
302 std::vector<BatchOutputType> intermediates;
303 };
304};
305
306
307
308///\brief Connects two AbstractModels so that the output of the first model is the input of the second.
309template<class VectorType>
313){
315 sequence.add(&firstModel, true);
316 sequence.add(&secondModel, true);
317 return sequence;
318}
319
320template<class VectorType>
322 ConcatenatedModel<VectorType> const& firstModel,
324){
325 ConcatenatedModel<VectorType> sequence = firstModel;
326 sequence.add(&secondModel, true);
327 return sequence;
328}
329
330
331}
332#endif