ConvolutionalModel.h
Go to the documentation of this file.
1/*!
2 *
3 *
4 * \brief Implements a model applying a convolution to an image
5 *
6 *
7 *
8 * \author
9 * \date 2017
10 *
11 *
12 * \par Copyright 1995-2017 Shark Development Team
13 *
14 * <BR><HR>
15 * This file is part of Shark.
16 * <https://shark-ml.github.io/Shark/>
17 *
18 * Shark is free software: you can redistribute it and/or modify
19 * it under the terms of the GNU Lesser General Public License as published
20 * by the Free Software Foundation, either version 3 of the License, or
21 * (at your option) any later version.
22 *
23 * Shark is distributed in the hope that it will be useful,
24 * but WITHOUT ANY WARRANTY; without even the implied warranty of
25 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
26 * GNU Lesser General Public License for more details.
27 *
28 * You should have received a copy of the GNU Lesser General Public License
29 * along with Shark. If not, see <http://www.gnu.org/licenses/>.
30 *
31 */
32#ifndef SHARK_MODELS_CONV2DModel_H
33#define SHARK_MODELS_CONV2DModel_H
34
40namespace shark {
41
42///
43/// \brief Convolutional Model for 2D image data.
44///
45/// \par
46/// This model computes the result of
47/// \f$ y = f(x) = g(\text{convolution}(w, x) + b) \f$, where g is an arbitrary activation function \ref activations and
48/// convolution is the convolution of the input image x with the filters w. b is a vector with one entry for each filter which is then applied to each response above
49///
50/// The image is allowed to have several channels and are linearized to a single vector of size width * height * numChannels.
51/// This is done by itnerleaving channels, i.e. for a pixel all channels are stored contiguously. Then the pixels are stored in
52/// a row-major scheme.
53///
54/// For handling edge condition, the Conv2D model handles two different convolution modes:
55///
56/// Padding::Valid:
57/// The output is only computed on patches which are fully inside the unpadded image as a linearized vector in the same format
58/// of size (width - filter_width+1) * (height - filter_height+1) * numFilters.
59///
60/// Padding::ZeroPad
61/// The output input is padded with zeros and the output has the same size as the input
62/// of size width * height * numFilters.
63///
64/// \ingroup models
65template <class VectorType = RealVector, class ActivationFunction = LinearNeuron>
66class Conv2DModel : public AbstractModel<VectorType,VectorType,VectorType>{
67private:
70 typedef typename VectorType::value_type value_type;
71 typedef typename VectorType::device_type device_type;
72
73 static_assert(!std::is_same<typename VectorType::storage_type::storage_tag, blas::dense_tag>::value, "Conv2D not implemented for sparse inputs");
74public:
78
79 /// Default Constructor; use setStructure later.
84
85 ///\brief Sets the structure by setting the dimensionalities of image and filters.
86 ///
87 /// \arg imageShape Shape of the image imHeight x imWidth x channel
88 /// \arg filterShape Shape of the filter matrix numFilters x fiHeight x fiWidth x channel
89 /// \arg type Type of convolution padding to perform
91 Shape const& imageShape, Shape const& filterShape, Padding type = Padding::ZeroPad
92 ){
95 setStructure(imageShape, filterShape, type);
96 }
97
98 std::string name() const
99 { return "Conv2DModel"; }
100
101 ///\brief Returns the expected shape of the input
103 return {m_imageHeight, m_imageWidth, m_numChannels};
104 }
105 ///\brief Returns the shape of the output
107 if(m_type != Padding::Valid){
108 return {m_imageHeight, m_imageWidth, m_numFilters};
109 }else{
110 return {m_imageHeight - m_filterHeight + 1, m_imageWidth - m_filterWidth + 1, m_numFilters};
111 }
112 }
113
114 /// \brief Returns the activation function.
115 ActivationFunction const& activationFunction()const{
116 return m_activation;
117 }
118
119 /// \brief Returns the activation function.
120 ActivationFunction& activationFunction(){
121 return m_activation;
122 }
123
124 /// \brief Obtain the parameter vector.
126 return m_filters | m_offset;
127 }
128
129 /// \brief Set the new parameters of the model.
130 void setParameterVector(ParameterVectorType const& newParameters){
131 SIZE_CHECK(newParameters.size() == numberOfParameters());
132 noalias(m_filters) = subrange(newParameters,0,m_filters.size());
133 noalias(m_offset) = subrange(newParameters,m_filters.size(),newParameters.size());
134 updateBackpropFilters();
135 }
136
137 /// \brief Return the number of parameters.
138 size_t numberOfParameters() const{
139 return m_filters.size() + m_offset.size();
140 }
141
142 ///\brief Sets the structure by setting the shape of image and filters.
143 ///
144 /// \arg imageShape Shape of the image imHeight x imWidth x channel
145 /// \arg filterShape Shape of the filter matrix numFilters x fiHeight x fiWidth
146 /// \arg type Type of convolution padding to perform
148 Shape const& imageShape, Shape const& filterShape, Padding type = Padding::ZeroPad
149 ){
150 m_type = type;
151 m_imageHeight = imageShape[0];
152 m_imageWidth = imageShape[1];
153 m_numChannels = imageShape[2];
154 m_numFilters = filterShape[0];
155 m_filterHeight = filterShape[1];
156 m_filterWidth = filterShape[2];
157 m_filters.resize(m_filterHeight * m_filterWidth * m_numFilters * m_numChannels);
158 m_offset.resize(m_numFilters);
159 updateBackpropFilters();
160 }
161
162 boost::shared_ptr<State> createState()const{
163 return boost::shared_ptr<State>(new typename ActivationFunction::State());
164 }
165
166 using base_type::eval;
167
168 /// Evaluate the model
169 void eval(BatchInputType const& inputs, BatchOutputType& outputs, State& state)const{
170 SIZE_CHECK(inputs.size2() == inputShape().numElements());
171 outputs.resize(inputs.size1(),outputShape().numElements());
172 outputs.clear();
173 //geometry for "zero pad"
174 std::size_t outputsForFilter = outputShape().numElements()/m_numFilters;
175 std::size_t paddingHeight = (m_type != Padding::Valid) ? m_filterHeight - 1: 0;
176 std::size_t paddingWidth = (m_type != Padding::Valid) ? m_filterWidth - 1: 0;
177
178 blas::kernels::conv2d(inputs, m_filters, outputs,
179 m_numChannels, m_numFilters,
180 m_imageHeight, m_imageWidth,
181 m_filterHeight, m_filterWidth,
182 paddingHeight, paddingWidth
183 );
184 //reshape matrix for offset
185 auto reshapedOutputs = to_matrix(to_vector(outputs), outputsForFilter * inputs.size1(), m_numFilters);
186 noalias(reshapedOutputs ) += blas::repeat(m_offset,outputsForFilter * inputs.size1());
187 m_activation.evalInPlace(outputs, state.toState<typename ActivationFunction::State>());
188 }
189
190 ///\brief Calculates the first derivative w.r.t the parameters and summing them up over all inputs of the last computed batch
192 BatchInputType const& inputs,
193 BatchOutputType const& outputs,
194 BatchOutputType const& coefficients,
195 State const& state,
196 ParameterVectorType& gradient
197 )const{
198 SIZE_CHECK(coefficients.size2()==outputShape().numElements());
199 SIZE_CHECK(coefficients.size1()==inputs.size1());
200 std::size_t n = inputs.size1();
201 auto outputHeight = outputShape()[0];
202 auto outputWidth = outputShape()[1];
203 BatchOutputType delta = coefficients;
204 m_activation.multiplyDerivative(outputs,delta, state.toState<typename ActivationFunction::State>());
205
206 gradient.resize(numberOfParameters());
207 auto weightGradient = subrange(gradient,0,m_filters.size());
208 auto offsetGradient = subrange(gradient, m_filters.size(),gradient.size());
209
210 std::size_t paddingHeight = (m_type != Padding::Valid) ? m_filterHeight - 1: 0;
211 std::size_t paddingWidth = (m_type != Padding::Valid) ? m_filterWidth - 1: 0;
212
213 //derivatives of offset parameters
214 //reshape coefficient matrix into a matrix where the rows are the single output pixels
215 auto delta_pixels = to_matrix(to_vector(delta), coefficients.size1() * coefficients.size2()/m_numFilters, m_numFilters);
216 noalias(offsetGradient) = sum(as_columns(delta_pixels));
217
218 //derivative of filters:
219 //the idea is to phrase this derivative in terms of another convolution.
220 //for this we swap for coefficients and inputs the batch-size with the number of channels
221 // i.e. we transform NHWC to CHWN.
222 // afterwards the derivative is just convolving the coefficients with the inputs (padding the inputs as normal).
223 // after convolving, the output has the filters as channels, therefore the derivative has to be reordered back
224 // from CHWN to NHWC format.
225 BatchOutputType delta_CHWN(m_numFilters, outputHeight * outputWidth * n);
226 BatchOutputType inputs_CHWN(m_numChannels, inputShape().numElements() / m_numChannels * n);
227 image::reorder<value_type, device_type>(
228 to_vector(delta), to_vector(delta_CHWN),
229 {n, outputHeight, outputWidth, m_numFilters},
231 );
232 image::reorder<value_type, device_type>(
233 to_vector(inputs), to_vector(inputs_CHWN),
234 {n, m_imageHeight, m_imageWidth, m_numChannels},
236 );
237 BatchInputType responses_CHWN(m_numChannels, m_filters.size() / m_numChannels);
238 blas::kernels::conv2d(inputs_CHWN, to_vector(delta_CHWN), responses_CHWN,
239 n, m_numFilters,
240 m_imageHeight, m_imageWidth,
241 outputHeight, outputWidth,
242 paddingHeight, paddingWidth
243 );
244 image::reorder<value_type, device_type>(
245 to_vector(responses_CHWN), weightGradient,
246 {m_numChannels, m_filterHeight, m_filterWidth, m_numFilters},
248 );
249 }
250 ///\brief Calculates the first derivative w.r.t the inputs and summs them up over all inputs of the last computed batch
252 BatchInputType const & inputs,
253 BatchOutputType const& outputs,
254 BatchOutputType const & coefficients,
255 State const& state,
256 BatchInputType& derivatives
257 )const{
258 SIZE_CHECK(coefficients.size2() == outputShape().numElements());
259 SIZE_CHECK(coefficients.size1() == inputs.size1());
260
261 BatchOutputType delta = coefficients;
262 m_activation.multiplyDerivative(outputs,delta, state.toState<typename ActivationFunction::State>());
263 Shape shape = outputShape();
264 std::size_t paddingHeight = m_filterHeight - 1;
265 std::size_t paddingWidth = m_filterWidth - 1;
266 if(m_type == Padding::Valid){
267 paddingHeight *=2;
268 paddingWidth *=2;
269 }
270 derivatives.resize(inputs.size1(),inputShape().numElements());
271 derivatives.clear();
272 blas::kernels::conv2d(delta, m_backpropFilters, derivatives,
273 m_numFilters, m_numChannels,
274 shape[0], shape[1],
275 m_filterHeight, m_filterWidth,
276 paddingHeight, paddingWidth
277 );
278 }
279
280 /// From ISerializable
281 void read(InArchive& archive){
282 archive >> m_filters;
283 archive >> m_offset;
284 archive >> m_imageHeight;
285 archive >> m_imageWidth;
286 archive >> m_filterHeight;
287 archive >> m_filterWidth;
288 archive >> m_numChannels;
289 archive >> m_numFilters;
290 archive >> (int&) m_type;
291 updateBackpropFilters();
292 }
293 /// From ISerializable
294 void write(OutArchive& archive) const{
295 archive << m_filters;
296 archive << m_offset;
297 archive << m_imageHeight;
298 archive << m_imageWidth;
299 archive << m_filterHeight;
300 archive << m_filterWidth;
301 archive << m_numChannels;
302 archive << m_numFilters;
303 archive << (int&) m_type;
304 }
305
306private:
307
308 ///\brief Converts the filters into the backprop filters
309 ///
310 /// for computing the derivatie wrt the inputs in the chain rule, we
311 /// have to convove the outer derivative with the "transposed" filters
312 /// the transposition is done by switching the order of channels and filters in the storage
313 void updateBackpropFilters(){
314 m_backpropFilters.resize(m_filters.size());
315
316 std::size_t filterImSize = m_filterWidth * m_filterHeight;
317 std::size_t filterSize = m_numChannels * m_filterWidth * m_filterHeight;
318 std::size_t bpFilterSize = m_numFilters * m_filterWidth * m_filterHeight;
319
320 //Note: this looks a bit funny, but this way on a gpu only m_numChannel kernels need to be run
321 for(std::size_t c = 0; c != m_numChannels; ++c){
322 auto channel_mat = subrange(
323 to_matrix(m_filters, m_numFilters, filterSize), //create matrix where each row is one filter
324 0, m_numFilters, c * filterImSize, (c+1) * filterImSize //cut out all columns belonging to the current channel
325 );
326 //Todo: commented out, because we also need to flip, which is not implemented in remora
327 //~ auto channel_vec = to_vector(flip(channel_mat));//flip and linearize to vector (flip not implemented)
328 //~ //cut out target are and set values
329 //~ noalias(subrange(m_backpropFilters, c * bpFilterSize, (c+1) * bpFilterSize)) = channel_vec;
330 //instead use this cpu-only version
331 auto target_vec = subrange(m_backpropFilters, c * bpFilterSize, (c+1) * bpFilterSize);
332 auto target_mat = to_matrix(target_vec,m_numFilters, m_filterWidth * m_filterHeight);
333 for(std::size_t f = 0; f != m_numFilters; ++f){
334 for(std::size_t i = 0; i != m_filterWidth * m_filterHeight; ++i){
335 target_mat(f,i) = channel_mat(f, m_filterWidth * m_filterHeight-i-1);
336 }
337 }
338 }
339 }
340 VectorType m_filters; ///< Filters used for performing the convolution
341 VectorType m_backpropFilters;///< Same as filter just with the storage order of filters and channels reversed.
342 VectorType m_offset;///< offset applied to each filters response
343 ActivationFunction m_activation;///< The activation function to use
344
345 std::size_t m_imageHeight;
346 std::size_t m_imageWidth;
347 std::size_t m_filterHeight;
348 std::size_t m_filterWidth;
349 std::size_t m_numChannels;
350 std::size_t m_numFilters;
351
352 Padding m_type;
353};
354
355
356}
357#endif