conv2d.hpp
Go to the documentation of this file.
1/*!
2 *
3 *
4 * \brief Implements the 2D convolution kernel for cpus
5 *
6 * \author O. Krause
7 * \date 2016
8 *
9 *
10 * \par Copyright 1995-2015 Shark Development Team
11 *
12 * <BR><HR>
13 * This file is part of Shark.
14 * <http://image.diku.dk/shark/>
15 *
16 * Shark is free software: you can redistribute it and/or modify
17 * it under the terms of the GNU Lesser General Public License as published
18 * by the Free Software Foundation, either version 3 of the License, or
19 * (at your option) any later version.
20 *
21 * Shark is distributed in the hope that it will be useful,
22 * but WITHOUT ANY WARRANTY; without even the implied warranty of
23 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
24 * GNU Lesser General Public License for more details.
25 *
26 * You should have received a copy of the GNU Lesser General Public License
27 * along with Shark. If not, see <http://www.gnu.org/licenses/>.
28 *
29 */
30
31#ifndef REMORA_KERNELS_DEFAULT_Conv2D_HPP
32#define REMORA_KERNELS_DEFAULT_Conv2D_HPP
33
34#include "simd.hpp"
35#include "../gemm.hpp"
36#include <type_traits> //for std::common_type and aligned storage
37namespace remora{namespace bindings {
38
39
40/// \brief Transforms the given image into a row-major format for convolution
41///
42/// The resulting matrix has one row for each output of the convolution.
43/// each row contains the patch used for computing the result.
44template<class E1, class E2>
45void im2mat(
46 matrix_expression<E1, cpu_tag> const& images,
47 matrix_expression<E2, cpu_tag>& output,
48 std::size_t num_channels,
49 std::size_t image_height,
50 std::size_t image_width,
51 std::size_t filter_height,
52 std::size_t filter_width
53){
54 static_assert(std::is_same<typename E1::orientation, row_major>::value, "Column major not implemented");
55 static_assert(std::is_same<typename E2::orientation, row_major>::value, "Column major not implemented");
56 std::size_t rows_per_image = (image_height - filter_height +1) * (image_width - filter_width +1);
57 //the order of loops is chosen such, that only very little changes of rows are performed
58 for(std::size_t im = 0; im != images().size1(); ++im){
59 for(std::size_t i = 0; i != image_height - filter_height +1; ++i){// iterate over row-positions in the image
60 for(std::size_t i1 = 0; i1 != filter_height; ++i1){//iterate over the the rows of the current filter
61 for(std::size_t j = 0; j != image_width - filter_width +1; ++j){//iterate over the column-position in the image
62 std::size_t row_start = im * rows_per_image + i * (image_width - filter_width +1) + j;
63 for(std::size_t j1 = 0; j1 != filter_width; ++j1){
64 std::size_t col_start = (i1 * filter_width + j1) * num_channels;
65 std::size_t image_start = ((i+i1) * image_width + j+j1) * num_channels;
66 for(std::size_t c = 0; c != num_channels; ++c){//iterate over the channels
67 output()(row_start, col_start + c) = images()(im,image_start + c);
68 }
69 }
70 }
71 }
72 }
73 }
74}
75
76template<class E1, class E2>
77void im2mat_pad(
78 matrix_expression<E1, cpu_tag> const& images,
79 matrix_expression<E2, cpu_tag>& output,
80 std::size_t num_channels,
81 std::size_t image_height,
82 std::size_t image_width,
83 std::size_t filter_height,
84 std::size_t filter_width,
85 std::size_t padding_height,
86 std::size_t padding_width
87){
88 static_assert(std::is_same<typename E1::orientation, row_major>::value, "Column major not implemented");
89 static_assert(std::is_same<typename E2::orientation, row_major>::value, "Column major not implemented");
90 std::size_t image_start1 = padding_height/2;
91 std::size_t image_end1 = image_height + image_start1;
92 std::size_t image_start2 = padding_width/2;
93 std::size_t image_end2 = image_width + image_start2;
94 std::size_t output_width = image_width - filter_width + 1 + padding_width;
95 std::size_t output_height = image_height - filter_height + 1 + padding_height;
96 std::size_t rows_per_image =output_width * output_height;
97 //the order of loops is chosen such, that only very little changes of rows are performed
98 for(std::size_t im = 0; im != images().size1(); ++im){
99 for(std::size_t i = 0; i != output_height; ++i){// iterate over row-positions in the output
100 for(std::size_t i1 = 0; i1 != filter_height; ++i1){//iterate over the the rows of the current filter
101 if(i1+i < image_start1 || i1+i >= image_end1){//special case: we are on the border above or below
102 for(std::size_t j = 0; j != output_width; ++j){//iterate over the column-position in the output
103 std::size_t row_start = im * rows_per_image + i * output_width +j;
104 std::size_t col_start = i1 * filter_width * num_channels;
105 for(std::size_t c = 0; c != num_channels * filter_width; ++c){//iterate over the channels
106 output()(row_start, col_start + c) = 0;
107 }
108 }
109 continue;//no need to got on, we are done
110 }
111 for(std::size_t j = 0; j != output_width; ++j){//iterate over the column-position in the output
112 std::size_t row_start = im * rows_per_image + i * output_width + j;
113 for(std::size_t j1 = 0; j1 != filter_width; ++j1){
114 std::size_t col_start = (i1 * filter_width + j1) * num_channels;
115
116 if(j+j1 < image_start2 || j+j1 >= image_end2){
117 for(std::size_t c = 0; c != num_channels; ++c){//iterate over the channels
118 output()(row_start, col_start + c) = 0;
119 }
120 }else{
121 std::size_t image_start = ((i+i1-image_start1) * image_width + j+j1-image_start2) * num_channels;
122 for(std::size_t c = 0; c != num_channels; ++c){//iterate over the channels
123 output()(row_start, col_start + c) = images()(im,image_start + c);
124 }
125 }
126 }
127 }
128 }
129 }
130 }
131}
132
133
134template<class E1, class E2, class M>
135void conv2d(
136 matrix_expression<E1, cpu_tag> const& images,
137 vector_expression<E2, cpu_tag> const& filter,
138 matrix_expression<M, cpu_tag>& outputs,
139 std::size_t num_channels,
140 std::size_t num_filters,
141 std::size_t image_height,
142 std::size_t image_width,
143 std::size_t filter_height,
144 std::size_t filter_width,
145 std::size_t padding_height,
146 std::size_t padding_width
147){
148 static_assert(std::is_same<typename E1::orientation, row_major>::value, "Column major not implemented");
149 static_assert(std::is_same<typename E1::storage_type::storage_tag, continuous_dense_tag>::value, "Subranges not implemented");
150 static_assert(std::is_same<typename M::orientation, row_major>::value, "Column major not implemented");
151 typedef typename std::common_type<
152 typename E1::value_type, typename E2::value_type, typename M::value_type
153 >::type value_type;
154
155 std::size_t output_rows_per_filter = (image_height - filter_height +1 + padding_height) * (image_width - filter_width +1 + padding_width);
156 std::size_t filter_size = filter_width * filter_height * num_channels;
157 std::size_t num_images = images().size1();
158
159 REMORA_SIZE_CHECK(outputs().size1() == images().size1());
160 REMORA_SIZE_CHECK(outputs().size2() == num_filters * output_rows_per_filter);
161 REMORA_SIZE_CHECK(images().size2() == num_channels * image_width * image_height);
162 REMORA_SIZE_CHECK(filter().size() == num_filters * filter_size);
163
164 //allocate storage and create temporary matrices
165 boost::alignment::aligned_allocator<value_type,64> allocator;
166 value_type* image_storage = allocator.allocate( num_images * output_rows_per_filter * filter_size);
167 value_type* filter_storage = allocator.allocate(num_filters * filter_size);
168 dense_matrix_adaptor<value_type, row_major, cpu_tag> image_transformed(image_storage,num_images * output_rows_per_filter, filter_size);
169 dense_matrix_adaptor<value_type, row_major, cpu_tag> filter_transformed(filter_storage, num_filters, filter_size);
170 dense_matrix_adaptor<value_type, row_major, cpu_tag> output_transformed(outputs().raw_storage().values, num_images * output_rows_per_filter, num_filters);
171 //copy image to temporary storage
172 if(padding_height == 0 && padding_width == 0){
173 im2mat(images,image_transformed, num_channels, image_height, image_width, filter_height, filter_width);
174 }else{
175 im2mat_pad(images,image_transformed, num_channels, image_height, image_width, filter_height, filter_width, padding_height, padding_width);
176 }
177 //copy filters to temporary storage
178 for(std::size_t f = 0; f != num_filters; ++f){
179 for(std::size_t i = 0; i != filter_size; ++i){
180 filter_transformed(f,i) = filter()(f * filter_size + i);
181 }
182 }
183
184 //do the computation
185 kernels::gemm(image_transformed, trans(filter_transformed), output_transformed, value_type(1.0));
186
187 //deallocate storage
188 allocator.deallocate(image_storage,num_images * output_rows_per_filter * filter_size);
189 allocator.deallocate(filter_storage, num_filters * filter_size);
190}
191
192}}
193
194#endif