copy.hpp
Go to the documentation of this file.
1/*!
2 * \brief Implements operations to copy data from cpu to gpu and back
3 *
4 * \author O. Krause
5 * \date 2016
6 *
7 *
8 * \par Copyright 1995-2015 Shark Development Team
9 *
10 * <BR><HR>
11 * This file is part of Shark.
12 * <http://image.diku.dk/shark/>
13 *
14 * Shark is free software: you can redistribute it and/or modify
15 * it under the terms of the GNU Lesser General Public License as published
16 * by the Free Software Foundation, either version 3 of the License, or
17 * (at your option) any later version.
18 *
19 * Shark is distributed in the hope that it will be useful,
20 * but WITHOUT ANY WARRANTY; without even the implied warranty of
21 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
22 * GNU Lesser General Public License for more details.
23 *
24 * You should have received a copy of the GNU Lesser General Public License
25 * along with Shark. If not, see <http://www.gnu.org/licenses/>.
26 *
27 */
28#ifndef REMORA_GPU_COPY_HPP
29#define REMORA_GPU_COPY_HPP
30
31#include "../detail/traits.hpp"
32#include "../dense.hpp" //required for vector proxy on cpu
33#include "../assignment.hpp"
34
35namespace remora{
36
37///////////////////////////////////////
38//////// Vector Transport
39///////////////////////////////////////
40
41template<class E>
42class vector_transport_to_cpu: public vector_expression<vector_transport_to_cpu<E>, cpu_tag>{
43public:
44 typedef typename E::const_closure_type expression_closure_type;
45
46 typedef typename E::value_type value_type;
47 typedef typename E::size_type size_type;
48 typedef value_type const& const_reference;
49 typedef const_reference reference;
50
51 typedef vector_transport_to_cpu const_closure_type;
52 typedef vector_transport_to_cpu closure_type;
53 typedef unknown_storage storage_type;
54 typedef unknown_storage const_storage_type;
55 typedef blockwise<typename E::evaluation_category::tag> evaluation_category;
56
57
58 //FIXME: This is required even though iterators for block expressions are meaningless
59 typedef typename E::const_iterator const_iterator;
60 typedef const_iterator iterator;
61
62 // Construction and destruction
63 explicit vector_transport_to_cpu(
64 expression_closure_type const& expression
65 ):m_expression(expression){}
66
67 size_type size() const {
68 return m_expression.size();
69 }
70
71 expression_closure_type const& expression() const {
72 return m_expression;
73 }
74 boost::compute::command_queue& queue() const{
75 return m_expression.queue();
76 }
77
78 //dispatcher to computation kernels
79 template<class VecX>
80 void assign_to(vector_expression<VecX, cpu_tag>& x)const{
81 //in case the expression can not be mapped to memory, evaluate it
82 //this does nothing for proxies
83 auto e_eval = eval_expression(m_expression);
84
85 auto storageE = e_eval.raw_storage();
86 auto& buffer = storageE.buffer;
87
88 //map buffer to host memory
89 auto p = (typename E::value_type*) m_expression.queue().enqueue_map_buffer(
90 buffer, CL_MAP_READ, 0, buffer.size()
91 );
92 //adapt host memory buffer to vector and assign
93 auto adaptE = adapt_vector(size(), p + storageE.offset, storageE.stride);
94 assign(x, adaptE);
95
96 //unmap memory
97 m_expression.queue().enqueue_unmap_buffer(buffer,p);
98 }
99 template<class VecX>
100 void plus_assign_to(vector_expression<VecX, cpu_tag>& x)const{
101 //in case the expression can not be mapped to memory, evaluate it
102 //this does nothing for proxies
103 auto e_eval = eval_expression(m_expression);
104
105 auto storageE = e_eval.raw_storage();
106 auto& buffer = storageE.buffer;
107 //map buffer to host memory
108 auto p = (value_type*) m_expression.queue().enqueue_map_buffer(
109 buffer, CL_MAP_READ, 0, buffer.size()
110 );
111 //adapt host memory buffer to vector and assign
112 auto adaptE = adapt_vector(size(), p + storageE.offset, storageE.stride);
113 plus_assign(x,adaptE);
114
115 //unmap memory
116 m_expression.queue().enqueue_unmap_buffer(buffer,p);
117 }
118
119private:
120 expression_closure_type m_expression;
121};
122
123template<class E>
124class vector_transport_to_gpu: public vector_expression<vector_transport_to_gpu<E>, gpu_tag>{
125public:
126 typedef typename E::const_closure_type expression_closure_type;
127
128 typedef typename E::value_type value_type;
129 typedef typename E::size_type size_type;
130 typedef value_type const& const_reference;
131 typedef const_reference reference;
132
133 typedef vector_transport_to_gpu const_closure_type;
134 typedef vector_transport_to_gpu closure_type;
135 typedef unknown_storage storage_type;
136 typedef unknown_storage const_storage_type;
137 typedef blockwise<typename E::evaluation_category::tag> evaluation_category;
138
139
140 //FIXME: This is required even though iterators for block expressions are meaningless
141 typedef typename E::const_iterator const_iterator;
142 typedef const_iterator iterator;
143
144 // Construction and destruction
145 explicit vector_transport_to_gpu(
146 expression_closure_type const& expression,
147 boost::compute::command_queue& queue
148 ):m_expression(expression), m_queue(&queue){}
149
150 size_type size() const {
151 return m_expression.size();
152 }
153 expression_closure_type const& expression() const {
154 return m_expression;
155 }
156 boost::compute::command_queue& queue() const{
157 return *m_queue;
158 }
159
160 //dispatcher to computation kernels
161 template<class VecX>
162 void assign_to(vector_expression<VecX, gpu_tag>& x)const{
163 auto storagex = x().raw_storage();
164 auto& buffer = storagex.buffer;
165 //map buffer to host memory
166 auto p = (typename VecX::value_type*) x().queue().enqueue_map_buffer(
167 buffer, CL_MAP_WRITE, 0, buffer.size()
168 );
169 //adapt host memory buffer to vector and assign
170 auto adaptX = adapt_vector(size(), p + storagex.offset, storagex.stride);
171 assign(adaptX,m_expression);
172
173 //unmap memory
174 x().queue().enqueue_unmap_buffer(buffer,p);
175 }
176 template<class VecX>
177 void plus_assign_to(vector_expression<VecX, gpu_tag>& x)const{
178 auto storagex = x().raw_storage();
179 auto& buffer = storagex.buffer;
180 //map buffer to host memory
181 auto p = (typename VecX::value_type*) x().queue().enqueue_map_buffer(
182 buffer, CL_MAP_WRITE, 0, buffer.size()
183 );
184 //adapt host memory buffer to vector and assign
185 auto adaptX = adapt_vector(size(), p + storagex.offset, storagex.stride);
186 plus_assign(adaptX,m_expression);
187
188 //unmap memory
189 x().queue().enqueue_unmap_buffer(buffer,p);
190 }
191
192private:
193
194 expression_closure_type m_expression;
195 boost::compute::command_queue* m_queue;
196};
197
198
199///////////////////////////////////////
200//////// Matrix Transport
201///////////////////////////////////////
202
203template<class E>
204class matrix_transport_to_cpu: public matrix_expression<matrix_transport_to_cpu<E>, cpu_tag>{
205public:
206 typedef typename E::const_closure_type expression_closure_type;
207
208 typedef typename E::value_type value_type;
209 typedef typename E::size_type size_type;
210 typedef value_type const& const_reference;
211 typedef const_reference reference;
212
213 typedef matrix_transport_to_cpu const_closure_type;
214 typedef matrix_transport_to_cpu closure_type;
215 typedef unknown_storage storage_type;
216 typedef unknown_storage const_storage_type;
217 typedef blockwise<typename E::evaluation_category::tag> evaluation_category;
218 typedef typename E::orientation orientation;
219
220 typedef no_iterator const_major_iterator;
221 typedef no_iterator major_iterator;
222
223 // Construction and destruction
224 explicit matrix_transport_to_cpu(
225 expression_closure_type const& expression
226 ):m_expression(expression){}
227
228 size_type size1() const {
229 return m_expression.size1();
230 }
231 size_type size2() const {
232 return m_expression.size2();
233 }
234 expression_closure_type const& expression() const {
235 return m_expression;
236 }
237 boost::compute::command_queue& queue() const{
238 return m_expression.queue();
239 }
240
241 //dispatcher to computation kernels
242 template<class MatX>
243 void assign_to(matrix_expression<MatX, cpu_tag>& X) const{
244 //in case the expression can not be mapped to memory, evaluate it
245 //this does nothing for proxies
246 auto e_eval = eval_expression(m_expression);
247
248 auto storageE = e_eval().raw_storage();
249 auto& buffer = storageE.buffer;
250 //map buffer to host memory
251 auto p = (typename E::value_type*) m_expression.queue().enqueue_map_buffer(
252 buffer, CL_MAP_READ, 0, buffer.size()
253 );
254 //adapt host memory buffer to matrix and assign
255 typedef typename decltype(e_eval)::orientation EOrientation;
256 typedef dense_matrix_adaptor<typename E::value_type, EOrientation> AdaptE;
257 AdaptE adaptE(p + storageE.offset,size1(), size2(), storageE.leading_dimension);
258
259 assign(X, adaptE);
260
261 //unmap memory
262 m_expression.queue().enqueue_unmap_buffer(buffer,p);
263 }
264 template<class MatX>
265 void plus_assign_to(matrix_expression<MatX, cpu_tag>& X)const{
266 //in case the expression can not be mapped to memory, evaluate it
267 //this does nothing for proxies
268 auto e_eval = eval_expression(m_expression);
269
270 auto storageE = e_eval().raw_storage();
271 auto& buffer = storageE.buffer;
272 //map buffer to host memory
273 auto p = (typename E::value_type*) m_expression.queue().enqueue_map_buffer(
274 buffer, CL_MAP_READ, 0, buffer.size()
275 );
276 //adapt host memory buffer to matrix and assign
277 typedef typename decltype(e_eval)::orientation EOrientation;
278 typedef dense_matrix_adaptor<typename E::value_type, EOrientation> AdaptE;
279 AdaptE adaptE(p + storageE.offset, size1(), size2(), storageE.leading_dimension);
280
281 plus_assign(X, adaptE);
282
283 //unmap memory
284 m_expression.queue().enqueue_unmap_buffer(buffer,p);
285 }
286private:
287 expression_closure_type m_expression;
288};
289
290template<class E>
291class matrix_transport_to_gpu: public matrix_expression<matrix_transport_to_gpu<E>, gpu_tag>{
292public:
293 typedef typename E::const_closure_type expression_closure_type;
294
295 typedef typename E::value_type value_type;
296 typedef typename E::size_type size_type;
297 typedef value_type const& const_reference;
298 typedef const_reference reference;
299
300 typedef matrix_transport_to_gpu const_closure_type;
301 typedef matrix_transport_to_gpu closure_type;
302 typedef unknown_storage storage_type;
303 typedef unknown_storage const_storage_type;
304 typedef blockwise<typename E::evaluation_category::tag> evaluation_category;
305 typedef typename E::orientation orientation;
306
307 typedef no_iterator const_major_iterator;
308 typedef no_iterator major_iterator;
309
310 // Construction and destruction
311 explicit matrix_transport_to_gpu(
312 expression_closure_type const& expression,
313 boost::compute::command_queue& queue
314 ):m_expression(expression), m_queue(&queue){}
315
316 size_type size1() const {
317 return m_expression.size1();
318 }
319 size_type size2() const {
320 return m_expression.size2();
321 }
322 expression_closure_type const& expression() const {
323 return m_expression;
324 }
325 boost::compute::command_queue& queue() const{
326 return *m_queue;
327 }
328
329 //dispatcher to computation kernels
330 template<class MatX>
331 void assign_to(matrix_expression<MatX, gpu_tag>& X)const{
332 auto storageX = X().raw_storage();
333 auto& buffer = storageX.buffer;
334 //map buffer to host memory
335 typename MatX::value_type* p = (typename MatX::value_type*) X().queue().enqueue_map_buffer(
336 buffer, CL_MAP_WRITE, 0, buffer.size()
337 );
338 //adapt host memory buffer to vector and assign
339 typedef typename MatX::orientation XOrientation;
340 dense_matrix_adaptor<typename MatX::value_type, XOrientation> adaptX(p, size1(), size2(), storageX.leading_dimension);
341 assign(adaptX, m_expression);
342
343 //unmap memory
344 X().queue().enqueue_unmap_buffer(buffer,p);
345 }
346 template<class MatX>
347 void plus_assign_to(matrix_expression<MatX, gpu_tag>& X) const{
348 auto storageX = X().raw_storage();
349 auto& buffer = storageX.buffer;
350 //map buffer to host memory
351 typename MatX::value_type* p = (typename MatX::value_type*) X().queue().enqueue_map_buffer(
352 buffer, CL_MAP_WRITE, 0, buffer.size()
353 );
354 //adapt host memory buffer to matrix and assign
355 typedef typename MatX::orientation XOrientation;
356 typedef dense_matrix_adaptor<typename MatX::value_type, XOrientation> AdaptX;
357 AdaptX adaptX(p + storageX.offset, size1(), size2(), storageX.leading_dimension);
358
359 plus_assign(adaptX, m_expression);
360
361 //unmap memory
362 X().queue().enqueue_unmap_buffer(buffer,p);
363 }
364
365private:
366 expression_closure_type m_expression;
367 boost::compute::command_queue* m_queue;
368};
369
370///////////////////////////////////////////////
371//////// Expression Optimizers
372///////////////////////////////////////////////
373
374namespace detail{
375template<class E>
376struct matrix_scalar_multiply_optimizer<vector_transport_to_gpu<E> >{
377 typedef vector_scalar_multiply_optimizer<E> opt;
378 typedef vector_transport_to_gpu<typename opt::type> type;
379 static type create(vector_transport_to_gpu<E> const& v, typename type::value_type alpha){
380 return type(opt::create(v.expression(), alpha), v.queue());
381 }
382};
383template<class E>
384struct matrix_scalar_multiply_optimizer<vector_transport_to_cpu<E> >{
385 typedef vector_scalar_multiply_optimizer<E> opt;
386 typedef vector_transport_to_cpu<typename opt::type> type;
387 static type create(vector_transport_to_cpu<E> const& v, typename type::value_type alpha){
388 return type(opt::create(v.expression(), alpha));
389 }
390};
391
392template<class E>
393struct matrix_scalar_multiply_optimizer<matrix_transport_to_gpu<E> >{
394 typedef matrix_scalar_multiply_optimizer<E> opt;
395 typedef matrix_transport_to_gpu<typename opt::type> type;
396 static type create(matrix_transport_to_gpu<E> const& m, typename type::value_type alpha){
397 return type(opt::create(m.expression(), alpha), m.queue());
398 }
399};
400
401template<class E>
402struct matrix_scalar_multiply_optimizer<matrix_transport_to_cpu<E> >{
403 typedef matrix_scalar_multiply_optimizer<E> opt;
404 typedef matrix_transport_to_cpu<typename opt::type> type;
405 static type create(matrix_transport_to_cpu<E> const& m, typename type::value_type alpha){
406 return type(opt::create(m.expression(), alpha));
407 }
408};
409}
410
411//TODO: proxy(copy_to_gpu) should be possible...
412
413///////////////////////////////////////////////
414//////// Expressions
415///////////////////////////////////////////////
416
417template<class E>
418vector_transport_to_cpu<E> copy_to_cpu(vector_expression<E, gpu_tag> const& e){
419 return vector_transport_to_cpu<E>(e());
420}
421
422template<class E>
423matrix_transport_to_cpu<E> copy_to_cpu(matrix_expression<E, gpu_tag> const& e){
424 return matrix_transport_to_cpu<E>(e());
425}
426template<class E>
427vector_transport_to_gpu<E> copy_to_gpu(
428 vector_expression<E, cpu_tag> const& e,
429 boost::compute::command_queue& queue = boost::compute::system::default_queue()
430){
431 return vector_transport_to_gpu<E>(e(), queue);
432}
433
434template<class E>
435matrix_transport_to_gpu<E> copy_to_gpu(
436 matrix_expression<E, cpu_tag> const& e,
437 boost::compute::command_queue& queue = boost::compute::system::default_queue()
438){
439 return matrix_transport_to_gpu<E>(e(),queue);
440}
441
442//moving gpu->gpu is for free
443template<class E>
444E const& copy_to_gpu(
445 vector_expression<E, gpu_tag> const& e,
446 boost::compute::command_queue& queue = boost::compute::system::default_queue()
447){
448 return e();
449}
450
451template<class E>
452E const& copy_to_gpu(
453 matrix_expression<E, gpu_tag> const& e,
454 boost::compute::command_queue& queue = boost::compute::system::default_queue()
455){
456 return e();
457}
458
459template<class E, class Device>
460auto copy_to_device(vector_expression<E, Device> const& e, gpu_tag)->decltype(copy_to_gpu(e)){
461 return copy_to_gpu(e);
462}
463
464
465template<class E, class Device>
466auto copy_to_device(matrix_expression<E, Device> const& e, gpu_tag)->decltype(copy_to_gpu(e)){
467 return copy_to_gpu(e);
468}
469
470
471}
472
473#endif