28#ifndef REMORA_GPU_COPY_HPP
29#define REMORA_GPU_COPY_HPP
31#include "../detail/traits.hpp"
32#include "../dense.hpp"
33#include "../assignment.hpp"
42class vector_transport_to_cpu:
public vector_expression<vector_transport_to_cpu<E>, cpu_tag>{
44 typedef typename E::const_closure_type expression_closure_type;
46 typedef typename E::value_type value_type;
47 typedef typename E::size_type size_type;
48 typedef value_type
const& const_reference;
49 typedef const_reference reference;
51 typedef vector_transport_to_cpu const_closure_type;
52 typedef vector_transport_to_cpu closure_type;
53 typedef unknown_storage storage_type;
54 typedef unknown_storage const_storage_type;
55 typedef blockwise<typename E::evaluation_category::tag> evaluation_category;
59 typedef typename E::const_iterator const_iterator;
60 typedef const_iterator iterator;
63 explicit vector_transport_to_cpu(
64 expression_closure_type
const& expression
65 ):m_expression(expression){}
67 size_type size()
const {
68 return m_expression.size();
71 expression_closure_type
const& expression()
const {
74 boost::compute::command_queue& queue()
const{
75 return m_expression.queue();
80 void assign_to(vector_expression<VecX, cpu_tag>& x)
const{
83 auto e_eval = eval_expression(m_expression);
85 auto storageE = e_eval.raw_storage();
86 auto& buffer = storageE.buffer;
89 auto p = (
typename E::value_type*) m_expression.queue().enqueue_map_buffer(
90 buffer, CL_MAP_READ, 0, buffer.size()
93 auto adaptE = adapt_vector(size(), p + storageE.offset, storageE.stride);
97 m_expression.queue().enqueue_unmap_buffer(buffer,p);
100 void plus_assign_to(vector_expression<VecX, cpu_tag>& x)
const{
103 auto e_eval = eval_expression(m_expression);
105 auto storageE = e_eval.raw_storage();
106 auto& buffer = storageE.buffer;
108 auto p = (value_type*) m_expression.queue().enqueue_map_buffer(
109 buffer, CL_MAP_READ, 0, buffer.size()
112 auto adaptE = adapt_vector(size(), p + storageE.offset, storageE.stride);
113 plus_assign(x,adaptE);
116 m_expression.queue().enqueue_unmap_buffer(buffer,p);
120 expression_closure_type m_expression;
124class vector_transport_to_gpu:
public vector_expression<vector_transport_to_gpu<E>, gpu_tag>{
126 typedef typename E::const_closure_type expression_closure_type;
128 typedef typename E::value_type value_type;
129 typedef typename E::size_type size_type;
130 typedef value_type
const& const_reference;
131 typedef const_reference reference;
133 typedef vector_transport_to_gpu const_closure_type;
134 typedef vector_transport_to_gpu closure_type;
135 typedef unknown_storage storage_type;
136 typedef unknown_storage const_storage_type;
137 typedef blockwise<typename E::evaluation_category::tag> evaluation_category;
141 typedef typename E::const_iterator const_iterator;
142 typedef const_iterator iterator;
145 explicit vector_transport_to_gpu(
146 expression_closure_type
const& expression,
147 boost::compute::command_queue& queue
148 ):m_expression(expression), m_queue(&queue){}
150 size_type size()
const {
151 return m_expression.size();
153 expression_closure_type
const& expression()
const {
156 boost::compute::command_queue& queue()
const{
162 void assign_to(vector_expression<VecX, gpu_tag>& x)
const{
163 auto storagex = x().raw_storage();
164 auto& buffer = storagex.buffer;
166 auto p = (
typename VecX::value_type*) x().queue().enqueue_map_buffer(
167 buffer, CL_MAP_WRITE, 0, buffer.size()
170 auto adaptX = adapt_vector(size(), p + storagex.offset, storagex.stride);
171 assign(adaptX,m_expression);
174 x().queue().enqueue_unmap_buffer(buffer,p);
177 void plus_assign_to(vector_expression<VecX, gpu_tag>& x)
const{
178 auto storagex = x().raw_storage();
179 auto& buffer = storagex.buffer;
181 auto p = (
typename VecX::value_type*) x().queue().enqueue_map_buffer(
182 buffer, CL_MAP_WRITE, 0, buffer.size()
185 auto adaptX = adapt_vector(size(), p + storagex.offset, storagex.stride);
186 plus_assign(adaptX,m_expression);
189 x().queue().enqueue_unmap_buffer(buffer,p);
194 expression_closure_type m_expression;
195 boost::compute::command_queue* m_queue;
204class matrix_transport_to_cpu:
public matrix_expression<matrix_transport_to_cpu<E>, cpu_tag>{
206 typedef typename E::const_closure_type expression_closure_type;
208 typedef typename E::value_type value_type;
209 typedef typename E::size_type size_type;
210 typedef value_type
const& const_reference;
211 typedef const_reference reference;
213 typedef matrix_transport_to_cpu const_closure_type;
214 typedef matrix_transport_to_cpu closure_type;
215 typedef unknown_storage storage_type;
216 typedef unknown_storage const_storage_type;
217 typedef blockwise<typename E::evaluation_category::tag> evaluation_category;
218 typedef typename E::orientation orientation;
220 typedef no_iterator const_major_iterator;
221 typedef no_iterator major_iterator;
224 explicit matrix_transport_to_cpu(
225 expression_closure_type
const& expression
226 ):m_expression(expression){}
228 size_type size1()
const {
229 return m_expression.size1();
231 size_type size2()
const {
232 return m_expression.size2();
234 expression_closure_type
const& expression()
const {
237 boost::compute::command_queue& queue()
const{
238 return m_expression.queue();
243 void assign_to(matrix_expression<MatX, cpu_tag>& X)
const{
246 auto e_eval = eval_expression(m_expression);
248 auto storageE = e_eval().raw_storage();
249 auto& buffer = storageE.buffer;
251 auto p = (
typename E::value_type*) m_expression.queue().enqueue_map_buffer(
252 buffer, CL_MAP_READ, 0, buffer.size()
255 typedef typename decltype(e_eval)::orientation EOrientation;
256 typedef dense_matrix_adaptor<typename E::value_type, EOrientation> AdaptE;
257 AdaptE adaptE(p + storageE.offset,size1(), size2(), storageE.leading_dimension);
262 m_expression.queue().enqueue_unmap_buffer(buffer,p);
265 void plus_assign_to(matrix_expression<MatX, cpu_tag>& X)
const{
268 auto e_eval = eval_expression(m_expression);
270 auto storageE = e_eval().raw_storage();
271 auto& buffer = storageE.buffer;
273 auto p = (
typename E::value_type*) m_expression.queue().enqueue_map_buffer(
274 buffer, CL_MAP_READ, 0, buffer.size()
277 typedef typename decltype(e_eval)::orientation EOrientation;
278 typedef dense_matrix_adaptor<typename E::value_type, EOrientation> AdaptE;
279 AdaptE adaptE(p + storageE.offset, size1(), size2(), storageE.leading_dimension);
281 plus_assign(X, adaptE);
284 m_expression.queue().enqueue_unmap_buffer(buffer,p);
287 expression_closure_type m_expression;
291class matrix_transport_to_gpu:
public matrix_expression<matrix_transport_to_gpu<E>, gpu_tag>{
293 typedef typename E::const_closure_type expression_closure_type;
295 typedef typename E::value_type value_type;
296 typedef typename E::size_type size_type;
297 typedef value_type
const& const_reference;
298 typedef const_reference reference;
300 typedef matrix_transport_to_gpu const_closure_type;
301 typedef matrix_transport_to_gpu closure_type;
302 typedef unknown_storage storage_type;
303 typedef unknown_storage const_storage_type;
304 typedef blockwise<typename E::evaluation_category::tag> evaluation_category;
305 typedef typename E::orientation orientation;
307 typedef no_iterator const_major_iterator;
308 typedef no_iterator major_iterator;
311 explicit matrix_transport_to_gpu(
312 expression_closure_type
const& expression,
313 boost::compute::command_queue& queue
314 ):m_expression(expression), m_queue(&queue){}
316 size_type size1()
const {
317 return m_expression.size1();
319 size_type size2()
const {
320 return m_expression.size2();
322 expression_closure_type
const& expression()
const {
325 boost::compute::command_queue& queue()
const{
331 void assign_to(matrix_expression<MatX, gpu_tag>& X)
const{
332 auto storageX = X().raw_storage();
333 auto& buffer = storageX.buffer;
335 typename MatX::value_type* p = (
typename MatX::value_type*) X().queue().enqueue_map_buffer(
336 buffer, CL_MAP_WRITE, 0, buffer.size()
339 typedef typename MatX::orientation XOrientation;
340 dense_matrix_adaptor<typename MatX::value_type, XOrientation> adaptX(p, size1(), size2(), storageX.leading_dimension);
341 assign(adaptX, m_expression);
344 X().queue().enqueue_unmap_buffer(buffer,p);
347 void plus_assign_to(matrix_expression<MatX, gpu_tag>& X)
const{
348 auto storageX = X().raw_storage();
349 auto& buffer = storageX.buffer;
351 typename MatX::value_type* p = (
typename MatX::value_type*) X().queue().enqueue_map_buffer(
352 buffer, CL_MAP_WRITE, 0, buffer.size()
355 typedef typename MatX::orientation XOrientation;
356 typedef dense_matrix_adaptor<typename MatX::value_type, XOrientation> AdaptX;
357 AdaptX adaptX(p + storageX.offset, size1(), size2(), storageX.leading_dimension);
359 plus_assign(adaptX, m_expression);
362 X().queue().enqueue_unmap_buffer(buffer,p);
366 expression_closure_type m_expression;
367 boost::compute::command_queue* m_queue;
376struct matrix_scalar_multiply_optimizer<vector_transport_to_gpu<E> >{
377 typedef vector_scalar_multiply_optimizer<E> opt;
378 typedef vector_transport_to_gpu<typename opt::type> type;
379 static type create(vector_transport_to_gpu<E>
const& v,
typename type::value_type alpha){
380 return type(opt::create(v.expression(), alpha), v.queue());
384struct matrix_scalar_multiply_optimizer<vector_transport_to_cpu<E> >{
385 typedef vector_scalar_multiply_optimizer<E> opt;
386 typedef vector_transport_to_cpu<typename opt::type> type;
387 static type create(vector_transport_to_cpu<E>
const& v,
typename type::value_type alpha){
388 return type(opt::create(v.expression(), alpha));
393struct matrix_scalar_multiply_optimizer<matrix_transport_to_gpu<E> >{
394 typedef matrix_scalar_multiply_optimizer<E> opt;
395 typedef matrix_transport_to_gpu<typename opt::type> type;
396 static type create(matrix_transport_to_gpu<E>
const& m,
typename type::value_type alpha){
397 return type(opt::create(m.expression(), alpha), m.queue());
402struct matrix_scalar_multiply_optimizer<matrix_transport_to_cpu<E> >{
403 typedef matrix_scalar_multiply_optimizer<E> opt;
404 typedef matrix_transport_to_cpu<typename opt::type> type;
405 static type create(matrix_transport_to_cpu<E>
const& m,
typename type::value_type alpha){
406 return type(opt::create(m.expression(), alpha));
418vector_transport_to_cpu<E> copy_to_cpu(vector_expression<E, gpu_tag>
const& e){
419 return vector_transport_to_cpu<E>(e());
423matrix_transport_to_cpu<E> copy_to_cpu(matrix_expression<E, gpu_tag>
const& e){
424 return matrix_transport_to_cpu<E>(e());
427vector_transport_to_gpu<E> copy_to_gpu(
428 vector_expression<E, cpu_tag>
const& e,
429 boost::compute::command_queue& queue = boost::compute::system::default_queue()
431 return vector_transport_to_gpu<E>(e(), queue);
435matrix_transport_to_gpu<E> copy_to_gpu(
436 matrix_expression<E, cpu_tag>
const& e,
437 boost::compute::command_queue& queue = boost::compute::system::default_queue()
439 return matrix_transport_to_gpu<E>(e(),queue);
445 vector_expression<E, gpu_tag>
const& e,
446 boost::compute::command_queue& queue = boost::compute::system::default_queue()
453 matrix_expression<E, gpu_tag>
const& e,
454 boost::compute::command_queue& queue = boost::compute::system::default_queue()
459template<
class E,
class Device>
460auto copy_to_device(vector_expression<E, Device>
const& e, gpu_tag)->decltype(copy_to_gpu(e)){
461 return copy_to_gpu(e);
465template<
class E,
class Device>
466auto copy_to_device(matrix_expression<E, Device>
const& e, gpu_tag)->decltype(copy_to_gpu(e)){
467 return copy_to_gpu(e);