dense.hpp
Go to the documentation of this file.
1/*!
2 * \brief Implements the dense matrix class for the gpu
3 *
4 * \author O. Krause
5 * \date 2016
6 *
7 *
8 * \par Copyright 1995-2015 Shark Development Team
9 *
10 * <BR><HR>
11 * This file is part of Shark.
12 * <http://image.diku.dk/shark/>
13 *
14 * Shark is free software: you can redistribute it and/or modify
15 * it under the terms of the GNU Lesser General Public License as published
16 * by the Free Software Foundation, either version 3 of the License, or
17 * (at your option) any later version.
18 *
19 * Shark is distributed in the hope that it will be useful,
20 * but WITHOUT ANY WARRANTY; without even the implied warranty of
21 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
22 * GNU Lesser General Public License for more details.
23 *
24 * You should have received a copy of the GNU Lesser General Public License
25 * along with Shark. If not, see <http://www.gnu.org/licenses/>.
26 *
27 */
28#ifndef REMORA_GPU_DENSE_HPP
29#define REMORA_GPU_DENSE_HPP
30
31#include "../detail/traits.hpp"
32#include "../assignment.hpp"
33#include <boost/compute/container/vector.hpp>
34#include <boost/compute/iterator/strided_iterator.hpp>
35#include <boost/compute/algorithm/fill.hpp>
36namespace remora{
37
38
39template<class T, class Tag>
40class dense_vector_adaptor<T, Tag, gpu_tag>: public vector_expression<dense_vector_adaptor<T, Tag, gpu_tag>, gpu_tag > {
41public:
42 typedef std::size_t size_type;
43 typedef typename std::remove_const<T>::type value_type;
44 typedef value_type const& const_reference;
45 typedef T& reference;
46
47 typedef dense_vector_adaptor<T const, Tag, gpu_tag> const_closure_type;
48 typedef dense_vector_adaptor closure_type;
49 typedef gpu::dense_vector_storage<T, Tag> storage_type;
50 typedef gpu::dense_vector_storage<value_type const, Tag> const_storage_type;
51 typedef elementwise<dense_tag> evaluation_category;
52
53 // Construction and destruction
54
55 /// \brief Copy-constructor
56 /// \param v is the proxy to be copied
57 template<class U, class Tag2>
58 dense_vector_adaptor(dense_vector_adaptor<U, Tag2, gpu_tag> const& v)
59 : m_storage(v.raw_storage())
60 , m_queue(&v.queue())
61 , m_size(v.size()){static_assert(std::is_convertible<Tag2,Tag>::value, "Can not convert storage type of argument to the given Tag");}
62
63 /// \brief Constructor of a vector proxy from a block of memory
64 /// \param storage the block of memory used
65 /// \param size number of elements
66 dense_vector_adaptor(
67 storage_type storage,
68 boost::compute::command_queue& queue,
69 size_type size
70 ):m_storage(storage)
71 , m_queue(&queue)
72 , m_size(size){}
73
74 dense_vector_adaptor(vector<value_type, gpu_tag> const& v)
75 : m_storage(v().raw_storage())
76 , m_queue(&v().queue())
77 , m_size(v().size()){}
78
79 dense_vector_adaptor(vector<value_type, gpu_tag>& v)
80 : m_storage(v().raw_storage())
81 , m_queue(&v().queue())
82 , m_size(v().size()){}
83
84 // -------------------
85 // Assignment operators
86 // -------------------
87
88 dense_vector_adaptor& operator = (dense_vector_adaptor const& e) {
89 REMORA_SIZE_CHECK(size() == e().size());
90 return assign(*this, typename vector_temporary<dense_vector_adaptor>::type(e));
91 }
92 template<class E>
93 dense_vector_adaptor& operator = (vector_expression<E, gpu_tag> const& e) {
94 REMORA_SIZE_CHECK(size() == e().size());
95 return assign(*this, typename vector_temporary<dense_vector_adaptor>::type(e));
96 }
97
98 /// \brief Return the size of the vector.
99 size_type size() const {
100 return m_size;
101 }
102
103 ///\brief Returns the underlying storage_type structure for low level access
104 storage_type raw_storage() const{
105 return m_storage;
106 }
107
108 boost::compute::command_queue& queue() const{
109 return *m_queue;
110 }
111
112 void clear(){
113 gpu::detail::meta_kernel k("vector_proxy_clear");
114 auto v = k.register_args(to_functor(*this));
115
116 //create source
117 k<<v(k.get_global_id(0))<<" = 0;";
118 boost::compute::kernel kernel = k.compile(queue().get_context());
119 //enqueue kernel
120 std::size_t global_work_size[1] = {size()};
121 queue().enqueue_nd_range_kernel(kernel, 1,nullptr, global_work_size, nullptr);
122 }
123
124 typedef no_iterator iterator;
125 typedef no_iterator const_iterator;
126
127private:
128 template<class,class,class> friend class dense_vector_adaptor;
129 dense_vector_adaptor(vector<value_type, gpu_tag> && v);//no construction from temporary vector
130
131 storage_type m_storage;
132 boost::compute::command_queue* m_queue;
133 size_type m_size;
134};
135
136template<class T,class Orientation, class Tag>
137class dense_matrix_adaptor<T, Orientation, Tag, gpu_tag>: public matrix_expression<dense_matrix_adaptor<T,Orientation, Tag, gpu_tag>, gpu_tag > {
138public:
139 typedef std::size_t size_type;
140 typedef typename std::remove_const<T>::type value_type;
141 typedef value_type const& const_reference;
142 typedef T& reference;
143
144 typedef dense_matrix_adaptor closure_type;
145 typedef dense_matrix_adaptor<value_type const, Orientation, Tag, gpu_tag> const_closure_type;
146 typedef gpu::dense_matrix_storage<T, Tag> storage_type;
147 typedef gpu::dense_matrix_storage<value_type const, Tag> const_storage_type;
148 typedef Orientation orientation;
149 typedef elementwise<dense_tag> evaluation_category;
150
151 // Construction and destruction
152 template<class U, class Tag2>
153 dense_matrix_adaptor(dense_matrix_adaptor<U, Orientation, Tag2, gpu_tag> const& expression)
154 : m_storage(expression.raw_storage())
155 , m_queue(&expression.queue())
156 , m_size1(expression.size1())
157 , m_size2(expression.size2())
158 {static_assert(std::is_convertible<Tag2,Tag>::value, "Can not convert storage type of argument to the given Tag");}
159
160 /// \brief Constructor of a matrix proxy from a block of memory
161 /// \param storage the block of memory used
162 /// \param size1 number of rows
163 /// \param size2 number of columns
164 dense_matrix_adaptor(
165 storage_type storage,
166 boost::compute::command_queue& queue,
167 size_type size1, size_type size2
168 ):m_storage(storage)
169 , m_queue(&queue)
170 , m_size1(size1)
171 , m_size2(size2){}
172
173 dense_matrix_adaptor(matrix<value_type, Orientation, gpu_tag> const& m )
174 : m_storage(m().raw_storage())
175 , m_queue(&m().queue())
176 , m_size1(m().size1())
177 , m_size2(m().size2()){}
178
179 dense_matrix_adaptor(matrix<value_type, Orientation, gpu_tag>& m )
180 : m_storage(m().raw_storage())
181 , m_queue(&m().queue())
182 , m_size1(m().size1())
183 , m_size2(m().size2()){}
184
185 // -------------------
186 // Assignment operators
187 // -------------------
188
189 dense_matrix_adaptor& operator = (dense_matrix_adaptor const& e) {
190 REMORA_SIZE_CHECK(size1() == e().size1());
191 REMORA_SIZE_CHECK(size2() == e().size2());
192 return assign(*this, typename matrix_temporary<dense_matrix_adaptor>::type(e));
193 }
194 template<class E>
195 dense_matrix_adaptor& operator = (matrix_expression<E, gpu_tag> const& e) {
196 REMORA_SIZE_CHECK(size1() == e().size1());
197 REMORA_SIZE_CHECK(size2() == e().size2());
198 return assign(*this, typename matrix_temporary<dense_matrix_adaptor>::type(e));
199 }
200 template<class E>
201 dense_matrix_adaptor& operator = (vector_set_expression<E, gpu_tag> const& e) {
202 REMORA_SIZE_CHECK(size1() == typename E::point_orientation::index_M(e().size(), e().point_size()));
203 REMORA_SIZE_CHECK(size2() == typename E::point_orientation::index_M(e().size(), e().point_size()));
204 return assign(*this, typename matrix_temporary<dense_matrix_adaptor>::type(e));
205 }
206
207 // ---------
208 // Storage interface
209 // ---------
210
211 ///\brief Returns the number of rows of the matrix.
212 size_type size1() const {
213 return m_size1;
214 }
215 ///\brief Returns the number of columns of the matrix.
216 size_type size2() const {
217 return m_size2;
218 }
219
220 boost::compute::command_queue& queue() const{
221 return *m_queue;
222 }
223
224 ///\brief Returns the underlying storage structure for low level access
225 storage_type raw_storage() const{
226 return {m_storage.buffer, m_storage.offset, m_storage.leading_dimension};
227 }
228
229 void clear(){
230 gpu::detail::meta_kernel k("matrix_proxy_clear");
231 auto m = k.register_args(to_functor(*this));
232
233 //create source
234 k<<m(k.get_global_id(0),k.get_global_id(1))<<" = 0;";
235 boost::compute::kernel kernel = k.compile(queue().get_context());
236 //enqueue kernel
237 std::size_t global_work_size[2] = {size1(), size2()};
238 queue().enqueue_nd_range_kernel(kernel, 2,nullptr, global_work_size, nullptr);
239 }
240
241 // Iterator types
242 typedef no_iterator major_iterator;
243 typedef no_iterator const_major_iterator;
244
245
246private:
247 storage_type m_storage;
248 boost::compute::command_queue* m_queue;
249 size_type m_size1;
250 size_type m_size2;
251};
252
253template<class T>
254class vector<T, gpu_tag>: public vector_container<vector<T, gpu_tag>, gpu_tag > {
255public:
256 typedef T value_type;
257 typedef value_type const_reference;
258 typedef value_type reference;
259 typedef std::size_t size_type;
260
261 typedef dense_vector_adaptor<T const, continuous_dense_tag, gpu_tag> const_closure_type;
262 typedef dense_vector_adaptor<T, continuous_dense_tag, gpu_tag> closure_type;
263 typedef gpu::dense_vector_storage<T,continuous_dense_tag> storage_type;
264 typedef gpu::dense_vector_storage<T,continuous_dense_tag> const_storage_type;
265 typedef elementwise<dense_tag> evaluation_category;
266
267 // Construction and destruction
268
269 /// \brief Constructor of a vector with a default queue
270 ///
271 ///note that for all operations for which vector is on the left hand side,
272 ///the kernels are enqueued on the supplied queue in case of a multi-queue setup.
273 vector(boost::compute::command_queue& queue = boost::compute::system::default_queue())
274 :m_storage(queue.get_context()), m_queue(&queue){}
275
276 /// \brief Constructor of a vector with a predefined size
277 /// By default, its elements are uninitialized.
278 /// \param size initial size of the vector
279 /// \param queue the opencl queue to use by this vector
280 explicit vector(size_type size, boost::compute::command_queue& queue = boost::compute::system::default_queue())
281 : m_storage(size,queue.get_context()), m_queue(&queue){}
282
283 /// \brief Constructor of a vector with a predefined size and a unique initial value
284 /// \param size of the vector
285 /// \param init value to assign to each element of the vector
286 /// \param queue the opencl queue to use by this vector
287 vector(size_type size, value_type const& init, boost::compute::command_queue& queue = boost::compute::system::default_queue())
288 : m_storage(size, init, queue), m_queue(&queue){}
289
290 /// \brief Move-constructor of a vector
291 /// \param v is the vector to be moved
292 vector(vector && v)
293 : m_storage(std::move(v.m_storage))
294 , m_queue(&v.queue()){}
295
296 /// \brief Copy-constructor of a vector
297 /// \param v is the vector to be duplicated
298 vector(vector const& v) = default;
299
300 /// \brief Copy-constructor of a vector from a vector_expression
301 /// \param e the vector_expression whose values will be duplicated into the vector
302 template<class E>
303 vector(vector_expression<E, gpu_tag> const& e)
304 : m_storage(e().size(), e().queue().get_context())
305 , m_queue(&e().queue()){
306 assign(*this, e);
307 }
308
309 /// \brief Copy-constructor of a vector from a vector_expression on a given queue
310 /// \param e the vector_expression whose values will be duplicated into the vector
311 /// \param queue the queue which should perform the task
312 template<class E>
313 vector(vector_expression<E, gpu_tag> const& e, boost::compute::command_queue& queue)
314 : m_storage(e().size(), queue.get_context())
315 , m_queue(&queue){
316 assign(*this, e);
317 }
318
319 // Element access
320 gpu::detail::dense_vector_element<value_type> to_functor() const{
321 return {m_storage.get_buffer()};
322 }
323
324 // -------------------
325 // Assignment operators
326 // -------------------
327
328 /// \brief Assign a full vector (\e RHS-vector) to the current vector (\e LHS-vector)
329 /// Assign a full vector (\e RHS-vector) to the current vector (\e LHS-vector). This method does not create any temporary.
330 /// \param v is the source vector container
331 /// \return a reference to a vector (i.e. the destination vector)
332 vector& operator = (vector const& v){
333 resize(v.size());
334 return assign(*this, v);
335 }
336
337 /// \brief Move-Assign a full vector (\e RHS-vector) to the current vector (\e LHS-vector)
338 /// \param v is the source vector container
339 /// \return a reference to a vector (i.e. the destination vector)
340 vector& operator = (vector && v){
341 m_storage = std::move(v.m_storage);
342 m_queue = v.m_queue;
343 return *this;
344 }
345
346 /// \brief Assign a full vector (\e RHS-vector) to the current vector (\e LHS-vector)
347 /// Assign a full vector (\e RHS-vector) to the current vector (\e LHS-vector). This method does not create any temporary.
348 /// \param v is the source vector container
349 /// \return a reference to a vector (i.e. the destination vector)
350 template<class C> // Container assignment without temporary
351 vector& operator = (vector_container<C, gpu_tag> const& v) {
352 resize(v().size());
353 return assign(*this, v);
354 }
355
356 /// \brief Assign the result of a vector_expression to the vector
357 ///
358 /// \param e is a const reference to the vector_expression
359 /// \return a reference to the resulting vector
360 template<class E>
361 vector& operator = (vector_expression<E, gpu_tag> const& e) {
362 vector temporary(e,queue());
363 swap(*this,temporary);
364 return *this;
365 }
366
367 // ---------
368 // Storage interface
369 // ---------
370
371 /// \brief Return the size of the vector.
372 size_type size() const {
373 return m_storage.size();
374 }
375
376 boost::compute::command_queue& queue() const{
377 return *m_queue;
378 }
379 ///\brief Returns the underlying storage structure for low level access
380 const_storage_type raw_storage() const{
381 return {m_storage.get_buffer(),0,1};
382 }
383
384 ///\brief Returns the underlying storage structure for low level access
385 storage_type raw_storage(){
386 return {m_storage.get_buffer(),0,1};
387 }
388
389 /// \brief Resize the vector
390 ///
391 /// This might erase all data stored in the vector
392 ///
393 /// \param size new size of the vector
394 void resize(size_type size) {
395 if(size < m_storage.size())
396 m_storage.resize(size);
397 else
398 m_storage = boost::compute::vector<T>(size, queue().get_context());
399 }
400
401 /// \brief Resize the vector
402 ///
403 /// This will erase all data stored in the vector and reinitialize it with the supplied value of init
404 ///
405 /// \param size new size of the vector
406 /// \param init the value of all elements
407 void resize(size_type size, value_type init) {
408 resize(size);
409 boost::compute::fill(m_storage.begin(),m_storage.end(), init);
410 }
411
412 void clear(){
413 boost::compute::fill(m_storage.begin(),m_storage.end(), value_type/*zero*/());
414 }
415
416 bool empty()const{
417 return m_storage.empty();
418 }
419
420 /// \brief Swap the content of two vectors
421 /// \param v1 is the first vector. It takes values from v2
422 /// \param v2 is the second vector It takes values from v1
423 friend void swap(vector& v1, vector& v2) {
424 using std::swap;
425 swap(v1.m_storage,v2.m_storage);
426 std::swap(v2.m_queue,v2.m_queue);
427 }
428
429 template<class Archive>
430 void serialize(Archive &ar, const unsigned int file_version) {
431 }
432
433 // Iterator types
434 typedef no_iterator iterator;
435 typedef no_iterator const_iterator;
436private:
437 boost::compute::vector<T> m_storage;
438 boost::compute::command_queue* m_queue;
439};
440
441/// \brief A dense matrix of values of type \c T stored on the gpu
442///
443/// For a \f$(m \times n)\f$-dimensional matrix and \f$ 0 \leq i < m, 0 \leq j < n\f$, every element \f$ m_{i,j} \f$ is mapped to
444/// the \f$(i.n + j)\f$-th element of the container for row major orientation or the \f$ (i + j.m) \f$-th element of
445/// the container for column major orientation. In a dense matrix all elements are represented in memory in a
446/// contiguous chunk of memory by definition.
447///
448/// Orientation can also be specified, otherwise a \c row_major is used.
449///
450/// \tparam T the type of object stored in the matrix (like double, float, complex, etc...)
451/// \tparam L the storage organization. It can be either \c row_major or \c column_major. Default is \c row_major
452template<class T, class L>
453class matrix<T,L, gpu_tag> : public matrix_container<matrix<T,L, gpu_tag>, gpu_tag > {
454public:
455 typedef T value_type;
456 typedef value_type const_reference;
457 typedef value_type reference;
458 typedef std::size_t size_type;
459
460 typedef dense_matrix_adaptor<T const,L, continuous_dense_tag, gpu_tag> const_closure_type;
461 typedef dense_matrix_adaptor<T,L, continuous_dense_tag, gpu_tag> closure_type;
462 typedef gpu::dense_matrix_storage<T, continuous_dense_tag> storage_type;
463 typedef gpu::dense_matrix_storage<T const, continuous_dense_tag> const_storage_type;
464 typedef elementwise<dense_tag> evaluation_category;
465 typedef L orientation;
466
467 // Construction and destruction
468
469 /// \brief Constructor of a matrix with a default queue
470 ///
471 ///note that for all operations for which matrix is on the left hand side,
472 ///the kernels are enqueued on the supplied queue in case of a multi-queue setup.
473 matrix(boost::compute::command_queue& queue = boost::compute::system::default_queue())
474 : m_storage(queue.get_context())
475 , m_queue(&queue),m_size1(0), m_size2(0){}
476
477 /// \brief Constructor of a matrix with a predefined size
478 /// By default, its elements are uninitialized
479 /// \param size1 number of rows
480 /// \param size2 number of columns
481 /// \param queue the opencl queue to use by this matrix
482 explicit matrix(size_type size1, size_type size2, boost::compute::command_queue& queue = boost::compute::system::default_queue())
483 : m_storage(size1 * size2, queue.get_context())
484 , m_queue(&queue)
485 , m_size1(size1)
486 , m_size2(size2){}
487
488 /// \brief Constructor of a matrix with a predefined size initialized to a value
489 /// \param size1 number of rows
490 /// \param size2 number of columns
491 /// \param init value to assign to each element of the matrix
492 /// \param queue the opencl queue to use by this matrix
493 matrix(size_type size1, size_type size2, value_type const& init, boost::compute::command_queue& queue = boost::compute::system::default_queue())
494 : m_storage(size1 * size2, init, queue)
495 , m_queue(&queue)
496 , m_size1(size1)
497 , m_size2(size2){}
498
499 /// \brief Move-constructor of a matrix
500 /// \param m is the matrix to be moved
501 matrix(matrix && m)
502 : m_storage(std::move(m.m_storage))
503 , m_queue(&m.queue())
504 , m_size1(m.size1())
505 , m_size2(m.size2()){}
506
507 /// \brief Copy-constructor of a matrix
508 /// \param m is the matrix to be duplicated
509 matrix(matrix const& m)
510 : m_storage(m.m_storage)
511 , m_queue(&m.queue())
512 , m_size1(m.size1())
513 , m_size2(m.size2()){}
514
515 /// \brief Copy-constructor of a matrix from a matrix_expression
516 /// \param e the matrix_expression whose values will be duplicated into the matrix
517 template<class E>
518 matrix(matrix_expression<E, gpu_tag> const& e)
519 : m_storage(e().size1() * e().size2(), e().queue().get_context())
520 , m_queue(&e().queue())
521 , m_size1(e().size1())
522 , m_size2(e().size2()){
523 assign(*this, e);
524 }
525
526 template<class E>
527 matrix(vector_set_expression<E, gpu_tag> const& e)
528 : m_storage(e().size() * e().point_size(), e().queue().get_context())
529 , m_queue(&e().queue())
530 , m_size1(E::point_orientation::index_M(e().size(), e().point_size()))
531 , m_size2(E::point_orientation::index_m(e().size(), e().point_size())){
532 assign(*this, e().expression());
533 }
534
535 // -------------------
536 // Assignment operators
537 // -------------------
538
539 /// \brief Assign a full matrix (\e RHS-matrix) to the current matrix (\e LHS-matrix)
540 /// Assign a full matrix (\e RHS-matrix) to the current matrix (\e LHS-matrix). This method does not create any temporary.
541 /// \param m is the source matrix container
542 /// \return a reference to a matrix (i.e. the destination matrix)
543 matrix& operator = (matrix const& m){
544 resize(m.size1(),m.size2());
545 return assign(*this, m);
546 }
547
548 /// \brief Move-Assign a full matrix (\e RHS-matrix) to the current matrix (\e LHS-matrix)
549 /// \param m is the source matrix container
550 /// \return a reference to a matrix (i.e. the destination matrix)
551 matrix& operator = (matrix && m){
552 m_storage = std::move(m.m_storage);
553 m_queue = m.m_queue;
554 m_size1 = m.m_size1;
555 m_size2 = m.m_size2;
556 return *this;
557 }
558
559 /// \brief Assign a full matrix (\e RHS-matrix) to the current matrix (\e LHS-matrix)
560 /// Assign a full matrix (\e RHS-matrix) to the current matrix (\e LHS-matrix). This method does not create any temporary.
561 /// \param m is the source matrix container
562 /// \return a reference to a matrix (i.e. the destination matrix)
563 template<class C> // Container assignment without temporary
564 matrix& operator = (matrix_container<C, gpu_tag> const& m) {
565 resize(m().size1(), m().size2());
566 return assign(*this, m);
567 }
568
569 /// \brief Assign the result of a matrix_expression to the matrix
570 ///
571 /// \param e is a const reference to the matrix_expression
572 /// \return a reference to the resulting matrix
573 template<class E>
574 matrix& operator = (matrix_expression<E, gpu_tag> const& e) {
575 matrix temporary(e);
576 swap(*this,temporary);
577 return *this;
578 }
579
580
581 /// \brief Assign the result of a vector_set to the matrix
582 ///
583 /// \param e is a const reference to the vector_set_expression
584 /// \return a reference to the resulting matrix
585 template<class E>
586 matrix& operator = (vector_set_expression<E, gpu_tag> const& e) {
587 matrix temporary(e);
588 swap(temporary);
589 return *this;
590 }
591
592 // ---------
593 // Storage interface
594 // ---------
595
596 ///\brief Returns the number of rows of the matrix.
597 size_type size1() const {
598 return m_size1;
599 }
600 ///\brief Returns the number of columns of the matrix.
601 size_type size2() const {
602 return m_size2;
603 }
604
605 boost::compute::command_queue& queue() const{
606 return *m_queue;
607 }
608 ///\brief Returns the underlying storage structure for low level access
609 const_storage_type raw_storage() const{
610 return {m_storage.get_buffer(),0,leading_dimension()};
611 }
612
613 ///\brief Returns the underlying storage structure for low level access
614 storage_type raw_storage(){
615 return {m_storage.get_buffer(),0,leading_dimension()};
616 }
617
618
619 /// \brief Resize the matrix
620 ///
621 /// This might erase all data stored in the matrix
622 ///
623 /// \param size1 new number of rows
624 /// \param size2 new number of columns
625 void resize(size_type size1, size_type size2) {
626 if(size1 * size2 < m_storage.size())
627 m_storage.resize(size1 * size2);
628 else
629 m_storage = boost::compute::vector<T>(size1 * size2, queue().get_context());
630 m_size1 = size1;
631 m_size2 = size2;
632 }
633
634
635 /// \brief Resize the matrix
636 ///
637 /// This will erase all data stored in the matrix and reinitialize it with the supplied value of init
638 ///
639 /// \param size1 new number of rows
640 /// \param size2 new number of columns
641 /// \param init the value of all elements
642 void resize(size_type size1, size_type size2, value_type init) {
643 resize(size1,size2);
644 boost::compute::fill(m_storage.begin(),m_storage.end(), init, queue());
645 }
646
647 void clear(){
648 boost::compute::fill(m_storage.begin(),m_storage.end(), value_type/*zero*/(), queue());
649 }
650
651 // Iterator types
652 typedef no_iterator major_iterator;
653 typedef no_iterator const_major_iterator;
654
655 /// \brief Swap the content of two matrixs
656 /// \param m1 is the first matrix. It takes values from m2
657 /// \param m2 is the second matrix It takes values from m1
658 friend void swap(matrix& m1, matrix& m2) {
659 using std::swap;
660 swap(m1.m_storage,m2.m_storage);
661 std::swap(m1.m_queue,m2.m_queue);
662 std::swap(m1.m_size1,m2.m_size1);
663 std::swap(m1.m_size2,m2.m_size2);
664 }
665
666 template<class Archive>
667 void serialize(Archive &ar, const unsigned int file_version) {
668 }
669private:
670 std::size_t leading_dimension() const{
671 return orientation::index_m(m_size1, m_size2);
672 };
673
674 boost::compute::vector<T> m_storage;
675 boost::compute::command_queue* m_queue;
676 size_type m_size1;
677 size_type m_size2;
678};
679
680
681template<class T, class Orientation, bool Upper, bool Unit>
682class dense_triangular_proxy<T, Orientation, triangular_tag<Upper, Unit> , gpu_tag>
683: public matrix_expression<dense_triangular_proxy<T, Orientation, triangular_tag<Upper, Unit>, gpu_tag>, gpu_tag> {
684public:
685 typedef std::size_t size_type;
686 typedef typename std::remove_const<T>::type value_type;
687 typedef value_type result_type;
688 typedef typename std::conditional<Unit, value_type const&, T&>::type reference;
689 typedef value_type const& const_reference;
690 typedef dense_triangular_proxy<value_type const, Orientation, triangular_tag<Upper, Unit> , gpu_tag> const_closure_type;
691 typedef dense_triangular_proxy<T, Orientation, triangular_tag<Upper, Unit> , gpu_tag> closure_type;
692
693 typedef gpu::dense_matrix_storage<T, dense_tag> storage_type;
694 typedef gpu::dense_matrix_storage<value_type const, dense_tag> const_storage_type;
695
696 typedef elementwise<dense_tag> evaluation_category;
697 typedef triangular<Orientation,triangular_tag<Upper, Unit> > orientation;
698
699
700 template<class U>
701 dense_triangular_proxy(dense_triangular_proxy<U, Orientation, triangular_tag<Upper, Unit>, gpu_tag> const& expression)
702 : m_storage(expression.raw_storage())
703 , m_queue(&expression.queue())
704 , m_size1(expression.size1())
705 , m_size2(expression.size2()){}
706
707 dense_triangular_proxy(storage_type const& storage, boost::compute::command_queue& queue, std::size_t size1, std::size_t size2)
708 : m_storage(storage)
709 , m_queue(&queue)
710 , m_size1(size1)
711 , m_size2(size2){}
712
713 dense_matrix_adaptor<T, Orientation, dense_tag, gpu_tag> to_dense() const{
714 return {m_storage, queue(), m_size1, m_size2};
715 }
716
717
718 /// \brief Return the number of rows of the matrix
719 size_type size1() const {
720 return m_size1;
721 }
722 /// \brief Return the number of columns of the matrix
723 size_type size2() const {
724 return m_size2;
725 }
726
727 ///\brief Returns the underlying storage structure for low level access
728 storage_type raw_storage() const{
729 return m_storage;
730 }
731
732 boost::compute::command_queue& queue()const{
733 return *m_queue;
734 }
735
736 typedef no_iterator major_iterator;
737 typedef no_iterator const_major_iterator;
738private:
739 storage_type m_storage;
740 boost::compute::command_queue* m_queue;
741 std::size_t m_size1;
742 std::size_t m_size2;
743};
744
745//////////////////////////////////
746//////Expression Traits
747///////////////////////////////////
748
749template<class T>
750struct ExpressionToFunctor<vector<T, gpu_tag> >{
751 static gpu::detail::dense_vector_element<T> transform(vector<T, gpu_tag> const& e){
752 return {e().raw_storage().buffer, 1, 0};
753 }
754};
755
756template<class T, class Orientation>
757struct ExpressionToFunctor<matrix<T, Orientation, gpu_tag> >{
758 static gpu::detail::dense_matrix_element<T> transform(matrix<T, Orientation, gpu_tag> const& e){
759 std::size_t leading = e().raw_storage().leading_dimension;
760 return {e().raw_storage().buffer, Orientation::stride1(leading), Orientation::stride2(leading),0};
761 }
762};
763
764
765
766template<class T, class Tag>
767struct ExpressionToFunctor<dense_vector_adaptor<T, Tag, gpu_tag> >{
768 static gpu::detail::dense_vector_element<T> transform(dense_vector_adaptor<T, Tag, gpu_tag> const& e){
769 auto const& storage = e().raw_storage();
770 return {storage.buffer, storage.stride, storage.offset};
771 }
772};
773
774template<class T, class Tag, class Orientation>
775struct ExpressionToFunctor<dense_matrix_adaptor<T, Orientation, Tag, gpu_tag> >{
776 static gpu::detail::dense_matrix_element<T> transform(dense_matrix_adaptor<T, Orientation, Tag, gpu_tag> const& e){
777 auto const& storage = e().raw_storage();
778 std::size_t stride1 = Orientation::index_m(std::size_t(1), storage.leading_dimension);
779 std::size_t stride2 = Orientation::index_M(std::size_t(1), storage.leading_dimension);
780 return {storage.buffer, stride1, stride2, storage.offset};
781 }
782};
783
784namespace detail{
785
786template<class T, class Orientation>
787struct vector_to_matrix_optimizer<dense_vector_adaptor<T, continuous_dense_tag, gpu_tag>, Orientation >{
788 typedef dense_matrix_adaptor<T, Orientation, continuous_dense_tag, gpu_tag> type;
789
790 static type create(
791 dense_vector_adaptor<T, continuous_dense_tag, gpu_tag> const& v,
792 std::size_t size1, std::size_t size2
793 ){
794 gpu::dense_matrix_storage<T, continuous_dense_tag> storage = {v.raw_storage().buffer, v.raw_storage().offset, Orientation::index_m(size1,size2)};
795 return type(storage, v.queue(), size1, size2);
796 }
797};
798}
799
800}
801
802#endif