28#ifndef REMORA_GPU_DENSE_HPP
29#define REMORA_GPU_DENSE_HPP
31#include "../detail/traits.hpp"
32#include "../assignment.hpp"
33#include <boost/compute/container/vector.hpp>
34#include <boost/compute/iterator/strided_iterator.hpp>
35#include <boost/compute/algorithm/fill.hpp>
39template<
class T,
class Tag>
40class dense_vector_adaptor<T, Tag, gpu_tag>:
public vector_expression<dense_vector_adaptor<T, Tag, gpu_tag>, gpu_tag > {
42 typedef std::size_t size_type;
43 typedef typename std::remove_const<T>::type value_type;
44 typedef value_type
const& const_reference;
47 typedef dense_vector_adaptor<T const, Tag, gpu_tag> const_closure_type;
48 typedef dense_vector_adaptor closure_type;
49 typedef gpu::dense_vector_storage<T, Tag> storage_type;
50 typedef gpu::dense_vector_storage<value_type const, Tag> const_storage_type;
51 typedef elementwise<dense_tag> evaluation_category;
57 template<
class U,
class Tag2>
58 dense_vector_adaptor(dense_vector_adaptor<U, Tag2, gpu_tag>
const& v)
59 : m_storage(v.raw_storage())
61 , m_size(v.size()){
static_assert(std::is_convertible<Tag2,Tag>::value,
"Can not convert storage type of argument to the given Tag");}
68 boost::compute::command_queue& queue,
74 dense_vector_adaptor(vector<value_type, gpu_tag>
const& v)
75 : m_storage(v().raw_storage())
76 , m_queue(&v().queue())
77 , m_size(v().size()){}
79 dense_vector_adaptor(vector<value_type, gpu_tag>& v)
80 : m_storage(v().raw_storage())
81 , m_queue(&v().queue())
82 , m_size(v().size()){}
88 dense_vector_adaptor& operator = (dense_vector_adaptor
const& e) {
89 REMORA_SIZE_CHECK(size() == e().size());
90 return assign(*
this,
typename vector_temporary<dense_vector_adaptor>::type(e));
93 dense_vector_adaptor& operator = (vector_expression<E, gpu_tag>
const& e) {
94 REMORA_SIZE_CHECK(size() == e().size());
95 return assign(*
this,
typename vector_temporary<dense_vector_adaptor>::type(e));
99 size_type size()
const {
104 storage_type raw_storage()
const{
108 boost::compute::command_queue& queue()
const{
113 gpu::detail::meta_kernel k(
"vector_proxy_clear");
114 auto v = k.register_args(to_functor(*
this));
117 k<<v(k.get_global_id(0))<<
" = 0;";
118 boost::compute::kernel kernel = k.compile(queue().get_context());
120 std::size_t global_work_size[1] = {size()};
121 queue().enqueue_nd_range_kernel(kernel, 1,
nullptr, global_work_size,
nullptr);
124 typedef no_iterator iterator;
125 typedef no_iterator const_iterator;
128 template<
class,
class,
class>
friend class dense_vector_adaptor;
129 dense_vector_adaptor(vector<value_type, gpu_tag> && v);
131 storage_type m_storage;
132 boost::compute::command_queue* m_queue;
136template<
class T,
class Orientation,
class Tag>
137class dense_matrix_adaptor<T, Orientation, Tag, gpu_tag>:
public matrix_expression<dense_matrix_adaptor<T,Orientation, Tag, gpu_tag>, gpu_tag > {
139 typedef std::size_t size_type;
140 typedef typename std::remove_const<T>::type value_type;
141 typedef value_type
const& const_reference;
142 typedef T& reference;
144 typedef dense_matrix_adaptor closure_type;
145 typedef dense_matrix_adaptor<value_type const, Orientation, Tag, gpu_tag> const_closure_type;
146 typedef gpu::dense_matrix_storage<T, Tag> storage_type;
147 typedef gpu::dense_matrix_storage<value_type const, Tag> const_storage_type;
148 typedef Orientation orientation;
149 typedef elementwise<dense_tag> evaluation_category;
152 template<
class U,
class Tag2>
153 dense_matrix_adaptor(dense_matrix_adaptor<U, Orientation, Tag2, gpu_tag>
const& expression)
154 : m_storage(expression.raw_storage())
155 , m_queue(&expression.queue())
156 , m_size1(expression.size1())
157 , m_size2(expression.size2())
158 {
static_assert(std::is_convertible<Tag2,Tag>::value,
"Can not convert storage type of argument to the given Tag");}
164 dense_matrix_adaptor(
165 storage_type storage,
166 boost::compute::command_queue& queue,
167 size_type size1, size_type size2
173 dense_matrix_adaptor(matrix<value_type, Orientation, gpu_tag>
const& m )
174 : m_storage(m().raw_storage())
175 , m_queue(&m().queue())
176 , m_size1(m().size1())
177 , m_size2(m().size2()){}
179 dense_matrix_adaptor(matrix<value_type, Orientation, gpu_tag>& m )
180 : m_storage(m().raw_storage())
181 , m_queue(&m().queue())
182 , m_size1(m().size1())
183 , m_size2(m().size2()){}
189 dense_matrix_adaptor& operator = (dense_matrix_adaptor
const& e) {
190 REMORA_SIZE_CHECK(size1() == e().size1());
191 REMORA_SIZE_CHECK(size2() == e().size2());
192 return assign(*
this,
typename matrix_temporary<dense_matrix_adaptor>::type(e));
195 dense_matrix_adaptor& operator = (matrix_expression<E, gpu_tag>
const& e) {
196 REMORA_SIZE_CHECK(size1() == e().size1());
197 REMORA_SIZE_CHECK(size2() == e().size2());
198 return assign(*
this,
typename matrix_temporary<dense_matrix_adaptor>::type(e));
201 dense_matrix_adaptor& operator = (vector_set_expression<E, gpu_tag>
const& e) {
202 REMORA_SIZE_CHECK(size1() ==
typename E::point_orientation::index_M(e().size(), e().point_size()));
203 REMORA_SIZE_CHECK(size2() ==
typename E::point_orientation::index_M(e().size(), e().point_size()));
204 return assign(*
this,
typename matrix_temporary<dense_matrix_adaptor>::type(e));
212 size_type size1()
const {
216 size_type size2()
const {
220 boost::compute::command_queue& queue()
const{
225 storage_type raw_storage()
const{
226 return {m_storage.buffer, m_storage.offset, m_storage.leading_dimension};
230 gpu::detail::meta_kernel k(
"matrix_proxy_clear");
231 auto m = k.register_args(to_functor(*
this));
234 k<<m(k.get_global_id(0),k.get_global_id(1))<<
" = 0;";
235 boost::compute::kernel kernel = k.compile(queue().get_context());
237 std::size_t global_work_size[2] = {size1(), size2()};
238 queue().enqueue_nd_range_kernel(kernel, 2,
nullptr, global_work_size,
nullptr);
242 typedef no_iterator major_iterator;
243 typedef no_iterator const_major_iterator;
247 storage_type m_storage;
248 boost::compute::command_queue* m_queue;
254class vector<T, gpu_tag>:
public vector_container<vector<T, gpu_tag>, gpu_tag > {
256 typedef T value_type;
257 typedef value_type const_reference;
258 typedef value_type reference;
259 typedef std::size_t size_type;
261 typedef dense_vector_adaptor<T const, continuous_dense_tag, gpu_tag> const_closure_type;
262 typedef dense_vector_adaptor<T, continuous_dense_tag, gpu_tag> closure_type;
263 typedef gpu::dense_vector_storage<T,continuous_dense_tag> storage_type;
264 typedef gpu::dense_vector_storage<T,continuous_dense_tag> const_storage_type;
265 typedef elementwise<dense_tag> evaluation_category;
273 vector(boost::compute::command_queue& queue = boost::compute::system::default_queue())
274 :m_storage(queue.get_context()), m_queue(&queue){}
280 explicit vector(size_type size, boost::compute::command_queue& queue = boost::compute::system::default_queue())
281 : m_storage(size,queue.get_context()), m_queue(&queue){}
287 vector(size_type size, value_type
const& init, boost::compute::command_queue& queue = boost::compute::system::default_queue())
288 : m_storage(size, init, queue), m_queue(&queue){}
293 : m_storage(std::move(v.m_storage))
294 , m_queue(&v.queue()){}
298 vector(vector
const& v) =
default;
303 vector(vector_expression<E, gpu_tag>
const& e)
304 : m_storage(e().size(), e().queue().get_context())
305 , m_queue(&e().queue()){
313 vector(vector_expression<E, gpu_tag>
const& e, boost::compute::command_queue& queue)
314 : m_storage(e().size(), queue.get_context())
320 gpu::detail::dense_vector_element<value_type> to_functor()
const{
321 return {m_storage.get_buffer()};
332 vector& operator = (vector
const& v){
334 return assign(*
this, v);
340 vector& operator = (vector && v){
341 m_storage = std::move(v.m_storage);
351 vector& operator = (vector_container<C, gpu_tag>
const& v) {
353 return assign(*
this, v);
361 vector& operator = (vector_expression<E, gpu_tag>
const& e) {
362 vector temporary(e,queue());
363 swap(*
this,temporary);
372 size_type size()
const {
373 return m_storage.size();
376 boost::compute::command_queue& queue()
const{
380 const_storage_type raw_storage()
const{
381 return {m_storage.get_buffer(),0,1};
385 storage_type raw_storage(){
386 return {m_storage.get_buffer(),0,1};
394 void resize(size_type size) {
395 if(size < m_storage.size())
396 m_storage.resize(size);
398 m_storage = boost::compute::vector<T>(size, queue().get_context());
407 void resize(size_type size, value_type init) {
409 boost::compute::fill(m_storage.begin(),m_storage.end(), init);
413 boost::compute::fill(m_storage.begin(),m_storage.end(), value_type());
417 return m_storage.empty();
423 friend void swap(vector& v1, vector& v2) {
425 swap(v1.m_storage,v2.m_storage);
426 std::swap(v2.m_queue,v2.m_queue);
429 template<
class Archive>
430 void serialize(Archive &ar,
const unsigned int file_version) {
434 typedef no_iterator iterator;
435 typedef no_iterator const_iterator;
437 boost::compute::vector<T> m_storage;
438 boost::compute::command_queue* m_queue;
452template<
class T,
class L>
453class matrix<T,L, gpu_tag> :
public matrix_container<matrix<T,L, gpu_tag>, gpu_tag > {
455 typedef T value_type;
456 typedef value_type const_reference;
457 typedef value_type reference;
458 typedef std::size_t size_type;
460 typedef dense_matrix_adaptor<T const,L, continuous_dense_tag, gpu_tag> const_closure_type;
461 typedef dense_matrix_adaptor<T,L, continuous_dense_tag, gpu_tag> closure_type;
462 typedef gpu::dense_matrix_storage<T, continuous_dense_tag> storage_type;
463 typedef gpu::dense_matrix_storage<T const, continuous_dense_tag> const_storage_type;
464 typedef elementwise<dense_tag> evaluation_category;
465 typedef L orientation;
473 matrix(boost::compute::command_queue& queue = boost::compute::system::default_queue())
474 : m_storage(queue.get_context())
475 , m_queue(&queue),m_size1(0), m_size2(0){}
482 explicit matrix(size_type size1, size_type size2, boost::compute::command_queue& queue = boost::compute::system::default_queue())
483 : m_storage(size1 * size2, queue.get_context())
493 matrix(size_type size1, size_type size2, value_type
const& init, boost::compute::command_queue& queue = boost::compute::system::default_queue())
494 : m_storage(size1 * size2, init, queue)
502 : m_storage(std::move(m.m_storage))
503 , m_queue(&m.queue())
505 , m_size2(m.size2()){}
509 matrix(matrix
const& m)
510 : m_storage(m.m_storage)
511 , m_queue(&m.queue())
513 , m_size2(m.size2()){}
518 matrix(matrix_expression<E, gpu_tag>
const& e)
519 : m_storage(e().size1() * e().size2(), e().queue().get_context())
520 , m_queue(&e().queue())
521 , m_size1(e().size1())
522 , m_size2(e().size2()){
527 matrix(vector_set_expression<E, gpu_tag>
const& e)
528 : m_storage(e().size() * e().point_size(), e().queue().get_context())
529 , m_queue(&e().queue())
530 , m_size1(E::point_orientation::index_M(e().size(), e().point_size()))
531 , m_size2(E::point_orientation::index_m(e().size(), e().point_size())){
532 assign(*
this, e().expression());
543 matrix& operator = (matrix
const& m){
544 resize(m.size1(),m.size2());
545 return assign(*
this, m);
551 matrix& operator = (matrix && m){
552 m_storage = std::move(m.m_storage);
564 matrix& operator = (matrix_container<C, gpu_tag>
const& m) {
565 resize(m().size1(), m().size2());
566 return assign(*
this, m);
574 matrix& operator = (matrix_expression<E, gpu_tag>
const& e) {
576 swap(*
this,temporary);
586 matrix& operator = (vector_set_expression<E, gpu_tag>
const& e) {
597 size_type size1()
const {
601 size_type size2()
const {
605 boost::compute::command_queue& queue()
const{
609 const_storage_type raw_storage()
const{
610 return {m_storage.get_buffer(),0,leading_dimension()};
614 storage_type raw_storage(){
615 return {m_storage.get_buffer(),0,leading_dimension()};
625 void resize(size_type size1, size_type size2) {
626 if(size1 * size2 < m_storage.size())
627 m_storage.resize(size1 * size2);
629 m_storage = boost::compute::vector<T>(size1 * size2, queue().get_context());
642 void resize(size_type size1, size_type size2, value_type init) {
644 boost::compute::fill(m_storage.begin(),m_storage.end(), init, queue());
648 boost::compute::fill(m_storage.begin(),m_storage.end(), value_type(), queue());
652 typedef no_iterator major_iterator;
653 typedef no_iterator const_major_iterator;
658 friend void swap(matrix& m1, matrix& m2) {
660 swap(m1.m_storage,m2.m_storage);
661 std::swap(m1.m_queue,m2.m_queue);
662 std::swap(m1.m_size1,m2.m_size1);
663 std::swap(m1.m_size2,m2.m_size2);
666 template<
class Archive>
667 void serialize(Archive &ar,
const unsigned int file_version) {
670 std::size_t leading_dimension()
const{
671 return orientation::index_m(m_size1, m_size2);
674 boost::compute::vector<T> m_storage;
675 boost::compute::command_queue* m_queue;
681template<
class T,
class Orientation,
bool Upper,
bool Unit>
682class dense_triangular_proxy<T, Orientation, triangular_tag<Upper, Unit> , gpu_tag>
683:
public matrix_expression<dense_triangular_proxy<T, Orientation, triangular_tag<Upper, Unit>, gpu_tag>, gpu_tag> {
685 typedef std::size_t size_type;
686 typedef typename std::remove_const<T>::type value_type;
687 typedef value_type result_type;
688 typedef typename std::conditional<Unit, value_type const&, T&>::type reference;
689 typedef value_type
const& const_reference;
690 typedef dense_triangular_proxy<value_type const, Orientation, triangular_tag<Upper, Unit> , gpu_tag> const_closure_type;
691 typedef dense_triangular_proxy<T, Orientation, triangular_tag<Upper, Unit> , gpu_tag> closure_type;
693 typedef gpu::dense_matrix_storage<T, dense_tag> storage_type;
694 typedef gpu::dense_matrix_storage<value_type const, dense_tag> const_storage_type;
696 typedef elementwise<dense_tag> evaluation_category;
697 typedef triangular<Orientation,triangular_tag<Upper, Unit> > orientation;
701 dense_triangular_proxy(dense_triangular_proxy<U, Orientation, triangular_tag<Upper, Unit>, gpu_tag>
const& expression)
702 : m_storage(expression.raw_storage())
703 , m_queue(&expression.queue())
704 , m_size1(expression.size1())
705 , m_size2(expression.size2()){}
707 dense_triangular_proxy(storage_type
const& storage, boost::compute::command_queue& queue, std::size_t size1, std::size_t size2)
713 dense_matrix_adaptor<T, Orientation, dense_tag, gpu_tag> to_dense()
const{
714 return {m_storage, queue(), m_size1, m_size2};
719 size_type size1()
const {
723 size_type size2()
const {
728 storage_type raw_storage()
const{
732 boost::compute::command_queue& queue()
const{
736 typedef no_iterator major_iterator;
737 typedef no_iterator const_major_iterator;
739 storage_type m_storage;
740 boost::compute::command_queue* m_queue;
750struct ExpressionToFunctor<vector<T, gpu_tag> >{
751 static gpu::detail::dense_vector_element<T>
transform(vector<T, gpu_tag>
const& e){
752 return {e().raw_storage().buffer, 1, 0};
756template<
class T,
class Orientation>
757struct ExpressionToFunctor<matrix<T, Orientation, gpu_tag> >{
758 static gpu::detail::dense_matrix_element<T>
transform(matrix<T, Orientation, gpu_tag>
const& e){
759 std::size_t leading = e().raw_storage().leading_dimension;
760 return {e().raw_storage().buffer, Orientation::stride1(leading), Orientation::stride2(leading),0};
766template<
class T,
class Tag>
767struct ExpressionToFunctor<dense_vector_adaptor<T, Tag, gpu_tag> >{
768 static gpu::detail::dense_vector_element<T>
transform(dense_vector_adaptor<T, Tag, gpu_tag>
const& e){
769 auto const& storage = e().raw_storage();
770 return {storage.buffer, storage.stride, storage.offset};
774template<
class T,
class Tag,
class Orientation>
775struct ExpressionToFunctor<dense_matrix_adaptor<T, Orientation, Tag, gpu_tag> >{
776 static gpu::detail::dense_matrix_element<T>
transform(dense_matrix_adaptor<T, Orientation, Tag, gpu_tag>
const& e){
777 auto const& storage = e().raw_storage();
778 std::size_t stride1 = Orientation::index_m(std::size_t(1), storage.leading_dimension);
779 std::size_t stride2 = Orientation::index_M(std::size_t(1), storage.leading_dimension);
780 return {storage.buffer, stride1, stride2, storage.offset};
786template<
class T,
class Orientation>
787struct vector_to_matrix_optimizer<dense_vector_adaptor<T, continuous_dense_tag, gpu_tag>, Orientation >{
788 typedef dense_matrix_adaptor<T, Orientation, continuous_dense_tag, gpu_tag> type;
791 dense_vector_adaptor<T, continuous_dense_tag, gpu_tag>
const& v,
792 std::size_t size1, std::size_t size2
794 gpu::dense_matrix_storage<T, continuous_dense_tag> storage = {v.raw_storage().buffer, v.raw_storage().offset, Orientation::index_m(size1,size2)};
795 return type(storage, v.queue(), size1, size2);