traits.hpp
Go to the documentation of this file.
1//===========================================================================
2/*!
3 *
4 *
5 * \brief Traits of gpu expressions
6 *
7 * \author O. Krause
8 * \date 2016
9 *
10 *
11 * \par Copyright 1995-2015 Shark Development Team
12 *
13 * <BR><HR>
14 * This file is part of Shark.
15 * <http://image.diku.dk/shark/>
16 *
17 * Shark is free software: you can redistribute it and/or modify
18 * it under the terms of the GNU Lesser General Public License as published
19 * by the Free Software Foundation, either version 3 of the License, or
20 * (at your option) any later version.
21 *
22 * Shark is distributed in the hope that it will be useful,
23 * but WITHOUT ANY WARRANTY; without even the implied warranty of
24 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
25 * GNU Lesser General Public License for more details.
26 *
27 * You should have received a copy of the GNU Lesser General Public License
28 * along with Shark. If not, see <http://www.gnu.org/licenses/>.
29 *
30 */
31//===========================================================================
32
33#ifndef REMORA_GPU_TRAITS_HPP
34#define REMORA_GPU_TRAITS_HPP
35
36#include <boost/compute/command_queue.hpp>
37#include <boost/compute/core.hpp>
38#include <boost/compute/container/vector.hpp>
39#include <boost/compute/functional/operator.hpp>
40#include <boost/compute/functional.hpp>
41
42namespace remora{namespace gpu{
43
44template<class T, class Tag>
45struct dense_vector_storage{
46 typedef Tag storage_tag;
47
48 boost::compute::buffer buffer;
49 std::size_t offset;
50 std::size_t stride;
51
52 dense_vector_storage(){}
53 dense_vector_storage(boost::compute::buffer const& buffer, std::size_t offset, std::size_t stride)
54 :buffer(buffer), offset(offset), stride(stride){}
55 template<class U, class Tag2>
56 dense_vector_storage(dense_vector_storage<U, Tag2> const& storage):
57 buffer(storage.buffer), offset(storage.offset), stride(storage.stride){
58 static_assert(std::is_convertible<U&, T&>::value, "incompatible storage");
59 static_assert(!(std::is_same<Tag,continuous_dense_tag>::value && std::is_same<Tag2,dense_tag>::value), "Trying to assign dense to continuous dense storage");
60 }
61
62 dense_vector_storage<T,Tag> sub_region(std::size_t offset) const{
63 return {buffer, this->offset+offset * stride, stride};
64 }
65};
66
67template<class T, class Tag>
68struct dense_matrix_storage{
69 typedef Tag storage_tag;
70 template<class O>
71 struct row_storage: public std::conditional<
72 std::is_same<O,row_major>::value,
73 dense_vector_storage<T, Tag>,
74 dense_vector_storage<T, dense_tag>
75 >{};
76 template<class O>
77 struct rows_storage: public std::conditional<
78 std::is_same<O,row_major>::value,
79 dense_matrix_storage<T, Tag>,
80 dense_matrix_storage<T, dense_tag>
81 >{};
82
83 typedef dense_vector_storage<T,Tag> diag_storage;
84 typedef dense_matrix_storage<T,dense_tag> sub_region_storage;
85
86 boost::compute::buffer buffer;
87 std::size_t offset;
88 std::size_t leading_dimension;
89
90 dense_matrix_storage(){}
91 dense_matrix_storage(boost::compute::buffer const& buffer, std::size_t offset, std::size_t leading_dimension)
92 :buffer(buffer), offset(offset), leading_dimension(leading_dimension){}
93
94 template<class U, class Tag2>
95 dense_matrix_storage(dense_matrix_storage<U, Tag2> const& storage):
96 buffer(storage.buffer), offset(storage.offset), leading_dimension(storage.leading_dimension){
97 static_assert(std::is_convertible<U&, T&>::value, "incompatible storage");
98 static_assert(!(std::is_same<Tag,continuous_dense_tag>::value && std::is_same<Tag2,dense_tag>::value), "Trying to assign dense to continuous dense storage");
99 }
100
101 template<class Orientation>
102 sub_region_storage sub_region(std::size_t offset1, std::size_t offset2, Orientation) const{
103 std::size_t offset_major = Orientation::index_M(offset1,offset2);
104 std::size_t offset_minor = Orientation::index_m(offset1,offset2);
105 return {buffer, offset + offset_major*leading_dimension+offset_minor, leading_dimension};
106 }
107
108 template<class Orientation>
109 typename row_storage<Orientation>::type row(std::size_t i, Orientation) const{
110 return {buffer, offset + i * Orientation::index_M(leading_dimension,std::size_t(1)), Orientation::index_m(leading_dimension,std::size_t(1))};
111 }
112
113 template<class Orientation>
114 typename rows_storage<Orientation>::type sub_rows(std::size_t i, Orientation) const{
115 std::size_t stride = Orientation::index_M(leading_dimension,(std::size_t)1);
116 return {buffer,offset + i * stride, leading_dimension};
117 }
118
119 diag_storage diag(){
120 return {buffer, offset, leading_dimension+1};
121 }
122
123 dense_vector_storage<T, continuous_dense_tag> linear() const{
124 return {buffer, offset, 1};
125 }
126};
127
128
129//Expression objects and generated by the functors which are then turned into code by meta_kernel operator<<
130//Note that often the type of the stored scalar can be different from its actual type, this is to allow replacing the value
131//by a variable representing it (i.e. a kernel argument). This way we prevent hard coding variables in the generated source code
132//in case the variable is indeed not constant
133namespace detail{
134template<class T, class Stored = T>
135struct invoked_constant{
136 Stored m_value;
137};
138
139template<class Arg1, class T, char Op, class Stored>
140struct invoked_operator_scalar{
141 typedef T result_type;
142 Arg1 arg1;
143 Stored m_scalar;
144};
145
146template<class Arg1, class T, class Stored = T>
147struct invoked_add_scalar{
148 typedef T result_type;
149 Arg1 arg1;
150 Stored m_scalar;
151};
152
153template<class Arg1, class Arg2, class T, class Stored=T>
154struct invoked_multiply_and_add{
155 typedef T result_type;
156 Arg1 arg1;
157 Arg2 arg2;
158 Stored m_scalar;
159};
160
161template<class Arg1, class T>
162struct invoked_soft_plus{
163 typedef T result_type;
164 Arg1 arg1;
165};
166template<class Arg1, class T>
167struct invoked_sigmoid{
168 typedef T result_type;
169 Arg1 arg1;
170};
171
172template<class Arg1, class T>
173struct invoked_sqr{
174 typedef T result_type;
175 Arg1 arg1;
176};
177
178template<class Arg1, class T>
179struct invoked_inv{
180 typedef T result_type;
181 Arg1 arg1;
182};
183
184template<class Arg1, class Arg2, class T, class S>
185struct invoked_safe_div{
186 typedef T result_type;
187 Arg1 arg1;
188 Arg2 arg2;
189 S default_value;
190};
191
192
193template<class Arg1, class T, char Op, class S>
194boost::compute::detail::meta_kernel& operator<<(boost::compute::detail::meta_kernel& k, invoked_operator_scalar<Arg1,T, Op, S> const& e){
195 return k << '('<<e.arg1 << Op << e.m_scalar<<')';
196}
197template<class Arg1, class Arg2, class T, class S>
198boost::compute::detail::meta_kernel& operator<<(boost::compute::detail::meta_kernel& k, invoked_multiply_and_add<Arg1,Arg2,T, S> const& e){
199 return k << '('<<e.arg1<<'+'<<e.m_scalar << '*'<< e.arg2<<')';
200}
201template<class Arg1, class T>
202boost::compute::detail::meta_kernel& operator<<(boost::compute::detail::meta_kernel& k, invoked_soft_plus<Arg1,T> const& e){
203 return k << "(log(1+exp("<< e.arg1<<")))";
204}
205template<class Arg1, class T>
206boost::compute::detail::meta_kernel& operator<<(boost::compute::detail::meta_kernel& k, invoked_sigmoid<Arg1,T> const& e){
207 return k << "(1/(1+exp(-"<< e.arg1<<")))";
208}
209template<class Arg1, class T>
210boost::compute::detail::meta_kernel& operator<<(boost::compute::detail::meta_kernel& k, invoked_sqr<Arg1,T> const& e){
211 return k << '('<<e.arg1<<'*'<<e.arg1<<')';
212}
213template<class Arg1, class T>
214boost::compute::detail::meta_kernel& operator<<(boost::compute::detail::meta_kernel& k, invoked_inv<Arg1,T> const& e){
215 return k << "1/("<<e.arg1<<')';
216}
217
218template<class T, class S>
219boost::compute::detail::meta_kernel& operator<<(boost::compute::detail::meta_kernel& k, invoked_constant<T, S> const& e){
220 return k << e.m_value;
221}
222
223
224template<class Arg1, class Arg2, class T, class S>
225boost::compute::detail::meta_kernel& operator<<(boost::compute::detail::meta_kernel& k, invoked_safe_div<Arg1,Arg2,T, S> const& e){
226 return k << "(("<<e.arg2<<"!=0)?"<<e.arg1<<'/'<<e.arg2<<':'<<e.default_value<<')';
227}
228
229}//End namespace detail
230}//End namespace gpu
231
232template<>
233struct device_traits<gpu_tag>{
234 typedef boost::compute::command_queue queue_type;
235
236 static queue_type& default_queue(){
237 return boost::compute::system::default_queue();
238 }
239
240 // iterators
241
242 template <class Iterator, class Functor>
243 struct transform_iterator{
244 typedef no_iterator type;
245 };
246
247 template <class Iterator1, class Iterator2, class Functor>
248 struct binary_transform_iterator{
249 typedef no_iterator type;
250 };
251
252 template<class T>
253 struct constant_iterator{
254 typedef no_iterator type;
255 };
256
257 template<class T>
258 struct one_hot_iterator{
259 typedef no_iterator type;
260 };
261
262 template<class Closure>
263 struct indexed_iterator{
264 typedef no_iterator type;
265 };
266
267 //functional
268
269 //G(F(args))
270 template<class F, class G>
271 struct compose{
272 typedef typename G::result_type result_type;
273 compose(F const& f, G const& g): m_f(f), m_g(g){ }
274
275 template<class Arg1>
276 auto operator()( Arg1 const& x) const -> decltype(std::declval<G const&>()(std::declval<F const&>()(x))){
277 return m_g(m_f(x));
278 }
279 template<class Arg1, class Arg2>
280 auto operator()( Arg1 const& x, Arg2 const& y) const -> decltype(std::declval<G const&>()(std::declval<F const&>()(x,y))){
281 return m_g(m_f(x,y));
282 }
283
284 F m_f;
285 G m_g;
286 };
287
288 //G(F1(args),F2(args))
289 template<class F1, class F2, class G>
290 struct compose_binary{
291 typedef typename G::result_type result_type;
292 compose_binary(F1 const& f1, F2 const& f2, G const& g): m_f1(f1), m_f2(f2), m_g(g){ }
293
294 template<class Arg1>
295 auto operator()( Arg1 const& x) const -> decltype(std::declval<G const&>()(std::declval<F1 const&>()(x),std::declval<F2 const&>()(x))){
296 return m_g(m_f1(x), m_f2(x));
297 }
298 template<class Arg1, class Arg2>
299 auto operator()( Arg1 const& x, Arg2 const& y) const -> decltype(std::declval<G const&>()(std::declval<F1 const&>()(x,y),std::declval<F2 const&>()(x,y))){
300 return m_g(m_f1(x,y), m_f2(x,y));
301 }
302
303 F1 m_f1;
304 F2 m_f2;
305 G m_g;
306 };
307
308
309 //G(F1(arg1),F2(arg2))
310 template<class F1, class F2, class G>
311 struct transform_arguments{
312 typedef typename G::result_type result_type;
313 transform_arguments(F1 const& f1, F2 const& f2, G const& g): m_f1(f1), m_f2(f2), m_g(g){ }
314
315 template<class Arg1, class Arg2>
316 auto operator()( Arg1 const& x, Arg2 const& y) const -> decltype(std::declval<G const&>()(std::declval<F1 const&>()(x),std::declval<F2 const&>()(y))){
317 return m_g(m_f1(x),m_f2(y));
318 }
319
320 F1 m_f1;
321 F2 m_f2;
322 G m_g;
323 };
324
325 template<class F, class Arg2>
326 struct bind_second{
327 typedef typename F::result_type result_type;
328 bind_second(F const& f, Arg2 const& arg2) : m_function(f), m_arg2(arg2){ }
329
330 template<class Arg1>
331 auto operator()(Arg1 const& arg1) const -> decltype(std::declval<F const&>()(arg1,std::declval<Arg2 const&>()))
332 {
333 return m_function(arg1, m_arg2);
334 }
335
336 F m_function;
337 Arg2 m_arg2;
338 };
339
340
341 //helper functions
342 template<class F, class G>
343 static compose<F,G> make_compose(F const& f, G const&g){
344 return compose<F,G>(f,g);
345 }
346
347 template<class F1, class F2, class G>
348 static compose_binary<F1, F2, G> make_compose_binary(F1 const& f1, F2 const& f2, G const&g){
349 return compose_binary<F1, F2, G>(f1, f2, g);
350 }
351
352 template<class F1, class F2, class G>
353 static transform_arguments<F1, F2, G> make_transform_arguments(F1 const& f1, F2 const& f2, G const& g){
354 return transform_arguments<F1, F2, G>(f1, f2, g);
355 }
356
357 template<class F, class Arg2>
358 static bind_second<F,Arg2> make_bind_second(F const& f, Arg2 const& arg2){
359 return bind_second<F,Arg2>(f,arg2);
360 }
361
362
363 //functors
364
365 //basic arithmetic
366 template<class T>
367 using add = boost::compute::plus<T>;
368 template<class T>
369 using subtract = boost::compute::minus<T>;
370 template<class T>
371 using multiply = boost::compute::multiplies<T>;
372 template<class T>
373 using divide = boost::compute::divides<T>;
374 template<class T>
375 using modulo = boost::compute::modulus<T>;
376 template<class T>
377 using pow = boost::compute::pow<T>;
378 template<class T, class S=T>
379 struct safe_divide{
380 typedef T result_type;
381 safe_divide(S const& default_value) : default_value(default_value) { }
382
383 template<class Arg1, class Arg2>
384 gpu::detail::invoked_safe_div<Arg1,Arg2, T,S> operator()(const Arg1 &x, const Arg2& y) const
385 {
386 return {x,y,default_value};
387 }
388 S default_value;
389 };
390 template<class T, class S= T>
391 struct multiply_and_add{
392 typedef T result_type;
393 multiply_and_add(S const& scalar) :m_scalar(scalar) { }
394
395 template<class Arg1, class Arg2>
396 gpu::detail::invoked_multiply_and_add<Arg1,Arg2,T,S> operator()(const Arg1 &x, const Arg2& y) const
397 {
398 return {x,y, m_scalar};
399 }
400 S m_scalar;
401 };
402
403
404 template<class T, char Op, class S>
405 struct operator_scalar{
406 typedef T result_type;
407 operator_scalar(S const& scalar) : m_scalar(scalar) { }
408
409 template<class Arg1>
410 gpu::detail::invoked_operator_scalar<Arg1,T, Op, S> operator()(Arg1 const& x) const
411 {
412 return {x, m_scalar};
413 }
414 S m_scalar;
415 };
416
417 template<class T>
418 using multiply_scalar = operator_scalar<T, '*', T>;
419 template<class T>
420 using add_scalar = operator_scalar<T, '+', T>;
421 template<class T>
422 using divide_scalar = operator_scalar<T, '/', T>;
423 template<class T>
424 using modulo_scalar = operator_scalar<T, '%', T>;
425
426 template<class T, class S=T>
427 struct multiply_assign{
428 typedef T result_type;
429 multiply_assign(S const& scalar): m_scalar(scalar) { }
430
431 template<class Arg1, class Arg2>
432 gpu::detail::invoked_operator_scalar<Arg2,T,'*',S> operator()(const Arg1&, const Arg2& y) const
433 {
434 return {y, m_scalar};
435 }
436 S m_scalar;
437 };
438 template<class T>
439 struct identity{
440 typedef T result_type;
441
442 template<class Arg>
443 Arg const& operator()(Arg const& arg) const{
444 return arg;
445 }
446 };
447
448 template<class T>
449 struct left_arg{
450 typedef T result_type;
451
452 template<class Arg1, class Arg2>
453 Arg1 const& operator()(Arg1 const& arg1, Arg2 const&) const{
454 return arg1;
455 }
456 };
457 template<class T>
458 struct right_arg{
459 typedef T result_type;
460
461 template<class Arg1, class Arg2>
462 Arg2 const& operator()(Arg1 const&, Arg2 const& arg2) const{
463 return arg2;
464 }
465 };
466
467 template<class T, class S=T>
468 struct constant{
469 typedef T result_type;
470 constant(S const& value): m_value(value){}
471
472 template<class Arg>
473 gpu::detail::invoked_constant<T,S> operator()(Arg const&) const
474 {
475 return {m_value};
476 }
477 template<class Arg1, class Arg2>
478 gpu::detail::invoked_constant<T,S> operator()(Arg1 const&, Arg2 const&) const
479 {
480 return {m_value};
481 }
482
483 S m_value;
484 };
485
486
487 //math unary functions
488 template<class T>
489 using log = boost::compute::log<T>;
490 template<class T>
491 using exp = boost::compute::exp<T>;
492 template<class T>
493 using sin = boost::compute::sin<T>;
494 template<class T>
495 using cos = boost::compute::cos<T>;
496 template<class T>
497 using tan = boost::compute::tan<T>;
498 template<class T>
499 using asin = boost::compute::asin<T>;
500 template<class T>
501 using acos = boost::compute::acos<T>;
502 template<class T>
503 using atan = boost::compute::atan<T>;
504 template<class T>
505 using tanh = boost::compute::tanh<T>;
506 template<class T>
507 using sqrt = boost::compute::sqrt<T>;
508 template<class T>
509 using cbrt = boost::compute::cbrt<T>;
510 template<class T>
511 using abs = boost::compute::fabs<T>;
512
513 template<class T>
514 using erf = boost::compute::erf<T>;
515 template<class T>
516 using erfc = boost::compute::erfc<T>;
517
518 template<class T>
519 struct sqr{
520 typedef T result_type;
521
522 template<class Arg1>
523 gpu::detail::invoked_sqr<Arg1,T> operator()(const Arg1 &x) const{
524 return {x};
525 }
526 };
527 template<class T>
528 struct soft_plus{
529 typedef T result_type;
530
531 template<class Arg1>
532 gpu::detail::invoked_soft_plus<Arg1,T> operator()(const Arg1 &x) const{
533 return {x};
534 }
535 };
536 template<class T>
537 struct sigmoid{
538 typedef T result_type;
539
540 template<class Arg1>
541 gpu::detail::invoked_sigmoid<Arg1,T> operator()(const Arg1 &x) const{
542 return {x};
543 }
544 };
545 template<class T>
546 struct inv{
547 typedef T result_type;
548
549 template<class Arg1>
550 gpu::detail::invoked_inv<Arg1,T> operator()(const Arg1 &x) const{
551 return {x};
552 }
553 };
554
555 //min/max
556 template<class T>
557 using min = boost::compute::fmin<T>;
558 template<class T>
559 using max = boost::compute::fmax<T>;
560
561 //comparison
562 template<class T>
563 using less = boost::compute::less<T>;
564 template<class T>
565 using less_equal = boost::compute::less_equal<T>;
566 template<class T>
567 using greater = boost::compute::greater<T>;
568 template<class T>
569 using greater_equal = boost::compute::greater_equal<T>;
570 template<class T>
571 using equal = boost::compute::equal_to<T>;
572 template<class T>
573 using not_equal = boost::compute::not_equal_to<T>;
574};
575
576namespace gpu{namespace detail{
577
578
579struct meta_kernel;
580
581template<class Entity>
582struct register_with_compute_kernel{
583 typedef Entity type;
584 static type const& reg(meta_kernel&, Entity const& e){
585 return e;
586 }
587};
588
589struct meta_kernel: public boost::compute::detail::meta_kernel{
590 meta_kernel(std::string const& name):boost::compute::detail::meta_kernel(name), m_id(0){}
591
592 template<class T>
593 std::string register_kernel_arg(T const& value){
594 ++m_id;
595 std::string name = "rem_var"+std::to_string(m_id);
596 this->add_set_arg<T>(name,value);
597 return name;
598 }
599
600 template<class Entity>
601 typename register_with_compute_kernel<Entity>::type
602 register_args(Entity const& e){
603 return register_with_compute_kernel<Entity>::reg(*this,e);
604 }
605private:
606 std::size_t m_id;
607};
608
609template<class F, class Arg2>
610struct register_with_compute_kernel<device_traits<gpu_tag>::template bind_second<F,Arg2> >{
611 typedef typename register_with_compute_kernel<F>::type f_type;
612 typedef device_traits<gpu_tag>::template bind_second<f_type,std::string> type;
613 static type reg(
614 meta_kernel& k,
615 device_traits<gpu_tag>::template bind_second<F,Arg2> const& f
616 ){
617 std::string arg2_name = k.register_kernel_arg(f.m_arg2);
618 return type(register_with_compute_kernel<F>::reg(k,f.m_function),arg2_name);
619 }
620};
621
622template<class F, class G>
623struct register_with_compute_kernel<device_traits<gpu_tag>::template compose<F, G> >{
624 typedef typename register_with_compute_kernel<F>::type f_type;
625 typedef typename register_with_compute_kernel<G>::type g_type;
626 typedef typename device_traits<gpu_tag>::template compose<f_type, g_type> type;
627 static type reg(
628 meta_kernel& k,
629 device_traits<gpu_tag>::compose<F, G> const& composed
630 ){
631 auto f_reg = register_with_compute_kernel<F>::reg(k,composed.m_f);
632 auto g_reg = register_with_compute_kernel<G>::reg(k,composed.m_g);
633 return type(f_reg, g_reg);
634 }
635};
636
637template<class F1, class F2, class G>
638struct register_with_compute_kernel<device_traits<gpu_tag>::template compose_binary<F1, F2, G> >{
639 typedef typename register_with_compute_kernel<F1>::type f1_type;
640 typedef typename register_with_compute_kernel<F2>::type f2_type;
641 typedef typename register_with_compute_kernel<G>::type g_type;
642 typedef typename device_traits<gpu_tag>::template compose_binary<f1_type, f2_type, g_type> type;
643 static type reg(
644 meta_kernel& k,
645 device_traits<gpu_tag>::compose_binary<F1, F2, G> const& composed
646 ){
647 auto f1_reg = register_with_compute_kernel<F1>::reg(k,composed.m_f1);
648 auto f2_reg = register_with_compute_kernel<F2>::reg(k,composed.m_f2);
649 auto g_reg = register_with_compute_kernel<G>::reg(k,composed.m_g);
650 return type(f1_reg, f2_reg, g_reg);
651 }
652};
653
654
655template<class F1, class F2, class G>
656struct register_with_compute_kernel<device_traits<gpu_tag>::template transform_arguments<F1, F2, G> >{
657 typedef typename register_with_compute_kernel<F1>::type f1_type;
658 typedef typename register_with_compute_kernel<F2>::type f2_type;
659 typedef typename register_with_compute_kernel<G>::type g_type;
660 typedef typename device_traits<gpu_tag>::template transform_arguments<f1_type, f2_type, g_type> type;
661 static type reg(
662 meta_kernel& k,
663 device_traits<gpu_tag>::transform_arguments<F1, F2, G> const& composed
664 ){
665 auto f1_reg = register_with_compute_kernel<F1>::reg(k,composed.m_f1);
666 auto f2_reg = register_with_compute_kernel<F2>::reg(k,composed.m_f2);
667 auto g_reg = register_with_compute_kernel<G>::reg(k,composed.m_g);
668 return type(f1_reg, f2_reg, g_reg);
669 }
670};
671
672template<class T>
673struct register_with_compute_kernel<device_traits<gpu_tag>::template constant<T,T> >{
674 typedef typename device_traits<gpu_tag>::template constant<T,std::string> type;
675 static type reg(
676 meta_kernel& k,
677 device_traits<gpu_tag>::constant<T,T> const& f
678 ){
679 return type(k.register_kernel_arg(f.m_value));
680 }
681};
682
683template<class T>
684struct register_with_compute_kernel<device_traits<gpu_tag>::template safe_divide<T,T> >{
685 typedef typename device_traits<gpu_tag>::template safe_divide<T,std::string> type;
686 static type reg(
687 meta_kernel& k,
688 device_traits<gpu_tag>::safe_divide<T,T> const& f
689 ){
690 return type(k.register_kernel_arg(f.default_value));
691 }
692};
693
694template<class T>
695struct register_with_compute_kernel<device_traits<gpu_tag>::template multiply_and_add<T,T> >{
696 typedef typename device_traits<gpu_tag>::template multiply_and_add<T,std::string> type;
697 static type reg(
698 meta_kernel& k,
699 device_traits<gpu_tag>::multiply_and_add<T,T> const& f
700 ){
701 return type(k.register_kernel_arg(f.m_scalar));
702 }
703};
704
705template<class T, char Op>
706struct register_with_compute_kernel<device_traits<gpu_tag>::template operator_scalar<T, Op, T> >{
707 typedef typename device_traits<gpu_tag>::template operator_scalar<T, Op, std::string> type;
708 static type reg(
709 meta_kernel& k,
710 device_traits<gpu_tag>::operator_scalar<T, Op, T> const& f
711 ){
712 return type(k.register_kernel_arg(f.m_scalar));
713 }
714};
715
716template<class T>
717struct register_with_compute_kernel<device_traits<gpu_tag>::template multiply_assign<T,T> >{
718 typedef typename device_traits<gpu_tag>::template multiply_assign<T,std::string> type;
719 static type reg(
720 meta_kernel& k,
721 device_traits<gpu_tag>::multiply_assign<T,T> const& f
722 ){
723 return type(k.register_kernel_arg(f.m_scalar));
724 }
725};
726
727
728//vector element
729template<class Arg, class T, class S>
730struct invoked_dense_vector_element{
731 typedef T result_type;
732 Arg arg;
733 S stride;
734 S offset;
735 boost::compute::buffer buffer;
736};
737
738template<class Arg,class T, class S>
739boost::compute::detail::meta_kernel& operator<< (
740 boost::compute::detail::meta_kernel& k,
741 invoked_dense_vector_element<Arg, T, S> const& e
742){
743 return k<< k.get_buffer_identifier<T>(e.buffer, boost::compute::memory_object::global_memory)
744 <<" [ "<<e.offset <<"+("<<e.arg <<") *"<<e.stride<<']';
745}
746
747template<class T, class S=std::size_t>
748struct dense_vector_element{
749 typedef T result_type;
750
751 template<class Arg>
752 gpu::detail::invoked_dense_vector_element<Arg,T, S> operator()(Arg const& x) const{
753 return {x, m_stride, m_offset, m_buffer};
754 }
755 boost::compute::buffer m_buffer;
756 S m_stride;
757 S m_offset;
758};
759
760template<class T>
761struct register_with_compute_kernel<dense_vector_element<T,std::size_t> >{
762 typedef dense_vector_element<T,std::string> type;
763 static type reg(
764 meta_kernel& k,
765 dense_vector_element<T,std::size_t> const& e
766 ){
767 return {e.m_buffer, k.register_kernel_arg(e.m_stride),k.register_kernel_arg(e.m_offset)};
768 }
769};
770
771//matrix element
772template<class Arg1, class Arg2, class T, class S>
773struct invoked_matrix_element{
774 typedef T result_type;
775 Arg1 arg1;
776 Arg2 arg2;
777 S stride1;
778 S stride2;
779 S offset;
780 boost::compute::buffer buffer;
781};
782
783
784template<class Arg1, class Arg2, class T, class S>
785boost::compute::detail::meta_kernel& operator<< (
786 boost::compute::detail::meta_kernel& k,
787 invoked_matrix_element<Arg1, Arg2, T, S> const& e
788){
789 return k << k.get_buffer_identifier<T>(e.buffer, boost::compute::memory_object::global_memory)
790 <<'['<<e.offset<<"+ ("<<e.arg1 <<") * "<<e.stride1<<" + ("<<e.arg2 <<") * "<<e.stride2<<']';
791}
792
793template<class T, class S=std::size_t>
794struct dense_matrix_element{
795 typedef T result_type;
796
797 template<class Arg1, class Arg2>
798 gpu::detail::invoked_matrix_element<Arg1, Arg2, T, S> operator()(Arg1 const& x, Arg2 const& y) const{
799 return {x, y, m_stride1, m_stride2, m_offset, m_buffer};
800 }
801
802 boost::compute::buffer m_buffer;
803 S m_stride1;
804 S m_stride2;
805 S m_offset;
806};
807
808template<class T>
809struct register_with_compute_kernel<dense_matrix_element<T,std::size_t> >{
810 typedef dense_matrix_element<T,std::string> type;
811 static type reg(
812 meta_kernel& k,
813 dense_matrix_element<T,std::size_t> const& e
814 ){
815 auto const& stride1 = k.register_kernel_arg(e.m_stride1);
816 auto const& stride2 = k.register_kernel_arg(e.m_stride2);
817 auto const& offset = k.register_kernel_arg(e.m_offset);
818 return {e.m_buffer, stride1, stride2, offset};
819 }
820};
821
822}}
823
824}
825
826#endif