31#ifndef REMORA_KERNELS_DEFAULT_FOLD_ROWS_HPP
32#define REMORA_KERNELS_DEFAULT_FOLD_ROWS_HPP
34#include "../../expression_types.hpp"
35#include "../../detail/traits.hpp"
37namespace remora{
namespace bindings{
39template<
class F,
class G,
class M,
class V>
41 matrix_expression<M, cpu_tag>
const& A,
42 vector_expression<V, cpu_tag>& v,
47 for(std::size_t i = 0; i != v().size(); ++i){
48 auto end = A().major_end(i);
49 auto pos = A().major_begin(i);
50 typename V::value_type s = *pos;
52 for(; pos != end; ++pos){
59template<
class F,
class G,
class M,
class V>
61 matrix_expression<M, cpu_tag>
const& A,
62 vector_expression<V, cpu_tag>& v,
67 std::size_t n = v().size();
68 const std::size_t BLOCK_SIZE = 16;
69 typename V::value_type storage[BLOCK_SIZE];
70 std::size_t numBlocks = (n + BLOCK_SIZE - 1) / BLOCK_SIZE;
72 for(std::size_t b = 0; b != numBlocks; ++b){
73 std::size_t start = b * BLOCK_SIZE;
74 std::size_t cur_size = std::min(BLOCK_SIZE, n - start);
75 for(std::size_t i = 0; i != cur_size; ++i){
76 storage[i] = A()(start + i, 0);
78 for(std::size_t j = 1; j != A().size2(); ++j){
79 for(std::size_t i = 0; i != cur_size; ++i){
80 storage[i] = f(storage[i], A()(start + i, j));
83 for(std::size_t i = 0; i != cur_size; ++i){
84 v()(start + i) += g(storage[i]);
90template<
class F,
class G,
class M,
class V,
class Orientation,
class Triangular>
92 matrix_expression<M, cpu_tag>
const& A,
93 vector_expression<V, cpu_tag>& v,
96 triangular<Orientation,Triangular>
98 fold_rows(A,v, f, g, Orientation());