fold_rows.hpp
Go to the documentation of this file.
1/*!
2 *
3 *
4 * \brief Folds the rows of a row-major or column major matrix.
5 *
6 * \author O. Krause
7 * \date 2018
8 *
9 *
10 * \par Copyright 1995-2015 Shark Development Team
11 *
12 * <BR><HR>
13 * This file is part of Shark.
14 * <http://image.diku.dk/shark/>
15 *
16 * Shark is free software: you can redistribute it and/or modify
17 * it under the terms of the GNU Lesser General Public License as published
18 * by the Free Software Foundation, either version 3 of the License, or
19 * (at your option) any later version.
20 *
21 * Shark is distributed in the hope that it will be useful,
22 * but WITHOUT ANY WARRANTY; without even the implied warranty of
23 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
24 * GNU Lesser General Public License for more details.
25 *
26 * You should have received a copy of the GNU Lesser General Public License
27 * along with Shark. If not, see <http://www.gnu.org/licenses/>.
28 *
29 */
30
31#ifndef REMORA_KERNELS_DEFAULT_FOLD_ROWS_HPP
32#define REMORA_KERNELS_DEFAULT_FOLD_ROWS_HPP
33
34#include "../../expression_types.hpp"//for vector/matrix_expression
35#include "../../detail/traits.hpp"
36
37namespace remora{namespace bindings{
38
39template<class F, class G, class M,class V>
40void fold_rows(
41 matrix_expression<M, cpu_tag> const& A,
42 vector_expression<V, cpu_tag>& v,
43 F f,
44 G g,
45 row_major
46){
47 for(std::size_t i = 0; i != v().size(); ++i){
48 auto end = A().major_end(i);
49 auto pos = A().major_begin(i);
50 typename V::value_type s = *pos;
51 ++pos;
52 for(; pos != end; ++pos){
53 s = f(s,*pos);
54 }
55 v()(i) += g(s);
56 }
57}
58
59template<class F, class G, class M,class V>
60void fold_rows(
61 matrix_expression<M, cpu_tag> const& A,
62 vector_expression<V, cpu_tag>& v,
63 F f,
64 G g,
65 column_major
66){
67 std::size_t n = v().size();
68 const std::size_t BLOCK_SIZE = 16;
69 typename V::value_type storage[BLOCK_SIZE];
70 std::size_t numBlocks = (n + BLOCK_SIZE - 1) / BLOCK_SIZE;
71
72 for(std::size_t b = 0; b != numBlocks; ++b){
73 std::size_t start = b * BLOCK_SIZE;
74 std::size_t cur_size = std::min(BLOCK_SIZE, n - start);
75 for(std::size_t i = 0; i != cur_size; ++i){
76 storage[i] = A()(start + i, 0);
77 }
78 for(std::size_t j = 1; j != A().size2(); ++j){
79 for(std::size_t i = 0; i != cur_size; ++i){
80 storage[i] = f(storage[i], A()(start + i, j));
81 }
82 }
83 for(std::size_t i = 0; i != cur_size; ++i){
84 v()(start + i) += g(storage[i]);
85 }
86 }
87}
88
89//dispatcher for triangular matrix
90template<class F, class G, class M,class V,class Orientation,class Triangular>
91void fold_rows(
92 matrix_expression<M, cpu_tag> const& A,
93 vector_expression<V, cpu_tag>& v,
94 F f,
95 G g,
96 triangular<Orientation,Triangular>
97){
98 fold_rows(A,v, f, g, Orientation());
99}
100
101}}
102
103#endif