trmv.hpp
Go to the documentation of this file.
1//===========================================================================
2/*!
3 *
4 * \brief -
5 *
6 * \author O. Krause
7 * \date 2010
8 *
9 *
10 * \par Copyright 1995-2015 Shark Development Team
11 *
12 * <BR><HR>
13 * This file is part of Shark.
14 * <http://image.diku.dk/shark/>
15 *
16 * Shark is free software: you can redistribute it and/or modify
17 * it under the terms of the GNU Lesser General Public License as published
18 * by the Free Software Foundation, either version 3 of the License, or
19 * (at your option) any later version.
20 *
21 * Shark is distributed in the hope that it will be useful,
22 * but WITHOUT ANY WARRANTY; without even the implied warranty of
23 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
24 * GNU Lesser General Public License for more details.
25 *
26 * You should have received a copy of the GNU Lesser General Public License
27 * along with Shark. If not, see <http://www.gnu.org/licenses/>.
28 *
29 */
30//===========================================================================
31#ifndef REMORA_KERNELS_DEFAULT_TRMV_HPP
32#define REMORA_KERNELS_DEFAULT_TRMV_HPP
33
34#include "../../expression_types.hpp" //vector,matrix_expression
35#include "../../proxy_expressions.hpp" //matrix and vector proxies
36#include "../../assignment.hpp" //assignment
37#include "../../dense.hpp" //adapt_vector
38#include "../gemv.hpp" //gemv kernel
39#include "../dot.hpp" //dot kernel
40#include <type_traits> //std::false_type marker for unoptimized
41namespace remora{ namespace bindings{
42
43//Lower triangular(row-major) - vector
44template<bool Unit, class MatA, class V>
45void trmv_impl(
46 matrix_expression<MatA, cpu_tag> const& A,
47 vector_expression<V, cpu_tag> &b,
48 lower, row_major
49){
50 typedef typename MatA::value_type value_typeA;
51 typedef typename V::value_type value_typeV;
52 std::size_t size = A().size1();
53 std::size_t const blockSize = 128;
54 std::size_t numBlocks = size/blockSize;
55 if(numBlocks*blockSize < size) ++numBlocks;
56
57 //this implementation partitions A into
58 //a set of panels, where a Panel is a set
59 // of columns. We start with the last panel
60 //and compute the product of it with the part of the vector
61 // and than just add the previous panel on top etc.
62
63 //tmporary storage for subblocks of b
64 value_typeV valueStorage[blockSize];
65
66 for(std::size_t bi = 1; bi <= numBlocks; ++bi){
67 std::size_t startbi = blockSize*(numBlocks-bi);
68 std::size_t sizebi = std::min(blockSize,size-startbi);
69 auto values = adapt_vector(sizebi, valueStorage);
70
71 //store and save the values of b we are now changing
72 auto blower = subrange(b,startbi+sizebi,size);
73 noalias(values) = subrange(b,startbi,startbi+sizebi);
74
75 //multiply with triangular part of the block
76 for (std::size_t i = 0; i != sizebi; ++i) {
77 std::size_t posi = startbi+i;
78 b()(posi) = 0;
79 for(std::size_t j = 0; j < i; ++j){
80 b()(posi) += A()(posi,startbi+j)*values(j);
81 }
82 b()(posi) += values(i)*(Unit? value_typeA(1):A()(posi,posi));
83 }
84 //now compute the lower rectangular part of the current block of A
85 kernels::gemv(subrange(A, startbi+sizebi,size,startbi,startbi+sizebi),values,blower,1);
86 }
87}
88
89//upper triangular(row-major)-vector
90template<bool Unit, class MatA, class V>
91void trmv_impl(
92 matrix_expression<MatA, cpu_tag> const& A,
93 vector_expression<V, cpu_tag>& b,
94 upper, row_major
95){
96 std::size_t size = A().size1();
97 for (std::size_t i = 0; i < size; ++ i) {
98 if(!Unit){
99 b()(i) *= A()(i,i);
100 }
101 typename V::value_type result;
102 kernels::dot(subrange(row(A,i),i+1,size),subrange(b(),i+1,size),result);
103 b()(i) += result;
104 }
105}
106
107//Lower triangular(column-major) - vector
108template<bool Unit, class MatA, class V>
109void trmv_impl(
110 matrix_expression<MatA, cpu_tag> const& A,
111 vector_expression<V, cpu_tag> &b,
112 lower, column_major
113){
114
115 std::size_t size = A().size1();
116 for (std::size_t n = 1; n <= size; ++n) {
117 std::size_t i = size-n;
118 double bi = b()(i);
119 if(!Unit){
120 b()(i) *= A()(i,i);
121 }
122 for(std::size_t k = i+1; k != size; ++k)
123 b()(k) += bi * A()(k,i);
124 }
125}
126
127//upper triangular(column-major)-vector
128template<bool Unit, class MatA, class V>
129void trmv_impl(
130 matrix_expression<MatA, cpu_tag> const& A,
131 vector_expression<V, cpu_tag>& b,
132 upper, column_major
133){
134 typedef typename V::value_type value_typeV;
135 std::size_t size = A().size1();
136 std::size_t const blockSize = 128;
137 std::size_t numBlocks = size/blockSize;
138 if(numBlocks*blockSize < size) ++numBlocks;
139
140 //this implementation partitions A into
141 //a set of panels, where a Panel is a set
142 // of rows. We start with the first panel
143 //and compute the product of it with the part of the vector
144 // and than just add the next panel on top etc.
145
146 //tmporary storage for subblocks of b
147 value_typeV valueStorage[blockSize];
148
149 for(std::size_t bj = 0; bj != numBlocks; ++bj){
150 std::size_t startbj = blockSize*bj;
151 std::size_t sizebj = std::min(blockSize,size-startbj);
152 auto values = adapt_vector(sizebj, valueStorage);
153
154 //store and save the values of b we are now changing
155 auto bupper = subrange(b,startbj,startbj+sizebj);
156 auto blower = subrange(b,startbj+sizebj,size);
157 noalias(values) = bupper;
158 bupper.clear();
159 //multiply with triangular element
160 for (std::size_t j = 0; j != sizebj; ++j) {
161 std::size_t posj = startbj+j;
162 for(std::size_t i = 0; i < j; ++i){
163 b()(startbj+i) += A()(startbj+i,posj)*values(j);
164 }
165 b()(posj) += values(j)*(Unit? 1.0:A()(posj,posj));
166 }
167 //now compute the right rectangular part of the current block of A
168 kernels::gemv(subrange(A, startbj,startbj+sizebj, startbj+sizebj,size),blower,bupper,1);
169 }
170}
171
172//dispatcher
173template <bool Upper,bool Unit,typename MatA, typename V>
174void trmv(
175 matrix_expression<MatA, cpu_tag> const& A,
176 vector_expression<V, cpu_tag> & b,
177 std::false_type//unoptimized
178){
179 REMORA_SIZE_CHECK(A().size1() == A().size2());
180 REMORA_SIZE_CHECK(A().size2() == b().size());
181 trmv_impl<Unit>(A, b, triangular_tag<Upper,false>(), typename MatA::orientation());
182}
183
184}}
185#endif