trsm.hpp
Go to the documentation of this file.
1/*!
2 *
3 *
4 * \brief -
5 *
6 * \author O. Krause
7 * \date 2016
8 *
9 *
10 * \par Copyright 1995-2015 Shark Development Team
11 *
12 * <BR><HR>
13 * This file is part of Shark.
14 * <http://image.diku.dk/shark/>
15 *
16 * Shark is free software: you can redistribute it and/or modify
17 * it under the terms of the GNU Lesser General Public License as published
18 * by the Free Software Foundation, either version 3 of the License, or
19 * (at your option) any later version.
20 *
21 * Shark is distributed in the hope that it will be useful,
22 * but WITHOUT ANY WARRANTY; without even the implied warranty of
23 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
24 * GNU Lesser General Public License for more details.
25 *
26 * You should have received a copy of the GNU Lesser General Public License
27 * along with Shark. If not, see <http://www.gnu.org/licenses/>.
28 *
29 */
30
31#ifndef REMORA_KERNELS_DEFAULT_TRSM_HPP
32#define REMORA_KERNELS_DEFAULT_TRSM_HPP
33
34#include "../../expression_types.hpp" //matrix_expression
35#include "../../proxy_expressions.hpp" //matrix proxies for blocking
36#include "../../detail/structure.hpp" //structure tags
37#include "../gemm.hpp" //gemm kernel
38
39#include <stdexcept> //exception when matrix is singular
40#include <type_traits> //std::false_type marker for unoptimized
41
42namespace remora{namespace bindings {
43
44//Lower triangular - matrix(row-major)
45template<std::size_t maxBlockSize1,std::size_t maxBlockSize2, bool Unit, class MatA, class MatB>
46void trsm_block(
47 matrix_expression<MatA, cpu_tag> const& A,
48 matrix_expression<MatB, cpu_tag> &B,
49 lower,
50 row_major // B is row-major
51){
52 REMORA_SIZE_CHECK(A().size1() <= maxBlockSize1);
53 typedef typename MatA::value_type value_typeA;
54 typedef typename MatB::value_type value_typeB;
55
56
57 //evaluate and copy block of A
58 std::size_t size = A().size1();
59 value_typeA blockA[maxBlockSize1][maxBlockSize1];
60 for(std::size_t i = 0; i != size; ++i){
61 for(std::size_t j = 0; j <= i; ++j){
62 blockA[i][j] = A()(i,j);
63 }
64 }
65
66
67 value_typeB blockB[maxBlockSize2][maxBlockSize1];
68 std::size_t numBlocks = (B().size2()+maxBlockSize2-1)/maxBlockSize2;
69 for(std::size_t i = 0; i != numBlocks; ++i){
70 std::size_t startB= i*maxBlockSize2;
71 std::size_t curBlockSize2 =std::min(maxBlockSize2, B().size2() - startB);
72
73 //copy blockB transposed in memory
74 for(std::size_t i = 0; i != size; ++i){
75 for(std::size_t k = 0; k != curBlockSize2; ++k){
76 blockB[k][i] = B()(i,startB+k);
77 }
78 }
79 //compute trsv kernel for each row in blockB
80 for(std::size_t k = 0; k != curBlockSize2; ++k){
81 for (std::size_t i = 0; i != size; ++i) {
82 for (std::size_t j = 0; j != i; ++j) {
83 blockB[k][i] -= blockA[i][j]*blockB[k][j];
84 }
85 if(!Unit){
86 if(blockA[i][i] == value_typeA())
87 throw std::invalid_argument("[TRSM] Matrix is singular!");
88 blockB[k][i] /= blockA[i][i];
89 }
90 }
91 }
92 //copy blockB back
93 for(std::size_t i = 0; i != size; ++i){
94 for(std::size_t k = 0; k != curBlockSize2; ++k){
95 B()(i,startB+k) = blockB[k][i];
96 }
97 }
98 }
99}
100
101// Lower triangular - matrix(column-major)
102template<std::size_t maxBlockSize1,std::size_t maxBlockSize2, bool Unit, class MatA, class MatB>
103void trsm_block(
104 matrix_expression<MatA, cpu_tag> const& A,
105 matrix_expression<MatB, cpu_tag>& B,
106 lower,
107 column_major // B is column-major
108) {
109 typedef typename MatA::value_type value_type;
110 //evaluate and copy block of A
111 std::size_t size = A().size1();
112 value_type blockA[maxBlockSize1][maxBlockSize1];
113 for(std::size_t i = 0; i != size; ++i){
114 for(std::size_t j = 0; j <= i; ++j){
115 blockA[i][j] = A()(i,j);
116 }
117 }
118
119 //compute trsv kernel for each column in B
120 for(std::size_t k = 0; k != B().size2(); ++k){
121 for (std::size_t i = 0; i != size; ++i) {
122 for (std::size_t j = 0; j != i; ++j) {
123 B()(i,k) -= blockA[i][j] * B()(j,k);
124 }
125 if(!Unit){
126 if(blockA[i][i] == value_type())
127 throw std::invalid_argument("[TRSM] Matrix is singular!");
128 B()(i,k) /= blockA[i][i];
129 }
130 }
131 }
132}
133
134
135//Upper triangular - matrix(row-major)
136template<std::size_t maxBlockSize1, std::size_t maxBlockSize2, bool Unit, class MatA, class MatB>
137void trsm_block(
138 matrix_expression<MatA, cpu_tag> const& A,
139 matrix_expression<MatB, cpu_tag> &B,
140 upper,
141 row_major // B is row-major
142){
143 REMORA_SIZE_CHECK(A().size1() <= maxBlockSize1);
144 typedef typename MatA::value_type value_typeA;
145 typedef typename MatB::value_type value_typeB;
146
147 //evaluate and copy block of A
148 std::size_t size = A().size1();
149 value_typeA blockA[maxBlockSize1][maxBlockSize1];
150 for(std::size_t i = 0; i != size; ++i){
151 for(std::size_t j = i; j != size; ++j){
152 blockA[i][j] = A()(i,j);
153 }
154 }
155
156 value_typeB blockB[maxBlockSize2][maxBlockSize1];
157 std::size_t numBlocks = (B().size2()+maxBlockSize2-1)/maxBlockSize2;
158 for(std::size_t i = 0; i != numBlocks; ++i){
159 std::size_t startB= i*maxBlockSize2;
160 std::size_t curBlockSize2 =std::min(maxBlockSize2, B().size2() - startB);
161
162 //copy blockB transposed in memory
163 for(std::size_t i = 0; i != size; ++i){
164 for(std::size_t k = 0; k != curBlockSize2; ++k){
165 blockB[k][i] = B()(i,startB+k);
166 }
167 }
168 //compute trsv kernel for each row in blockB
169 for(std::size_t k = 0; k != curBlockSize2; ++k){
170 for (std::size_t n = 0; n != size; ++n) {
171 std::size_t i = size-n-1;
172 for (std::size_t j = i+1; j != size; ++j) {
173 blockB[k][i] -= blockA[i][j] * blockB[k][j];
174 }
175 if(!Unit){
176 if(blockA[i][i] == value_typeA())
177 throw std::invalid_argument("[TRSM] Matrix is singular!");
178 blockB[k][i] /= blockA[i][i];
179 }
180 }
181 }
182 //copy blockB back
183 for(std::size_t i = 0; i != size; ++i){
184 for(std::size_t j = 0; j != curBlockSize2; ++j){
185 B()(i,startB+j) = blockB[j][i];
186 }
187 }
188 }
189}
190
191// Upper triangular - matrix(column-major)
192template<std::size_t maxBlockSize1,std::size_t maxBlockSize2, bool Unit, class MatA, class MatB>
193void trsm_block(
194 matrix_expression<MatA, cpu_tag> const& A,
195 matrix_expression<MatB, cpu_tag>& B,
196 upper,
197 column_major // B is column-major
198) {
199 typedef typename MatA::value_type value_type;
200 //evaluate and copy block of A
201 std::size_t size = A().size1();
202 value_type blockA[maxBlockSize1][maxBlockSize1];
203 for(std::size_t i = 0; i != size; ++i){
204 for(std::size_t j = i; j != size; ++j){
205 blockA[i][j] = A()(i,j);
206 }
207 }
208
209 //compute trsv kernel for each column in B
210 for(std::size_t k = 0; k != B().size2(); ++k){
211 for (std::size_t n = 0; n != size; ++n) {
212 std::size_t i = size-n-1;
213 for (std::size_t j = i+1; j != size; ++j) {
214 B()(i,k) -= blockA[i][j] * B()(j,k);
215 }
216 if(!Unit){
217 if(blockA[i][i] == value_type())
218 throw std::invalid_argument("[TRSM] Matrix is singular!");
219 B()(i,k) /= blockA[i][i];
220 }
221 }
222 }
223}
224
225template <typename MatA, typename MatB, class Triangular>
226void trsm_recursive(
227 matrix_expression<MatA, cpu_tag> const& Afull,
228 matrix_expression<MatB, cpu_tag> & Bfull,
229 std::size_t start,
230 std::size_t end,
231 Triangular t,
232 left l
233){
234 static std::size_t const Block_Size = 32;
235 std::size_t num_rhs = Bfull().size2();
236 auto A = subrange(Afull,start,end,start,end);
237 auto B = subrange(Bfull,start,end,0,num_rhs);
238 //if the matrix is small enough call the computation kernel directly for the block
239 if(A.size1() <= Block_Size){
240 trsm_block<Block_Size,16,Triangular::is_unit>(A,B,triangular_tag<Triangular::is_upper,false>(), typename MatB::orientation());
241 return;
242 }
243 std::size_t size = A.size1();
244 std::size_t numBlocks =(A.size1()+Block_Size-1)/Block_Size;
245 std::size_t split = numBlocks/2*Block_Size;
246 auto Bfront = subrange(B,0,split,0,num_rhs);
247 auto Bback = subrange(B,split,size,0,num_rhs);
248
249 //otherwise run the kernel recursively
250 if(Triangular::is_upper){ //Upper triangular case
251 trsm_recursive(Afull, Bfull,start+split,end, t, l);
252 kernels::gemm(subrange(A,0,split,split,size), Bback, Bfront, -1.0);
253 trsm_recursive(Afull, Bfull,start,start+split, t, l);
254 }else{// Lower triangular caste
255 trsm_recursive(Afull, Bfull,start,start+split, t, l);
256 kernels::gemm(subrange(A,split,size,0,split), Bfront, Bback, -1.0);
257 trsm_recursive(Afull, Bfull,start+split,end, t, l);
258 }
259}
260
261template <typename MatA, typename MatB, class Triangular>
262void trsm_recursive(
263 matrix_expression<MatA, cpu_tag> const& Afull,
264 matrix_expression<MatB, cpu_tag> & Bfull,
265 std::size_t start,
266 std::size_t end,
267 Triangular,
268 right
269){
270 auto transA = trans(Afull);
271 auto transB = trans(Bfull);
272 trsm_recursive(transA,transB,start,end,typename Triangular::transposed_orientation(),left());
273}
274
275//main kernel runs the kernel above recursively and calls gemv
276template <class Triangular, class Side, typename MatA, typename MatB>
277void trsm(
278 matrix_expression<MatA, cpu_tag> const& A,
279 matrix_expression<MatB, cpu_tag>& B,
280 std::false_type //unoptimized
281){
282
283 bindings::trsm_recursive(A,B,0,A().size1(), Triangular(), Side());
284}
285}}
286#endif