potrf.hpp
Go to the documentation of this file.
1/*!
2 *
3 *
4 * \brief Implements the default implementation of the POTRF algorithm
5 *
6 * \author O. Krause
7 * \date 2016
8 *
9 *
10 * \par Copyright 1995-2014 Shark Development Team
11 *
12 * <BR><HR>
13 * This file is part of Shark.
14 * <http://image.diku.dk/shark/>
15 *
16 * Shark is free software: you can redistribute it and/or modify
17 * it under the terms of the GNU Lesser General Public License as published
18 * by the Free Software Foundation, either version 3 of the License, or
19 * (at your option) any later version.
20 *
21 * Shark is distributed in the hope that it will be useful,
22 * but WITHOUT ANY WARRANTY; without even the implied warranty of
23 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
24 * GNU Lesser General Public License for more details.
25 *
26 * You should have received a copy of the GNU Lesser General Public License
27 * along with Shark. If not, see <http://www.gnu.org/licenses/>.
28 *
29 */
30#ifndef REMORA_KERNELS_DEFAULT_POTRF_HPP
31#define REMORA_KERNELS_DEFAULT_POTRF_HPP
32
33#include "../../proxy_expressions.hpp"
34#include "../trsm.hpp" //trsm kernel
35#include "../syrk.hpp" //syrk kernel
36#include <type_traits> //std::false_type marker for unoptimized
37
38namespace remora{namespace bindings {
39
40//diagonal block kernels
41//upper potrf(row-major)
42template<class MatA>
43std::size_t potrf_block(
44 matrix_expression<MatA, cpu_tag>& A,
45 row_major, lower
46) {
47 std::size_t m = A().size1();
48 for(size_t j = 0; j < m; j++) {
49 for(size_t i = j; i < m; i++) {
50 double s = A()(i, j);
51 for(size_t k = 0; k < j; k++) {
52 s -= A()(i, k) * A()(j, k);
53 }
54 if(i == j) {
55 if(s <= 0)
56 return i+1;
57 A()(i, j) = std::sqrt(s);
58 } else {
59 A()(i, j) = s / A()(j , j);
60 }
61 }
62 }
63 return 0;
64}
65
66//lower potrf(row-major)
67template<class MatA>
68std::size_t potrf_block(
69 matrix_expression<MatA, cpu_tag>& A,
70 row_major, upper
71) {
72 std::size_t m = A().size1();
73 for(size_t i = 0; i < m; i++) {
74 double& Aii = A()(i, i);
75 if(Aii < 0)
76 return i+1;
77 using std::sqrt;
78 Aii = sqrt(Aii);
79 //update row
80
81 for(std::size_t j = i + 1; j < m; ++j) {
82 A()(i, j) /= Aii;
83 }
84 //rank-one update
85 for(size_t k = i + 1; k < m; k++) {
86 for(std::size_t j = k; j < m; ++j) {
87 A()(k, j) -= A()(i, k) * A()(i, j);
88 }
89 }
90 }
91 return 0;
92}
93
94
95//dispatcher for column major
96template<class MatA, class Triangular>
97std::size_t potrf_block(
98 matrix_expression<MatA, cpu_tag>& A,
99 column_major, Triangular
100) {
101 auto Atrans = trans(A);
102 return potrf_block(Atrans, row_major(), typename Triangular::transposed_orientation());
103}
104
105//main kernel for large matrices
106template <typename MatA>
107std::size_t potrf_recursive(
108 matrix_expression<MatA, cpu_tag>& Afull,
109 std::size_t start,
110 std::size_t end,
111 lower
112){
113 std::size_t block_size = 32;
114 auto A = subrange(Afull,start,end,start,end);
115 std::size_t size = A.size1();
116 //if the matrix is small enough call the computation kernel directly for the block
117 if(size <= block_size){
118 return potrf_block(A,typename MatA::orientation(), lower());
119 }
120 std::size_t numBlocks = (A.size1()+block_size-1)/block_size;
121 std::size_t split = numBlocks/2*block_size;
122
123
124 //otherwise run the kernel recursively
125 std::size_t result = potrf_recursive(Afull,start,start+split,lower());
126 if(result) return result;
127
128 auto Aul = subrange(A,0,split,0,split);
129 auto All = subrange(A,split,size,0,split);
130 auto Alr = subrange(A,split,size,split,size);
131 kernels::trsm<upper,right>(trans(Aul), All );
132 kernels::syrk<false>(All,Alr, -1.0);
133 return potrf_recursive(Afull,start+split,end,lower());
134}
135
136template <typename MatA>
137std::size_t potrf_recursive(
138 matrix_expression<MatA, cpu_tag>& A,
139 std::size_t start,
140 std::size_t end,
141 upper
142){
143 auto Atrans = trans(A);
144 return potrf_recursive(Atrans,start,end,lower());
145}
146
147//dispatcher
148template <class Triangular, typename MatA>
149std::size_t potrf(
150 matrix_container<MatA, cpu_tag>& A,
151 std::false_type//unoptimized
152){
153 REMORA_SIZE_CHECK(A().size1() == A().size2());
154 return potrf_recursive(A,0,A().size1(), Triangular());
155}
156
157}}
158#endif