pstrf.hpp
Go to the documentation of this file.
1/*!
2 * \brief Cholesky Decompositions for a positive semi-definite Matrix A = LL^T
3 *
4 *
5 * \author O. Krause
6 * \date 2016
7 *
8 * \par Copyright 1995-2015 Shark Development Team
9 *
10 * <BR><HR>
11 * This file is part of Shark.
12 * <http://image.diku.dk/shark/>
13 *
14 * Shark is free software: you can redistribute it and/or modify
15 * it under the terms of the GNU Lesser General Public License as published
16 * by the Free Software Foundation, either version 3 of the License, or
17 * (at your option) any later version.
18 *
19 * Shark is distributed in the hope that it will be useful,
20 * but WITHOUT ANY WARRANTY; without even the implied warranty of
21 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
22 * GNU Lesser General Public License for more details.
23 *
24 * You should have received a copy of the GNU Lesser General Public License
25 * along with Shark. If not, see <http://www.gnu.org/licenses/>.
26 *
27 */
28
29#ifndef REMORA_KERNELS_DEFAULT_PSTRF_HPP
30#define REMORA_KERNELS_DEFAULT_PSTRF_HPP
31
32#include "../gemm.hpp" //gemm kernel
33#include "../gemv.hpp" //gemv kernel
34#include "../../proxy_expressions.hpp"
35#include "../../dense.hpp"
36#include <algorithm>
37namespace remora{namespace bindings {
38
39template<class MatA, class VecP>
40std::size_t pstrf(
41 matrix_expression<MatA, cpu_tag> &A,
42 vector_expression<VecP, cpu_tag>& P,
43 lower
44){
45 //The normal cholesky decomposition works by
46 // partitioning A in 4 blocks.
47 // |A11 | A12
48 //A^(k)=|-----------
49 // |A21 | A^(k+1)
50 // First, the cholesky decomposition of A is computed, after which
51 // bloc A12 is computed by solving a system of equations.
52 // Lastly, the (expensive) operation
53 // A^(k+1) -= A21 A21^T
54 // is performed.
55 // When we suspect that A is rank deficient, this does not work
56 // as we might run in the case that A11 is rank deficient which makes
57 // updating A21 impossible.
58 // instead, in every iteration we first pick the row/column with the
59 // current largest diagonal element, permute the matrix, so that
60 // this row/column is in the blocks A11 and A12 and update everything
61 //step by step until we have a full block, which makes it possible
62 // to perform the expensive
63 // // A^(k+1) -= A21 A21^T
64 // using efficient routines.
65
66
67 //todo: experiment a bit with the sizes
68 std::size_t block_size = 20;
69
70
71 size_t m = A().size1();
72 //storage for pivot values
73 vector<typename MatA::value_type> pivotValues(m);
74
75 //stopping criterion
76 double max_diag = A()(0,0);
77 for(std::size_t i = 1; i < m; ++i)
78 max_diag = std::max(max_diag,std::abs(A()(i,i)));
79 double epsilon = m * m * std::numeric_limits<typename MatA::value_type>::epsilon() * max_diag;
80
81 for(std::size_t k = 0; k < m; k += block_size){
82 std::size_t currentSize = std::min(m-k,block_size);//last iteration is smaller
83 //partition of the matrix
84 auto Ak = subrange(A,k,m,k,m);
85 auto pivots = subrange(pivotValues,k,m);
86
87 //update current block L11
88 for(std::size_t j = 0; j != currentSize; ++j){
89 //we have to dynamically update the pivot values
90 //we start every block anew to prevent accumulation of rounding errors
91 if(j == 0){
92 for(std::size_t i = 0; i != m-k; ++i)
93 pivots(i) = Ak(i,i);
94 }else{//update pivot values
95 for(std::size_t i = j; i != m-k; ++i)
96 pivots(i) -= Ak(i,j-1) * Ak(i,j-1);
97 }
98 //get current pivot. if it is not equal to the j-th, we swap rows and columns
99 //such that j == pivot and Ak(j,j) = pivots(pivot)
100 std::size_t pivot = std::max_element(pivots.begin()+j,pivots.end())-pivots.begin();
101 if(pivot != j){
102 P()(k+j) = (int)(pivot+k);
103 A().swap_rows(k+j,k+pivot);
104 A().swap_columns(k+j,k+pivot);
105 std::swap(pivots(j),pivots(pivot));
106 }
107
108 //check whether we are finished
109 auto pivotValue = pivots(j);
110 if(pivotValue < epsilon){
111 //the remaining part is so near 0, we can just ignore it
112 subrange(Ak,j,m-k,j,m-k).clear();
113 return k+j;
114 }
115
116 //update current column
117 Ak(j,j) = std::sqrt(pivotValue);
118 //the last updates of columns k...k+j-1 did not change
119 //this row, so do it now
120 auto colLowerPart = subrange(column(Ak,j),j+1,m-k);
121 if(j > 0){
122 //the last updates of columns 0,1,...,j-1 did not change
123 //this column, so do it now
124 auto blockLL = subrange(Ak,j+1,m-k,0,j);
125 auto curRow = row(Ak,j);
126 auto rowLeftPart = subrange(curRow,0,j);
127
128 //suppose you are the j-th column
129 //than you want to get updated by the last
130 //outer products which are induced by your column friends
131 //Li...Lj-1
132 //so you get the effect of
133 //(L1L1T)^(j)+...+(Lj-1Lj-1)^(j)
134 //=L1*L1j+L2*L2j+L3*L3j...
135 //which is a matrix-vector product
136 kernels::gemv(blockLL,rowLeftPart,colLowerPart,-1);
137 }
138 colLowerPart /= Ak(j,j);
139 //set block L12 to 0
140 subrange(Ak,j,j+1,j+1,Ak.size2()).clear();
141 }
142 if(k+currentSize < m){
143 auto blockLL = subrange(Ak, block_size, m-k, 0, block_size);
144 auto blockLR = subrange(Ak, block_size, m-k, block_size, m-k);
145 kernels::gemm(blockLL,trans(blockLL), blockLR, -1);
146 }
147 }
148 return m;
149
150}
151
152template<class MatA, class VecP>
153std::size_t pstrf(
154 matrix_expression<MatA, cpu_tag> &A,
155 vector_expression<VecP, cpu_tag>& P,
156 upper
157){
158 auto transA = trans(A);
159 return pstrf(transA,P,lower());
160}
161
162}}
163#endif