28#ifndef REMORA_DECOMPOSITIONS_HPP
29#define REMORA_DECOMPOSITIONS_HPP
44template<
class D,
class Device>
45struct solver_expression{
46 typedef Device device_type;
48 D
const& operator()()
const {
49 return *
static_cast<D const*
>(
this);
53 return *
static_cast<D*
>(
this);
70template<
class MatrixStorage>
71class cholesky_decomposition:
72 public solver_expression<
73 cholesky_decomposition<MatrixStorage>,
74 typename MatrixStorage::device_type
77 typedef typename MatrixStorage::value_type value_type;
79 typedef typename MatrixStorage::device_type device_type;
80 cholesky_decomposition(){}
82 cholesky_decomposition(matrix_expression<E,device_type>
const& e):m_cholesky(e){
83 kernels::potrf<lower>(m_cholesky);
87 void decompose(matrix_expression<E,device_type>
const& e){
88 m_cholesky.resize(e().size1(), e().size2());
89 noalias(m_cholesky) = e;
90 kernels::potrf<lower>(m_cholesky);
93 MatrixStorage
const& lower_factor()
const{
97 auto upper_factor()const -> decltype(trans(std::declval<MatrixStorage const&>())){
98 return trans(m_cholesky);
102 void solve(matrix_expression<MatB,device_type>& B, left)
const{
103 kernels::trsm<lower,left >(lower_factor(),B);
104 kernels::trsm<upper,left >(upper_factor(),B);
107 void solve(matrix_expression<MatB,device_type>& B, right)
const{
108 kernels::trsm<upper,right >(upper_factor(),B);
109 kernels::trsm<lower,right >(lower_factor(),B);
111 template<
class VecB,
bool Left>
112 void solve(vector_expression<VecB,device_type>&b, system_tag<Left>)
const{
113 kernels::trsv<lower, left>(lower_factor(),b);
114 kernels::trsv<upper,left>(upper_factor(),b);
133 void update(value_type alpha, value_type beta, vector_expression<VecV,device_type>
const& v){
135 m_cholesky *= std::sqrt(alpha);
139 std::size_t n = v().size();
140 auto& L = m_cholesky;
141 typename vector_temporary<VecV>::type temp = v;
142 double beta_prime = 1;
143 double a = std::sqrt(alpha);
144 for(std::size_t j=0; j != n; ++j)
146 double Ljj = a * L(j,j);
147 double dj = Ljj * Ljj;
149 double swj2 = beta * wj * wj;
150 double gamma = dj * beta_prime + swj2;
152 double x = dj + swj2/beta_prime;
154 throw std::invalid_argument(
"[cholesky_decomposition::update] update makes matrix indefinite, no update available");
155 double nLjj = std::sqrt(x);
157 beta_prime += swj2/dj;
161 subrange(column(L,j),j+1,n) *= a;
162 noalias(subrange(temp,j+1,n)) -= (wj/Ljj) * subrange(column(L,j),j+1,n);
165 subrange(column(L,j),j+1,n) *= nLjj/Ljj;
166 noalias(subrange(column(L,j),j+1,n))+= (nLjj * beta*wj/gamma)*subrange(temp,j+1,n);
171 template<
typename Archive>
172 void serialize( Archive & ar,
const std::size_t version ) {
176 MatrixStorage m_cholesky;
185template<
class MatrixStorage>
186class symm_eigenvalue_decomposition:
187 public solver_expression<
188 symm_eigenvalue_decomposition<MatrixStorage>,
189 typename MatrixStorage::device_type
192 typedef typename MatrixStorage::value_type value_type;
193 typedef typename vector_temporary<MatrixStorage>::type VectorStorage;
195 typedef typename MatrixStorage::device_type device_type;
196 symm_eigenvalue_decomposition(){}
198 symm_eigenvalue_decomposition(matrix_expression<E,device_type>
const& e){
203 void decompose(matrix_expression<E,device_type>
const& e){
204 REMORA_SIZE_CHECK(e().size1() == e().size2());
205 m_eigenvectors.resize(e().size1(),e().size1());
206 m_eigenvalues.resize(e().size1());
207 noalias(m_eigenvectors) = e;
209 kernels::syev(m_eigenvectors,m_eigenvalues);
212 MatrixStorage
const& Q()
const{
213 return m_eigenvectors;
215 VectorStorage
const& D()
const{
216 return m_eigenvalues;
221 void solve(matrix_expression<MatB,device_type>& B, left)
const{
222 B() = Q() % to_diagonal(elem_inv(D()))% trans(Q()) % B;
225 void solve(matrix_expression<MatB,device_type>& B, right)
const{
226 auto transB = trans(B);
227 solve(transB,left());
230 void solve(vector_expression<VecB,device_type>&b, left)
const{
231 b() = Q() % safe_div(trans(Q()) % b,D() ,0.0);
235 void solve(vector_expression<VecB,device_type>&b, right)
const{
239 template<
typename Archive>
240 void serialize( Archive & ar,
const std::size_t version ) {
245 MatrixStorage m_eigenvectors;
246 VectorStorage m_eigenvalues;
252template<
class MatrixStorage>
253class pivoting_lu_decomposition:
254public solver_expression<
255 pivoting_lu_decomposition<MatrixStorage>,
256 typename MatrixStorage::device_type
259 typedef typename MatrixStorage::device_type device_type;
261 pivoting_lu_decomposition(matrix_expression<E,device_type>
const& e)
262 :m_factor(e), m_permutation(e().size1()){
263 kernels::getrf(m_factor,m_permutation);
266 MatrixStorage
const& factor()
const{
270 permutation_matrix
const& permutation()
const{
271 return m_permutation;
275 void solve(matrix_expression<MatB,device_type>& B, left)
const{
276 swap_rows(m_permutation,B);
277 kernels::trsm<unit_lower,left>(m_factor,B);
278 kernels::trsm<upper,left>(m_factor,B);
281 void solve(matrix_expression<MatB,device_type>& B, right)
const{
282 kernels::trsm<upper,right>(m_factor,B);
283 kernels::trsm<unit_lower,right>(m_factor,B);
284 swap_columns_inverted(m_permutation,B);
287 void solve(vector_expression<VecB,device_type>& b, left)
const{
288 swap_rows(m_permutation,b);
289 kernels::trsv<unit_lower,left>(m_factor,b);
290 kernels::trsv<upper,left>(m_factor,b);
293 void solve(vector_expression<VecB,device_type>& b, right)
const{
294 kernels::trsv<upper,right>(m_factor,b);
295 kernels::trsv<unit_lower,right>(m_factor,b);
296 swap_rows_inverted(m_permutation,b);
299 MatrixStorage m_factor;
300 permutation_matrix m_permutation;
320template<
class MatrixStorage>
321class symm_pos_semi_definite_solver:
322 public solver_expression<symm_pos_semi_definite_solver<MatrixStorage>, typename MatrixStorage::device_type>{
324 typedef typename MatrixStorage::device_type device_type;
326 symm_pos_semi_definite_solver(matrix_expression<E,device_type>
const& e)
327 :m_factor(e), m_permutation(e().size1()){
328 m_rank = kernels::pstrf<lower>(m_factor,m_permutation);
329 if(m_rank == e().size1())
return;
331 auto L = columns(m_factor,0,m_rank);
332 m_cholesky.decompose(prod(trans(L),L));
335 std::size_t rank()
const{
343 void compute_inverse_factor(matrix_expression<Mat,device_type>& C)
const{
344 REMORA_SIZE_CHECK(C().size1() == m_rank);
345 REMORA_SIZE_CHECK(C().size2() == m_factor.size1());
346 if(m_rank == m_factor.size1()){
348 noalias(C) = identity_matrix<double>( m_factor.size1());
349 swap_columns_inverted(m_permutation,C);
350 kernels::trsm<lower,left>(m_factor,C);
352 auto L = columns(m_factor,0,m_rank);
353 noalias(C) = trans(L);
354 m_cholesky.solve(C,left());
355 swap_columns_inverted(m_permutation,C);
360 void solve(matrix_expression<MatB,device_type>& B, left)
const{
361 swap_rows(m_permutation,B);
364 }
else if(m_rank == m_factor.size1()){
365 kernels::trsm<lower,left>(m_factor,B);
366 kernels::trsm<upper,left>(trans(m_factor),B);
368 auto L = columns(m_factor,0,m_rank);
369 auto Z = eval_block(prod(trans(L),B));
370 m_cholesky.solve(Z,left());
371 m_cholesky.solve(Z,left());
372 noalias(B) = prod(L,Z);
374 swap_rows_inverted(m_permutation,B);
377 void solve(matrix_expression<MatB,device_type>& B, right)
const{
379 auto transB = trans(B);
380 solve(transB, left());
382 template<
class VecB,
bool Left>
383 void solve(vector_expression<VecB,device_type>& b, system_tag<Left>)
const{
384 swap_rows(m_permutation,b);
387 }
else if(m_rank == m_factor.size1()){
388 kernels::trsv<lower,left >(m_factor,b);
389 kernels::trsv<upper,left >(trans(m_factor),b);
391 auto L = columns(m_factor,0,m_rank);
392 auto z = eval_block(prod(trans(L),b));
393 m_cholesky.solve(z,left());
394 m_cholesky.solve(z,left());
395 noalias(b) = prod(L,z);
397 swap_rows_inverted(m_permutation,b);
401 MatrixStorage m_factor;
402 cholesky_decomposition<MatrixStorage> m_cholesky;
403 permutation_matrix m_permutation;
407class cg_solver:
public solver_expression<cg_solver<MatA>, typename MatA::device_type>{
409 typedef typename MatA::const_closure_type matrix_closure_type;
410 typedef typename MatA::device_type device_type;
411 cg_solver(matrix_closure_type
const& e,
double epsilon = 1.e-10,
unsigned int max_iterations = 0)
412 :m_expression(e), m_epsilon(epsilon), m_max_iterations(max_iterations){}
416 matrix_expression<MatB,device_type>& B, left,
417 double epsilon,
unsigned max_iterations
419 typename matrix_temporary<MatB>::type X = B;
420 cg(m_expression,X, B, epsilon, max_iterations);
425 matrix_expression<MatB,device_type>& B, right,
426 double epsilon,
unsigned max_iterations
428 auto transB = trans(B);
429 typename transposed_matrix_temporary<MatB>::type X = transB;
430 cg(m_expression,X,transB, epsilon, max_iterations);
433 template<
class VecB,
bool Left>
435 vector_expression<VecB,device_type>&b, system_tag<Left>,
436 double epsilon,
unsigned max_iterations
438 typename vector_temporary<VecB>::type x = b;
439 cg(m_expression,x,b,epsilon,max_iterations);
443 template<
class VecB,
bool Left>
444 void solve(vector_expression<VecB,device_type>&b, system_tag<Left>
tag)
const{
445 solve(b,
tag, m_epsilon, m_max_iterations);
447 template<
class MatB,
bool Left>
448 void solve(matrix_expression<MatB,device_type>&B, system_tag<Left>
tag)
const{
449 solve(B,
tag, m_epsilon, m_max_iterations);
452 template<
class MatT,
class VecB,
class VecX>
454 matrix_expression<MatT, device_type>
const& A,
455 vector_expression<VecX, device_type>& x,
456 vector_expression<VecB, device_type>
const& b,
458 unsigned int max_iterations
460 REMORA_SIZE_CHECK(A().size1() == A().size2());
461 REMORA_SIZE_CHECK(A().size1() == b().size());
462 REMORA_SIZE_CHECK(A().size1() == x().size());
463 typedef typename vector_temporary<VecX>::type vector_type;
465 std::size_t dim = b().size();
468 vector_type residual = b - prod(A,x);
470 if(norm_inf(residual) > norm_inf(b)){
474 vector_type next_residual(dim);
475 vector_type p = residual;
478 for(std::size_t iter = 0;; ++iter){
479 if(max_iterations != 0 && iter >= max_iterations)
break;
480 noalias(Ap) = prod(A,p);
481 double rsqr = norm_sqr(residual);
482 double alpha = rsqr/inner_prod(p,Ap);
483 noalias(x) += alpha * p;
484 noalias(next_residual) = residual - alpha * Ap;
485 if(norm_inf(next_residual) < epsilon)
488 double beta = inner_prod(next_residual,next_residual)/rsqr;
490 noalias(p) += next_residual;
491 swap(residual,next_residual);
495 template<
class MatT,
class MatB,
class MatX>
497 matrix_expression<MatT, device_type>
const& A,
498 matrix_expression<MatX, device_type>& X,
499 matrix_expression<MatB, device_type>
const& B,
501 unsigned int max_iterations
503 REMORA_SIZE_CHECK(A().size1() == A().size2());
504 REMORA_SIZE_CHECK(A().size1() == B().size1());
505 REMORA_SIZE_CHECK(A().size1() == X().size1());
506 REMORA_SIZE_CHECK(B().size2() == X().size2());
507 typedef typename vector_temporary<MatX>::type vector_type;
508 typedef typename matrix_temporary<MatX>::type matrix_type;
510 std::size_t dim = B().size1();
511 std::size_t num_rhs = B().size2();
514 matrix_type residual = B - prod(A,X);
516 for(std::size_t i = 0; i != num_rhs; ++i){
517 if(norm_inf(column(residual,i)) <= norm_inf(column(residual,i))){
519 noalias(column(residual,i)) = column(B,i);
523 vector_type next_residual(dim);
524 matrix_type P = residual;
525 matrix_type AP(dim, num_rhs);
527 for(std::size_t iter = 0;; ++iter){
528 if(max_iterations != 0 && iter >= max_iterations)
break;
530 noalias(AP) = prod(A,P);
532 for(std::size_t i = 0; i != num_rhs; ++i){
533 auto r = column(residual,i);
536 if(norm_inf(r) < epsilon)
continue;
538 auto x = column(X,i);
539 auto p = column(P,i);
540 auto Ap = column(AP,i);
541 double rsqr = norm_sqr(r);
542 double alpha = rsqr/inner_prod(p,Ap);
543 noalias(x) += alpha * p;
544 noalias(next_residual) = r - alpha * Ap;
545 double beta = inner_prod(next_residual,next_residual)/rsqr;
547 noalias(p) += next_residual;
548 noalias(r) = next_residual;
551 if(max(abs(residual)) < epsilon)
556 matrix_closure_type m_expression;
558 unsigned int m_max_iterations;
565struct symm_pos_def{
typedef symm_pos_def transposed_orientation;};
566struct symm_semi_pos_def{
typedef symm_semi_pos_def transposed_orientation;};
567struct indefinite_full_rank{
typedef indefinite_full_rank transposed_orientation;};
568struct conjugate_gradient{
569 typedef conjugate_gradient transposed_orientation;
571 unsigned max_iterations;
572 conjugate_gradient(
double epsilon = 1.e-10,
unsigned max_iterations = 0)
573 :epsilon(epsilon), max_iterations(max_iterations){}
578template<
class MatA,
class SolverTag>
581template<
class MatA,
bool Upper,
bool Unit>
582struct solver_traits<MatA,triangular_tag<Upper,Unit> >{
583 class type:
public solver_expression<type,typename MatA::device_type>{
585 typedef typename MatA::const_closure_type matrix_closure_type;
586 typedef typename MatA::device_type device_type;
587 type(matrix_closure_type
const& e, triangular_tag<Upper,Unit>):m_matrix(e){}
589 template<
class MatB,
bool Left>
590 void solve(matrix_expression<MatB, device_type>& B, system_tag<Left> ){
591 kernels::trsm<triangular_tag<Upper,Unit>,system_tag<Left> >(m_matrix,B);
593 template<
class VecB,
bool Left>
594 void solve(vector_expression<VecB, device_type>& b, system_tag<Left> ){
595 kernels::trsv<triangular_tag<Upper,Unit>,system_tag<Left> >(m_matrix,b);
598 matrix_closure_type m_matrix;
603struct solver_traits<MatA,symm_pos_def>{
604 struct type :
public cholesky_decomposition<typename matrix_temporary<MatA>::type>{
606 type(M
const& m, symm_pos_def)
607 :cholesky_decomposition<typename matrix_temporary<MatA>::type>(m){}
612struct solver_traits<MatA,indefinite_full_rank>{
613 struct type :
public pivoting_lu_decomposition<typename matrix_temporary<MatA>::type>{
615 type(M
const& m, indefinite_full_rank)
616 :pivoting_lu_decomposition<typename matrix_temporary<MatA>::type>(m){}
621struct solver_traits<MatA,symm_semi_pos_def>{
622 struct type :
public symm_pos_semi_definite_solver<typename matrix_temporary<MatA>::type>{
624 type(M
const& m, symm_semi_pos_def)
625 :symm_pos_semi_definite_solver<typename matrix_temporary<MatA>::type>(m){}
630struct solver_traits<MatA,conjugate_gradient>{
631 struct type :
public cg_solver<MatA>{
633 type(M
const& m, conjugate_gradient t):cg_solver<MatA>(m,t.epsilon,t.max_iterations){}
639template<
class MatA,
class SolverType>
640struct solver:
public detail::solver_traits<MatA,SolverType>::type{
642 solver(M
const& m, SolverType t = SolverType()): detail::solver_traits<MatA,SolverType>::type(m,t){}