31#ifndef REMORA_KERNELS_DEFAULT_TRMV_HPP
32#define REMORA_KERNELS_DEFAULT_TRMV_HPP
34#include "../../expression_types.hpp"
35#include "../../proxy_expressions.hpp"
36#include "../../assignment.hpp"
37#include "../../dense.hpp"
41namespace remora{
namespace bindings{
44template<
bool Unit,
class MatA,
class V>
46 matrix_expression<MatA, cpu_tag>
const& A,
47 vector_expression<V, cpu_tag> &b,
50 typedef typename MatA::value_type value_typeA;
51 typedef typename V::value_type value_typeV;
52 std::size_t size = A().size1();
53 std::size_t
const blockSize = 128;
54 std::size_t numBlocks = size/blockSize;
55 if(numBlocks*blockSize < size) ++numBlocks;
64 value_typeV valueStorage[blockSize];
66 for(std::size_t bi = 1; bi <= numBlocks; ++bi){
67 std::size_t startbi = blockSize*(numBlocks-bi);
68 std::size_t sizebi = std::min(blockSize,size-startbi);
69 auto values = adapt_vector(sizebi, valueStorage);
72 auto blower = subrange(b,startbi+sizebi,size);
73 noalias(values) = subrange(b,startbi,startbi+sizebi);
76 for (std::size_t i = 0; i != sizebi; ++i) {
77 std::size_t posi = startbi+i;
79 for(std::size_t j = 0; j < i; ++j){
80 b()(posi) += A()(posi,startbi+j)*values(j);
82 b()(posi) += values(i)*(Unit? value_typeA(1):A()(posi,posi));
85 kernels::gemv(subrange(A, startbi+sizebi,size,startbi,startbi+sizebi),values,blower,1);
90template<
bool Unit,
class MatA,
class V>
92 matrix_expression<MatA, cpu_tag>
const& A,
93 vector_expression<V, cpu_tag>& b,
96 std::size_t size = A().size1();
97 for (std::size_t i = 0; i < size; ++ i) {
101 typename V::value_type result;
102 kernels::dot(subrange(row(A,i),i+1,size),subrange(b(),i+1,size),result);
108template<
bool Unit,
class MatA,
class V>
110 matrix_expression<MatA, cpu_tag>
const& A,
111 vector_expression<V, cpu_tag> &b,
115 std::size_t size = A().size1();
116 for (std::size_t n = 1; n <= size; ++n) {
117 std::size_t i = size-n;
122 for(std::size_t k = i+1; k != size; ++k)
123 b()(k) += bi * A()(k,i);
128template<
bool Unit,
class MatA,
class V>
130 matrix_expression<MatA, cpu_tag>
const& A,
131 vector_expression<V, cpu_tag>& b,
134 typedef typename V::value_type value_typeV;
135 std::size_t size = A().size1();
136 std::size_t
const blockSize = 128;
137 std::size_t numBlocks = size/blockSize;
138 if(numBlocks*blockSize < size) ++numBlocks;
147 value_typeV valueStorage[blockSize];
149 for(std::size_t bj = 0; bj != numBlocks; ++bj){
150 std::size_t startbj = blockSize*bj;
151 std::size_t sizebj = std::min(blockSize,size-startbj);
152 auto values = adapt_vector(sizebj, valueStorage);
155 auto bupper = subrange(b,startbj,startbj+sizebj);
156 auto blower = subrange(b,startbj+sizebj,size);
157 noalias(values) = bupper;
160 for (std::size_t j = 0; j != sizebj; ++j) {
161 std::size_t posj = startbj+j;
162 for(std::size_t i = 0; i < j; ++i){
163 b()(startbj+i) += A()(startbj+i,posj)*values(j);
165 b()(posj) += values(j)*(Unit? 1.0:A()(posj,posj));
168 kernels::gemv(subrange(A, startbj,startbj+sizebj, startbj+sizebj,size),blower,bupper,1);
173template <
bool Upper,
bool Unit,
typename MatA,
typename V>
175 matrix_expression<MatA, cpu_tag>
const& A,
176 vector_expression<V, cpu_tag> & b,
179 REMORA_SIZE_CHECK(A().size1() == A().size2());
180 REMORA_SIZE_CHECK(A().size2() == b().size());
181 trmv_impl<Unit>(A, b, triangular_tag<Upper,false>(),
typename MatA::orientation());