31#ifndef REMORA_KERNELS_DEFAULT_GEMM_HPP
32#define REMORA_KERNELS_DEFAULT_GEMM_HPP
35#include "../vector_assign.hpp"
36#include "../../dense.hpp"
37#include "../../proxy_expressions.hpp"
40namespace remora{
namespace bindings {
44template <
class E1,
class E2,
class M,
class Orientation>
46 matrix_expression<E1, cpu_tag>
const& e1,
47 matrix_expression<E2, cpu_tag>
const& e2,
48 matrix_expression<M, cpu_tag>& m,
49 typename M::value_type alpha,
50 row_major, row_major, Orientation,
53 for (std::size_t i = 0; i != e1().size1(); ++i) {
54 auto row_m = row(m,i);
55 kernels::gemv(trans(e2),row(e1,i),row_m,alpha);
59template <
class E1,
class E2,
class M>
61 matrix_expression<E1, cpu_tag>
const& e1,
62 matrix_expression<E2, cpu_tag>
const& e2,
63 matrix_expression<M, cpu_tag>& m,
64 typename M::value_type alpha,
65 row_major, column_major, column_major,
68 for (std::size_t j = 0; j != e2().size2(); ++j) {
69 auto column_m = column(m,j);
70 kernels::gemv(e1,column(e2,j),column_m,alpha);
74template <
class E1,
class E2,
class M>
76 matrix_expression<E1, cpu_tag>
const& e1,
77 matrix_expression<E2, cpu_tag>
const& e2,
78 matrix_expression<M, cpu_tag>& m,
79 typename M::value_type alpha,
80 row_major, column_major, row_major,
83 typedef typename M::value_type value_type;
84 typedef device_traits<cpu_tag>::multiply_and_add<value_type> MultAdd;
85 for (std::size_t k = 0; k != e1().size2(); ++k) {
86 for(std::size_t i = 0; i != e1().size1(); ++i){
87 auto row_m = row(m,i);
88 kernels::assign(row_m, row(e2,k), MultAdd(alpha * e1()(i,k)));
94template <
class E1,
class E2,
class M,
class Orientation>
96 matrix_expression<E1, cpu_tag>
const& e1,
97 matrix_expression<E2, cpu_tag>
const& e2,
98 matrix_expression<M, cpu_tag>& m,
99 typename M::value_type alpha,
100 row_major, row_major, Orientation,
101 sparse_tag, dense_tag
103 for (std::size_t i = 0; i != e1().size1(); ++i) {
104 auto row_m = row(m,i);
105 kernels::gemv(trans(e2),row(e1,i),row_m,alpha);
109template <
class E1,
class E2,
class M>
111 matrix_expression<E1, cpu_tag>
const& e1,
112 matrix_expression<E2, cpu_tag>
const& e2,
113 matrix_expression<M, cpu_tag>& m,
114 typename M::value_type alpha,
115 row_major, column_major, column_major,
116 sparse_tag, dense_tag
118 for (std::size_t j = 0; j != e2().size2(); ++j) {
119 auto column_m = column(m,j);
120 kernels::gemv(e1,column(e2,j),column_m,alpha);
124template <
class E1,
class E2,
class M>
126 matrix_expression<E1, cpu_tag>
const& e1,
127 matrix_expression<E2, cpu_tag>
const& e2,
128 matrix_expression<M, cpu_tag>& m,
129 typename M::value_type alpha,
130 row_major, column_major, row_major,
131 sparse_tag, dense_tag
133 typedef typename M::value_type value_type;
134 typedef device_traits<cpu_tag>::multiply_and_add<value_type> MultAdd;
135 for (std::size_t k = 0; k != e1().size2(); ++k) {
136 auto e1end = e1().major_end(k);
137 for(
auto e1pos = e1().major_begin(k); e1pos != e1end; ++e1pos){
138 std::size_t i = e1pos.index();
139 auto row_m = row(m,i);
140 kernels::assign(row_m, row(e2,k), MultAdd(alpha * (*e1pos)));
146template<
class M,
class E1,
class E2>
148 matrix_expression<E1, cpu_tag>
const& e1,
149 matrix_expression<E2, cpu_tag>
const& e2,
150 matrix_expression<M, cpu_tag>& m,
151 typename M::value_type alpha,
152 row_major, row_major, row_major,
153 sparse_tag, sparse_tag
155 typedef typename M::value_type value_type;
156 value_type zero = value_type();
157 vector<value_type> temporary(e2().size2(), zero);
158 for (std::size_t i = 0; i != e1().size1(); ++i) {
159 kernels::gemv(trans(e2),row(e1,i),temporary,alpha);
160 auto insert_pos = m().major_begin(i);
161 for (std::size_t j = 0; j != temporary.size(); ++ j) {
162 if (temporary(j) != zero) {
164 auto row_end = m().major_end(i);
165 while(insert_pos != row_end && insert_pos.index() < j)
168 if(insert_pos != row_end && insert_pos.index() == j){
169 *insert_pos += temporary(j);
171 insert_pos = m().set_element(insert_pos,j,temporary(j));
179template<
class M,
class E1,
class E2>
181 matrix_expression<E1, cpu_tag>
const& e1,
182 matrix_expression<E2, cpu_tag>
const& e2,
183 matrix_expression<M, cpu_tag>& m,
184 typename M::value_type alpha,
185 row_major, row_major, column_major,
186 sparse_tag, sparse_tag
188 for (std::size_t j = 0; j != e2().size2(); ++j) {
189 auto column_m = column(m,j);
190 kernels::gemv(e1,column(e2,j),column_m,alpha);
194template <
class E1,
class E2,
class M,
class Orientation>
196 matrix_expression<E1, cpu_tag>
const& e1,
197 matrix_expression<E2, cpu_tag>
const& e2,
198 matrix_expression<M, cpu_tag>& m,
199 typename M::value_type alpha,
200 row_major, column_major, Orientation o,
201 sparse_tag t1, sparse_tag t2
205 typename transposed_matrix_temporary<E1>::type e1_trans(e1);
206 gemm(e1_trans,e2,m,alpha,row_major(),row_major(),o,t1,t2);