trsm.hpp
Go to the documentation of this file.
1//===========================================================================
2/*!
3 *
4 *
5 * \brief -
6 *
7 * \author O. Krause
8 * \date 2017
9 *
10 *
11 * \par Copyright 1995-2015 Shark Development Team
12 *
13 * <BR><HR>
14 * This file is part of Shark.
15 * <http://image.diku.dk/shark/>
16 *
17 * Shark is free software: you can redistribute it and/or modify
18 * it under the terms of the GNU Lesser General Public License as published
19 * by the Free Software Foundation, either version 3 of the License, or
20 * (at your option) any later version.
21 *
22 * Shark is distributed in the hope that it will be useful,
23 * but WITHOUT ANY WARRANTY; without even the implied warranty of
24 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
25 * GNU Lesser General Public License for more details.
26 *
27 * You should have received a copy of the GNU Lesser General Public License
28 * along with Shark. If not, see <http://www.gnu.org/licenses/>.
29 *
30 */
31//===========================================================================
32#ifndef REMORA_KERNELS_CLBLAST_TRSM_HPP
33#define REMORA_KERNELS_CLBLAST_TRSM_HPP
34
35#include "../../expression_types.hpp"
36#include "../../detail/traits.hpp"
37#include "../../proxy_expressions.hpp"
38#include <clblast.h>
39namespace remora{ namespace kernels{
40
41
42template <class Triangular, typename MatA, typename MatB>
43void trsm_impl(
44 matrix_expression<MatA, gpu_tag> const& A,
45 matrix_expression<MatB, gpu_tag>& B,
46 Triangular,
47 left
48){
49 REMORA_SIZE_CHECK(A().size1() == A().size2());
50 REMORA_SIZE_CHECK(A().size2() == B().size1());
51
52 static_assert(std::is_same<typename MatA::value_type, typename MatB::value_type>::value, "[trsm] Arguments do not have same element type");
53 static_assert(std::is_same<typename MatA::evaluation_category::tag, dense_tag>::value, "[trsm] A is not dense");
54 static_assert(std::is_base_of<dense_tag, typename MatB::storage_type::storage_tag>::value, "[trsm] B does not have dense storage layout");
55
56 //pre-evaluate A into a temporary if necessary
57 auto const& Aeval = eval_expression(A);
58
59 using namespace clblast;
60
61 //obtain geometry information
62 auto transA = std::is_same<typename MatA::orientation,typename MatB::orientation>::value? Transpose::kNo : Transpose::kYes;
63 auto layout = std::is_same<typename MatB::orientation::orientation, row_major>::value? Layout::kRowMajor : Layout::kColMajor;
64 auto diagonal = Triangular::is_unit? Diagonal::kUnit : Diagonal::kNonUnit;
65 auto triangular = Triangular::is_upper? Triangle::kUpper : Triangle::kLower;
66 if(transA == Transpose::kYes){//when we transpose the matrix, we also have to change its Triangular type
67 triangular = Triangular::is_upper? Triangle::kLower : Triangle::kUpper;
68 }
69 std::size_t m = B().size1();
70 std::size_t n = B().size2();
71
72 //obtain raw storage
73 auto storageA = Aeval.raw_storage();
74 auto storageB = B().raw_storage();
75
76 cl_event* event = nullptr;//todo: store events for out-of-order queues
77 auto code = Trsm(layout, Side::kLeft, triangular, transA, diagonal,
78 m, n, typename MatB::value_type(1),
79 storageA.buffer.get(), storageA.offset, storageA.leading_dimension,
80 storageB.buffer.get(), storageB.offset, storageB.leading_dimension,
81 &B().queue().get(), event
82 );
83 assert(code == StatusCode::kSuccess);
84}
85
86
87template <class Triangular, typename MatA, typename MatB>
88void trsm_impl(
89 matrix_expression<MatA, gpu_tag> const& A,
90 matrix_expression<MatB, gpu_tag>& B,
91 Triangular,
92 right
93){
94 auto transB = trans(B);
95 trsm_impl(trans(A), transB, typename Triangular::transposed_orientation(), left());
96}
97
98// solve AX = B or XA=B with A being triangular
99template <class Triangular, class Side, typename MatA, typename MatB>
100void trsm(
101 matrix_expression<MatA, gpu_tag> const& A,
102 matrix_expression<MatB, gpu_tag>& B
103){
104 trsm_impl(A,B,Triangular(),Side());
105}
106
107}}
108
109#endif