SplineInterpolation2D.h
Go to the documentation of this file.
1#ifndef SHARK_CORE_IMAGES_GPU_SPLINE_INTERPOLATION_2D_H
2#define SHARK_CORE_IMAGES_GPU_SPLINE_INTERPOLATION_2D_H
3
4#include <shark/LinAlg/Base.h>
5#include <shark/Core/Shape.h>
6namespace shark{
7namespace image{
8template<class T>
10 blas::dense_matrix_adaptor<T const, blas::row_major, blas::continuous_dense_tag, blas::gpu_tag> images_unreg,
11 Shape const& shape,
12 blas::dense_matrix_adaptor<T const, blas::row_major, blas::continuous_dense_tag, blas::gpu_tag> points_unreg, std::size_t pointsPerImage,
13 blas::dense_matrix_adaptor<T, blas::row_major, blas::continuous_dense_tag, blas::gpu_tag> values_unreg
14){
15 values_unreg.clear();
16
17 //generate kernel source
18 blas::gpu::detail::meta_kernel k("shark_spline_interpolation_2D");
19 std::size_t width_index = k.add_arg<std::size_t>("width");
20 std::size_t height_index = k.add_arg<std::size_t>("height");
21 std::size_t depth_index = k.add_arg<std::size_t>("depth");
22 std::size_t stride_index = k.add_arg<std::size_t>("stride");
23 auto images = k.register_args(to_functor(images_unreg));
24 auto points = k.register_args(to_functor(points_unreg));
25 auto values = k.register_args(to_functor(values_unreg));
26
27 k << "const ulong im = get_global_id(0);\n";
28 k << "const ulong p = get_global_id(1);\n";
29 k << "const ulong pointIdx = p + im * stride;\n";
30
31 //integer pixel positions (rounded down)
32 auto pointx = points(k.expr<cl_ulong>("pointIdx"),1);
33 auto pointy = points(k.expr<cl_ulong>("pointIdx"),0);
34 k << "int basex = (int) ("<<pointx<<" * width);\n";
35 k << "int basey = (int) ("<<pointy<<" * height);\n";
36 //the sets of indices accessed. this also incldues clamping border condition
37 k << "int px[4] = {clamp(basex - 1, 0, (int)width - 1), clamp(basex, 0, (int)width - 1), clamp(basex + 1, 0, (int)width - 1), clamp(basex + 2, 0, (int)width - 1)};\n";
38 k << "int py[4] = {clamp(basey - 1, 0, (int)height - 1), clamp(basey, 0, (int)height - 1), clamp(basey + 1, 0, (int)height - 1), clamp(basey + 2, 0, (int)height - 1)};\n";
39
40 //compute the interpolation constants for the x-coordinate
41 k << k.decl<T>("t") << "=" << pointx << " * width - basex;\n";
42 k << k.decl<T>("t2") << " = t * t;\n";
43 k << k.decl<T>("t3") << " = t2 * t;\n";
44 k << k.decl<T>("x")<<"[4]={-t3+3*t2-3*t+1, 3*t3-6*t2+4, -3*t3+3*t2+3*t+1, t3};\n";
45
46 //compute the interpolation constants for the y-coordinate
47 k << "t = " << pointy << " * height - basey;\n";
48 k << "t2 = t * t;\n";
49 k << "t3 = t2 * t;\n";
50 k << k.decl<T>("y")<<"[4]={-t3+3*t2-3*t+1, 3*t3-6*t2+4, -3*t3+3*t2+3*t+1, t3};\n";
51
52 //perform actual interpolation
53 k << "for(int k = 0;k < 4; ++k){\n";
54 k << " for(int l = 0;l < 4; ++l){\n";
55 k << " const ulong imIdx = (px[l] + width * py[k]) * depth;\n";
56 k << " const ulong valIdx = p * depth;\n";
57 k << " for(int c = 0;c < depth; ++c){\n";
58 k << " "<<values(k.expr<cl_ulong>("im"), k.expr<cl_ulong>("valIdx + c") )
59 <<" += (x[l]*y[k]/36) * "<<images(k.expr<cl_ulong>("im"), k.expr<cl_ulong>("imIdx + c" ))<<";\n";
60 k << " }\n";
61 k << " }\n";
62 k << "}\n";
63
64 //compile kernel
65 boost::compute::kernel kernel = k.compile(values_unreg.queue().get_context());
66
67 //enqueue kernel with kernel args
68 std::size_t stride = (pointsPerImage == points_unreg.size1())? 0: pointsPerImage;
69 kernel.set_arg(height_index, shape[0]);
70 kernel.set_arg(width_index, shape[1]);
71 kernel.set_arg(depth_index, shape[2]);
72 kernel.set_arg(stride_index, stride);
73
74 std::size_t global_work_size[2] = {images_unreg.size1(), pointsPerImage};
75 values_unreg.queue().enqueue_nd_range_kernel(kernel, 2, nullptr, global_work_size, nullptr);
76}
77
78template<class T>
80 blas::dense_matrix_adaptor<T const, blas::row_major, blas::continuous_dense_tag, blas::gpu_tag> imageDerivatives_unreg,
81 Shape const& shape,
82 blas::dense_matrix_adaptor<T const, blas::row_major, blas::continuous_dense_tag, blas::gpu_tag> points_unreg, std::size_t pointsPerImage,
83 blas::dense_matrix_adaptor<T, blas::row_major, blas::continuous_dense_tag, blas::gpu_tag> results_unreg
84){
85
86 results_unreg.clear();
87 // implementationd etail: because we can not assume the points to lie ona grid, we can not parallelize computation of the derivatives
88 // in the number of points. reason is that several points share the same pixels in the image and we do not know which and how many.
89 // instead we will parallelize over the image pixel lookup, so the parallelisation is numImages x (4*4)
90
91 //generate kernel source
92 blas::gpu::detail::meta_kernel k("shark_spline_interpolation_2D_derivative");
93 std::size_t width_index = k.add_arg<std::size_t>("width");
94 std::size_t height_index = k.add_arg<std::size_t>("height");
95 std::size_t depth_index = k.add_arg<std::size_t>("depth");
96 std::size_t stride_index = k.add_arg<std::size_t>("stride");
97 std::size_t numPoints_index = k.add_arg<std::size_t>("numPoints");
98 auto imageDerivatives = k.register_args(to_functor(imageDerivatives_unreg));
99 auto points = k.register_args(to_functor(points_unreg));
100 auto results = k.register_args(to_functor(results_unreg));
101
102 k << "const ulong im = get_group_id(0);\n";
103 k << "const ulong c = get_local_id(1);\n";
104 //integer pixel positions (rounded down)
105 k << "for(int p = 0;p < numPoints; ++p){\n";
106 k << " const ulong pointIdx = p + im * stride;\n";
107 auto pointx = points(k.expr<cl_ulong>("pointIdx"),1);
108 auto pointy = points(k.expr<cl_ulong>("pointIdx"),0);
109 k << " int basex = (int) ("<<pointx<<" * width);\n";
110 k << " int basey = (int) ("<<pointy<<" * height);\n";
111 //the sets of indices accessed. this also incldues clamping border condition
112 k << " int px[4] = {clamp(basex - 1, 0, (int)width - 1), clamp(basex, 0, (int)width - 1), clamp(basex + 1, 0, (int)width - 1), clamp(basex + 2, 0, (int)width - 1)};\n";
113 k << " int py[4] = {clamp(basey - 1, 0, (int)height - 1), clamp(basey, 0, (int)height - 1), clamp(basey + 1, 0, (int)height - 1), clamp(basey + 2, 0, (int)height - 1)};\n";
114
115 //compute the interpolation constants for the x-coordinate
116 k << k.decl<T>("t") << "=" << pointx << " * width - basex;\n";
117 k << k.decl<T>("t2") << " = t * t;\n";
118 k << k.decl<T>("t3") << " = t2 * t;\n";
119 k << k.decl<T>("x")<<"[4]={-t3+3*t2-3*t+1, 3*t3-6*t2+4, -3*t3+3*t2+3*t+1, t3};\n";
120
121 //compute the interpolation constants for the y-coordinate
122 k << " t = " << pointy << " * height - basey;\n";
123 k << " t2 = t * t;\n";
124 k << " t3 = t2 * t;\n";
125 k << k.decl<T>("y")<<"[4]={-t3+3*t2-3*t+1, 3*t3-6*t2+4, -3*t3+3*t2+3*t+1, t3};\n";
126
127 //compute derivative
128 k << " for(int k = 0;k < 4; ++k){\n";
129 k << " for(int l = 0;l < 4; ++l){\n";
130 k << " const ulong imIdx = (px[l] + width * py[k]) * depth;\n";
131 k << " const ulong valIdx = p * depth;\n";
132 k << " "<<results(k.expr<cl_ulong>("im"), k.expr<cl_ulong>("imIdx + c" ))
133 <<" += (x[l]*y[k]/36) * "<<imageDerivatives(k.expr<cl_ulong>("im"), k.expr<cl_ulong>("valIdx + c" ))<<";\n";
134 k << " }\n";
135 k << " }\n";
136 k << "}\n";
137 //compile kernel
138 boost::compute::kernel kernel = k.compile(results_unreg.queue().get_context());
139
140 //enqueue kernel with kernel args
141 std::size_t stride = (pointsPerImage == points_unreg.size1())? 0: pointsPerImage;
142 kernel.set_arg(height_index, shape[0]);
143 kernel.set_arg(width_index, shape[1]);
144 kernel.set_arg(depth_index, shape[2]);
145 kernel.set_arg(stride_index, stride);
146 kernel.set_arg(numPoints_index, pointsPerImage);
147
148 std::size_t global_work_size[2] = {imageDerivatives_unreg.size1() * 1, shape[2]};
149 std::size_t local_work_size[2] = {1,shape[2]};
150 results_unreg.queue().enqueue_nd_range_kernel(kernel, 2, nullptr, global_work_size, local_work_size);
151}
152
153}}
154
155#endif