LCTree.h
Go to the documentation of this file.
1//===========================================================================
2/*!
3 *
4 *
5 * \brief Tree for nearest neighbor search in data with low embedding dimension.
6 *
7 *
8 *
9 * \author T. Glasmachers
10 * \date 2011
11 *
12 *
13 * \par Copyright 1995-2017 Shark Development Team
14 *
15 * <BR><HR>
16 * This file is part of Shark.
17 * <https://shark-ml.github.io/Shark/>
18 *
19 * Shark is free software: you can redistribute it and/or modify
20 * it under the terms of the GNU Lesser General Public License as published
21 * by the Free Software Foundation, either version 3 of the License, or
22 * (at your option) any later version.
23 *
24 * Shark is distributed in the hope that it will be useful,
25 * but WITHOUT ANY WARRANTY; without even the implied warranty of
26 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
27 * GNU Lesser General Public License for more details.
28 *
29 * You should have received a copy of the GNU Lesser General Public License
30 * along with Shark. If not, see <http://www.gnu.org/licenses/>.
31 *
32 */
33//===========================================================================
34
35#ifndef SHARK_ALGORITHMS_NEARESTNEIGHBORS_LCTREE_H
36#define SHARK_ALGORITHMS_NEARESTNEIGHBORS_LCTREE_H
37
38
40#include <shark/Data/DataView.h>
41#include <shark/LinAlg/Base.h>
42#include <boost/array.hpp>
43
44namespace shark {
45
46
47///
48/// \brief LC-tree, a binary space-partitioning tree
49///
50/// \par
51/// LC-tree data structure for efficient nearest
52/// neighbor searches in possibly high-dimensional
53/// spaces, but with low-dimensional effective data
54/// dimension (means << 10 dimensions).
55/// "LC" stands for Linear Cut.
56///
57/// This tree requires the existence of a function
58/// inner_prod computing the standard inner product
59/// of two objects of type VectorType, and a function
60/// distanceSqr computing the squared Euclidean
61/// distance between two vectors.
62///
63/// \par
64/// The tree is constructed from data by splitting
65/// the direction with largest extent (in the data
66/// covered, not space represented by the cell),
67/// recursively. Approximate direction and median
68/// are used to determine the cutting hyperplane,
69/// where the maximal number of points used to
70/// estimate the median can be controlled with the
71/// template parameter CuttingAccuracy.
72///
73/// \par
74/// The bucket size for the tree is always one,
75/// such that there is a unique point in each leaf
76/// cell.
77///
78/// \ingroup space_trees
79template <class VectorType = RealVector, int CuttingAccuracy = 25>
80class LCTree : public BinaryTree<VectorType>
81{
83public:
84 /// Construct the tree from data.
85 /// It is assumed that the container exceeds
86 /// the lifetime of the LCTree (which holds
87 /// only references to the points), and that
88 /// the memory locations of the points remain
89 /// unchanged.
91 : base_type(dataset.numberOfElements())
92 , m_normal(dataDimension(dataset)){
93 typedef DataView<Data<RealVector> const> PointSet;
94 PointSet points(dataset);
95 //create a list to the iterator elements as temporary storage
96 //we need indexed operators to have a fast lookup of the position of the elements in the container
98 std::vector<iterator> elements(m_size);
99 boost::iota(elements,iterator(boost::begin(points),0));
100
101 buildTree(tc,elements);
102 //after the creation of the trees, the iterators in the array are sorted in the order,
103 //they are referenced by the nodes.so we can create the indexList using the indizes of the iterators
104 for(std::size_t i = 0; i != m_size; ++i){
105 mp_indexList[i] = elements[i].index();
106 }
107 }
108
109
110 /// \par
111 /// Compute the squared Euclidean distance of
112 /// this cell to the given reference point, or
113 /// alternatively a lower bound on this value.
114 double squaredDistanceLowerBound(VectorType const& reference) const{
115 double dist = 0.0;
116 LCTree const* t = this;
117 LCTree const* p = (LCTree const*)mep_parent;
118 while (p != NULL)
119 {
120 double v = p->distanceFromPlane(reference);
121 if (t == p->mp_right)
122 v = -v;
123 if (v > dist)
124 dist = v;
125 t = p;
126 p = (LCTree const*)p->mep_parent;
127 }
128 return dist * dist;
129 }
130
131protected:
133 using base_type::mp_left;
136 using base_type::m_size;
137 using base_type::m_nodes;
138
139 /// (internal) construction of a non-root node
140 LCTree(LCTree* parent, std::size_t* list, std::size_t size)
141 : base_type(parent, list, size){}
142
143 /// (internal) construction method:
144 /// median-cuts of the dimension with widest spread
145 template<class Range>
146 void buildTree(TreeConstruction tc, Range& points){
147 typedef typename Range::value_type pointIterator;
148 typedef typename Range::iterator iterator;
149
150 //check whether we are finished
151 if (tc.maxDepth() == 0 || m_size <= tc.maxBucketSize()) {
152 m_nodes = 1;
153 return;
154 }
155
156 // use only a subset of size at most CuttingAccuracy
157 // to estimate the vector along the longest
158 // distance
159 if (m_size <= CuttingAccuracy){
160 calculateNormal(points);
161 }
162 else{
163 boost::array<pointIterator,CuttingAccuracy> samples;
164 for(std::size_t i = 0; i != CuttingAccuracy; i++)
165 samples[i] = points[m_size * (2*i+1) / (2*CuttingAccuracy)];
166 calculateNormal(samples);
167 }
168
169 //calculate the distance from the plane for every point in the list
170 std::vector<double> distance(m_size);
171 for(std::size_t i = 0; i != m_size; ++i){
172 distance[i] = inner_prod(m_normal, *points[i]);
173 }
174
175
176 // split the list into sub-cells
177 iterator split = this->splitList(distance,points);
178 iterator begin = boost::begin(points);
179 iterator end = boost::end(points);
180
181 if (split == end) {//can't split points.
182 m_nodes = 1;
183 return;
184 }
185
186 // create sub-nodes
187 std::size_t leftSize = split-begin;
188 mp_left = new LCTree(this, mp_indexList, leftSize);
189 mp_right = new LCTree(this, mp_indexList + leftSize, m_size - leftSize);
190
191 // recurse
192 boost::iterator_range<iterator> left(begin,split);
193 boost::iterator_range<iterator> right(split,end);
194 ((LCTree*)mp_left)->buildTree(tc.nextDepthLevel(),left);
195 ((LCTree*)mp_right)->buildTree(tc.nextDepthLevel(),right);
196 m_nodes = 1 + mp_left->nodes() + mp_right->nodes();
197 }
198
199 /// function describing the separating hyperplane
200 double funct(VectorType const& reference) const{
201 return inner_prod(m_normal, reference);
202 }
203
204 //find the longest distance between vectors in the sample set and calculate
205 //the normal along this direction
206 template<class Range>
207 void calculateNormal(Range const& samples){
208 std::size_t numSamples = samples.size();
209 std::size_t besti = 0;
210 std::size_t bestj = 0;
211 double best_dist2 = -1.0;
212 for (std::size_t i = 1; i != numSamples; i++){
213 for (std::size_t j = 0; j != i; j++){
214 double dist2 = distanceSqr(*samples[i], *samples[j]);
215 if (dist2 > best_dist2){
216 best_dist2 = dist2;
217 besti = i;
218 bestj = j;
219 }
220 }
221 }
222 double factor = 1.0 / std::sqrt(best_dist2);
223 if (! (boost::math::isfinite)(factor))
224 factor = 1.0;
225
226 m_normal = factor * (*samples[besti] - *samples[bestj]);
227 }
228
229 /// split/cut normal vector of this node
231};
232
233
234}
235#endif