LabelOrder.h
Go to the documentation of this file.
1//===========================================================================
2/*!
3 *
4 *
5 * \brief This will relabel a given dataset to have labels 0..N-1 (and vice versa)
6 *
7 *
8 *
9 * \author Aydin Demircioglu
10 * \date 2014
11 *
12 *
13 * \par Copyright 1995-2017 Shark Development Team
14 *
15 * <BR><HR>
16 * This file is part of Shark.
17 * <https://shark-ml.github.io/Shark/>
18 *
19 * Shark is free software: you can redistribute it and/or modify
20 * it under the terms of the GNU Lesser General Public License as published
21 * by the Free Software Foundation, either version 3 of the License, or
22 * (at your option) any later version.
23 *
24 * Shark is distributed in the hope that it will be useful,
25 * but WITHOUT ANY WARRANTY; without even the implied warranty of
26 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
27 * GNU Lesser General Public License for more details.
28 *
29 * You should have received a copy of the GNU Lesser General Public License
30 * along with Shark. If not, see <http://www.gnu.org/licenses/>.
31 *
32 */
33//===========================================================================
34
35
36#ifndef SHARK_LABELORDER_H
37#define SHARK_LABELORDER_H
38
41
42#include <shark/Data/Dataset.h>
43
44
45
46
47namespace shark
48{
49
50
51/// \brief This will normalize the labels of a given dataset to 0..N-1
52///
53/// \par This will normalize the labels of a given dataset to 0..N-1
54/// and store the ordering in a member variable.
55/// After processing, the dataset will afterwards have labels ranging
56/// from 0 to N-1, with N the number of classes, so usual Shark
57/// trainers can work with it.
58/// One can then revert the original labeling just by calling restoreOriginalLabels
59class LabelOrder : public INameable
60{
61private:
62
63public:
64
65
67
68
69 virtual ~LabelOrder() {};
70
71
72 /// \brief From INameable: return the class name.
73 std::string name() const
74 { return "LabelOrder"; }
75
76
77 /// \brief This will normalize the labels and store the ordering in the
78 /// member variables. The dataset will afterwards have labels ranging
79 /// from 0 to N-1, with N the number of classes.
80 /// This will overwrite any previously stored label ordering in the object.
81 ///
82 /// \param[in,out] dataset dataset that will be relabeled
83
85 {
86 // determine the min and max labels of the given dataset
87 unsigned int minLabel = std::numeric_limits<unsigned int>::max();
88 unsigned int maxLabel = 0;
89 for(std::size_t i = 0; i < dataset.numberOfElements(); ++i)
90 {
91 unsigned int label = dataset.labels().element(i);
92
93 if(label < minLabel)
94 minLabel = label;
95 if(label > maxLabel)
96 maxLabel = label;
97 }
98
99 // now we create an vector that can hold the label ordering
100 m_labelOrder.clear();
101
102 // and one array that tracks what we already encountered
103 unsigned int maxval = std::numeric_limits<unsigned int>::max();
104 std::vector<unsigned int> foundLabels(maxLabel - minLabel + 1, maxval);
105
106 // and insert all labels we encounter
107 unsigned int currentPosition = 0;
108 for(std::size_t i = 0; i < dataset.numberOfElements(); i++)
109 {
110 // is it a new label?
111 unsigned int label = dataset.labels().element(i);
112 if(foundLabels[label - minLabel] == maxval)
113 {
114 foundLabels[label - minLabel] = currentPosition;
115 m_labelOrder.push_back(label);
116 currentPosition++;
117 }
118 }
119
120 // now map every label
121 for(std::size_t i = 0; i < dataset.numberOfElements(); i++)
122 {
123 unsigned int label = dataset.labels().element(i);
124 dataset.labels().element(i) = foundLabels[label - minLabel];
125 }
126 }
127
128
129
130 /// \brief This will restore the original labels of the dataset. This
131 /// must be called with data compatible the original dataset, so that the labels will
132 /// fit. The label ordering will not be destroyed after calling this function, so
133 /// it can be called multiple times, e.g. to testsets or similar data.
134 ///
135 /// \param[in,out] dataset dataset to relabel (restore labels)
136
138 {
139 // now map every label
140 for(std::size_t i = 0; i < dataset.numberOfElements(); ++i)
141 {
142 unsigned int label = dataset.labels().element(i);
143
144 // check if the reordering fit the data
145 SHARK_RUNTIME_CHECK(label < m_labelOrder.size(),"Dataset labels does not fit to the stored ordering!");
146
147 // relabel
148 label = m_labelOrder[label];
149 dataset.labels().element(i) = label;
150 }
151 }
152
153
154
155 /// \brief Get label ordering directly
156 ///
157 /// \param[out] labelOrder vector to store the current label order.
158
159 void getLabelOrder(std::vector<unsigned int>& labelOrder)
160 {
161 labelOrder = m_labelOrder;
162 }
163
164
165 /// \brief Set label ordering directly
166 ///
167 /// \param[in] labelOrder vector with the new label order
168
169 void setLabelOrder(std::vector<unsigned int> const& labelOrder)
170 {
171 m_labelOrder = labelOrder;
172 }
173
174
175protected:
176
177 std::vector<unsigned int> m_labelOrder;
178};
179
180}
181
182#endif
183