Datasets.cpp
Go to the documentation of this file.
1//===========================================================================
2/*!
3 *
4 *
5 * \brief Data Normalization
6 *
7 * This file is part of the tutorial "Data Containers".
8 * By itself, it does not do anything particularly useful.
9 *
10 * \author T. Glasmachers
11 * \date 2014
12 *
13 *
14 * \par Copyright 1995-2017 Shark Development Team
15 *
16 * <BR><HR>
17 * This file is part of Shark.
18 * <https://shark-ml.github.io/Shark/>
19 *
20 * Shark is free software: you can redistribute it and/or modify
21 * it under the terms of the GNU Lesser General Public License as published
22 * by the Free Software Foundation, either version 3 of the License, or
23 * (at your option) any later version.
24 *
25 * Shark is distributed in the hope that it will be useful,
26 * but WITHOUT ANY WARRANTY; without even the implied warranty of
27 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
28 * GNU Lesser General Public License for more details.
29 *
30 * You should have received a copy of the GNU Lesser General Public License
31 * along with Shark. If not, see <http://www.gnu.org/licenses/>.
32 *
33 */
34//===========================================================================
35
36#include <shark/Data/Dataset.h>
37
38#include <shark/Data/DataView.h>
39
41
42using namespace shark;
43
44
45class F
46{
47public:
48 typedef RealVector result_type;
49 RealVector operator () (RealVector x) const
50 { return (2.0 * x); }
51};
52
53class G
54{
55public:
56 typedef unsigned int result_type;
57 unsigned int operator () (unsigned int y) const
58 { return y + 1; }
59};
60
61 class Add
62 {
63 public:
64 Add(RealVector offset) : m_offset(offset) {}
65
66 typedef RealVector result_type; // do not forget to specify the result type
67
68 RealVector operator () (RealVector input) const { // const is important
69 return (input + m_offset);
70 }
71
72 private:
73 RealVector m_offset;
74 };
75
76
77int main()
78{
79
80{
81 std::vector<RealVector> points;
83}
84{
85 std::vector<RealVector> inputs;
86 std::vector<unsigned int> labels;
88}
89{
90 Data<RealVector> data(1000, RealVector(5));
91}
92{
93 Data<RealVector> data(1000, RealVector(5), 100);
94}
95{
97 Data<RealVector> data2(data);
98 data = data2;
99 data.makeIndependent();
100}
101{
102 Data<RealVector> data;
103 typedef Data<RealVector>::batch_range Batches;
104 Batches batches = data.batches();
105
106 std::cout << batches.size() << std::endl;
107 for (auto pos = batches.begin(); pos != batches.end(); ++pos) {
108 std::cout << *pos << std::endl;
109 }
110}
111{
112 Data<RealVector> data;
114 for(auto const& batch: data.batches()) {
115 std::cout << batch << std::endl;
116 }
117 for (std::size_t i = 0; i != data.numberOfBatches(); ++i) {
118 std::cout << data.batch(i) << std::endl;
119 }
120 for(auto const& batch: data.batches()) {
121 for(std::size_t i=0; i != batchSize(batch); ++i) {
122 std::cout << getBatchElement(batch,i ); // prints element i of the batch
123 }
124 }
125 typedef Data<RealVector>::element_range Elements;
126
127 // 1: explicit iterator loop using the range over the elements
128 Elements elements = data.elements();
129 for (auto pos = elements.begin(); pos != elements.end(); ++pos) {
130 std::cout << *pos << std::endl;
131 }
132
133 // 2: foreach
134 //note pass by value, the range returns proxy elements instead of references
135 for(auto element: data.elements()) {
136 std::cout << element << std::endl;
137 }
138}
139{
141 std::size_t classes = numberOfClasses(data); // maximal class label minus one
142 std::vector<std::size_t> sizes = classSizes(data); // number of occurrences of every class label
143
144 Data<RealVector> dataVec;
145 std::size_t dim = dataDimension(dataVec); // dimensionality of the data points
146}
147{
149 std::size_t classes = numberOfClasses(data); // maximal class label minus one
150 std::vector<std::size_t> sizes = classSizes(data); // number of occurrences of every class label
151 std::size_t dim = inputDimension(data); // dimensionality of the data points
152}
153{
154 F f;
155 G g;
156 Data<RealVector> data; // initial data set
157 data = transform(data, f); // applies f to each element
158
159 LabeledData<RealVector, unsigned int> labeledData; // initial labeled dataset
160 labeledData = transformInputs(labeledData, f); // applies f to each input
161 labeledData = transformLabels(labeledData, g); // applies g to each label
162
163 // a linear model, for example for whitening
164 LinearModel<> model;
165 // application of the model to the data
166 labeledData = transformInputs(labeledData, model);
167 // or an alternate shortcut:
168 data = model(data);
169}
170{
171 Data<RealVector> data;
172 RealVector v(3); v(0) = 1.0; v(1) = 3.0; v(2) = -0.5;
173 data = transform(data, Add(v));
174}
175{
176 Data<unsigned int> dataset;
177 DataView<Data<unsigned int> > view(dataset);
178 for (std::size_t i=0; i != view.size(); ++i) {
179 std::cout << view[i] << std::endl;
180 }
181 std::vector<std::size_t> indices;
182 // somehow choose a set of indices
183 Data<unsigned int> subsetData = toDataset(subset(view, indices));
184}
185{
186 Data<unsigned int> dataset;
187 DataView<Data<unsigned int> > view(dataset);
188 std::vector<std::size_t> indices;
189 std::size_t maximumBatchSize = 100;
190 Data<unsigned int> subsetData = toDataset(subset(view, indices), maximumBatchSize);
191}
192{
195 std::cout << numberOfClasses(view) << " " << inputDimension(view) << std::endl;
196}
197
198}