Import.cpp
Go to the documentation of this file.
1//===========================================================================
2/*!
3 *
4 *
5 * \brief Data Import
6 *
7 * This file is part of the tutorial "Importing Data".
8 * By itself, it does not do anything particularly useful.
9 *
10 * \author T. Glasmachers
11 * \date 2014-2018
12 *
13 *
14 * \par Copyright 1995-2018 Shark Development Team
15 *
16 * <BR><HR>
17 * This file is part of Shark.
18 * <https://shark-ml.github.io/Shark/>
19 *
20 * Shark is free software: you can redistribute it and/or modify
21 * it under the terms of the GNU Lesser General Public License as published
22 * by the Free Software Foundation, either version 3 of the License, or
23 * (at your option) any later version.
24 *
25 * Shark is distributed in the hope that it will be useful,
26 * but WITHOUT ANY WARRANTY; without even the implied warranty of
27 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
28 * GNU Lesser General Public License for more details.
29 *
30 * You should have received a copy of the GNU Lesser General Public License
31 * along with Shark. If not, see <http://www.gnu.org/licenses/>.
32 *
33 */
34//===========================================================================
35
36#include <shark/Data/Dataset.h>
38#include <shark/Data/Csv.h>
40#include <shark/Data/Download.h>
41#include <iostream>
42using namespace shark;
43
44
45class YourDistribution : public LabeledDataDistribution<RealVector, unsigned int>
46{
47public:
48 void draw(RealVector& input, unsigned int& label) const
49 {
50 input.resize(2);
52 input(0) = random::uni(random::globalRng, -1,1);
53 input(1) = random::uni(random::globalRng, -1,1) + label;
54 }
55};
56
57
58int main(int argc, char** argv)
59{
60 std::cout <<
61 "\n"
62 "WARNING: This program loads several data sets from disk.\n"
63 " If the files are not found then it will terminate\n"
64 " with an exception.\n"
65 "\n";
66
67 Data<RealVector> points;
69
70 YourDistribution distribution;
71 unsigned int numberOfSamples = 1000;
72 dataset = distribution.generateDataset(numberOfSamples);
73
74 importCSV(points, "inputs.csv", ',', '#');
75 importCSV(dataset, "data.csv", LAST_COLUMN, ',', '#');
76
77{
78 Data<RealVector> inputs;
79 Data<RealVector> labels;
80 importCSV(inputs, "inputs.csv");
81 importCSV(labels, "labels.csv");
82 RegressionDataset dataset(inputs, labels);
83}
84
85{
86 importSparseData(dataset, "data.libsvm");
87
89 importSparseData(sparse_dataset, "data.libsvm");
90}
91
92{
94 // download dense data
95 downloadCsvData(dataset, "http://www.shark-ml.org/data/quickstart-train.csv", LAST_COLUMN, ' ');
96
97 // download sparse data
98 downloadSparseData(dataset, "http://www.csie.ntu.edu.tw/~cjlin/libsvmtools/datasets/binary/svmguide1");
99}
100}