Shark machine learning library
Installation
Tutorials
Benchmarks
Documentation
Quick references
Class list
Global functions
examples
Data
Import.cpp
Go to the documentation of this file.
1
//===========================================================================
2
/*!
3
*
4
*
5
* \brief Data Import
6
*
7
* This file is part of the tutorial "Importing Data".
8
* By itself, it does not do anything particularly useful.
9
*
10
* \author T. Glasmachers
11
* \date 2014-2018
12
*
13
*
14
* \par Copyright 1995-2018 Shark Development Team
15
*
16
* <BR><HR>
17
* This file is part of Shark.
18
* <https://shark-ml.github.io/Shark/>
19
*
20
* Shark is free software: you can redistribute it and/or modify
21
* it under the terms of the GNU Lesser General Public License as published
22
* by the Free Software Foundation, either version 3 of the License, or
23
* (at your option) any later version.
24
*
25
* Shark is distributed in the hope that it will be useful,
26
* but WITHOUT ANY WARRANTY; without even the implied warranty of
27
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
28
* GNU Lesser General Public License for more details.
29
*
30
* You should have received a copy of the GNU Lesser General Public License
31
* along with Shark. If not, see <http://www.gnu.org/licenses/>.
32
*
33
*/
34
//===========================================================================
35
36
#include <
shark/Data/Dataset.h
>
37
#include <
shark/Data/DataDistribution.h
>
38
#include <
shark/Data/Csv.h
>
39
#include <
shark/Data/SparseData.h
>
40
#include <
shark/Data/Download.h
>
41
#include <iostream>
42
using namespace
shark
;
43
44
45
class
YourDistribution :
public
LabeledDataDistribution
<RealVector, unsigned int>
46
{
47
public
:
48
void
draw
(RealVector& input,
unsigned
int
& label)
const
49
{
50
input.resize(2);
51
label =
random::coinToss
(
random::globalRng
);
52
input(0) =
random::uni
(
random::globalRng
, -1,1);
53
input(1) =
random::uni
(
random::globalRng
, -1,1) + label;
54
}
55
};
56
57
58
int
main
(
int
argc,
char
** argv)
59
{
60
std::cout <<
61
"\n"
62
"WARNING: This program loads several data sets from disk.\n"
63
" If the files are not found then it will terminate\n"
64
" with an exception.\n"
65
"\n"
;
66
67
Data<RealVector>
points;
68
ClassificationDataset
dataset;
69
70
YourDistribution distribution;
71
unsigned
int
numberOfSamples = 1000;
72
dataset = distribution.generateDataset(numberOfSamples);
73
74
importCSV
(points,
"inputs.csv"
,
','
,
'#'
);
75
importCSV
(dataset,
"data.csv"
,
LAST_COLUMN
,
','
,
'#'
);
76
77
{
78
Data<RealVector>
inputs;
79
Data<RealVector>
labels;
80
importCSV
(inputs,
"inputs.csv"
);
81
importCSV
(labels,
"labels.csv"
);
82
RegressionDataset
dataset(inputs, labels);
83
}
84
85
{
86
importSparseData
(dataset,
"data.libsvm"
);
87
88
LabeledData<CompressedRealVector, unsigned int>
sparse_dataset;
89
importSparseData
(sparse_dataset,
"data.libsvm"
);
90
}
91
92
{
93
ClassificationDataset
dataset;
94
// download dense data
95
downloadCsvData
(dataset,
"http://www.shark-ml.org/data/quickstart-train.csv"
,
LAST_COLUMN
,
' '
);
96
97
// download sparse data
98
downloadSparseData
(dataset,
"http://www.csie.ntu.edu.tw/~cjlin/libsvmtools/datasets/binary/svmguide1"
);
99
}
100
}