KernelBudgetedSGDTutorial.cpp
Go to the documentation of this file.
1#include <shark/Algorithms/Trainers/Budgeted/KernelBudgetedSGDTrainer.h> // the KernelBudgetedSGD trainer
2#include <shark/Algorithms/Trainers/Budgeted/MergeBudgetMaintenanceStrategy.h> // the strategy the trainer will use
3#include <shark/Data/DataDistribution.h> //includes small toy distributions
4#include <shark/Models/Kernels/GaussianRbfKernel.h> //the used kernel for the SVM
5#include <shark/ObjectiveFunctions/Loss/HingeLoss.h> // the loss we want to use for the SGD machine
6#include <shark/ObjectiveFunctions/Loss/ZeroOneLoss.h> //used for evaluation of the classifier
7
8using namespace shark;
9using namespace std;
10
11
12// data generating distribution for our toy
13// multi-category classification problem
14class myProblem : public LabeledDataDistribution<RealVector, unsigned int>
15{
16public:
17 void draw(RealVector& input, unsigned int& label)const
18 {
20 input.resize(1);
21 input(0) = random::gauss(random::globalRng) + 3.0 * label;
22 }
23};
24
25
26int main(int argc, char** argv)
27{
28 // experiment settings
29 unsigned int ell = 500; // number of training data point
30 unsigned int tests = 10000; // number of test data points
31 double gamma = 0.5; // kernel bandwidth parameter
32 double C = 1.0; // regularization parameter
33 bool bias = false; // use bias/offset parameter
34 size_t budgetSize = 16; // our model shall contain at most 16 vectors
35 size_t epochs = 5; // we want to run 5 epochs
36
37
38 GaussianRbfKernel<> kernel(gamma); // Gaussian kernel
39 KernelClassifier<RealVector> kernelClassifier; // (affine) linear function in kernel-induced feature space
40
41 // generate dataset
42 Chessboard problem; // artificial benchmark data
43 ClassificationDataset trainingData = problem.generateDataset(ell);
44 ClassificationDataset testData = problem.generateDataset(tests);
45
46 // define the machine
47 HingeLoss hingeLoss; // define the loss we want to use while training
48 // as the budget maintenance strategy we choose the merge strategy
50 KernelBudgetedSGDTrainer<RealVector> kernelBudgetedSGDtrainer(&kernel, &hingeLoss, C, bias, false, budgetSize, strategy); // create the trainer
51 kernelBudgetedSGDtrainer.setEpochs(epochs); // set the epochs number
52
53 // train the machine
54 std::cout << "Training the " << kernelBudgetedSGDtrainer.name() << " on the problem with a budget of " << budgetSize << " and " << epochs << " Epochs..." << std::endl; // Shark algorithms know their names
55 kernelBudgetedSGDtrainer.train(kernelClassifier, trainingData);
56 Data<RealVector> supportVectors = kernelClassifier.decisionFunction().basis(); // get a pointer to the support vectors of the model
57 size_t nSupportVectors = supportVectors.numberOfElements(); // get number of support vectors
58 std::cout << "We have " << nSupportVectors << " support vectors in our model.\n"; // report
59
60 // evaluate
61 ZeroOneLoss<unsigned int> loss; // 0-1 loss
62 Data<unsigned int> output = kernelClassifier(trainingData.inputs()); // evaluate on training set
63 double train_error = loss.eval(trainingData.labels(), output);
64 cout << "training error:\t" << train_error << endl;
65 output = kernelClassifier(testData.inputs()); // evaluate on test set
66 double test_error = loss.eval(testData.labels(), output);
67 cout << "test error:\t" << test_error << endl;
68}
69