LassoRegression.cpp
Go to the documentation of this file.
1//===========================================================================
2/*!
3 *
4 *
5 * \brief LASSO Regression
6 *
7 * This program demonstrates LASSO regression for the identification
8 * of sparse coefficient vectors.
9 *
10 *
11 *
12 * \author T. Glasmachers
13 * \date 2013
14 *
15 *
16 * \par Copyright 1995-2017 Shark Development Team
17 *
18 * <BR><HR>
19 * This file is part of Shark.
20 * <https://shark-ml.github.io/Shark/>
21 *
22 * Shark is free software: you can redistribute it and/or modify
23 * it under the terms of the GNU Lesser General Public License as published
24 * by the Free Software Foundation, either version 3 of the License, or
25 * (at your option) any later version.
26 *
27 * Shark is distributed in the hope that it will be useful,
28 * but WITHOUT ANY WARRANTY; without even the implied warranty of
29 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
30 * GNU Lesser General Public License for more details.
31 *
32 * You should have received a copy of the GNU Lesser General Public License
33 * along with Shark. If not, see <http://www.gnu.org/licenses/>.
34 *
35 */
36//===========================================================================
37
41
42#include <iostream>
43#include <fstream>
44
45using namespace shark;
46using namespace std;
47
48
49class TestProblem : public LabeledDataDistribution<RealVector, RealVector>
50{
51public:
52 TestProblem(size_t informative, size_t nnz, size_t dim)
53 : m_informative(informative)
54 , m_nnz(nnz)
55 , m_dim(dim)
56 { }
57
58
59 void draw(RealVector& input, RealVector& label) const
60 {
61 input.resize(m_dim);
62 input.clear();
63 label.resize(1);
64
65 // we have one informative component per example
67 size_t i = random::discrete(random::globalRng, std::size_t(0), m_informative-1);
68 input(i) = g;
69 label(0) = g;
70
71 // the rest is non-informative
72 for (size_t n=1; n<m_nnz; n++)
73 {
74 size_t i = random::discrete(random::globalRng, m_informative, m_dim-1);
76 }
77 }
78
79protected:
80 size_t m_informative;
81 size_t m_nnz;
82 size_t m_dim;
83};
84
85
86int main(int argc, char** argv)
87{
88 // Define a test problem with 10 out of 1000 informative
89 // components. Each instance contains one informative and
90 // 49 noise components. 10000 instances are drawn.
91 TestProblem prob(10, 50, 1000);
92 cout << "generating 100000 points ..." << flush;
93 RegressionDataset data = prob.generateDataset(100000);
94 cout << " done." << endl;
95
96 // Set the regularization parameter.
97 // For this problem the LASSO method identifies the correct
98 // subset of 10 informative coefficients for a large range
99 // of parameter values.
100 double lambda = 1.0;
101
102 // trainer and model
103 LinearModel<> model;
104 LassoRegression<> trainer(lambda);
105
106 // train the model
107 cout << "LASSO training ..." << flush;
108 trainer.train(model, data);
109 cout << " done." << endl;
110
111 // check non-zero coefficients
112 RealMatrix m = model.matrix();
113 size_t nnz = 0;
114 size_t correct = 0;
115 size_t wrong = 0;
116 for (size_t j=0; j<m.size2(); j++)
117 {
118 if (m(0, j) != 0.0)
119 {
120 nnz++;
121 if (j < 10) correct++;
122 else wrong++;
123 }
124 }
125 cout << "solution statistics:" << endl;
126 cout << " number of non-zero coefficients: " << nnz << endl;
127 cout << " correctly identified coefficients: " << correct << endl;
128 cout << " wrongly identified coefficients: " << wrong << endl;
129}