MissingFeatureSvmTrainer.h
Go to the documentation of this file.
1//===========================================================================
2/*!
3 *
4 *
5 * \brief Trainer for binary SVMs natively supporting missing features.
6 *
7 *
8 *
9 * \author B. Li
10 * \date 2012
11 *
12 *
13 * \par Copyright 1995-2017 Shark Development Team
14 *
15 * <BR><HR>
16 * This file is part of Shark.
17 * <https://shark-ml.github.io/Shark/>
18 *
19 * Shark is free software: you can redistribute it and/or modify
20 * it under the terms of the GNU Lesser General Public License as published
21 * by the Free Software Foundation, either version 3 of the License, or
22 * (at your option) any later version.
23 *
24 * Shark is distributed in the hope that it will be useful,
25 * but WITHOUT ANY WARRANTY; without even the implied warranty of
26 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
27 * GNU Lesser General Public License for more details.
28 *
29 * You should have received a copy of the GNU Lesser General Public License
30 * along with Shark. If not, see <http://www.gnu.org/licenses/>.
31 *
32 */
33//===========================================================================
34
35#ifndef SHARK_ALGORITHMS_TRAINERS_MISSING_FEATURE_SVM_H
36#define SHARK_ALGORITHMS_TRAINERS_MISSING_FEATURE_SVM_H
37
46
47namespace shark {
48
49/// \brief Trainer for binary SVMs natively supporting missing features.
50///
51/// This is a specialized variant of the standard binary C-SVM which can be used
52/// to learn from data with missing features, without the need for prior imputation
53/// of missing values. The key idea is that each example is considered as having an
54/// instance-specific margin value, which is computed in the lower-dimensional subspace
55/// for which all features of that example are present.
56///
57/// The resulting optimization problem has the drawback that it is not convex any
58/// more. However, a local minimum can be easily reached by an iterative wrapper
59/// algorithm around a standard C-SVM solver. In detail, example-wise weights \f$ s_i \f$
60/// are incorporated into the quadratic term of the standard SVM optimization problem.
61/// These take initial values of 1, and are then iteratively updated according to the
62/// instance-specific margin values. After each such update, the SVM solver is again called,
63/// and so on. Usually, between 2 and 5 iterations have been shown to be sufficient for
64/// an acceptable level of convergence.
65///
66/// For details, see the paper:<br/>
67/// <p>Max-margin Classification of %Data with Absent Features
68/// Gal Chechik, Geremy Heitz, Gal Elidan, Pieter Abbeel and Daphne Koller. JMLR 2008.</p>
69///
70/// \note Much of the code in this class is borrowed from the class CSvmTrainer
71/// \ingroup supervised_trainer
72template <class InputType, class CacheType = float>
73class MissingFeatureSvmTrainer : public AbstractSvmTrainer<InputType, unsigned int,MissingFeaturesKernelExpansion<InputType> >
74{
75protected:
76
77 typedef CacheType QpFloatType;
80
81public:
82
83 /// Constructor
84 MissingFeatureSvmTrainer(KernelType* kernel, double C, bool offset, bool unconstrained = false)
85 :
86 base_type(kernel, C, offset, unconstrained),
87 m_maxIterations(4u)
88 { }
89
90 /// \brief From INameable: return the class name.
91 std::string name() const
92 { return "MissingFeatureSvmTrainer"; }
93
95 // Check prerequisites
96 SHARK_RUNTIME_CHECK(numberOfClasses(dataset) == 2, "Not a binary problem");
97
98 svm.setStructure(base_type::m_kernel,dataset.inputs(), this->m_trainOffset);
99
100 if(svm.hasOffset())
101 trainWithOffset(svm,dataset);
102 else
103 trainWithoutOffset(svm,dataset);
104 }
105
106 void setMaxIterations(std::size_t newIterations)
107 { m_maxIterations = newIterations; }
108
109private:
110
111 /// Number of iterations to run
112 std::size_t m_maxIterations;
113
114 void trainWithoutOffset(MissingFeaturesKernelExpansion<InputType>& svm, LabeledData<InputType, unsigned int> const& dataset)
115 {
116
117 // Initialize scaling coefficients as 1.0
118 RealVector scalingCoefficients(dataset.numberOfElements(), 1.0);
119
120 // What body of this loop does:
121 // *) Solve the QP with a normal solver treating s_i as constants
122 // *) Calculate norms: w_i and w
123 // *) Update s_i with w_i / w
124 for(std::size_t iteration = 0; iteration != m_maxIterations; ++iteration){
125 //Set up the problem
127 typedef CachedMatrix<MatrixType> CachedMatrixType;
128 typedef CSVMProblem<CachedMatrixType> SVMProblemType;
130 MatrixType kernelMatrix(*base_type::m_kernel, dataset.inputs());
131 kernelMatrix.setScalingCoefficients(scalingCoefficients);
132 CachedMatrixType matrix(&kernelMatrix);
133 SVMProblemType svmProblem(matrix,dataset.labels(),this->C());
134 ProblemType problem(svmProblem,base_type::m_shrinking);
135
136 //solve it
137 QpSolver< ProblemType > solver(problem);
139 RealVector alpha = problem.getUnpermutedAlpha();
140
141 //update s_i and w_i
142 const double classifierNorm = svm.computeNorm(alpha, scalingCoefficients);
143 SHARK_ASSERT(classifierNorm > 0.0);
144 for (std::size_t i = 0; i < scalingCoefficients.size(); ++i)
145 {
146 // Update scaling coefficients
147 scalingCoefficients(i) = svm.computeNorm(
148 alpha,
149 scalingCoefficients,
150 dataset.element(i).input)
151 / classifierNorm;
152 }
153
154 //store alpha in the last iteration inside the svm
155 if(iteration == m_maxIterations-1)
156 column(svm.alpha(),0)= alpha;
157
158 //keep track of number of kernel evaluations
159 base_type::m_accessCount += kernelMatrix.getAccessCount();
160 }
161 svm.setScalingCoefficients(scalingCoefficients);
162 }
163 void trainWithOffset(MissingFeaturesKernelExpansion<InputType>& svm, LabeledData<InputType, unsigned int> const& dataset)
164 {
165 // Initialize scaling coefficients as 1.0
166 std::size_t datasetSize = dataset.numberOfElements();
167 RealVector scalingCoefficients(datasetSize, 1.0);
168
169 // What body of this loop does:
170 // *) Solve the QP with a normal solver treating s_i as constants
171 // *) Calculate norms: w_i and w
172 // *) Update s_i with w_i / w
173 for(std::size_t iteration = 0; iteration != m_maxIterations; ++iteration){
174 //Set up the problem
175 typedef ExampleModifiedKernelMatrix<InputType, QpFloatType> MatrixType;
176 typedef CachedMatrix<MatrixType> CachedMatrixType;
177 typedef CSVMProblem<CachedMatrixType> SVMProblemType;
178 typedef SvmShrinkingProblem<SVMProblemType> ProblemType;
179 MatrixType kernelMatrix(*base_type::m_kernel, dataset.inputs());
180 kernelMatrix.setScalingCoefficients(scalingCoefficients);
181 CachedMatrixType matrix(&kernelMatrix);
182 SVMProblemType svmProblem(matrix,dataset.labels(),this->C());
183 ProblemType problem(svmProblem,base_type::m_shrinking);
184
185 //solve it
186 QpSolver< ProblemType > solver(problem);
188 RealVector unpermutedAlpha = problem.getUnpermutedAlpha();
189
190 //update s_i and w_i
191 const double classifierNorm = svm.computeNorm(unpermutedAlpha, scalingCoefficients);
192 SHARK_ASSERT(classifierNorm > 0.0);
193 for (std::size_t i = 0; i < scalingCoefficients.size(); ++i)
194 {
195 // Update scaling coefficients
196 scalingCoefficients(i) = svm.computeNorm(
197 unpermutedAlpha,
198 scalingCoefficients,
199 dataset.element(i).input
200 )/ classifierNorm;
201 }
202
203
204 if(iteration == m_maxIterations-1){
205 //in the last tieration,y
206 // Compute the offset(i.e., b or Bias) and push it along with alpha to SVM
207 column(svm.alpha(),0)= unpermutedAlpha;
208 double lowerBound = -1e100;
209 double upperBound = 1e100;
210 double sum = 0.0;
211 std::size_t freeVars = 0;
212
213 // No reason to init to 0, but avoid compiler warnings
214 for (std::size_t i = 0; i < datasetSize; i++)
215 {
216 // In case of no free SVs, we are looking for the largest gradient of all alphas at the lower bound
217 // and the smallest gradient of all alphas at the upper bound
218 const double value = problem.gradient(i);
219 if (problem.alpha(i) == problem.boxMin(i)){
220 lowerBound = std::max(value,lowerBound);
221 }
222 else if (problem.alpha(i) == problem.boxMax(i)){
223 upperBound = std::min(value,upperBound);
224 }
225 else{
226 sum += value;
227 freeVars++;
228 }
229 }
230
231 if (freeVars > 0) {
232 // Stabilized (averaged) exact value
233 svm.offset(0) = sum / freeVars;
234 } else {
235 // TODO: need work out how to do the calculation of the derivative with missing features
236
237 // Return best estimate
238 svm.offset(0) = 0.5 * (lowerBound + upperBound);
239 }
240 }
241
242 //keep track of number of kernel evaluations
243 base_type::m_accessCount += kernelMatrix.getAccessCount();
244 }
245 svm.setScalingCoefficients(scalingCoefficients);
246 }
247
248};
249
250} // namespace shark {
251
252#endif