226 std::vector<WeightType>& w,
227 std::size_t batchsize = 1,
228 double varepsilon = 0.001)
232 unsigned int classes = w.size();
234 double lambda = 1.0 / (ell * C);
236 double initialPrimal = -1.0;
249 initialPrimal = classes - 1.0;
270 initialPrimal = classes - 1.0;
282 initialPrimal = classes;
286 SHARK_RUNTIME_CHECK(initialPrimal > 0 && lg,
"The combination of margin and loss is not implemented");
288 double normbound2 = initialPrimal / lambda;
289 double norm_w2 = 0.0;
291 double target = initialPrimal * varepsilon;
292 std::vector<VectorType> gradient(classes);
293 RealVector f(classes);
294 for (
unsigned int c=0; c<classes; c++)
296 gradient[c].resize(w[c].size());
297 w[c] = RealZeroVector(w[c].size());
301 std::size_t start = 10;
302 std::size_t checkinterval = (2 * ell) / batchsize;
303 std::size_t nextcheck = start + ell / batchsize;
304 std::size_t predictions = 0;
305 for (std::size_t t=start; ; t++)
311 for (
unsigned int c=0; c<classes; c++) gradient[c] = (lambda * sigma * (
double)ell) * w[c];
312 for (std::size_t i=0; i<ell; i++)
315 unsigned int y = data(i).label;
316 for (
unsigned int c=0; c<classes; c++) f(c) = sigma * inner_prod(w[c], x);
317 lg(x, y, f, gradient, sumToZero);
323 for (
unsigned int c=0; c<classes; c++) n2 += inner_prod(gradient[c], gradient[c]);
324 double n = std::sqrt(n2) / (double)ell;
335 nextcheck = t + checkinterval;
339 for (
unsigned int c=0; c<classes; c++) gradient[c].clear();
341 for (
unsigned int i=0; i<batchsize; i++)
346 unsigned int y = data(active).label;
350 for (
unsigned int c=0; c<classes; c++) f(c) = sigma * inner_prod(w[c], x);
354 lg(x, y, f, gradient, sumToZero);
358 sigma *= (1.0 - 1.0 / (double)t);
361 double eta = 1.0 / (sigma * lambda * t * batchsize);
362 for (
unsigned int c=0; c<classes; c++)
365 norm_w2 += inner_prod(gradient[c], gradient[c]) - 2.0 * inner_prod(w[c], gradient[c]);
366 noalias(w[c]) -= gradient[c];
370 double n2 = sigma * sigma * norm_w2;
371 if (n2 > normbound2) sigma *= std::sqrt(normbound2 / n2);
376 for (
unsigned int c=0; c<classes; c++) w[c] *= sigma;
487 std::vector<VectorType>& gradient,
491 std::size_t argmax = 0;
492 for (std::size_t c=0; c<f.size(); c++)
494 if (c == y)
continue;
503 gradient[argmax] += x;
507 for (std::size_t c=0; c<f.size(); c++)
if (c != argmax) gradient[c] -= xx;
557 std::vector<VectorType>& gradient,
561 std::size_t argmax = 0;
562 for (std::size_t c=0; c<f.size(); c++)
585 gradient[argmax] -= x;
589 for (std::size_t c=0; c<f.size(); c++)
if (c != argmax) gradient[c] += xx;
594 gradient[argmax] += x;
598 for (std::size_t c=0; c<f.size(); c++)
if (c != argmax) gradient[c] -= xx;