226            std::vector<WeightType>& w,                         
 
  227            std::size_t batchsize = 1,                          
 
  228            double varepsilon = 0.001)                          
 
  232        unsigned int classes = w.size();
 
  234        double lambda = 1.0 / (ell * C);
 
  236        double initialPrimal = -1.0;
 
  249                initialPrimal = classes - 1.0;
 
  270                initialPrimal = classes - 1.0;
 
  282                initialPrimal = classes;
 
  286        SHARK_RUNTIME_CHECK(initialPrimal > 0 && lg, 
"The combination of margin and loss is not implemented");
 
  288        double normbound2 = initialPrimal / lambda;     
 
  289        double norm_w2 = 0.0;                           
 
  291        double target = initialPrimal * varepsilon;     
 
  292        std::vector<VectorType> gradient(classes);      
 
  293        RealVector f(classes);                          
 
  294        for (
unsigned int c=0; c<classes; c++)
 
  296            gradient[c].resize(w[c].size());
 
  297            w[c] = RealZeroVector(w[c].size());
 
  301        std::size_t start = 10;
 
  302        std::size_t checkinterval = (2 * ell) / batchsize;
 
  303        std::size_t nextcheck = start + ell / batchsize;
 
  304        std::size_t predictions = 0;
 
  305        for (std::size_t t=start; ; t++)
 
  311                for (
unsigned int c=0; c<classes; c++) gradient[c] = (lambda * sigma * (
double)ell) * w[c];
 
  312                for (std::size_t i=0; i<ell; i++)
 
  315                    unsigned int y = data(i).label;
 
  316                    for (
unsigned int c=0; c<classes; c++) f(c) = sigma * inner_prod(w[c], x);
 
  317                    lg(x, y, f, gradient, sumToZero);
 
  323                for (
unsigned int c=0; c<classes; c++) n2 += inner_prod(gradient[c], gradient[c]);
 
  324                double n = std::sqrt(n2) / (double)ell;
 
  335                nextcheck = t + checkinterval;
 
  339            for (
unsigned int c=0; c<classes; c++) gradient[c].clear();
 
  341            for (
unsigned int i=0; i<batchsize; i++)
 
  346                unsigned int y = data(active).label;
 
  350                for (
unsigned int c=0; c<classes; c++) f(c) = sigma * inner_prod(w[c], x);
 
  354                lg(x, y, f, gradient, sumToZero);
 
  358            sigma *= (1.0 - 1.0 / (double)t);
 
  361                double eta = 1.0 / (sigma * lambda * t * batchsize);
 
  362                for (
unsigned int c=0; c<classes; c++)
 
  365                    norm_w2 += inner_prod(gradient[c], gradient[c]) - 2.0 * inner_prod(w[c], gradient[c]);
 
  366                    noalias(w[c]) -= gradient[c];
 
  370                double n2 = sigma * sigma * norm_w2;
 
  371                if (n2 > normbound2) sigma *= std::sqrt(normbound2 / n2);
 
  376        for (
unsigned int c=0; c<classes; c++) w[c] *= sigma;
 
 
  487            std::vector<VectorType>& gradient,
 
  491        std::size_t argmax = 0;
 
  492        for (std::size_t c=0; c<f.size(); c++)
 
  494            if (c == y) 
continue;
 
  503            gradient[argmax] += x;
 
  507                for (std::size_t c=0; c<f.size(); c++) 
if (c != argmax) gradient[c] -= xx;
 
 
  557            std::vector<VectorType>& gradient,
 
  561        std::size_t argmax = 0;
 
  562        for (std::size_t c=0; c<f.size(); c++)
 
  585                gradient[argmax] -= x;
 
  589                    for (std::size_t c=0; c<f.size(); c++) 
if (c != argmax) gradient[c] += xx;
 
  594                gradient[argmax] += x;
 
  598                    for (std::size_t c=0; c<f.size(); c++) 
if (c != argmax) gradient[c] -= xx;