HingeLoss.h
Go to the documentation of this file.
1/*!
2 *
3 * \brief Implements the Hinge Loss function for maximum margin classification.
4 *
5 *
6 * \author Oswin Krause
7 * \date 2014
8 *
9 *
10 * \par Copyright 1995-2017 Shark Development Team
11 *
12 * <BR><HR>
13 * This file is part of Shark.
14 * <https://shark-ml.github.io/Shark/>
15 *
16 * Shark is free software: you can redistribute it and/or modify
17 * it under the terms of the GNU Lesser General Public License as published
18 * by the Free Software Foundation, either version 3 of the License, or
19 * (at your option) any later version.
20 *
21 * Shark is distributed in the hope that it will be useful,
22 * but WITHOUT ANY WARRANTY; without even the implied warranty of
23 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
24 * GNU Lesser General Public License for more details.
25 *
26 * You should have received a copy of the GNU Lesser General Public License
27 * along with Shark. If not, see <http://www.gnu.org/licenses/>.
28 *
29 */
30
31#ifndef SHARK_OBJECTIVEFUNCTIONS_LOSS_HINGELOSS_H
32#define SHARK_OBJECTIVEFUNCTIONS_LOSS_HINGELOSS_H
33
35
36namespace shark {
37
38///
39/// \brief Hinge-loss for large margin classification
40///
41/// The hinge loss for two class problems is defined as \f$ L_i = \max \{ 0 , 1- y_i f(x_i) \} \f$ where \f$ y_i \in \{-1,1} \f$ is the label
42/// and \f$ f(x_i) \f$ is the prediction of the model for the ith input. The loss introduces the concept of
43/// a margin, that is, the point should not only be correctly classified but also not too close to the
44/// decision boundary. Therefore even correctly classified points are getting punished.
45///
46/// for multi class problems the concept of sums of the relative margin is used:
47/// \f$ L_i = \sum_{c \neq y_i} \max \{ 0 , 1- 1/2 (f_{y_i}(x_i)- f_c(x_i) \} \f$. This loss requires that there is a margin
48/// between the different class outputs and the functions needs as many outputs as classes. the pre-factor
49/// 1/2 ensures that in the 2 class 2 output case with a linear function the value of loss is the same as in the single
50/// output version.
51///
52/// The loss is implemented for class labels 0,1,...,n, even in the binary cases.
53///
54/// The hinge-loss is differentiable except on one point.
55/// For points violating the margin, the derivative is -1,
56/// for points that are not violating it, it is 0. Boundary counts as non-violating.
57/// \ingroup lossfunctions
58class HingeLoss : public AbstractLoss<unsigned int, RealVector>
59{
60public:
61 /// constructor
63 m_features |= base_type::HAS_FIRST_DERIVATIVE;
64 }
65
66 /// \brief Returns class name "HingeLoss"
67 std::string name() const
68 { return "HingeLoss"; }
69
70
71 ///\brief calculates the sum of all
72 double eval(BatchLabelType const& labels, BatchOutputType const& predictions) const{
73 std::size_t numInputs = labels.size();
74 SIZE_CHECK(numInputs == predictions.size1());
75
76 double error = 0;
77 //binary case for models with single output
78 if(predictions.size2() == 1){
79 for(std::size_t i = 0; i != numInputs;++i){
80 SIZE_CHECK(labels(i) < 2);
81 double y = 2.0*labels(i)-1.0;
82 error += std::max(0.0,1.0-y*predictions(i,0));
83 }
84 }
85 else
86 {//multi-class or multiple output case
87 for(std::size_t i = 0; i != numInputs;++i){
88 SIZE_CHECK(labels(i) < predictions.size2());
89 for(std::size_t o = 0; o != predictions.size2(); ++o){
90 if(o == labels(i)) continue;
91 error += std::max(0.0,2.0 - predictions(i,labels(i))+predictions(i,o));
92 }
93 }
94 error/=2;
95 }
96
97 return error;
98 }
99
100 double evalDerivative(BatchLabelType const& labels, BatchOutputType const& predictions, BatchOutputType& gradient)const{
101 std::size_t numInputs = labels.size();
102 std::size_t outputDim = predictions.size2();
103 SIZE_CHECK(numInputs == predictions.size1());
104
105 gradient.resize(numInputs,outputDim);
106 gradient.clear();
107 double error = 0;
108 //binary case for models with single output
109 if(outputDim == 1){
110 for(std::size_t i = 0; i != numInputs; ++i){
111 double y = 2.0*labels(i)-1.0;
112 double sampleLoss = std::max(0.0,1.0-y*predictions(i,0));
113 if(sampleLoss > 0)
114 gradient(i,0) = -y;
115 error += sampleLoss;
116 }
117 }
118 else
119 {//multi-class or multiple output case
120 for(std::size_t i = 0; i != numInputs;++i){
121 SIZE_CHECK(labels(i) < predictions.size2());
122 for(std::size_t o = 0; o != predictions.size2();++o){
123 if( o == labels(i)) continue;
124 double sampleLoss = std::max(0.0, 2.0 - predictions(i,labels(i)) + predictions(i,o));
125 if(sampleLoss > 0){
126 gradient(i,o) = 0.5;
127 gradient(i,labels(i)) -= 0.5;
128 }
129 error+=sampleLoss;
130 }
131 }
132 error/=2;
133 }
134
135 return error;
136 }
137
138};
139
140}
141#endif