SquaredLoss.h
Go to the documentation of this file.
1/*!
2 *
3 *
4 * \brief Implements the Squared Error Loss function for regression.
5 *
6 *
7 *
8 *
9 * \author Oswin Krause, Christian Igel
10 * \date 2011
11 *
12 *
13 * \par Copyright 1995-2017 Shark Development Team
14 *
15 * <BR><HR>
16 * This file is part of Shark.
17 * <https://shark-ml.github.io/Shark/>
18 *
19 * Shark is free software: you can redistribute it and/or modify
20 * it under the terms of the GNU Lesser General Public License as published
21 * by the Free Software Foundation, either version 3 of the License, or
22 * (at your option) any later version.
23 *
24 * Shark is distributed in the hope that it will be useful,
25 * but WITHOUT ANY WARRANTY; without even the implied warranty of
26 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
27 * GNU Lesser General Public License for more details.
28 *
29 * You should have received a copy of the GNU Lesser General Public License
30 * along with Shark. If not, see <http://www.gnu.org/licenses/>.
31 *
32 */
33#ifndef SHARK_OBJECTIVEFUNCTIONS_LOSS_SQUAREDLOSS_H
34#define SHARK_OBJECTIVEFUNCTIONS_LOSS_SQUAREDLOSS_H
35
36
38
39namespace shark{
40/// \brief squared loss for regression and classification
41///
42/// The SquaredLoss computes the squared distance
43/// between target and prediction. It is defined for both
44/// vectorial as well as integral labels. In the case of integral labels,
45/// the label c is interpreted as unit-vector having the c-th component activated.
46/// \ingroup lossfunctions
47template<class OutputType = RealVector, class LabelType = OutputType >
48class SquaredLoss : public AbstractLoss<LabelType,OutputType>
49{
50public:
54
55 /// Constructor.
60
61
62 /// \brief From INameable: return the class name.
63 std::string name() const
64 { return "SquaredLoss"; }
65
66 using base_type::eval;
67
68 /// Evaluate the squared loss \f$ (label - prediction)^2 \f$.
69 double eval(BatchLabelType const& labels, BatchOutputType const& predictions) const {
70 SIZE_CHECK(labels.size1()==predictions.size1());
71 SIZE_CHECK(labels.size2()==predictions.size2());
72
73 double error = sum(sqr(labels - predictions));
74 return 0.5 * error;
75 }
76
77 /// Evaluate the squared loss \f$ (label - prediction)^2 \f$
78 /// and its deriative \f$ \frac{\partial}{\partial prediction} 1/2 (label - prediction)^2 = prediction - label \f$.
79 double evalDerivative(BatchLabelType const& label, BatchOutputType const& prediction, BatchOutputType& gradient) const {
80 gradient.resize(prediction.size1(),prediction.size2());
81 noalias(gradient) = (prediction - label);
82 return SquaredLoss::eval(label,prediction);
83 }
84};
85
86//specialisation for classification case.
87template<class OutputType>
88class SquaredLoss<OutputType,unsigned int> : public AbstractLoss<unsigned int,OutputType>
89{
90public:
94
95 /// Constructor.
100
101
102 /// \brief From INameable: return the class name.
103 std::string name() const
104 { return "SquaredLoss"; }
105
106 using base_type::eval;
107
108 /// Evaluate the squared loss \f$ (label - prediction)^2 \f$.
109 double eval(BatchLabelType const& labels, BatchOutputType const& predictions) const {
110 SIZE_CHECK(labels.size()==predictions.size1());
111
112 double error = 0;
113 for(std::size_t i = 0; i != labels.size(); ++i){
114 unsigned int c = labels(i);
115 SIZE_CHECK(c < predictions.size2());
116 error+=norm_sqr(row(predictions,i))+1.0-2.0*predictions(i,c);
117 }
118 return 0.5 * error;
119 }
120
121 /// Evaluate the squared loss \f$ (label - prediction)^2 \f$
122 /// and its deriative \f$ \frac{\partial}{\partial prediction} 1/2 (label - prediction)^2 = prediction - label \f$.
123 double evalDerivative(BatchLabelType const& labels, BatchOutputType const& predictions, BatchOutputType& gradient) const {
124 gradient.resize(predictions.size1(),predictions.size2());
125 noalias(gradient) = predictions;
126 for(std::size_t i = 0; i != labels.size(); ++i){
127 unsigned int c = labels(i);
128 SIZE_CHECK(c < predictions.size2());
129 gradient(i,c)-=1.0;
130 }
131 return SquaredLoss::eval(labels,predictions);
132 }
133};
134
135//spcialisation for sequence data
136template<>
137class SquaredLoss<Sequence,Sequence> : public AbstractLoss<Sequence,Sequence>
138{
139public:
140 /// \brief Constructor.
141 ///
142 /// \param ignore Specifies how many elements of the sequence are to be ignored during evaluation
143 /// must be strictly smaller than the smalles sequnce to evaluate.
144 SquaredLoss(std::size_t ignore=0)
145 :m_ignore(ignore){
147 }
148
149
150 /// \brief From INameable: return the class name.
151 std::string name() const
152 { return "SquaredLoss"; }
153
154 using base_type::eval;
155
156 /// \brief Evaluate the squared loss \f$ (label - prediction)^2 \f$.
157 ///
158 /// For Sequences this is:
159 /// \f[ sum_{i=i_0} (label_i-prediction_i)^2\f]
160 /// where \f$ i_0 \f$ is the first element to be evaluated. By default it is 0
161 double eval(BatchLabelType const& labels, BatchOutputType const& predictions) const {
162 SIZE_CHECK(labels.size()==predictions.size());
163
164 double error = 0;
165 for(std::size_t i = 0; i != labels.size(); ++i){
166 SIZE_CHECK(labels[i].size()==predictions[i].size());
167 SHARK_RUNTIME_CHECK(labels[i].size() > m_ignore,"Number of sequence elements to ignore is too large");
168
169 for(std::size_t j = m_ignore; j != labels[i].size(); ++j){
170 error += distanceSqr(predictions[i][j],labels[i][j]);
171 }
172 }
173 return 0.5 * error;
174 }
175
176 /// Evaluate the squared loss \f$ (label - prediction)^2 \f$
177 /// and its deriative \f$ \frac{\partial}{\partial prediction} 1/2 (label - prediction)^2 = prediction - label \f$.
178 double evalDerivative(BatchLabelType const& labels, BatchOutputType const& predictions, BatchOutputType& gradient) const {
179 SIZE_CHECK(labels.size()==predictions.size());
180 gradient.resize(labels.size());
181
182 double error = 0;
183 for(std::size_t i = 0; i != labels.size(); ++i){
184 SIZE_CHECK(labels[i].size()==predictions[i].size());
185 SHARK_RUNTIME_CHECK(labels[i].size() > m_ignore,"Number of sequence elements to ignore is too large");
186 for(std::size_t j = 0; j != m_ignore; ++j){
187 gradient[i].push_back(RealVector(predictions[i][j].size(),0.0));
188 }
189 for(std::size_t j = m_ignore; j != labels[i].size(); ++j){
190 error += 0.5 * distanceSqr(predictions[i][j],labels[i][j]);
191 gradient[i].push_back(predictions[i][j] - labels[i][j]);
192
193 }
194 }
195 return error;
196 }
197private:
198 std::size_t m_ignore;
199};
200
201}
202#endif