NegativeLogLikelihood.h
Go to the documentation of this file.
1/*!
2 *
3 *
4 * \brief Negative Log Likelihood error function
5 *
6 *
7 *
8 * \author O.Krause
9 * \date 2014
10 *
11 *
12 * \par Copyright 1995-2017 Shark Development Team
13 *
14 * <BR><HR>
15 * This file is part of Shark.
16 * <https://shark-ml.github.io/Shark/>
17 *
18 * Shark is free software: you can redistribute it and/or modify
19 * it under the terms of the GNU Lesser General Public License as published
20 * by the Free Software Foundation, either version 3 of the License, or
21 * (at your option) any later version.
22 *
23 * Shark is distributed in the hope that it will be useful,
24 * but WITHOUT ANY WARRANTY; without even the implied warranty of
25 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
26 * GNU Lesser General Public License for more details.
27 *
28 * You should have received a copy of the GNU Lesser General Public License
29 * along with Shark. If not, see <http://www.gnu.org/licenses/>.
30 *
31 */
32#ifndef SHARK_OBJECTIVEFUNCTIONS_NEGATIVE_LOG_LIKELIHOOD_H
33#define SHARK_OBJECTIVEFUNCTIONS_NEGATIVE_LOG_LIKELIHOOD_H
34
37#include <shark/Core/Random.h>
38
39#include <boost/range/algorithm_ext/iota.hpp>
40#include <boost/range/algorithm/random_shuffle.hpp>
41namespace shark{
42
43/// \brief Computes the negative log likelihood of a dataset under a model
44///
45/// The negative log likelihood is defined as
46/// \f[ L(\theta) = -\frac 1 N \sum_{i=1}^N log(p_{\theta}(x_i)) \f]
47/// where \f$ \theta \f$ is the vector of parameters of the model \f$ p \f$ and \f$ x \f$ are the
48/// datapoints of the training set. Minimizing this
49/// maximizes the probability of the datast under p. This error measure is
50/// closely related to the Kulback-Leibler-Divergence.
51///
52/// For this error function, the model is only allowed to have a single output
53/// - the probability of the sample. The distribution must be normalized as otherwise
54/// the likeelihood does not mean anything.
55/// \ingroup objfunctions
56class NegativeLogLikelihood : public AbstractObjectiveFunction< RealVector, double >
57{
58public:
60
62 DatasetType const& data,
64 ):mep_model(model),m_data(data){
65 if(mep_model->hasFirstParameterDerivative())
68 }
69
70 /// \brief From INameable: return the class name.
71 std::string name() const
72 { return "NegativeLogLikelihood"; }
73
75 return mep_model->parameterVector();
76 }
77
78 std::size_t numberOfVariables()const{
79 return mep_model->numberOfParameters();
80 }
81
82 ResultType eval(RealVector const& input) const{
83 SIZE_CHECK(input.size() == numberOfVariables());
85 mep_model->setParameterVector(input);
86
87 double error = 0;
88 double minProb = 1e-100;//numerical stability is only guaranteed for lower bounded probabilities
89 SHARK_PARALLEL_FOR(int i = 0; i < (int)m_data.numberOfBatches(); ++i){
90 RealMatrix predictions = (*mep_model)(m_data.batch(i));
91 SIZE_CHECK(predictions.size2() == 1);
92 double logLikelihoodOfSamples = sum(log(max(predictions,minProb)));
94 error += logLikelihoodOfSamples;
95 }
96 }
97 error/=m_data.numberOfElements();//compute mean
98 return -error;//negative log likelihood
99 }
101 SearchPointType const& input,
102 FirstOrderDerivative & derivative
103 ) const{
104 SIZE_CHECK(input.size() == numberOfVariables());
106 mep_model->setParameterVector(input);
107 derivative.resize(input.size());
108 derivative.clear();
109
110 //compute partitioning on threads
111 std::size_t numBatches = m_data.numberOfBatches();
112 std::size_t numElements = m_data.numberOfElements();
113 std::size_t numThreads = std::min(SHARK_NUM_THREADS,numBatches);
114 //calculate optimal partitioning
115 std::size_t batchesPerThread = numBatches/numThreads;
116 std::size_t leftOver = numBatches - batchesPerThread*numThreads;
117 double error = 0;
118 double minProb = 1e-100;//numerical stability is only guaranteed for lower bounded probabilities
119 SHARK_PARALLEL_FOR(int ti = 0; ti < (int)numThreads; ++ti){//MSVC does not support unsigned integrals in paralll loops
120 std::size_t t = ti;
121 //~ //get start and end index of batch-range
122 std::size_t start = t*batchesPerThread+std::min(t,leftOver);
123 std::size_t end = (t+1)*batchesPerThread+std::min(t+1,leftOver);
124
125 //calculate error and derivative of the current thread
126 FirstOrderDerivative threadDerivative(input.size(),0.0);
127 double threadError = 0;
128 boost::shared_ptr<State> state = mep_model->createState();
129 RealVector batchDerivative;
130 RealMatrix predictions;
131 for(std::size_t i = start; i != end; ++i){
132 mep_model->eval(m_data.batch(i),predictions,*state);
133 SIZE_CHECK(predictions.size2() == 1);
134 threadError += sum(log(max(predictions,minProb)));
135 RealMatrix coeffs(predictions.size1(),predictions.size2(),0.0);
136 //the below handls numeric instabilities...
137 for(std::size_t j = 0; j != predictions.size1(); ++j){
138 for(std::size_t k = 0; k != predictions.size2(); ++k){
139 if(predictions(j,k) >= minProb){
140 coeffs(j,k) = 1.0/predictions(j,k);
141 }
142 }
143 }
145 m_data.batch(i),predictions, coeffs,*state,batchDerivative
146 );
147 threadDerivative += batchDerivative;
148 }
149
150 //sum over all threads
152 error += threadError;
153 noalias(derivative) += threadDerivative;
154 }
155 }
156
157 error /= numElements;
158 derivative /= numElements;
159 derivative *= -1;
160 return -error;//negative log likelihood
161 }
162
163private:
166};
167
168}
169#endif