LinearModel.h
Go to the documentation of this file.
1/*!
2 *
3 *
4 * \brief Implements a Model using a linear function.
5 *
6 *
7 *
8 * \author T. Glasmachers, O. Krause
9 * \date 2010-2017
10 *
11 *
12 * \par Copyright 1995-2017 Shark Development Team
13 *
14 * <BR><HR>
15 * This file is part of Shark.
16 * <https://shark-ml.github.io/Shark/>
17 *
18 * Shark is free software: you can redistribute it and/or modify
19 * it under the terms of the GNU Lesser General Public License as published
20 * by the Free Software Foundation, either version 3 of the License, or
21 * (at your option) any later version.
22 *
23 * Shark is distributed in the hope that it will be useful,
24 * but WITHOUT ANY WARRANTY; without even the implied warranty of
25 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
26 * GNU Lesser General Public License for more details.
27 *
28 * You should have received a copy of the GNU Lesser General Public License
29 * along with Shark. If not, see <http://www.gnu.org/licenses/>.
30 *
31 */
32#ifndef SHARK_MODELS_LINEARMODEL_H
33#define SHARK_MODELS_LINEARMODEL_H
34
38namespace shark {
39
40
41///
42/// \brief Linear Prediction with optional activation function
43///
44/// \par
45/// This model computes the result of
46/// \f$ y = f(x) = g(A x + b) \f$, where g is an arbitrary activation function, see \ref activations..
47/// By default g is the identity and the model is a simple linear model.
48/// Otherwise, this is known as a generalized linear model. There are two important special cases:
49/// The output may be a single number, and the offset term b may be
50/// dropped.
51///
52/// The class allows for dense and sparse input vector types. However it assumes that
53/// the weight matrix and the ouputs are dense. There are some cases where this is not
54/// good behavior. Check for example Normalizer for a class which is designed for sparse
55/// inputs and outputs.
56///
57/// \ingroup models
58template <class InputType = RealVector, class ActivationFunction = LinearNeuron>
60 InputType,
61 blas::vector<typename InputType::value_type, typename InputType::device_type>,//type of output uses same device and precision as input
62 blas::vector<typename InputType::value_type, typename InputType::device_type>//type of parameters uses same device and precision as input
63>{
64public:
65 typedef blas::vector<typename InputType::value_type, typename InputType::device_type> VectorType;
66 typedef blas::matrix<typename InputType::value_type, blas::row_major, typename InputType::device_type> MatrixType;
67private:
70 Shape m_inputShape;
71 Shape m_outputShape;
72 MatrixType m_matrix;
73 VectorType m_offset;
74 ActivationFunction m_activation;
75public:
77 typedef typename base_type::BatchOutputType BatchOutputType;//same as MatrixType
78 typedef typename base_type::ParameterVectorType ParameterVectorType;//same as VectorType
79
80 /// CDefault Constructor; use setStructure later
83 if(std::is_base_of<blas::dense_tag, typename InputType::storage_type::storage_tag>::value){
85 }
86 }
87 /// Constructor creating a model with given dimensionalities and optional offset term.
88 LinearModel(Shape const& inputs, Shape const& outputs = 1, bool offset = false)
89 : m_inputShape(inputs)
90 , m_outputShape(outputs)
91 , m_matrix(outputs.numElements(),inputs.numElements(),0.0)
92 , m_offset(offset?outputs.numElements():0,0.0){
94 if(std::is_base_of<blas::dense_tag, typename InputType::storage_type::storage_tag>::value){
96 }
97 }
98
99 /// \brief From INameable: return the class name.
100 std::string name() const
101 { return "LinearModel"; }
102
103 /// Construction from matrix (and vector)
105 : m_inputShape(matrix.size2())
106 , m_outputShape(matrix.size1())
107 , m_matrix(matrix)
108 , m_offset(offset){
110 if(std::is_base_of<blas::dense_tag, typename InputType::storage_type::storage_tag>::value){
112 }
113 }
114
115 /// check for the presence of an offset term
116 bool hasOffset() const{
117 return m_offset.size() != 0;
118 }
119
120 ///\brief Returns the expected shape of the input
122 return m_inputShape;
123 }
124 ///\brief Returns the shape of the output
126 return m_outputShape;
127 }
128
129 /// obtain the parameter vector
131 return to_vector(m_matrix) | m_offset;
132 }
133
134 /// overwrite the parameter vector
135 void setParameterVector(ParameterVectorType const& newParameters){
136 std::size_t numInputs = inputShape().numElements();
137 std::size_t numOutputs = outputShape().numElements();
138 noalias(to_vector(m_matrix)) = subrange(newParameters, 0, numInputs * numOutputs);
139 noalias(m_offset) = subrange(newParameters, numInputs * numOutputs, newParameters.size());
140 }
141
142 /// return the number of parameter
143 size_t numberOfParameters() const{
144 return m_matrix.size1()*m_matrix.size2()+m_offset.size();
145 }
146
147 /// overwrite structure and parameters
148 void setStructure(Shape const& inputs, Shape const& outputs = 1, bool offset = false){
150 *this = model;
151 }
152
153 /// overwrite structure and parameters
158
159 /// return a copy of the matrix in dense format
160 MatrixType const& matrix() const{
161 return m_matrix;
162 }
163
165 return m_matrix;
166 }
167
168 /// return the offset
169 VectorType const& offset() const{
170 return m_offset;
171 }
173 return m_offset;
174 }
175
176 /// \brief Returns the activation function.
177 ActivationFunction const& activationFunction()const{
178 return m_activation;
179 }
180
181 /// \brief Returns the activation function.
182 ActivationFunction& activationFunction(){
183 return m_activation;
184 }
185
186 boost::shared_ptr<State> createState()const{
187 return boost::shared_ptr<State>(new typename ActivationFunction::State());
188 }
189
190 using base_type::eval;
191
192 /// Evaluate the model: output = matrix * input + offset
193 void eval(BatchInputType const& inputs, BatchOutputType& outputs)const{
194 outputs.resize(inputs.size1(),m_matrix.size1());
195 //we multiply with a set of row vectors from the left
196 noalias(outputs) = inputs % trans(m_matrix);
197 if (hasOffset()){
198 noalias(outputs)+=repeat(m_offset,inputs.size1());
199 }
200 m_activation.evalInPlace(outputs);
201 }
202
203 void eval(InputType const& input, VectorType& output)const {
204 output.resize(m_matrix.size1());
205 //we multiply with a set of row vectors from the left
206 noalias(output) = m_matrix % input;
207 if (hasOffset()) {
208 noalias(output) += m_offset;
209 }
210 m_activation.evalInPlace(output);
211 }
212 /// Evaluate the model: output = matrix * input + offset
213 void eval(BatchInputType const& inputs, BatchOutputType& outputs, State& state)const{
214 outputs.resize(inputs.size1(),m_matrix.size1());
215 //we multiply with a set of row vectors from the left
216 noalias(outputs) = inputs % trans(m_matrix);
217 if (hasOffset()){
218 noalias(outputs)+=repeat(m_offset,inputs.size1());
219 }
220 m_activation.evalInPlace(outputs, state.toState<typename ActivationFunction::State>());
221 }
222
223 ///\brief Calculates the first derivative w.r.t the parameters and summing them up over all patterns of the last computed batch
225 BatchInputType const& patterns,
226 BatchOutputType const& outputs,
227 BatchOutputType const& coefficients,
228 State const& state,
229 ParameterVectorType& gradient
230 )const{
231 SIZE_CHECK(coefficients.size2()==m_matrix.size1());
232 SIZE_CHECK(coefficients.size1()==patterns.size1());
233
234 gradient.resize(numberOfParameters());
235 std::size_t numInputs = inputShape().numElements();
236 std::size_t numOutputs = outputShape().numElements();
237 gradient.clear();
238 std::size_t matrixParams = numInputs*numOutputs;
239
240 auto weightGradient = blas::to_matrix(subrange(gradient,0,matrixParams), numOutputs,numInputs);
241
242 BatchOutputType delta = coefficients;
243 m_activation.multiplyDerivative(outputs,delta, state.toState<typename ActivationFunction::State>());
244 //sum_i coefficients(output,i)*pattern(i))
245 noalias(weightGradient) = trans(delta) % patterns;
246
247 if (hasOffset()){
248 noalias(subrange(gradient, matrixParams, matrixParams + numOutputs)) = sum(as_columns(delta));
249 }
250 }
251 ///\brief Calculates the first derivative w.r.t the inputs and summs them up over all patterns of the last computed batch
253 BatchInputType const & patterns,
254 BatchOutputType const& outputs,
255 BatchOutputType const & coefficients,
256 State const& state,
257 MatrixType& derivative
258 )const{
259 SIZE_CHECK(coefficients.size2() == m_matrix.size1());
260 SIZE_CHECK(coefficients.size1() == patterns.size1());
261
262 //compute chain rule
263 BatchOutputType delta = coefficients;
264 m_activation.multiplyDerivative(outputs,delta, state.toState<typename ActivationFunction::State>());
265
266 derivative.resize(patterns.size1(),patterns.size2());
267 noalias(derivative) = delta % m_matrix;
268 }
269
271 BatchInputType const & patterns,
272 BatchOutputType const& outputs,
273 BatchOutputType const & coefficients,
274 State const& state,
275 ParameterVectorType& parameterDerivative,
276 MatrixType& inputDerivative
277 )const{
278 SIZE_CHECK(coefficients.size2()==m_matrix.size1());
279 SIZE_CHECK(coefficients.size1()==patterns.size1());
280
281 std::size_t numInputs = inputShape().numElements();
282 std::size_t numOutputs = outputShape().numElements();
283
284 //compute chain rule
285 BatchOutputType delta = coefficients;
286 m_activation.multiplyDerivative(outputs,delta, state.toState<typename ActivationFunction::State>());
287
288 //compute input derivative
289 inputDerivative.resize(patterns.size1(),numInputs);
290 noalias(inputDerivative) = delta % m_matrix;
291
292 //compute parameter derivative
293 parameterDerivative.resize(numberOfParameters());
294 parameterDerivative.clear();
295 std::size_t matrixParams = numInputs*numOutputs;
296 auto weightGradient = blas::to_matrix(subrange(parameterDerivative,0,matrixParams), numOutputs,numInputs);
297 auto offsetGradient = subrange(parameterDerivative,matrixParams,parameterDerivative.size());
298
299 //sum_i coefficients(output,i)*pattern(i))
300 noalias(weightGradient) = trans(delta) % patterns;
301 if (hasOffset()){
302 noalias(offsetGradient) = sum(as_columns(delta));
303 }
304 }
305
306 /// From ISerializable
307 void read(InArchive& archive){
308 archive >> m_matrix;
309 archive >> m_offset;
310 archive >> m_inputShape;
311 archive >> m_outputShape;
312 }
313 /// From ISerializable
314 void write(OutArchive& archive) const{
315 archive << m_matrix;
316 archive << m_offset;
317 archive << m_inputShape;
318 archive << m_outputShape;
319 }
320};
321
322/// \brief Basic linear classifier.
323///
324/// The LinearClassifier class is a multi class classifier model
325/// suited for linear discriminant analysis. For c classes
326/// \f$ 0, \dots, c-1 \f$ the model computes
327///
328/// \f$ \arg \max_i w_i^T x + b_i \f$
329///
330/// Thus is it a linear model with arg max computation.
331/// The internal linear model can be queried using decisionFunction().
332///
333/// \ingroup models
334template<class VectorType = RealVector>
335class LinearClassifier : public Classifier<LinearModel<VectorType> >
336{
337public:
340
341 /// Constructor creating a model with given dimensionalities and optional offset term.
342 LinearClassifier(Shape const& inputs, std::size_t numClasses, bool offset = false){
343 setStructure(inputs, numClasses, offset);
344 }
345
346 /// Constructor from weight matrix (and optional offset).
347 LinearClassifier(MatrixType const& matrix, VectorType const& offset = VectorType()){
348 setStructure(matrix, offset);
349 }
350
351 std::string name() const
352 { return "LinearClassifier"; }
353
354 /// overwrite structure and parameters
355 void setStructure(Shape const& inputs, std::size_t numClasses, bool offset = false){
356 this->decisionFunction().setStructure(inputs, numClasses, offset);
357 }
358
359 /// overwrite structure and parameters
360 void setStructure(MatrixType const& matrix, VectorType const& offset = VectorType()){
361 this->decisionFunction().setStructure(matrix, offset);
362 }
363};
364
365}
366#endif