NeuronLayers.h
Go to the documentation of this file.
1/*!
2 *
3 *
4 * \file
5 *
6 * \author O.Krause
7 * \date 2011
8 *
9 *
10 * \par Copyright 1995-2017 Shark Development Team
11 *
12 * <BR><HR>
13 * This file is part of Shark.
14 * <https://shark-ml.github.io/Shark/>
15 *
16 * Shark is free software: you can redistribute it and/or modify
17 * it under the terms of the GNU Lesser General Public License as published
18 * by the Free Software Foundation, either version 3 of the License, or
19 * (at your option) any later version.
20 *
21 * Shark is distributed in the hope that it will be useful,
22 * but WITHOUT ANY WARRANTY; without even the implied warranty of
23 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
24 * GNU Lesser General Public License for more details.
25 *
26 * You should have received a copy of the GNU Lesser General Public License
27 * along with Shark. If not, see <http://www.gnu.org/licenses/>.
28 *
29 */
30#ifndef MODELS_NEURONS_H
31#define MODELS_NEURONS_H
32
33#include <shark/LinAlg/Base.h>
35
36namespace shark{
37
38
39/// \defgroup activations Neuron activation functions
40/// \ingroup models
41/// Neuron activation functions used for neural network nonlinearities.
42
43
44///\brief Neuron which computes the hyperbolic tangenst with range [-1,1].
45///
46///The Tanh function is
47///\f[ f(x)=\tanh(x) = \frac 2 {1+exp^(-2x)}-1 \f]
48///it's derivative can be computed as
49///\f[ f'(x)= 1-f(x)^2 \f]
50///
51/// \ingroup activations
54 template<class Arg>
55 void evalInPlace(Arg& arg)const{
56 noalias(arg) = tanh(arg);
57 }
58
59 template<class Arg>
60 void evalInPlace(Arg& arg, State&)const{
61 evalInPlace(arg);
62 }
63
64 template<class Output, class Derivative>
65 void multiplyDerivative(Output const& output, Derivative& der, State const& )const{
66 noalias(der) *= typename Output::value_type(1) - sqr(output);
67 }
68};
69
70///\brief Neuron which computes the Logistic (logistic) function with range [0,1].
71///
72///The Logistic function is
73///\f[ f(x)=\frac 1 {1+exp^(-x)}\f]
74///it's derivative can be computed as
75///\f[ f'(x)= f(x)(1-f(x)) \f]
76///
77/// \ingroup activations
80 template<class Arg>
81 void evalInPlace(Arg& arg)const{
82 noalias(arg) = sigmoid(arg);
83 }
84
85 template<class Arg>
86 void evalInPlace(Arg& arg, State&)const{
87 evalInPlace(arg);
88 }
89
90 template<class Output, class Derivative>
91 void multiplyDerivative(Output const& output, Derivative& der, State const& state)const{
92 noalias(der) *= output * (typename Output::value_type(1) - output);
93 }
94};
95
96///\brief Fast sigmoidal function, which does not need to compute an exponential function.
97///
98///It is defined as
99///\f[ f(x)=\frac x {1+|x|}\f]
100///it's derivative can be computed as
101///\f[ f'(x)= (1 - |f(x)|)^2 \f]
102///
103/// \ingroup activations
106 template<class Arg>
107 void evalInPlace(Arg& arg)const{
108 noalias(arg) /= typename Arg::value_type(1)+abs(arg);
109 }
110
111 template<class Arg>
112 void evalInPlace(Arg& arg, State&)const{
113 evalInPlace(arg);
114 }
115
116 template<class Output, class Derivative>
117 void multiplyDerivative(Output const& output, Derivative& der, State const& state)const{
118 noalias(der) *= sqr(typename Output::value_type(1) - abs(output));
119 }
120};
121
122///\brief Linear activation Neuron.
123///
124///It is defined as
125///\f[ f(x)=x\f]
126///
127/// \ingroup activations
130 template<class Arg>
131 void evalInPlace(Arg&)const{}
132
133 template<class Arg>
134 void evalInPlace(Arg& arg, State const&)const{}
135
136 template<class Output, class Derivative>
137 void multiplyDerivative(Output const& output, Derivative& der, State const& state)const{}
138};
139
140///\brief Rectifier Neuron f(x) = max(0,x)
141///
142/// \ingroup activations
145 template<class Arg>
146 void evalInPlace(Arg& arg)const{
147 noalias(arg) = max(arg,typename Arg::value_type(0));
148 }
149
150 template<class Arg>
151 void evalInPlace(Arg& arg, State&)const{
152 evalInPlace(arg);
153 }
154
155 template<class Output, class Derivative>
156 void multiplyDerivative(Output const& output, Derivative& der, State const& state)const{
157 //~ noalias(der) *= heaviside(output);
158 //~ for(std::size_t i = 0; i != output.size1(); ++i){
159 //~ for(std::size_t j = 0; j != output.size2(); ++j){
160 //~ der(i,j) *= output(i,j) > 0? 1.0:0.0;
161 //~ }
162 //~ }
163 noalias(der) *= output > 0;
164 }
165};
166
167///\brief Normalizes the sum of inputs to one.
168///
169/// \f[ f_i(x)= x_i \ \sum_j x_j \f]
170/// Normalization will reinterpret the input as probabilities. Therefore no negative valeus are allowed.
171///
172/// \ingroup activations
173template<class VectorType = RealVector>
175 struct State: public shark::State{
177
178 void resize(std::size_t patterns){
179 norm.resize(patterns);
180 }
181 };
182
183 template<class Arg, class Device>
184 void evalInPlace(blas::vector_expression<Arg,Device>& arg)const{
185 noalias(arg) /= sum(arg);
186 }
187
188 template<class Arg, class Device>
189 void evalInPlace(blas::matrix_expression<Arg,Device>& arg)const{
190 noalias(trans(arg)) /= blas::repeat(sum(as_rows(arg)),arg().size2());
191 }
192
193 template<class Arg, class Device>
194 void evalInPlace(blas::matrix_expression<Arg,Device>& arg, State& state)const{
195 state.norm.resize(arg().size1());
196 noalias(state.norm) = sum(as_rows(arg));
197 noalias(arg) /= trans(blas::repeat(state.norm,arg().size2()));
198 }
199
200 template<class Output, class Derivative>
201 void multiplyDerivative(Output const& output, Derivative& der, State const& s)const{
202 for(std::size_t i = 0; i != output.size1(); ++i){
203 double constant=inner_prod(row(der,i),row(output,i));
204 noalias(row(der,i))= (row(der,i)-constant)/s.norm(i);
205 }
206 }
207};
208
209///\brief Computes the softmax activation function.
210///
211/// \f[ f_i(x)= \exp(x_i) \ \sum_j \exp(x_j) \f]
212///
213/// computes the exponential function of the inputs and normalizes the outputs to sum to one. This is
214/// the NormalizerNeuron just without the constraint of values being positive
215///
216/// \ingroup activations
217template<class VectorType = RealVector>
220
221 template<class Arg, class Device>
222 void evalInPlace(blas::vector_expression<Arg,Device>& arg)const{
223 noalias(arg) = exp(arg);
224 noalias(arg) /= sum(arg);
225 }
226
227 template<class Arg, class Device>
228 void evalInPlace(blas::matrix_expression<Arg,Device>& arg)const{
229 noalias(arg) = exp(arg);
230 noalias(arg) /= trans(blas::repeat(sum(as_rows(arg)),arg().size2()));
231 }
232
233 template<class Arg, class Device>
234 void evalInPlace(blas::matrix_expression<Arg,Device>& arg, State&)const{
235 evalInPlace(arg);
236 }
237
238 template<class Output, class Derivative>
239 void multiplyDerivative(Output const& output, Derivative& der, State const& s)const{
240 auto mass = eval_block(sum(as_rows(der * output)));
241 noalias(der) -= trans(blas::repeat(mass, der.size2()));
242 noalias(der) *= output;
243 }
244};
245
246
247
248///\brief Neuron activation layer.
249///
250/// Applies a nonlinear activation function to the given input. Various choices for activations
251/// are given in \ref activations.
252///
253/// \ingroup models
254template <class NeuronType, class VectorType = RealVector>
255class NeuronLayer : public AbstractModel<VectorType, VectorType, VectorType>{
256private:
258
259 NeuronType m_neuron;
260 Shape m_shape;
261public:
265
270
271 /// \brief From INameable: return the class name.
272 std::string name() const
273 { return "NeuronLayer"; }
274
275 NeuronType const& neuron()const{ return m_neuron;}
276 NeuronType& neuron(){ return m_neuron;}
277
279 return m_shape;
280 }
281
283 return m_shape;
284 }
285
286 /// obtain the parameter vector
290
291 /// overwrite the parameter vector
292 void setParameterVector(ParameterVectorType const& newParameters){
293 SIZE_CHECK(newParameters.size() == 0);
294 }
295
296 /// return the number of parameter
297 size_t numberOfParameters() const{
298 return 0;
299 }
300
301 boost::shared_ptr<State> createState()const{
302 return boost::shared_ptr<State>(new typename NeuronType::State());
303 }
304
305 using base_type::eval;
306
307 void eval(BatchInputType const& inputs, BatchOutputType& outputs)const{
308 SIZE_CHECK(inputs.size2() == m_shape.numElements());
309 outputs.resize(inputs.size1(),inputs.size2());
310 noalias(outputs) = inputs;
311 m_neuron.evalInPlace(outputs);
312 }
313
314 void eval(VectorType const& input, VectorType& output)const{
315 SIZE_CHECK(input.size() == m_shape.numElements());
316 output.resize(input.size());
317 noalias(output) = input;
318 m_neuron.evalInPlace(output);
319 }
320 void eval(BatchInputType const& inputs, BatchOutputType& outputs, State& state)const{
321 SIZE_CHECK(inputs.size2() == m_shape.numElements());
322 outputs.resize(inputs.size1(),inputs.size2());
323 noalias(outputs) = inputs;
324 m_neuron.evalInPlace(outputs, state.toState<typename NeuronType::State>());
325 }
326
327 ///\brief Calculates the first derivative w.r.t the parameters and summing them up over all inputs of the last computed batch
329 BatchInputType const& inputs,
330 BatchOutputType const& outputs,
331 BatchOutputType const& coefficients,
332 State const& state,
333 ParameterVectorType& gradient
334 )const{
335 SIZE_CHECK(coefficients.size1()==inputs.size1());
336 SIZE_CHECK(coefficients.size2()==inputs.size2());
337 }
338 ///\brief Calculates the first derivative w.r.t the inputs and summs them up over all inputs of the last computed batch
340 BatchInputType const & inputs,
341 BatchOutputType const & outputs,
342 BatchOutputType const & coefficients,
343 State const& state,
344 BatchInputType& derivative
345 )const{
346 SIZE_CHECK(coefficients.size1() == inputs.size1());
347 SIZE_CHECK(coefficients.size2() == inputs.size2());
348
349 derivative.resize(inputs.size1(),inputs.size2());
350 noalias(derivative) = coefficients;
351 m_neuron.multiplyDerivative(outputs, derivative, state.toState<typename NeuronType::State>());
352
353 }
354
355 /// From ISerializable
356 void read(InArchive& archive){ archive >> m_shape;}
357 /// From ISerializable
358 void write(OutArchive& archive) const{ archive << m_shape;}
359};
360
361
362}
363
364#endif