Shark machine learning library
Installation
Tutorials
Benchmarks
Documentation
Quick references
Class list
Global functions
include
shark
Models
NeuronLayers.h
Go to the documentation of this file.
1
/*!
2
*
3
*
4
* \file
5
*
6
* \author O.Krause
7
* \date 2011
8
*
9
*
10
* \par Copyright 1995-2017 Shark Development Team
11
*
12
* <BR><HR>
13
* This file is part of Shark.
14
* <https://shark-ml.github.io/Shark/>
15
*
16
* Shark is free software: you can redistribute it and/or modify
17
* it under the terms of the GNU Lesser General Public License as published
18
* by the Free Software Foundation, either version 3 of the License, or
19
* (at your option) any later version.
20
*
21
* Shark is distributed in the hope that it will be useful,
22
* but WITHOUT ANY WARRANTY; without even the implied warranty of
23
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
24
* GNU Lesser General Public License for more details.
25
*
26
* You should have received a copy of the GNU Lesser General Public License
27
* along with Shark. If not, see <http://www.gnu.org/licenses/>.
28
*
29
*/
30
#ifndef MODELS_NEURONS_H
31
#define MODELS_NEURONS_H
32
33
#include <
shark/LinAlg/Base.h
>
34
#include <
shark/Models/AbstractModel.h
>
35
36
namespace
shark
{
37
38
39
/// \defgroup activations Neuron activation functions
40
/// \ingroup models
41
/// Neuron activation functions used for neural network nonlinearities.
42
43
44
///\brief Neuron which computes the hyperbolic tangenst with range [-1,1].
45
///
46
///The Tanh function is
47
///\f[ f(x)=\tanh(x) = \frac 2 {1+exp^(-2x)}-1 \f]
48
///it's derivative can be computed as
49
///\f[ f'(x)= 1-f(x)^2 \f]
50
///
51
/// \ingroup activations
52
struct
TanhNeuron
{
53
typedef
EmptyState
State
;
54
template
<
class
Arg>
55
void
evalInPlace
(Arg& arg)
const
{
56
noalias(arg) = tanh(arg);
57
}
58
59
template
<
class
Arg>
60
void
evalInPlace
(Arg& arg,
State
&)
const
{
61
evalInPlace
(arg);
62
}
63
64
template
<
class
Output,
class
Derivative>
65
void
multiplyDerivative
(Output
const
& output, Derivative& der,
State
const
& )
const
{
66
noalias(der) *=
typename
Output::value_type(1) -
sqr
(output);
67
}
68
};
69
70
///\brief Neuron which computes the Logistic (logistic) function with range [0,1].
71
///
72
///The Logistic function is
73
///\f[ f(x)=\frac 1 {1+exp^(-x)}\f]
74
///it's derivative can be computed as
75
///\f[ f'(x)= f(x)(1-f(x)) \f]
76
///
77
/// \ingroup activations
78
struct
LogisticNeuron
{
79
typedef
EmptyState
State
;
80
template
<
class
Arg>
81
void
evalInPlace
(Arg& arg)
const
{
82
noalias(arg) =
sigmoid
(arg);
83
}
84
85
template
<
class
Arg>
86
void
evalInPlace
(Arg& arg,
State
&)
const
{
87
evalInPlace
(arg);
88
}
89
90
template
<
class
Output,
class
Derivative>
91
void
multiplyDerivative
(Output
const
& output, Derivative& der,
State
const
& state)
const
{
92
noalias(der) *= output * (
typename
Output::value_type(1) - output);
93
}
94
};
95
96
///\brief Fast sigmoidal function, which does not need to compute an exponential function.
97
///
98
///It is defined as
99
///\f[ f(x)=\frac x {1+|x|}\f]
100
///it's derivative can be computed as
101
///\f[ f'(x)= (1 - |f(x)|)^2 \f]
102
///
103
/// \ingroup activations
104
struct
FastSigmoidNeuron
{
105
typedef
EmptyState
State
;
106
template
<
class
Arg>
107
void
evalInPlace
(Arg& arg)
const
{
108
noalias(arg) /=
typename
Arg::value_type(1)+abs(arg);
109
}
110
111
template
<
class
Arg>
112
void
evalInPlace
(Arg& arg,
State
&)
const
{
113
evalInPlace
(arg);
114
}
115
116
template
<
class
Output,
class
Derivative>
117
void
multiplyDerivative
(Output
const
& output, Derivative& der,
State
const
& state)
const
{
118
noalias(der) *=
sqr
(
typename
Output::value_type(1) - abs(output));
119
}
120
};
121
122
///\brief Linear activation Neuron.
123
///
124
///It is defined as
125
///\f[ f(x)=x\f]
126
///
127
/// \ingroup activations
128
struct
LinearNeuron
{
129
typedef
EmptyState
State
;
130
template
<
class
Arg>
131
void
evalInPlace
(Arg&)
const
{}
132
133
template
<
class
Arg>
134
void
evalInPlace
(Arg& arg,
State
const
&)
const
{}
135
136
template
<
class
Output,
class
Derivative>
137
void
multiplyDerivative
(Output
const
& output, Derivative& der,
State
const
& state)
const
{}
138
};
139
140
///\brief Rectifier Neuron f(x) = max(0,x)
141
///
142
/// \ingroup activations
143
struct
RectifierNeuron
{
144
typedef
EmptyState
State
;
145
template
<
class
Arg>
146
void
evalInPlace
(Arg& arg)
const
{
147
noalias(arg) = max(arg,
typename
Arg::value_type(0));
148
}
149
150
template
<
class
Arg>
151
void
evalInPlace
(Arg& arg,
State
&)
const
{
152
evalInPlace
(arg);
153
}
154
155
template
<
class
Output,
class
Derivative>
156
void
multiplyDerivative
(Output
const
& output, Derivative& der,
State
const
& state)
const
{
157
//~ noalias(der) *= heaviside(output);
158
//~ for(std::size_t i = 0; i != output.size1(); ++i){
159
//~ for(std::size_t j = 0; j != output.size2(); ++j){
160
//~ der(i,j) *= output(i,j) > 0? 1.0:0.0;
161
//~ }
162
//~ }
163
noalias(der) *= output > 0;
164
}
165
};
166
167
///\brief Normalizes the sum of inputs to one.
168
///
169
/// \f[ f_i(x)= x_i \ \sum_j x_j \f]
170
/// Normalization will reinterpret the input as probabilities. Therefore no negative valeus are allowed.
171
///
172
/// \ingroup activations
173
template
<
class
VectorType = RealVector>
174
struct
NormalizerNeuron
{
175
struct
State
:
public
shark::State
{
176
VectorType
norm
;
177
178
void
resize
(std::size_t patterns){
179
norm
.resize(patterns);
180
}
181
};
182
183
template
<
class
Arg,
class
Device>
184
void
evalInPlace
(blas::vector_expression<Arg,Device>& arg)
const
{
185
noalias(arg) /= sum(arg);
186
}
187
188
template
<
class
Arg,
class
Device>
189
void
evalInPlace
(blas::matrix_expression<Arg,Device>& arg)
const
{
190
noalias(trans(arg)) /= blas::repeat(sum(as_rows(arg)),arg().size2());
191
}
192
193
template
<
class
Arg,
class
Device>
194
void
evalInPlace
(blas::matrix_expression<Arg,Device>& arg,
State
& state)
const
{
195
state.
norm
.resize(arg().size1());
196
noalias(state.
norm
) = sum(as_rows(arg));
197
noalias(arg) /= trans(blas::repeat(state.
norm
,arg().size2()));
198
}
199
200
template
<
class
Output,
class
Derivative>
201
void
multiplyDerivative
(Output
const
& output, Derivative& der,
State
const
& s)
const
{
202
for
(std::size_t i = 0; i != output.size1(); ++i){
203
double
constant=inner_prod(row(der,i),row(output,i));
204
noalias(row(der,i))= (row(der,i)-constant)/s.
norm
(i);
205
}
206
}
207
};
208
209
///\brief Computes the softmax activation function.
210
///
211
/// \f[ f_i(x)= \exp(x_i) \ \sum_j \exp(x_j) \f]
212
///
213
/// computes the exponential function of the inputs and normalizes the outputs to sum to one. This is
214
/// the NormalizerNeuron just without the constraint of values being positive
215
///
216
/// \ingroup activations
217
template
<
class
VectorType = RealVector>
218
struct
SoftmaxNeuron
{
219
typedef
EmptyState
State
;
220
221
template
<
class
Arg,
class
Device>
222
void
evalInPlace
(blas::vector_expression<Arg,Device>& arg)
const
{
223
noalias(arg) = exp(arg);
224
noalias(arg) /= sum(arg);
225
}
226
227
template
<
class
Arg,
class
Device>
228
void
evalInPlace
(blas::matrix_expression<Arg,Device>& arg)
const
{
229
noalias(arg) = exp(arg);
230
noalias(arg) /= trans(blas::repeat(sum(as_rows(arg)),arg().size2()));
231
}
232
233
template
<
class
Arg,
class
Device>
234
void
evalInPlace
(blas::matrix_expression<Arg,Device>& arg,
State
&)
const
{
235
evalInPlace
(arg);
236
}
237
238
template
<
class
Output,
class
Derivative>
239
void
multiplyDerivative
(Output
const
& output, Derivative& der,
State
const
& s)
const
{
240
auto
mass = eval_block(sum(as_rows(der * output)));
241
noalias(der) -= trans(blas::repeat(mass, der.size2()));
242
noalias(der) *= output;
243
}
244
};
245
246
247
248
///\brief Neuron activation layer.
249
///
250
/// Applies a nonlinear activation function to the given input. Various choices for activations
251
/// are given in \ref activations.
252
///
253
/// \ingroup models
254
template
<
class
NeuronType,
class
VectorType = RealVector>
255
class
NeuronLayer
:
public
AbstractModel
<VectorType, VectorType, VectorType>{
256
private
:
257
typedef
AbstractModel<VectorType,VectorType, VectorType>
base_type
;
258
259
NeuronType m_neuron;
260
Shape
m_shape;
261
public
:
262
typedef
typename
base_type::BatchInputType
BatchInputType
;
263
typedef
typename
base_type::BatchOutputType
BatchOutputType
;
264
typedef
typename
base_type::ParameterVectorType
ParameterVectorType
;
265
266
NeuronLayer
(
Shape
const
& shape =
Shape
()): m_shape(shape){
267
base_type::m_features
|=
base_type::HAS_FIRST_PARAMETER_DERIVATIVE
;
268
base_type::m_features
|=
base_type::HAS_FIRST_INPUT_DERIVATIVE
;
269
}
270
271
/// \brief From INameable: return the class name.
272
std::string
name
()
const
273
{
return
"NeuronLayer"
; }
274
275
NeuronType
const
&
neuron
()
const
{
return
m_neuron;}
276
NeuronType&
neuron
(){
return
m_neuron;}
277
278
Shape
inputShape
()
const
{
279
return
m_shape;
280
}
281
282
Shape
outputShape
()
const
{
283
return
m_shape;
284
}
285
286
/// obtain the parameter vector
287
ParameterVectorType
parameterVector
()
const
{
288
return
ParameterVectorType
();
289
}
290
291
/// overwrite the parameter vector
292
void
setParameterVector
(
ParameterVectorType
const
& newParameters){
293
SIZE_CHECK
(newParameters.size() == 0);
294
}
295
296
/// return the number of parameter
297
size_t
numberOfParameters
()
const
{
298
return
0;
299
}
300
301
boost::shared_ptr<State>
createState
()
const
{
302
return
boost::shared_ptr<State>(
new
typename
NeuronType::State());
303
}
304
305
using
base_type::eval
;
306
307
void
eval
(
BatchInputType
const
& inputs,
BatchOutputType
& outputs)
const
{
308
SIZE_CHECK
(inputs.size2() == m_shape.
numElements
());
309
outputs.resize(inputs.size1(),inputs.size2());
310
noalias(outputs) = inputs;
311
m_neuron.evalInPlace(outputs);
312
}
313
314
void
eval
(
VectorType
const
& input,
VectorType
& output)
const
{
315
SIZE_CHECK
(input.size() == m_shape.
numElements
());
316
output.resize(input.size());
317
noalias(output) = input;
318
m_neuron.evalInPlace(output);
319
}
320
void
eval
(
BatchInputType
const
& inputs,
BatchOutputType
& outputs,
State
& state)
const
{
321
SIZE_CHECK
(inputs.size2() == m_shape.
numElements
());
322
outputs.resize(inputs.size1(),inputs.size2());
323
noalias(outputs) = inputs;
324
m_neuron.evalInPlace(outputs, state.
toState
<
typename
NeuronType::State>());
325
}
326
327
///\brief Calculates the first derivative w.r.t the parameters and summing them up over all inputs of the last computed batch
328
void
weightedParameterDerivative
(
329
BatchInputType
const
& inputs,
330
BatchOutputType
const
& outputs,
331
BatchOutputType
const
& coefficients,
332
State
const
& state,
333
ParameterVectorType
& gradient
334
)
const
{
335
SIZE_CHECK
(coefficients.size1()==inputs.size1());
336
SIZE_CHECK
(coefficients.size2()==inputs.size2());
337
}
338
///\brief Calculates the first derivative w.r.t the inputs and summs them up over all inputs of the last computed batch
339
void
weightedInputDerivative
(
340
BatchInputType
const
& inputs,
341
BatchOutputType
const
& outputs,
342
BatchOutputType
const
& coefficients,
343
State
const
& state,
344
BatchInputType
& derivative
345
)
const
{
346
SIZE_CHECK
(coefficients.size1() == inputs.size1());
347
SIZE_CHECK
(coefficients.size2() == inputs.size2());
348
349
derivative.resize(inputs.size1(),inputs.size2());
350
noalias(derivative) = coefficients;
351
m_neuron.multiplyDerivative(outputs, derivative, state.
toState
<
typename
NeuronType::State>());
352
353
}
354
355
/// From ISerializable
356
void
read
(
InArchive
& archive){ archive >> m_shape;}
357
/// From ISerializable
358
void
write
(
OutArchive
& archive)
const
{ archive << m_shape;}
359
};
360
361
362
}
363
364
#endif