DropoutLayer.h
Go to the documentation of this file.
1/*!
2 *
3 *
4 * \brief -
5 *
6 * \author O.Krause
7 * \date 2017
8 *
9 *
10 * \par Copyright 1995-2017 Shark Development Team
11 *
12 * <BR><HR>
13 * This file is part of Shark.
14 * <https://shark-ml.github.io/Shark/>
15 *
16 * Shark is free software: you can redistribute it and/or modify
17 * it under the terms of the GNU Lesser General Public License as published
18 * by the Free Software Foundation, either version 3 of the License, or
19 * (at your option) any later version.
20 *
21 * Shark is distributed in the hope that it will be useful,
22 * but WITHOUT ANY WARRANTY; without even the implied warranty of
23 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
24 * GNU Lesser General Public License for more details.
25 *
26 * You should have received a copy of the GNU Lesser General Public License
27 * along with Shark. If not, see <http://www.gnu.org/licenses/>.
28 *
29 */
30#ifndef MODELS_DROPOUTLAYER_H
31#define MODELS_DROPOUTLAYER_H
32
33#include <shark/Core/Random.h>
34#include <shark/LinAlg/Base.h>
36namespace shark{
37
38/// \brief Implements Dropout layer semantics
39///
40/// A dropout layer drops its input, i.e. sets it to 0 with a given probability. This is done for each element
41/// separately. This means, model prodections are not deterministic any more. Thus, after training the
42/// output of several evaluations should be averaged.
43///
44/// Dropout during training often leads to better regularized solutions in deep neural networks.
45///
46/// \ingroup models
47template <class VectorType = RealVector>
48class DropoutLayer : public AbstractModel<VectorType, VectorType, VectorType>{
49private:
51 typedef blas::matrix<int, blas::row_major, typename VectorType::device_type> MatrixType;
52 struct InternalState: public State{
53 MatrixType mask;
54 };
55 Shape m_shape;
56 random::rng_type* mep_rng;
57 double m_dropoutProbability;
58
59public:
63
64 DropoutLayer(Shape const& inputShape, double probability = 0.5, random::rng_type& rng = random::globalRng)
65 : m_shape(inputShape), mep_rng(&rng), m_dropoutProbability(probability){
68 }
69
70 /// \brief From INameable: return the class name.
71 std::string name() const
72 { return "DropoutLayer"; }
73
74 /// obtain the parameter vector
78
79 /// overwrite the parameter vector
80 void setParameterVector(ParameterVectorType const& newParameters){
81 SIZE_CHECK(newParameters.size() == 0);
82 }
83
84 /// return the number of parameter
85 size_t numberOfParameters() const{
86 return 0;
87 }
88
89 ///\brief Returns the expected shape of the input
91 return m_shape;
92 }
93 ///\brief Returns the shape of the output
95 return m_shape;
96 }
97
98 boost::shared_ptr<State> createState()const{
99 return boost::shared_ptr<State>(new InternalState());
100 }
101
102 using base_type::eval;
103
104 void eval(BatchInputType const& inputs, BatchOutputType& outputs)const{
105 outputs.resize(inputs.size1(),inputs.size2());
106 noalias(outputs) = inputs;
107 for(std::size_t i = 0; i != outputs.size1(); ++i){
108 for(std::size_t j = 0; j != outputs.size2(); ++j){
109 if(!random::coinToss(*mep_rng,m_dropoutProbability)){
110 outputs(i,j) = 0;
111 }
112 }
113 }
114 }
115
116 void eval(VectorType const& input, VectorType& output)const {
117 output.resize(input.size());
118 noalias(output) = input;
119 for(std::size_t j = 0; j != output.size(); ++j){
120 if(!random::coinToss(*mep_rng,m_dropoutProbability)){
121 output(j) = 0;
122 }
123 }
124 }
125 void eval(BatchInputType const& inputs, BatchOutputType& outputs, State& state)const{
126 MatrixType& mask = state.toState<InternalState>().mask;
127 outputs.resize(inputs.size1(),inputs.size2());
128 mask.resize(inputs.size1(),inputs.size2());
129 for(std::size_t i = 0; i != outputs.size1(); ++i){
130 for(std::size_t j = 0; j != outputs.size2(); ++j){
131 mask(i,j) = random::coinToss(*mep_rng,m_dropoutProbability);
132 }
133 }
134 noalias(outputs) = inputs * mask;
135 }
136
137 ///\brief Calculates the first derivative w.r.t the parameters and summing them up over all patterns of the last computed batch
139 BatchInputType const& patterns,
140 BatchOutputType const& outputs,
141 BatchOutputType const& coefficients,
142 State const& state,
143 ParameterVectorType& gradient
144 )const{
145 SIZE_CHECK(coefficients.size1()==patterns.size1());
146 SIZE_CHECK(coefficients.size2()==patterns.size2());
147 }
148 ///\brief Calculates the first derivative w.r.t the inputs and summs them up over all patterns of the last computed batch
150 BatchInputType const & patterns,
151 BatchOutputType const & outputs,
152 BatchOutputType const & coefficients,
153 State const& state,
154 BatchInputType& derivative
155 )const{
156 SIZE_CHECK(coefficients.size1() == patterns.size1());
157 SIZE_CHECK(coefficients.size2() == patterns.size2());
158
159 MatrixType const& mask = state.toState<InternalState>().mask;
160 derivative.resize(coefficients.size1(),coefficients.size2());
161 noalias(derivative) = coefficients * mask;
162 }
163
164 /// From ISerializable
165 void read(InArchive& archive){archive >> m_dropoutProbability;}
166 /// From ISerializable
167 void write(OutArchive& archive) const{ archive << m_dropoutProbability;}
168};
169
170
171}
172
173#endif