93 m_layers[index].optimize = opt;
95 bool inputDerivative =
true;
96 bool parameterDerivative =
true;
97 for(std::size_t k = 0; k != m_layers.size(); ++k){
98 auto const& layer = m_layers[m_layers.size() - k -1];
99 if( layer.optimize && (!layer.model->hasFirstParameterDerivative() || !inputDerivative)){
100 parameterDerivative =
false;
102 if( !layer.model->hasFirstInputDerivative()){
103 inputDerivative =
false;
107 if (parameterDerivative){
111 if (inputDerivative){
176 InternalState& s = state.
toState<InternalState>();
178 for(std::size_t i = 0; i != m_layers.size(); ++i){
180 m_layers[i].model->eval(patterns,s.intermediates[i], *s.state[i]);
182 m_layers[i].model->eval(s.intermediates[i-1],s.intermediates[i], *s.state[i]);
184 outputs = s.intermediates.back();
194 InternalState
const& s = state.
toState<InternalState>();
198 std::size_t paramEnd = gradient.size();
199 for(std::size_t k = 0; k != m_layers.size(); ++k){
200 std::size_t i = m_layers.size() - k -1;
203 pInput = &s.intermediates[i-1];
205 swap(inputDerivativeLast,inputDerivative);
207 if(!m_layers[i].optimize || m_layers[i].model->numberOfParameters() == 0){
209 m_layers[i].model->weightedInputDerivative(*pInput,s.intermediates[i], inputDerivativeLast, *s.state[i], inputDerivative);
213 m_layers[i].model->weightedDerivatives(*pInput,s.intermediates[i], inputDerivativeLast, *s.state[i], paramDerivative,inputDerivative);
216 m_layers[i].model->weightedParameterDerivative(*pInput,s.intermediates[i], inputDerivativeLast, *s.state[i], paramDerivative);
218 noalias(subrange(gradient,paramEnd - paramDerivative.size(),paramEnd)) = paramDerivative;
219 paramEnd -= paramDerivative.size();
231 InternalState
const& s = state.
toState<InternalState>();
233 derivatives = coefficients;
234 for(std::size_t k = 0; k != m_layers.size(); ++k){
235 std::size_t i = m_layers.size() - k -1;
239 pInput = &s.intermediates[i-1];
241 swap(derivativeLast,derivatives);
242 m_layers[i].model->weightedInputDerivative(*pInput,s.intermediates[i], derivativeLast, *s.state[i], derivatives);
254 InternalState
const& s = state.
toState<InternalState>();
256 inputDerivative = coefficients;
258 std::size_t paramEnd = gradient.size();
259 for(std::size_t k = 0; k != m_layers.size(); ++k){
260 std::size_t i = m_layers.size() - k -1;
263 pInput = &s.intermediates[i-1];
265 swap(inputDerivativeLast,inputDerivative);
267 if(!m_layers[i].optimize || m_layers[i].model->numberOfParameters() == 0){
268 m_layers[i].model->weightedInputDerivative(*pInput,s.intermediates[i], inputDerivativeLast, *s.state[i], inputDerivative);
271 m_layers[i].model->weightedDerivatives(*pInput,s.intermediates[i], inputDerivativeLast, *s.state[i], paramDerivative,inputDerivative);
272 noalias(subrange(gradient,paramEnd - paramDerivative.size(),paramEnd)) = paramDerivative;
273 paramEnd -= paramDerivative.size();