Classifier.h
Go to the documentation of this file.
1//===========================================================================
2/*!
3 *
4 *
5 * \brief Model for conversion of real valued output to class labels
6 *
7 * \author T. Glasmachers, O.Krause
8 * \date 2017
9 *
10 *
11 * \par Copyright 1995-2017 Shark Development Team
12 *
13 * <BR><HR>
14 * This file is part of Shark.
15 * <https://shark-ml.github.io/Shark/>
16 *
17 * Shark is free software: you can redistribute it and/or modify
18 * it under the terms of the GNU Lesser General Public License as published
19 * by the Free Software Foundation, either version 3 of the License, or
20 * (at your option) any later version.
21 *
22 * Shark is distributed in the hope that it will be useful,
23 * but WITHOUT ANY WARRANTY; without even the implied warranty of
24 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
25 * GNU Lesser General Public License for more details.
26 *
27 * You should have received a copy of the GNU Lesser General Public License
28 * along with Shark. If not, see <http://www.gnu.org/licenses/>.
29 *
30 */
31//===========================================================================
32
33#ifndef SHARK_MODELS_CLASSIFIER_H
34#define SHARK_MODELS_CLASSIFIER_H
35
37namespace shark {
38
39///
40/// \brief Conversion of real-valued or vector valued outputs to class labels
41///
42/// \par
43/// The Classifier is a model converting the
44/// real-valued vector output of an underlying decision function to a
45/// class label 0, ..., d-1 by means of an arg-max operation.
46/// The class returns the argument of the maximal
47/// input component as its output. This convertson is adjusted to
48/// interpret the output of a linear model, a neural network or a support vector
49/// machine for multi-category classification.
50///
51/// In the special case that d is 1, it is assumed that the model can be represented as
52/// a 2 d vector with both components having the same value but opposite sign.
53/// In consequence, a positive output of the model is interpreted as class 1, a negative as class 0.
54///
55/// The underlying decision function is an arbitrary model. It should
56/// be default constructable and it can be accessed using decisionFunction().
57/// The parameters of the Classifier are the ones of the decision function.
58///
59/// Optionally the model allows to set bias values which are added on the predicted
60/// values of the decision function. Thus adding positive weights on a class makes it
61/// more likely to be predicted. In the binary case with a single output, a positive weight
62/// makes class one more likely and a negative weight class 0.
63///
64/// \ingroup models
65template<class Model>
67 typename Model::InputType,
68 unsigned int,
69 typename Model::ParameterVectorType
70>{
71private:
72 typedef typename Model::BatchOutputType ModelBatchOutputType;
73public:
74 typedef Model DecisionFunctionType;
75 typedef typename Model::InputType InputType;
76 typedef unsigned int OutputType;
79 typedef typename Model::ParameterVectorType ParameterVectorType;
80
83 : m_decisionFunction(decisionFunction){}
84
85 std::string name() const
86 { return "Classifier<"+m_decisionFunction.name()+">"; }
87
89 return m_decisionFunction.parameterVector();
90 }
91
92 void setParameterVector(ParameterVectorType const& newParameters){
93 m_decisionFunction.setParameterVector(newParameters);
94 }
95
96 std::size_t numberOfParameters() const{
97 return m_decisionFunction.numberOfParameters();
98 }
99
100 ///\brief Returns the expected shape of the input
102 return m_decisionFunction.inputShape();
103 }
104 ///\brief Returns the shape of the output
105 ///
106 /// For the classifier, Shape is a number representing the number of classes.
108 return m_decisionFunction.outputShape().flatten();
109 }
110
111 RealVector const& bias()const{
112 return m_bias;
113 }
114 RealVector& bias(){
115 return m_bias;
116 }
117
118 /// \brief Return the decision function
119 Model const& decisionFunction()const{
120 return m_decisionFunction;
121 }
122
123 /// \brief Return the decision function
125 return m_decisionFunction;
126 }
127
128 void eval(BatchInputType const& input, BatchOutputType& output)const{
129 SIZE_CHECK(m_bias.empty() || m_decisionFunction.outputShape().numElements() == m_bias.size());
130 ModelBatchOutputType modelResult;
131 m_decisionFunction.eval(input,modelResult);
132 std::size_t batchSize = modelResult.size1();
133 output.resize(batchSize);
134 if(modelResult.size2()== 1){
135 double bias = m_bias.empty()? 0.0 : m_bias(0);
136 for(std::size_t i = 0; i != batchSize; ++i){
137 output(i) = modelResult(i,0) + bias > 0.0;
138 }
139 }
140 else{
141 for(std::size_t i = 0; i != batchSize; ++i){
142 if(m_bias.empty())
143 output(i) = static_cast<unsigned int>(arg_max(row(modelResult,i)));
144 else
145 output(i) = static_cast<unsigned int>(arg_max(row(modelResult,i) + m_bias));
146 }
147 }
148 }
149 void eval(BatchInputType const& input, BatchOutputType& output, State& state)const{
150 eval(input,output);
151 }
152
153 void eval(InputType const & pattern, OutputType& output)const{
154 SIZE_CHECK(m_bias.empty() || m_decisionFunction.outputShape().numElements() == m_bias.size());
155 typename Model::OutputType modelResult;
156 m_decisionFunction.eval(pattern,modelResult);
157 if(m_bias.empty()){
158 if(modelResult.size() == 1){
159 double bias = m_bias.empty()? 0.0 : m_bias(0);
160 output = modelResult(0) + bias > 0.0;
161 }
162 else{
163 if(m_bias.empty())
164 output = static_cast<unsigned int>(arg_max(modelResult));
165 else
166 output = static_cast<unsigned int>(arg_max(modelResult + m_bias));
167 }
168 }
169 }
170
171 /// From ISerializable
172 void read(InArchive& archive){
173 archive >> m_decisionFunction;
174 archive >> m_bias;
175 }
176 /// From ISerializable
177 void write(OutArchive& archive) const{
178 archive << m_decisionFunction;
179 archive << m_bias;
180 }
181
182private:
183 Model m_decisionFunction;
184 RealVector m_bias;
185};
186
187};
188#endif