include/shark/Models/Classifier.h Source File

Go to the documentation of this file.
//===========================================================================
/*!
 * 
 *
 * \brief       Model for conversion of real valued output to class labels
 *
 * \author      T. Glasmachers, O.Krause
 * \date        2017
 *
 *
 * \par Copyright 1995-2017 Shark Development Team
 * 
 * <BR><HR>
 * This file is part of Shark.
 * <https://shark-ml.github.io/Shark/>
 * 
 * Shark is free software: you can redistribute it and/or modify
 * it under the terms of the GNU Lesser General Public License as published 
 * by the Free Software Foundation, either version 3 of the License, or
 * (at your option) any later version.
 * 
 * Shark is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU Lesser General Public License for more details.
 * 
 * You should have received a copy of the GNU Lesser General Public License
 * along with Shark.  If not, see <http://www.gnu.org/licenses/>.
 *
 */
//===========================================================================
 
#ifndef SHARK_MODELS_CLASSIFIER_H
#define SHARK_MODELS_CLASSIFIER_H
 
#include <shark/Models/AbstractModel.h>
namespace shark {
 
///
/// \brief Conversion of real-valued or vector valued outputs to class labels
///
/// \par
/// The Classifier is a model converting the
/// real-valued vector output of an underlying decision function to a 
/// class label 0, ..., d-1 by means of an arg-max operation.
/// The class returns the argument of the maximal
/// input component as its output. This convertson is adjusted to
/// interpret the output of a linear model, a neural network or a support vector
/// machine for multi-category classification.
///
/// In the special case that d is 1, it is assumed that the model can be represented as
/// a 2 d vector with both components having the same value but opposite sign. 
/// In consequence, a positive output of the model is interpreted as class 1, a negative as class 0.
///
/// The underlying decision function is an arbitrary model. It should
/// be default constructable and it can be accessed using decisionFunction().
/// The parameters of the Classifier are the ones of the decision function.
///
/// Optionally the model allows to set bias values which are added on the predicted
/// values of the decision function. Thus adding positive weights on a class makes it
/// more likely to be predicted. In the binary case with a single output, a positive weight
/// makes class one more likely and a negative weight class 0.
///
/// \ingroup models
template<class Model>
class Classifier : public AbstractModel<
    typename Model::InputType,
    unsigned int,
    typename Model::ParameterVectorType
>{
private:
    typedef typename Model::BatchOutputType ModelBatchOutputType;
public:
    typedef Model DecisionFunctionType;
    typedef typename Model::InputType InputType;
    typedef unsigned int OutputType;
    typedef typename Batch<InputType>::type BatchInputType;
    typedef Batch<unsigned int>::type BatchOutputType;
    typedef typename Model::ParameterVectorType ParameterVectorType;
 
    Classifier(){}
    Classifier(Model const& decisionFunction)
    : m_decisionFunction(decisionFunction){}
 
    std::string name() const
    { return "Classifier<"+m_decisionFunction.name()+">"; }
    
    ParameterVectorType parameterVector() const{
        return m_decisionFunction.parameterVector();
    }
 
    void setParameterVector(ParameterVectorType const& newParameters){
        m_decisionFunction.setParameterVector(newParameters);
    }
 
    std::size_t numberOfParameters() const{
        return m_decisionFunction.numberOfParameters();
    }
    
    ///\brief Returns the expected shape of the input
    Shape inputShape() const{
        return m_decisionFunction.inputShape();
    }
    ///\brief Returns the shape of the output
    ///
    /// For the classifier, Shape is a number representing the number of classes.
    Shape outputShape() const{
        return m_decisionFunction.outputShape().flatten();
    }
    
    RealVector const& bias()const{
        return m_bias;
    }
    RealVector& bias(){
        return m_bias;
    }
    
    /// \brief Return the decision function
    Model const& decisionFunction()const{
        return m_decisionFunction;
    }
    
    /// \brief Return the decision function
    Model& decisionFunction(){
        return m_decisionFunction;
    }
    
    void eval(BatchInputType const& input, BatchOutputType& output)const{
        SIZE_CHECK(m_bias.empty() || m_decisionFunction.outputShape().numElements() == m_bias.size());
        ModelBatchOutputType modelResult;
        m_decisionFunction.eval(input,modelResult);
        std::size_t batchSize = modelResult.size1();
        output.resize(batchSize);
        if(modelResult.size2()== 1){
            double bias = m_bias.empty()? 0.0 : m_bias(0);
            for(std::size_t i = 0; i != batchSize; ++i){
                output(i) = modelResult(i,0) + bias > 0.0;
            }
        }
        else{
            for(std::size_t i = 0; i != batchSize; ++i){
                if(m_bias.empty())
                    output(i) = static_cast<unsigned int>(arg_max(row(modelResult,i)));
                else
                    output(i) = static_cast<unsigned int>(arg_max(row(modelResult,i) + m_bias));
            }
        }
    }
    void eval(BatchInputType const& input, BatchOutputType& output, State& state)const{
        eval(input,output);
    }
    
    void eval(InputType const & pattern, OutputType& output)const{
        SIZE_CHECK(m_bias.empty() || m_decisionFunction.outputShape().numElements() == m_bias.size());
        typename Model::OutputType modelResult;
        m_decisionFunction.eval(pattern,modelResult);
        if(m_bias.empty()){
            if(modelResult.size() == 1){
                double bias = m_bias.empty()? 0.0 : m_bias(0);
                output = modelResult(0) + bias > 0.0;
            }
            else{
                if(m_bias.empty())
                    output = static_cast<unsigned int>(arg_max(modelResult));
                else
                    output = static_cast<unsigned int>(arg_max(modelResult + m_bias));
            }
        }
    }
    
    /// From ISerializable
    void read(InArchive& archive){
        archive >> m_decisionFunction;
        archive >> m_bias;
    }
    /// From ISerializable
    void write(OutArchive& archive) const{
        archive << m_decisionFunction;
        archive << m_bias;
    }
    
private:
    Model m_decisionFunction;
    RealVector m_bias;
};
 
};
#endif