include/shark/Models/Kernels/PolynomialKernel.h Source File

Go to the documentation of this file.
//===========================================================================
/*!
 * 
 *
 * \brief       Polynomial kernel.
 * 
 * 
 *
 * \author      T.Glasmachers
 * \date        2011
 *
 *
 * \par Copyright 1995-2017 Shark Development Team
 * 
 * <BR><HR>
 * This file is part of Shark.
 * <https://shark-ml.github.io/Shark/>
 * 
 * Shark is free software: you can redistribute it and/or modify
 * it under the terms of the GNU Lesser General Public License as published 
 * by the Free Software Foundation, either version 3 of the License, or
 * (at your option) any later version.
 * 
 * Shark is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU Lesser General Public License for more details.
 * 
 * You should have received a copy of the GNU Lesser General Public License
 * along with Shark.  If not, see <http://www.gnu.org/licenses/>.
 *
 */
//===========================================================================
 
#ifndef SHARK_MODELS_KERNELS_POLYNOMIAL_KERNEL_H
#define SHARK_MODELS_KERNELS_POLYNOMIAL_KERNEL_H
 
#include <shark/Models/Kernels/AbstractKernelFunction.h>
 
namespace shark {
 
 
/// \brief Polynomial kernel.
///
/// \par
/// The polynomial kernel is defined as
/// \f$ \left( \left\langle x_1, x_2 \right\rangle + b \right)^n \f$
/// with degree \f$ n \in \mathbb{N} \f$ and non-negative offset
/// \f$ b \geq 0 \f$. For n=1 and b=0 it matches the linear kernel
/// (standard inner product).
/// \ingroup kernels
template<class InputType = RealVector>
class PolynomialKernel : public AbstractKernelFunction<InputType>
{
private:
    typedef AbstractKernelFunction<InputType> base_type;
    
    struct InternalState: public State{
        RealMatrix base;//stores the inner product of vectors x_1,x_j
        RealMatrix exponentedProd;//pow(innerProd,m_exponent)
        
        void resize(std::size_t sizeX1, std::size_t sizeX2){
            base.resize(sizeX1, sizeX2);
            exponentedProd.resize(sizeX1, sizeX2);
        }
    };
public:
    typedef typename base_type::BatchInputType BatchInputType;
    typedef typename base_type::ConstInputReference ConstInputReference;
    typedef typename base_type::ConstBatchInputReference ConstBatchInputReference;
 
    /// Constructor.
    ///
    /// \param  degree  exponent of the polynomial
    /// \param  offset  constant added to the standard inner product
    /// \param  degree_is_parameter should the degree be a regular model parameter? if yes, the kernel will not be differentiable
    /// \param  unconstrained should the offset internally be represented as exponential of the externally visible parameter?
    PolynomialKernel( unsigned int degree = 2, double offset = 0.0, bool degree_is_parameter = true, bool unconstrained = false )
    : m_degree( degree ),
      m_offset( offset ),
      m_degreeIsParam( degree_is_parameter ),
      m_unconstrained( unconstrained ) {
        SHARK_RUNTIME_CHECK(degree > 0, "[PolynomialKernel::PolynomialKernel] degree must be positive");
        this->m_features |= base_type::HAS_FIRST_INPUT_DERIVATIVE;
        this->m_features |= base_type::SUPPORTS_VARIABLE_INPUT_SIZE;
          if ( !m_degreeIsParam )
            this->m_features |= base_type::HAS_FIRST_PARAMETER_DERIVATIVE;
    }
 
    /// \brief From INameable: return the class name.
    std::string name() const
    { return "PolynomialKernel"; }
    
    void setDegree( unsigned int deg ) {
        RANGE_CHECK( deg > 0 );
        SHARK_RUNTIME_CHECK( !m_degreeIsParam, "[PolynomialKernel::setDegree] Please use setParameterVector when the degree is a parameter.");
        m_degree = deg;
    }
    
    unsigned int degree() const {
        return m_degree;
    }
 
    RealVector parameterVector() const {
        if ( m_degreeIsParam ) {
            RealVector ret(2);
            ret(0) = m_degree;
            if ( m_unconstrained ) 
                ret(1) = std::log( m_offset );
            else 
                ret(1) = m_offset;
            return ret;
        } else {
            RealVector ret(1);
            if ( m_unconstrained ) 
                ret(0) = std::log( m_offset );
            else 
                ret(0) = m_offset;
            return ret;
        }
    }
 
    void setParameterVector(RealVector const& newParameters) {
        if ( m_degreeIsParam ) {
            SIZE_CHECK(newParameters.size() == 2);
            SHARK_ASSERT(std::abs((unsigned int)newParameters(0) - newParameters(0)) <= 2*std::numeric_limits<double>::epsilon());
            RANGE_CHECK(newParameters(0) >= 1.0);
            m_degree = (unsigned int)newParameters(0);
            if ( m_unconstrained ) {
                m_offset = std::exp(newParameters(1));
            } else {
                RANGE_CHECK(newParameters(1) >= 0.0);
                m_offset = newParameters(1);
            }
        } else {
            SIZE_CHECK(newParameters.size() == 1);
            if ( m_unconstrained ) {
                m_offset = std::exp(newParameters(0));
            } else {
                RANGE_CHECK(newParameters(0) >= 0.0);
                m_offset = newParameters(0);
            }
        }
    }
 
    std::size_t numberOfParameters() const { 
        if ( m_degreeIsParam ) 
            return 2; 
        else 
            return 1;
    }
    
    ///\brief creates the internal state of the kernel
    boost::shared_ptr<State> createState()const{
        return boost::shared_ptr<State>(new InternalState());
    }
 
    /////////////////////////EVALUATION//////////////////////////////
    
    /// \f$ k(x_1, x_2) = \left( \langle x_1, x_2 \rangle + b \right)^n \f$
    double eval(ConstInputReference x1, ConstInputReference x2) const{
        SIZE_CHECK(x1.size() == x2.size());
        double base = inner_prod(x1, x2) + m_offset;
        return std::pow(base,m_degree);
    }
    
    void eval(ConstBatchInputReference batchX1,ConstBatchInputReference batchX2, RealMatrix& result) const {
        SIZE_CHECK(batchX1.size2() == batchX2.size2());
        std::size_t sizeX1 = batchX1.size1();
        std::size_t sizeX2 = batchX2.size1();
        result.resize(sizeX1,sizeX2);
        
        //calculate the inner product
        noalias(result) = prod(batchX1,trans(batchX2));
        result += m_offset;
        //now do exponentiation
        if(m_degree != 1)
            noalias(result) = pow(result,m_degree);
    }
    
    void eval(ConstBatchInputReference batchX1, ConstBatchInputReference batchX2, RealMatrix& result, State& state) const{
        SIZE_CHECK(batchX1.size2() == batchX2.size2());
        
        std::size_t sizeX1 = batchX1.size1();
        std::size_t sizeX2 = batchX2.size1();
 
        InternalState& s = state.toState<InternalState>();
        s.resize(sizeX1,sizeX2);
        result.resize(sizeX1,sizeX2);
        
        //calculate the inner product
        noalias(s.base) = prod(batchX1,trans(batchX2));
        s.base += m_offset;
        
        //now do exponentiation
        if(m_degree != 1)
            noalias(result) = pow(s.base,m_degree);
        else
            noalias(result) = s.base;
        noalias(s.exponentedProd) = result;
    }
    
    /////////////////////DERIVATIVES////////////////////////////////////
    
    void weightedParameterDerivative(
        ConstBatchInputReference batchX1, 
        ConstBatchInputReference batchX2, 
        RealMatrix const& coefficients,
        State const& state, 
        RealVector& gradient
    ) const{
        
        std::size_t sizeX1 = batchX1.size1();
        std::size_t sizeX2 = batchX2.size1();
        gradient.resize(1);
        InternalState const& s = state.toState<InternalState>();
        
        //internal checks
        SIZE_CHECK(batchX1.size2() == batchX2.size2());
        SIZE_CHECK(s.base.size1() == sizeX1);
        SIZE_CHECK(s.base.size2() == sizeX2);
        SIZE_CHECK(s.exponentedProd.size1() == sizeX1);
        SIZE_CHECK(s.exponentedProd.size2() == sizeX2);
        
        
        //m_degree == 1 is easy
        if(m_degree == 1){//result_ij/base_ij = 1
            gradient(0) = sum(coefficients);
            if ( m_unconstrained ) 
                gradient(0) *= m_offset;
            return;
        }
        
        gradient(0) = m_degree * sum(safe_div(s.exponentedProd,s.base,0.0) * coefficients);
        if ( m_unconstrained ) 
            gradient(0) *= m_offset;
    }
    
    /// \f$ k(x_1, x_2) = \left( \langle x_1, x_2 \rangle + b \right)^n \f$
    /// <br/>
    /// \f$ \frac{\partial k(x_1, x_2)}{\partial x_1} = \left[ n \cdot (\langle x_1, x_2 \rangle + b)^{n-1} \right] \cdot x_2 \f$
    void weightedInputDerivative( 
        ConstBatchInputReference batchX1, 
        ConstBatchInputReference batchX2, 
        RealMatrix const& coefficientsX2,
        State const& state, 
        BatchInputType& gradient
    ) const{
        
        std::size_t sizeX1 = batchX1.size1();
        std::size_t sizeX2 = batchX2.size1();
        gradient.resize(sizeX1,batchX1.size2());
        
        InternalState const& s = state.toState<InternalState>();
        
        //internal checks
        SIZE_CHECK(batchX1.size2() == batchX2.size2());
        SIZE_CHECK(s.base.size1() == sizeX1);
        SIZE_CHECK(s.base.size2() == sizeX2);
        SIZE_CHECK(s.exponentedProd.size1() == sizeX1);
        SIZE_CHECK(s.exponentedProd.size2() == sizeX2);
        SIZE_CHECK(coefficientsX2.size1() == sizeX1);
        SIZE_CHECK(coefficientsX2.size2() == sizeX2);
        
        
        //again m_degree == 1 is easy, as it is for the i-th row
        //just c_i X2;
        if(m_degree == 1){
            noalias(gradient) = prod(coefficientsX2,batchX2);
            return;
        }
        
        //first calculate weights(i,j) = coeff(i)*exp(i,j)/prod(i,j)
        //we have to take the usual division by 0 into account
        RealMatrix weights = coefficientsX2 * safe_div(s.exponentedProd,s.base,0.0);
        //and the derivative of input i of batch x1 is 
        //g = sum_j m_n*weights(i,j)*x2_j
        //we now sum over j which is a matrix-matrix product
        noalias(gradient) = m_degree * prod(weights,batchX2);
    }
    
    void read(InArchive& ar){
        ar >> m_degree;
        ar >> m_offset;
        ar >> m_degreeIsParam;
        ar >> m_unconstrained;
    }
 
    void write(OutArchive& ar) const{
        ar << m_degree;
        ar << m_offset;
        ar << m_degreeIsParam;
        ar << m_unconstrained;
    }
 
protected:
    int m_degree;                ///< exponent n
    double m_offset;                      ///< offset b
    bool m_degreeIsParam;                 ///< is the degree a model parameter?
    bool m_unconstrained;                 ///< is the degree internally represented as exponential of the parameter?
};
 
typedef PolynomialKernel<> DensePolynomialKernel;
typedef PolynomialKernel<CompressedRealVector> CompressedPolynomialKernel;
 
 
}
#endif