include/shark/Algorithms/QP/BoxBasedShrinkingStrategy.h Source File

Go to the documentation of this file.
/*!
 * 
 *
 * \brief       Shrinking strategy based on box constraints
 * 
 * 
 *
 * \author      T. Glasmachers, O.Krause
 * \date        2017
 *
 *
 * \par Copyright 1995-2017 Shark Development Team
 * 
 * <BR><HR>
 * This file is part of Shark.
 * <https://shark-ml.github.io/Shark/>
 * 
 * Shark is free software: you can redistribute it and/or modify
 * it under the terms of the GNU Lesser General Public License as published 
 * by the Free Software Foundation, either version 3 of the License, or
 * (at your option) any later version.
 * 
 * Shark is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU Lesser General Public License for more details.
 * 
 * You should have received a copy of the GNU Lesser General Public License
 * along with Shark.  If not, see <http://www.gnu.org/licenses/>.
 *
 */
#ifndef SHARK_ALGORITHMS_QP_BOXBASEDSHRINKINGSTRATEGY_H
#define SHARK_ALGORITHMS_QP_BOXBASEDSHRINKINGSTRATEGY_H
 
namespace shark{
 
    
/// \brief Takes q boxx constrained QP-type problem and implements shrinking on it
///
/// Given a QP-type-Problem, implements a strategy which allows to efficiently shrink
/// and unshrink the problem. If a value of the QP has an active box constraint,
/// it is shrinked from the problem when currently there is no possible step 
/// using that variable that leads to a gain. This is problem dependent as
/// this might involve consideration of additional constraints.
/// Therefore, every Problem must implement the method testShrinkVariable.
template<class Problem>
struct BoxBasedShrinkingStrategy : public Problem{
public:
    typedef typename Problem::QpFloatType QpFloatType;
    typedef typename Problem::MatrixType MatrixType;
    typedef typename Problem::PreferedSelectionStrategy PreferedSelectionStrategy;
 
    template<class ProblemT>
    BoxBasedShrinkingStrategy(ProblemT& problem, bool shrink=true)
    : Problem(problem)
    , m_isUnshrinked(false)
    , m_shrink(shrink)
    , m_gradientEdge(problem.linear)
    { }
 
    using Problem::alpha;
    using Problem::gradient;
    using Problem::linear;
    using Problem::active;
    using Problem::dimensions;
    using Problem::quadratic;
    using Problem::isLowerBound;
    using Problem::isUpperBound;
    using Problem::boxMin;
    using Problem::boxMax;
    using Problem::setInitialSolution;
 
    virtual void updateSMO(std::size_t i, std::size_t j){
        double aiOld = alpha(i);
        double ajOld = alpha(j);
        //call base class to do the step
        Problem::updateSMO(i,j);
        double ai = alpha(i);
        double aj = alpha(j);
        
        // update the gradient edge data structure to keep up with changes
        updateGradientEdge(i,aiOld,ai);
        updateGradientEdge(j,ajOld,aj);
    }
 
    bool shrink(double epsilon){
        if(!m_shrink) return false;
 
        double largestUp;
        double smallestDown;
        getMaxKKTViolations(largestUp,smallestDown,active());
 
        // check whether unshrinking is necessary at this accuracy level
        if (!m_isUnshrinked  && (largestUp - smallestDown < 10.0 * epsilon))
        {
            unshrink();
            //recalculate maximum KKT violation for immediate re-shrinking
            getMaxKKTViolations(largestUp, smallestDown, dimensions());
        }
        //shrink
        auto& active = this->m_active;
        for (std::size_t a = active; a > 0; --a){
            std::size_t i = a-1;
            if(Problem::testShrinkVariable(i, largestUp, smallestDown)){
                Problem::flipCoordinates(i,active-1);
                std::swap( m_gradientEdge[i], m_gradientEdge[active-1]);
                --active;
            }
        }
        return true;
    }
 
    ///\brief Unshrink the problem
    void unshrink(){
        if (active() == dimensions()) return;
        m_isUnshrinked = true;
        
        // Recompute the gradient of the whole problem.
        // We assume here that all shrinked variables are on the border of the problem.
        // The gradient of the active components is already correct and
        // we store the gradient of the subset of variables which are on the
        // borders of the box for the whole set.
        // Thus we only have to recompute the part of the gradient which is
        // based on variables in the active set which are not on the border.
        for (std::size_t a = active(); a < dimensions(); a++)
            this->m_gradient(a) = m_gradientEdge(a);
 
        for (std::size_t i = 0; i < active(); i++){
            //check whether alpha value is already stored in gradientEdge
            if (isUpperBound(i) || isLowerBound(i)) continue;
            
            QpFloatType* q = quadratic().row(i, 0, dimensions());
            for (std::size_t a = active(); a < dimensions(); a++) 
                this->m_gradient(a) -= alpha(i) * q[a];
        }
 
        this->m_active = dimensions();
    }
 
    void setShrinking(bool shrinking){
        m_shrink = shrinking;
        if(!shrinking)
            unshrink();
    }
 
    /// \brief Define the initial solution for the iterative solver.
    ///
    /// This method can be used to warm-start the solver. It requires a
    /// feasible solution (alpha) and the corresponding gradient of the
    /// dual objective function.
    void setInitialSolution(RealVector const& alpha, RealVector const& gradient, RealVector const& gradientEdge){
        Problem::setInitialSolution(alpha,gradient);
        std::size_t n = dimensions();
        SIZE_CHECK(gradientEdge.size() == n);
        for (std::size_t i=0; i<n; i++){
            std::size_t j = this->permutation(i);
            m_gradientEdge(i) = gradientEdge(j);
        }
    }
 
    /// \brief Define the initial solution for the iterative solver.
    ///
    /// This method can be used to warm-start the solver. It requires a
    /// feasible solution (alpha), for which it computes the gradient of
    /// the dual objective function. Note that this is a quadratic time
    /// operation in the number of non-zero coefficients.
    void setInitialSolution(RealVector const& alpha){
        std::size_t n = dimensions();
        SIZE_CHECK(alpha.size() == n);
        RealVector gradient = this->m_problem.linear;
        RealVector gradientEdge = this->m_problem.linear;
        blas::vector<QpFloatType> q(n);
        std::vector<std::size_t> inverse(n);
        for (std::size_t i=0; i<n; i++) 
            inverse[this->permutation(i)] = i;
        for (std::size_t i=0; i<n; i++)
        {
            double a = alpha(i);
            if (a == 0.0) continue;
            this->m_problem.quadratic.row(i, 0, n, q.raw_storage().values);
            noalias(gradient) -= a * q;
            std::size_t j = inverse[i];
            if (a == boxMin(j) || a == boxMax(j)) gradientEdge -= a * q;
        }
        setInitialSolution(alpha, gradient, gradientEdge);
    }
    
    ///\brief Remove the i-th example from the problem.
    void deactivateVariable(std::size_t i){
        SIZE_CHECK(i < dimensions());
        RealVector alpha_old(dimensions);
        for(std::size_t i = 0; i != dimensions; ++i){
            updateGradientEdge(i,alpha_old(i), alpha(i));
        }
    }
 
    /// \brief Scales all box constraints by a constant factor and adapts the solution by scaling it by the same factor.
    void scaleBoxConstraints(double factor, double variableScalingFactor){
        Problem::scaleBoxConstraints(factor,variableScalingFactor);
        if(factor != variableScalingFactor){
            for(std::size_t i = 0; i != dimensions(); ++i){
                m_gradientEdge(i) = linear(i);
            }   
        }
        else{
            for(std::size_t i = 0; i != dimensions(); ++i){
                m_gradientEdge(i) -= linear(i);
                m_gradientEdge(i) *= factor;
                m_gradientEdge(i) += linear(i);
            }
        }
    }
    
    /// \brief adapts the linear part of the problem and updates the internal data structures accordingly.
    virtual void setLinear(std::size_t i, double newValue){
        m_gradientEdge(i) -= linear(i);
        m_gradientEdge(i) += newValue;
        Problem::setLinear(i,newValue);
    }
    
    ///\brief swap indizes (i,j)
    void flipCoordinates(std::size_t i,std::size_t j){
        Problem::flipCoordinates(i,j);
        std::swap( m_gradientEdge[i], m_gradientEdge[j]);
    }
    
private:
    ///\brief Updates the edge-part of the gradient when an alpha value was changed
    ///
    /// This function overwites the base class method and is called whenever
    /// an alpha value is changed.
    void updateGradientEdge(std::size_t i, double oldAlpha, double newAlpha){
        SIZE_CHECK(i < active());
        if(!m_shrink || oldAlpha==newAlpha) return;
        bool isInsideOld = oldAlpha > boxMin(i) && oldAlpha < boxMax(i);
        bool isInsideNew = newAlpha > boxMin(i) && newAlpha < boxMax(i);
        //check if variable is relevant at all, that means that old and new alpha value are inside
        //or old alpha is 0 and new alpha inside
        if((oldAlpha == 0 || isInsideOld) && isInsideNew)
            return;
 
        //compute change to the gradient
        double diff = 0;
        if(!isInsideOld)//the value was on a border, so remove it's old influence on the gradient
            diff -=oldAlpha;
        if(!isInsideNew){//variable entered boundary or changed from one boundary to another
            diff += newAlpha;
        }
 
        QpFloatType* q = quadratic().row(i, 0, dimensions());
        for(std::size_t a = 0; a != dimensions(); ++a){
            m_gradientEdge(a) -= diff*q[a];
        }
    }
 
    void getMaxKKTViolations(double& largestUp, double& smallestDown, std::size_t maxIndex){
        largestUp = -1e100;
        smallestDown = 1e100;
        for (std::size_t a = 0; a < maxIndex; a++){
            if (!isLowerBound(a))
                smallestDown = std::min(smallestDown,gradient(a));
            if (!isUpperBound(a))
                largestUp = std::max(largestUp,gradient(a));
        }
    }
    
    bool m_isUnshrinked;
    
    ///\brief true if shrinking is to be used.
    bool m_shrink;
 
    ///\brief Stores the gradient of the alpha dimensions which are either 0 or C
    RealVector m_gradientEdge;
};
 
}
#endif