NormalizeKernelUnitVariance.h
Go to the documentation of this file.
1//===========================================================================
2/*!
3 *
4 *
5 * \brief Determine the scaling factor of a ScaledKernel so that it has unit variance in feature space one on a given dataset.
6 *
7 *
8 *
9 *
10 * \author M. Tuma
11 * \date 2012
12 *
13 *
14 * \par Copyright 1995-2017 Shark Development Team
15 *
16 * <BR><HR>
17 * This file is part of Shark.
18 * <https://shark-ml.github.io/Shark/>
19 *
20 * Shark is free software: you can redistribute it and/or modify
21 * it under the terms of the GNU Lesser General Public License as published
22 * by the Free Software Foundation, either version 3 of the License, or
23 * (at your option) any later version.
24 *
25 * Shark is distributed in the hope that it will be useful,
26 * but WITHOUT ANY WARRANTY; without even the implied warranty of
27 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
28 * GNU Lesser General Public License for more details.
29 *
30 * You should have received a copy of the GNU Lesser General Public License
31 * along with Shark. If not, see <http://www.gnu.org/licenses/>.
32 *
33 */
34//===========================================================================
35
36
37#ifndef SHARK_ALGORITHMS_TRAINERS_NORMALIZEKERNELUNITVARIANCE_H
38#define SHARK_ALGORITHMS_TRAINERS_NORMALIZEKERNELUNITVARIANCE_H
39
40
43
44namespace shark {
45
46
47///
48/// \brief Determine the scaling factor of a ScaledKernel so that it has unit variance in feature space one on a given dataset.
49///
50/// \par
51/// For example in the multiple kernel learning setting, it can be important that the sub-kernels
52/// are normalized to unit variance in feature space. This class computes both the trace and the
53/// mean of a kernel matrix, and uses both to employ the "Multiplicative Kernel Scaling" laid out
54/// in "Kloft, Brefeld, Sonnenburg, Zien: l_p-Norm Multiple Kernel Learning. JMLR 12, 2011.".
55/// Given a ScaledKernel, which itself holds an arbitrary underlying kernel k, we compute
56/// \f[ \frac{1}{N}\sum_{i=1}^N k(x_i,x_i) - \frac{1}{N^2} \sum_{i,j=1}^N k(x_i,x_j) \f]
57///
58/// \ingroup unsupervised_trainer
59template < class InputType = RealVector >
60class NormalizeKernelUnitVariance : public AbstractUnsupervisedTrainer<ScaledKernel<InputType> >
61{
62public:
63
66
67 /// \brief From INameable: return the class name.
68 std::string name() const
69 { return "NormalizeKernelUnitVariance"; }
70
71 double trace() const {
72 return m_matrixTrace;
73 }
74 double mean() const {
75 return m_mean;
76 }
77
79 {
80 SHARK_RUNTIME_CHECK(input.numberOfElements() >= 2, "Input needs to contain at least two points");
81 AbstractKernelFunction< InputType > const& k = *kernel.base(); //get direct access to the kernel we want to use.
82
83 // Next compute the trace and mean of the kernel matrix. This means heavy lifting:
84 // calculate each entry of one diagonal half of the kernel matrix exactly once.
85 // [MT] This part was taken from the PrecomputedMatrix constructor in QuadraticProgram.h
86 // Blockwise version, with order of computations optimized for better use of the processor
87 // cache. This can save around 10% computation time for fast kernel functions.
88 std::size_t N = input.numberOfElements();
89
90 //O.K. tried to make it more efficient (and shorter)
91 m_mean = 0.0;
92 m_matrixTrace = 0.0;
93 for(std::size_t i = 0; i != input.numberOfBatches(); ++i){
95 //off diagonal entries
96 for(std::size_t j = 0; j < i; ++j){
97 RealMatrix matrixBlock = k(batch, input.batch(j));
98 m_mean += 2*sum(matrixBlock);
99 }
100 RealMatrix matrixBlock = k(batch, batch);
101 m_mean += sum(matrixBlock);
102 m_matrixTrace += blas::trace(matrixBlock);
103 }
104 double tm = m_matrixTrace/N - m_mean/N/N;
105 SHARK_ASSERT( tm > 0 );
106 double scaling_factor = 1.0 / tm;
107 kernel.setFactor( scaling_factor );
108 }
109
110protected:
111 double m_mean; //store for other uses, external queries, etc.
113};
114
115
116}
117#endif