104 bool centering =
true
113 if(mep_kernel -> hasFirstParameterDerivative())
118 m_centering = centering;
120 setupY(dataset.
labels(), centering);
125 {
return "KernelTargetAlignment"; }
129 return mep_kernel -> parameterVector();
140 double eval(RealVector
const& input)
const{
143 return -evaluateKernelMatrix().error;
171 KernelMatrixResults results = evaluateKernelMatrix();
174 derivative.resize(parameters);
177 std::size_t startX = 0;
178 for(
int j = 0; j != i; ++j){
181 RealVector threadDerivative(parameters,0.0);
182 RealVector blockDerivative;
183 boost::shared_ptr<State> state = mep_kernel->
createState();
186 std::size_t startY = 0;
187 for(
int j = 0; j <= i; ++j){
188 mep_kernel->
eval(m_data.
batch(i).input,m_data.
batch(j).input,blockK,*state);
191 generateDerivativeWeightBlock(i,j,startX,startY,blockK,results),
195 noalias(threadDerivative) += blockDerivative;
199 noalias(derivative) += threadDerivative;
203 derivative /= m_elements;
204 return -results.error;
210 RealVector m_columnMeanY;
212 std::size_t m_numberOfClasses;
213 std::size_t m_elements;
216 struct KernelMatrixResults{
224 void setupY(Data<unsigned int>
const& labels,
bool centering){
227 std::vector<std::size_t> classCount =
classSizes(labels);
228 m_numberOfClasses = classCount.size();
229 RealVector classMean(m_numberOfClasses);
230 double dm1 = m_numberOfClasses-1.0;
232 for(std::size_t i = 0; i != m_numberOfClasses; ++i){
233 classMean(i) = classCount[i]-(m_elements-classCount[i])/dm1;
234 m_meanY += classCount[i] * classMean(i);
236 classMean /= m_elements;
237 m_meanY /=
sqr(
double(m_elements));
238 m_columnMeanY.resize(m_elements);
239 for(std::size_t i = 0; i != m_elements; ++i){
240 m_columnMeanY(i) = classMean(labels.element(i));
244 m_columnMeanY.clear();
248 void setupY(Data<RealVector>
const& labels,
bool centering){
249 RealVector meanLabel =
mean(labels);
250 m_columnMeanY.resize(m_elements);
251 for(std::size_t i = 0; i != m_elements; ++i){
252 m_columnMeanY(i) = inner_prod(labels.element(i),meanLabel);
254 m_meanY=inner_prod(meanLabel,meanLabel);
257 m_columnMeanY.clear();
260 void computeBlockY(UIntVector
const& labelsi,UIntVector
const& labelsj, RealMatrix& blockY)
const{
261 std::size_t blockSize1 = labelsi.size();
262 std::size_t blockSize2 = labelsj.size();
263 double dm1 = m_numberOfClasses-1.0;
264 for(std::size_t k = 0; k != blockSize1; ++k){
265 for(std::size_t l = 0; l != blockSize2; ++l){
266 if( labelsi(k) == labelsj(l))
269 blockY(k,l) = -1.0/dm1;
274 void computeBlockY(RealMatrix
const& labelsi,RealMatrix
const& labelsj, RealMatrix& blockY)
const{
275 noalias(blockY) = labelsi % trans(labelsj);
279 double updateYK(UIntVector
const& labelsi,UIntVector
const& labelsj, RealMatrix
const& block)
const{
280 std::size_t blockSize1 = labelsi.size();
281 std::size_t blockSize2 = labelsj.size();
284 double dm1 = m_numberOfClasses-1.0;
285 for(std::size_t k = 0; k != blockSize1; ++k){
286 for(std::size_t l = 0; l != blockSize2; ++l){
287 if(labelsi(k) == labelsj(l))
288 result += block(k,l);
290 result -= block(k,l)/dm1;
297 double updateYK(RealMatrix
const& labelsi,RealMatrix
const& labelsj, RealMatrix
const& block)
const{
298 RealMatrix Y(labelsi.size1(), labelsj.size1());
299 computeBlockY(labelsi,labelsj,Y);
300 return sum(Y * block);
305 RealMatrix generateDerivativeWeightBlock(
306 std::size_t i, std::size_t j,
307 std::size_t start1, std::size_t start2,
308 RealMatrix
const& blockK,
309 KernelMatrixResults
const& matrixStatistics
314 double KcKc = matrixStatistics.KcKc;
315 double YcKc = matrixStatistics.YcKc;
316 double meanK = matrixStatistics.meanK;
317 RealMatrix blockW(blockSize1,blockSize2);
320 computeBlockY(m_data.
batch(i).label,m_data.
batch(j).label,blockW);
327 blockW-=repeat(subrange(KcKc*m_columnMeanY - YcKc*matrixStatistics.k,start2,start2+blockSize2),blockSize1);
328 blockW-=trans(repeat(subrange(KcKc*m_columnMeanY - YcKc*matrixStatistics.k,start1,start1+blockSize1),blockSize2));
330 blockW+= KcKc*m_meanY-YcKc*meanK;
331 blockW /= KcKc*std::sqrt(KcKc);
347 KernelMatrixResults evaluateKernelMatrix()
const{
356 RealVector k(m_elements,0.0);
358 std::size_t startRow = 0;
359 for(
int j = 0; j != i; ++j){
365 RealVector threadk(m_elements,0.0);
366 std::size_t startColumn = 0;
367 for(
int j = 0; j <= i; ++j){
369 RealMatrix blockK = (*mep_kernel)(m_data.
batch(i).input,m_data.
batch(j).input);
371 threadKK += frobenius_prod(blockK,blockK);
372 subrange(threadk,startColumn,startColumn+columnSize)+=sum(as_columns(blockK));
373 threadYK += updateYK(m_data.
batch(i).label,m_data.
batch(j).label,blockK);
376 threadKK += 2.0 * frobenius_prod(blockK,blockK);
377 subrange(threadk,startColumn,startColumn+columnSize)+=sum(as_columns(blockK));
378 subrange(threadk,startRow,startRow+rowSize)+=sum(as_rows(blockK));
379 threadYK += 2.0 * updateYK(m_data.
batch(i).label,m_data.
batch(j).label,blockK);
381 startColumn+=columnSize;
386 noalias(k) +=threadk;
390 double n = (double)m_elements;
392 double meanK = sum(k)/n;
398 double YcKc = YK-2.0*n*inner_prod(k,m_columnMeanY)+n2*m_meanY*meanK;
399 double KcKc = KK - 2.0*n*inner_prod(k,k)+n2*
sqr(meanK);
401 KernelMatrixResults results;
405 results.meanK = meanK;
406 results.error = YcKc/std::sqrt(KcKc)/n;