CSvmMaxLikelihoodMS.cpp
Go to the documentation of this file.
12
13using namespace std;
14using namespace shark;
15
16
17
18// define the basic dimensionality of the problem
19unsigned int useful_dim = 5;
20unsigned int noise_dim = 5;
22
23RealVector run_one_trial( bool verbose) {
24
25 // set up the classification problem from a DataDistribution
26 PamiToy problem( useful_dim, noise_dim );
27
28 // construct training and test sets from the problem distribution
29 unsigned int train_size = 500;
30 unsigned int test_size = 5000;
31 ClassificationDataset train = problem.generateDataset( train_size );
32 ClassificationDataset test = problem.generateDataset( test_size );
33
34 // normalize data as usual
35 Normalizer<> normalizer;
36 NormalizeComponentsUnitVariance<> normalizationTrainer(false);
37 normalizationTrainer.train( normalizer, train.inputs() );
38 train = transformInputs( train, normalizer );
39 test = transformInputs( test, normalizer );
40
41 // set up the ArdKernel
42 DenseARDKernel kernel( total_dim, 0.1 ); //for now with arbitrary value for gamma (gets properly initialized later)
43
44 // set up partitions for cross-validation
45 unsigned int num_folds = 5;
46 CVFolds<ClassificationDataset> cv_folds = createCVIID( train, num_folds );
47
48 // set up the learning machine
49 bool log_enc_c = true; //use log encoding for the regularization parameter C
50 QpStoppingCondition stop(1e-12); //use a very conservative stopping criterion for the individual SVM runs
51 SvmLogisticInterpretation<> mlms( cv_folds, &kernel, log_enc_c, &stop ); //the main class for this tutorial
52 //SvmLogisticInterpretation<> mlms( cv_folds, &kernel, log_enc_c ); //also possible without stopping criterion
53
54 // set up a starting point for the optimization process
55 RealVector start( total_dim+1 );
56 if ( log_enc_c ) start( total_dim ) = 0.0; else start( total_dim ) = 1.0; //start at C = 1.0
57 for ( unsigned int k=0; k<total_dim; k++ )
58 start(k) = 0.5 / total_dim;
59
60 // for illustration purposes, we also evalute the model selection criterion a single time at the starting point
61 double start_value = mlms.eval( start );
62
63 if ( verbose ) {
64 std::cout << "Value of model selection criterion at starting point: " << start_value << std::endl << std::endl;
65 std::cout << " -------------------------------------------------------------------------------- " << std::endl;
66 std::cout << " ----------- Beginning gradient-based optimization of MLMS criterion ------------ " << std::endl;
67 std::cout << " -------------------------------------------------------------------------------- " << std::endl << std::endl;
68 }
69
70 // set up the optimizer
71 Rprop<> rprop;
72 double stepsize = 0.1;
73 double stop_delta = 1e-3;
74 mlms.init();
75 rprop.init( mlms, start, stepsize );
76 unsigned int its = 50;
77
78 // start the optimization loop
79 for (unsigned int i=0; i<its; i++) {
80 rprop.step( mlms );
81 if ( verbose )
82 std::cout << "iteration " << i << ": current NCLL = " << rprop.solution().value << " at parameter: " << rprop.solution().point << std::endl;
83 if ( rprop.maxDelta() < stop_delta ) {
84 if ( verbose ) std::cout << " Rprop quit pecause of small progress " << std::endl;
85 break;
86 }
87 }
88
89 if ( verbose ) {
90 std::cout << std::endl;
91 std::cout << " -------------------------------------------------------------------------------- " << std::endl;
92 std::cout << " ----------- Done with gradient-based optimization of MLMS criterion ------------ " << std::endl;
93 std::cout << " -------------------------------------------------------------------------------- " << std::endl << std::endl;
94 }
95 if ( verbose ) std::cout << std::endl << std::endl << " EVALUATION of hyperparameters found:" << std::endl << std::endl << std::endl;
96
97 double C_reg; //will hold regularization parameter
98 double test_error_v1, train_error_v1; //will hold errors determined via method 1
99 double test_error_v2, train_error_v2; //will hold errors determined via method 2
100
101 // BEGIN POSSIBILITY ONE OF HYPERPARAMETER COPY
102 if ( verbose ) std::cout << std::endl << " Possibility 1: copy kernel parameters via eval() and C by hand..." << std::endl << std::endl;
103
104 // copy final parameters, variant one
105 double end_value = mlms.eval( rprop.solution().point ); //this at the same time copies the most recent parameters from rprop to the kernel.
106 C_reg = ( log_enc_c ? exp( rprop.solution().point(total_dim) ) : rprop.solution().point(total_dim) ); //ATTENTION: mind the encoding
107
108 if ( verbose ) {
109 std::cout << " Value of model selection criterion at final point: " << end_value << std::endl;
110 std::cout << " Done optimizing the SVM hyperparameters. The final parameters (true/unencoded) are:" << std::endl << std::endl;
111 std::cout << " C = " << C_reg << std::endl;
112 for ( unsigned int i=0; i<total_dim; i++ )
113 std::cout << " gamma(" << i << ") = " << kernel.parameterVector()(i)*kernel.parameterVector()(i) << std::endl;
114 std::cout << std::endl << " (as also given by kernel.gammaVector() : " << kernel.gammaVector() << " ) " << std::endl;
115 }
116
117 // construct and train the final learner
119 CSvmTrainer<RealVector> trainer_v1( &kernel, C_reg, true, log_enc_c ); //encoding does not really matter in this case b/c it does not affect the ctor
120 if ( verbose ) {
121 std::cout << std::endl << std::endl << " Used mlms.eval(...) to copy kernel.parameterVector() " << kernel.parameterVector() << std::endl;
122 std::cout << " into trainer_v1.parameterVector() " << trainer_v1.parameterVector() << std::endl;
123 std::cout << " , where C (the last parameter) was set manually to " << trainer_v1.C() << std::endl << std::endl << std::endl;
124 }
125 trainer_v1.train( svm_v1, train ); //the kernel has the right parameters, and we copied C, so we are good to go
126
127 // evaluate the final trained classifier on training and test set
129 Data<unsigned int> output_v1; //real-valued output
130 output_v1 = svm_v1( train.inputs() );
131 train_error_v1 = loss_v1.eval( train.labels(), output_v1 );
132 output_v1 = svm_v1( test.inputs() );
133 test_error_v1 = loss_v1.eval( test.labels(), output_v1 );
134 if ( verbose ) {
135 std::cout << " training error via possibility 1: " << train_error_v1 << std::endl;
136 std::cout << " test error via possibility 1: " << test_error_v1 << std::endl << std::endl << std::endl;
137 }
138 // END POSSIBILITY ONE OF HYPERPARAMETER COPY
139
140 // BEGIN POSSIBILITY TWO OF HYPERPARAMETER COPY
141 if ( verbose ) std::cout << std::endl << " Possibility 2: copy best parameters via solution().point()..." << std::endl << std::endl;
142
144 CSvmTrainer<RealVector> trainer_v2( &kernel, 0.1, true, log_enc_c ); //ATTENTION: must be constructed with same log-encoding preference
145 trainer_v2.setParameterVector( rprop.solution().point ); //copy best hyperparameters to svm trainer
146
147 if ( verbose ) {
148 std::cout << " Copied rprop.solution().point = " << rprop.solution().point << std::endl;
149 std::cout << " into trainer_v2.parameterVector(), now = " << trainer_v2.parameterVector() << std::endl << std::endl << std::endl;
150 }
151
152 trainer_v2.train( svm_v2, train );
153
154 // evaluate the final trained classifier on training and test set
156 Data<unsigned int> output_v2; //real-valued output
157 output_v2 = svm_v2( train.inputs() );
158 train_error_v2 = loss_v2.eval( train.labels(), output_v2 );
159 output_v2 = svm_v2( test.inputs() );
160 test_error_v2 = loss_v2.eval( test.labels(), output_v2 );
161 if ( verbose ) {
162 std::cout << " training error via possibility 2: " << train_error_v2 << std::endl;
163 std::cout << " test error via possibility 2: " << test_error_v2 << std::endl << std::endl << std::endl;
164 std::cout << std::endl << "That's all folks - we are done!" << std::endl;
165 }
166 // END POSSIBILITY TWO OF HYPERPARAMETER COPY
167
168 // copy the best parameters, as well as performance values into averaging vector:
169 RealVector final_params(total_dim+3);
170 final_params(total_dim) = C_reg;
171 for ( unsigned int i=0; i<total_dim; i++ )
172 final_params(i) = rprop.solution().point(i)*rprop.solution().point(i);
173 final_params(total_dim+1) = train_error_v1;
174 final_params(total_dim+2) = test_error_v1;
175 return final_params;
176
177}
178
179
180int main() {
181
182 // run one trial with output
183 run_one_trial( true);
184 std::cout << "\nNOW REPEAT WITH 100 TRIALS: now we do the exact same thing multiple times in a row, and note the average kernel weights. Please wait." << std::endl << std::endl;
185
186 // run several trials without output, and average the results
187 unsigned int num_trials = 100;
188 Data<RealVector> many_results(num_trials,RealVector(total_dim+3));//each row is one run of resulting hyperparameters
189 for ( unsigned int i=0; i<num_trials; i++ ) {
190 many_results.element(i) = run_one_trial(false);
191 std::cout << "." << std::flush;
192 }
193 std::cout << "\n" << std::endl;
194
195 RealVector overall_mean, overall_variance;
196 meanvar( many_results, overall_mean, overall_variance );
197 for ( unsigned int i=0; i<total_dim+1; i++ ) {
198 std::cout << "avg-param(" << i << ") = " << overall_mean(i) << " +- "<< overall_variance(i) << std::endl;
199 }
200 std::cout << std::endl << "avg-error-train = " << overall_mean(total_dim+1) << " +- "<< overall_variance(total_dim+1) << std::endl;
201 std::cout << "avg-error-test = " << overall_mean(total_dim+2) << " +- "<< overall_variance(total_dim+2) << std::endl;
202
203}