pktools  2.6.7
Processing Kernel for geospatial data
pksvmogr.cc
1 /**********************************************************************
2 pksvmogr.cc: classify vector dataset using Support Vector Machine
3 Copyright (C) 2008-2017 Pieter Kempeneers
4 
5 This file is part of pktools
6 
7 pktools is free software: you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation, either version 3 of the License, or
10 (at your option) any later version.
11 
12 pktools is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
16 
17 You should have received a copy of the GNU General Public License
18 along with pktools. If not, see <http://www.gnu.org/licenses/>.
19 ***********************************************************************/
20 #include <stdlib.h>
21 #include <vector>
22 #include <map>
23 #include <algorithm>
24 #include "imageclasses/ImgReaderGdal.h"
25 #include "imageclasses/ImgWriterGdal.h"
26 #include "imageclasses/ImgReaderOgr.h"
27 #include "imageclasses/ImgWriterOgr.h"
28 #include "base/Optionpk.h"
29 #include "base/PosValue.h"
30 #include "algorithms/ConfusionMatrix.h"
31 #include "algorithms/svm.h"
32 
33 #ifdef HAVE_CONFIG_H
34 #include <config.h>
35 #endif
36 /******************************************************************************/
114 namespace svm{
115  enum SVM_TYPE {C_SVC=0, nu_SVC=1,one_class=2, epsilon_SVR=3, nu_SVR=4};
116  enum KERNEL_TYPE {linear=0,polynomial=1,radial=2,sigmoid=3};
117 }
118 
119 #define Malloc(type,n) (type *)malloc((n)*sizeof(type))
120 
121 using namespace std;
122 
123 int main(int argc, char *argv[])
124 {
125  vector<double> priors;
126 
127  //--------------------------- command line options ------------------------------------
128  Optionpk<string> input_opt("i", "input", "input image");
129  Optionpk<string> training_opt("t", "training", "Training vector file. A single vector file contains all training features (must be set as: b0, b1, b2,...) for all classes (class numbers identified by label option). Use multiple training files for bootstrap aggregation (alternative to the bag and bsize options, where a random subset is taken from a single training file)");
130  Optionpk<string> tlayer_opt("tln", "tln", "Training layer name(s)");
131  Optionpk<string> label_opt("label", "label", "Attribute name for class label in training vector file.","label");
132  Optionpk<unsigned int> balance_opt("bal", "balance", "Balance the input data to this number of samples for each class", 0);
133  Optionpk<bool> random_opt("random", "random", "Randomize training data for balancing and bagging", true, 2);
134  Optionpk<int> minSize_opt("min", "min", "If number of training pixels is less then min, do not take this class into account (0: consider all classes)", 0);
135  Optionpk<unsigned short> band_opt("b", "band", "Band index (starting from 0, either use band option or use start to end)");
136  Optionpk<unsigned short> bstart_opt("sband", "startband", "Start band sequence number");
137  Optionpk<unsigned short> bend_opt("eband", "endband", "End band sequence number");
138  Optionpk<double> offset_opt("offset", "offset", "Offset value for each spectral band input features: refl[band]=(DN[band]-offset[band])/scale[band]", 0.0);
139  Optionpk<double> scale_opt("scale", "scale", "Scale value for each spectral band input features: refl=(DN[band]-offset[band])/scale[band] (use 0 if scale min and max in each band to -1.0 and 1.0)", 0.0);
140  Optionpk<double> priors_opt("prior", "prior", "Prior probabilities for each class (e.g., -p 0.3 -p 0.3 -p 0.2 ). Used for input only (ignored for cross validation)", 0.0);
141  Optionpk<string> priorimg_opt("pim", "priorimg", "Prior probability image (multi-band img with band for each class","",2);
142  Optionpk<unsigned short> cv_opt("cv", "cv", "N-fold cross validation mode",0);
143  Optionpk<string> cmformat_opt("cmf","cmf","Format for confusion matrix (ascii or latex)","ascii");
144  Optionpk<std::string> svm_type_opt("svmt", "svmtype", "Type of SVM (C_SVC, nu_SVC,one_class, epsilon_SVR, nu_SVR)","C_SVC");
145  Optionpk<std::string> kernel_type_opt("kt", "kerneltype", "Type of kernel function (linear,polynomial,radial,sigmoid) ","radial");
146  Optionpk<unsigned short> kernel_degree_opt("kd", "kd", "Degree in kernel function",3);
147  Optionpk<float> gamma_opt("g", "gamma", "Gamma in kernel function",1.0);
148  Optionpk<float> coef0_opt("c0", "coef0", "Coef0 in kernel function",0);
149  Optionpk<float> ccost_opt("cc", "ccost", "The parameter C of C_SVC, epsilon_SVR, and nu_SVR",1000);
150  Optionpk<float> nu_opt("nu", "nu", "The parameter nu of nu_SVC, one_class SVM, and nu_SVR",0.5);
151  Optionpk<float> epsilon_loss_opt("eloss", "eloss", "The epsilon in loss function of epsilon_SVR",0.1);
152  Optionpk<int> cache_opt("cache", "cache", "Cache memory size in MB",100);
153  Optionpk<float> epsilon_tol_opt("etol", "etol", "The tolerance of termination criterion",0.001);
154  Optionpk<bool> shrinking_opt("shrink", "shrink", "Whether to use the shrinking heuristics",false);
155  Optionpk<bool> prob_est_opt("pe", "probest", "Whether to train a SVC or SVR model for probability estimates",true,2);
156  // Optionpk<bool> weight_opt("wi", "wi", "Set the parameter C of class i to weight*C, for C_SVC",true);
157  Optionpk<unsigned short> comb_opt("comb", "comb", "How to combine bootstrap aggregation classifiers (0: sum rule, 1: product rule, 2: max rule). Also used to aggregate classes with rc option.",0);
158  Optionpk<unsigned short> bag_opt("bag", "bag", "Number of bootstrap aggregations", 1);
159  Optionpk<int> bagSize_opt("bagsize", "bagsize", "Percentage of features used from available training features for each bootstrap aggregation (one size for all classes, or a different size for each class respectively", 100);
160  Optionpk<string> classBag_opt("cb", "classbag", "Output for each individual bootstrap aggregation");
161  Optionpk<string> mask_opt("m", "mask", "Only classify within specified mask (vector or raster).");
162  Optionpk<unsigned short> nodata_opt("nodata", "nodata", "Nodata value to put where vector dataset is masked as nodata", 0);
163  Optionpk<string> output_opt("o", "output", "Output vector dataset");
164  Optionpk<string> option_opt("co", "co", "Creation option for output file. Multiple options can be specified.");
165  Optionpk<string> colorTable_opt("ct", "ct", "Color table in ASCII format having 5 columns: id R G B ALFA (0: transparent, 255: solid)");
166  Optionpk<string> entropy_opt("entropy", "entropy", "Entropy image (measure for uncertainty of classifier output","",2);
167  Optionpk<string> active_opt("active", "active", "Ogr output for active training sample.","",2);
168  Optionpk<string> ogrformat_opt("f", "f", "Output ogr format for active training sample","SQLite");
169  Optionpk<unsigned int> nactive_opt("na", "nactive", "Number of active training points",1);
170  Optionpk<string> classname_opt("c", "class", "List of class names.");
171  Optionpk<short> classvalue_opt("r", "reclass", "List of class values (use same order as in class opt).");
172  Optionpk<short> verbose_opt("v", "verbose", "Verbose level",0,2);
173 
174  option_opt.setHide(1);
175  band_opt.setHide(1);
176  bstart_opt.setHide(1);
177  bend_opt.setHide(1);
178  balance_opt.setHide(1);
179  minSize_opt.setHide(1);
180  bag_opt.setHide(1);
181  bagSize_opt.setHide(1);
182  comb_opt.setHide(1);
183  classBag_opt.setHide(1);
184  priorimg_opt.setHide(1);
185  offset_opt.setHide(1);
186  scale_opt.setHide(1);
187  svm_type_opt.setHide(1);
188  kernel_type_opt.setHide(1);
189  kernel_degree_opt.setHide(1);
190  coef0_opt.setHide(1);
191  nu_opt.setHide(1);
192  epsilon_loss_opt.setHide(1);
193  cache_opt.setHide(1);
194  epsilon_tol_opt.setHide(1);
195  shrinking_opt.setHide(1);
196  prob_est_opt.setHide(1);
197  entropy_opt.setHide(1);
198  active_opt.setHide(1);
199  nactive_opt.setHide(1);
200  random_opt.setHide(1);
201 
202  verbose_opt.setHide(2);
203 
204  bool doProcess;//stop process when program was invoked with help option (-h --help)
205  try{
206  doProcess=training_opt.retrieveOption(argc,argv);
207  input_opt.retrieveOption(argc,argv);
208  output_opt.retrieveOption(argc,argv);
209  cv_opt.retrieveOption(argc,argv);
210  cmformat_opt.retrieveOption(argc,argv);
211  tlayer_opt.retrieveOption(argc,argv);
212  classname_opt.retrieveOption(argc,argv);
213  classvalue_opt.retrieveOption(argc,argv);
214  ogrformat_opt.retrieveOption(argc,argv);
215  option_opt.retrieveOption(argc,argv);
216  colorTable_opt.retrieveOption(argc,argv);
217  label_opt.retrieveOption(argc,argv);
218  priors_opt.retrieveOption(argc,argv);
219  gamma_opt.retrieveOption(argc,argv);
220  ccost_opt.retrieveOption(argc,argv);
221  mask_opt.retrieveOption(argc,argv);
222  nodata_opt.retrieveOption(argc,argv);
223  // Advanced options
224  band_opt.retrieveOption(argc,argv);
225  bstart_opt.retrieveOption(argc,argv);
226  bend_opt.retrieveOption(argc,argv);
227  balance_opt.retrieveOption(argc,argv);
228  minSize_opt.retrieveOption(argc,argv);
229  bag_opt.retrieveOption(argc,argv);
230  bagSize_opt.retrieveOption(argc,argv);
231  comb_opt.retrieveOption(argc,argv);
232  classBag_opt.retrieveOption(argc,argv);
233  priorimg_opt.retrieveOption(argc,argv);
234  offset_opt.retrieveOption(argc,argv);
235  scale_opt.retrieveOption(argc,argv);
236  svm_type_opt.retrieveOption(argc,argv);
237  kernel_type_opt.retrieveOption(argc,argv);
238  kernel_degree_opt.retrieveOption(argc,argv);
239  coef0_opt.retrieveOption(argc,argv);
240  nu_opt.retrieveOption(argc,argv);
241  epsilon_loss_opt.retrieveOption(argc,argv);
242  cache_opt.retrieveOption(argc,argv);
243  epsilon_tol_opt.retrieveOption(argc,argv);
244  shrinking_opt.retrieveOption(argc,argv);
245  prob_est_opt.retrieveOption(argc,argv);
246  entropy_opt.retrieveOption(argc,argv);
247  active_opt.retrieveOption(argc,argv);
248  nactive_opt.retrieveOption(argc,argv);
249  verbose_opt.retrieveOption(argc,argv);
250  random_opt.retrieveOption(argc,argv);
251  }
252  catch(string predefinedString){
253  std::cout << predefinedString << std::endl;
254  exit(0);
255  }
256  if(!doProcess){
257  cout << endl;
258  cout << "Usage: pksvmogr -t training [-i input -o output] [-cv value]" << endl;
259  cout << endl;
260  std::cout << "short option -h shows basic options only, use long option --help to show all options" << std::endl;
261  exit(0);//help was invoked, stop processing
262  }
263 
264  if(entropy_opt[0]=="")
265  entropy_opt.clear();
266  if(active_opt[0]=="")
267  active_opt.clear();
268  if(priorimg_opt[0]=="")
269  priorimg_opt.clear();
270 
271 
272  std::map<std::string, svm::SVM_TYPE> svmMap;
273 
274  svmMap["C_SVC"]=svm::C_SVC;
275  svmMap["nu_SVC"]=svm::nu_SVC;
276  svmMap["one_class"]=svm::one_class;
277  svmMap["epsilon_SVR"]=svm::epsilon_SVR;
278  svmMap["nu_SVR"]=svm::nu_SVR;
279 
280  std::map<std::string, svm::KERNEL_TYPE> kernelMap;
281 
282  kernelMap["linear"]=svm::linear;
283  kernelMap["polynomial"]=svm::polynomial;
284  kernelMap["radial"]=svm::radial;
285  kernelMap["sigmoid;"]=svm::sigmoid;
286 
287  assert(training_opt.size());
288 
289  if(verbose_opt[0]>=1){
290  if(input_opt.size())
291  std::cout << "input filename: " << input_opt[0] << std::endl;
292  if(mask_opt.size())
293  std::cout << "mask filename: " << mask_opt[0] << std::endl;
294  std::cout << "training vector file: " << std::endl;
295  for(int ifile=0;ifile<training_opt.size();++ifile)
296  std::cout << training_opt[ifile] << std::endl;
297  std::cout << "verbose: " << verbose_opt[0] << std::endl;
298  }
299  unsigned short nbag=(training_opt.size()>1)?training_opt.size():bag_opt[0];
300  if(verbose_opt[0]>=1)
301  std::cout << "number of bootstrap aggregations: " << nbag << std::endl;
302 
303  ImgReaderOgr extentReader;
304  OGRLayer *readLayer;
305 
306  double ulx=0;
307  double uly=0;
308  double lrx=0;
309  double lry=0;
310 
311  bool maskIsVector=false;
312  if(mask_opt.size()){
313  try{
314  extentReader.open(mask_opt[0]);
315  maskIsVector=true;
316  readLayer = extentReader.getDataSource()->GetLayer(0);
317  if(!(extentReader.getExtent(ulx,uly,lrx,lry))){
318  cerr << "Error: could not get extent from " << mask_opt[0] << endl;
319  exit(1);
320  }
321  }
322  catch(string errorString){
323  maskIsVector=false;
324  }
325  }
326 
327  ImgWriterOgr activeWriter;
328  if(active_opt.size()){
329  prob_est_opt[0]=true;
330  ImgReaderOgr trainingReader(training_opt[0]);
331  activeWriter.open(active_opt[0],ogrformat_opt[0]);
332  activeWriter.createLayer(active_opt[0],trainingReader.getProjection(),wkbPoint,NULL);
333  activeWriter.copyFields(trainingReader);
334  }
335  vector<PosValue> activePoints(nactive_opt[0]);
336  for(int iactive=0;iactive<activePoints.size();++iactive){
337  activePoints[iactive].value=1.0;
338  activePoints[iactive].posx=0.0;
339  activePoints[iactive].posy=0.0;
340  }
341 
342  unsigned int totalSamples=0;
343  unsigned int nactive=0;
344  vector<struct svm_model*> svm(nbag);
345  vector<struct svm_parameter> param(nbag);
346 
347  short nclass=0;
348  int nband=0;
349  int startBand=2;//first two bands represent X and Y pos
350 
351  //normalize priors from command line
352  if(priors_opt.size()>1){//priors from argument list
353  priors.resize(priors_opt.size());
354  double normPrior=0;
355  for(short iclass=0;iclass<priors_opt.size();++iclass){
356  priors[iclass]=priors_opt[iclass];
357  normPrior+=priors[iclass];
358  }
359  //normalize
360  for(short iclass=0;iclass<priors_opt.size();++iclass)
361  priors[iclass]/=normPrior;
362  }
363 
364  //convert start and end band options to vector of band indexes
365  try{
366  if(bstart_opt.size()){
367  if(bend_opt.size()!=bstart_opt.size()){
368  string errorstring="Error: options for start and end band indexes must be provided as pairs, missing end band";
369  throw(errorstring);
370  }
371  band_opt.clear();
372  for(int ipair=0;ipair<bstart_opt.size();++ipair){
373  if(bend_opt[ipair]<=bstart_opt[ipair]){
374  string errorstring="Error: index for end band must be smaller then start band";
375  throw(errorstring);
376  }
377  for(int iband=bstart_opt[ipair];iband<=bend_opt[ipair];++iband)
378  band_opt.push_back(iband);
379  }
380  }
381  }
382  catch(string error){
383  cerr << error << std::endl;
384  exit(1);
385  }
386  //sort bands
387  if(band_opt.size())
388  std::sort(band_opt.begin(),band_opt.end());
389 
390  map<string,short> classValueMap;
391  vector<std::string> nameVector;
392  if(classname_opt.size()){
393  assert(classname_opt.size()==classvalue_opt.size());
394  for(int iclass=0;iclass<classname_opt.size();++iclass)
395  classValueMap[classname_opt[iclass]]=classvalue_opt[iclass];
396  }
397 
398  //----------------------------------- Training -------------------------------
400  vector< vector<double> > offset(nbag);
401  vector< vector<double> > scale(nbag);
402  map<string,Vector2d<float> > trainingMap;
403  vector< Vector2d<float> > trainingPixels;//[class][sample][band]
404  vector<string> fields;
405 
406  vector<struct svm_problem> prob(nbag);
407  vector<struct svm_node *> x_space(nbag);
408 
409  for(int ibag=0;ibag<nbag;++ibag){
410  //organize training data
411  if(ibag<training_opt.size()){//if bag contains new training pixels
412  trainingMap.clear();
413  trainingPixels.clear();
414  if(verbose_opt[0]>=1)
415  std::cout << "reading imageVector file " << training_opt[0] << std::endl;
416  try{
417  ImgReaderOgr trainingReaderBag(training_opt[ibag]);
418  if(band_opt.size())
419  //todo: when tlayer_opt is provided, readDataImageOgr does not read any layer
420  totalSamples=trainingReaderBag.readDataImageOgr(trainingMap,fields,band_opt,label_opt[0],tlayer_opt,verbose_opt[0]);
421  else
422  totalSamples=trainingReaderBag.readDataImageOgr(trainingMap,fields,0,0,label_opt[0],tlayer_opt,verbose_opt[0]);
423  if(trainingMap.size()<2){
424  string errorstring="Error: could not read at least two classes from training file, did you provide class labels in training sample (see option label)?";
425  throw(errorstring);
426  }
427  trainingReaderBag.close();
428  }
429  catch(string error){
430  cerr << error << std::endl;
431  exit(1);
432  }
433  catch(std::exception& e){
434  std::cerr << "Error: ";
435  std::cerr << e.what() << std::endl;
436  std::cerr << CPLGetLastErrorMsg() << std::endl;
437  exit(1);
438  }
439  catch(...){
440  cerr << "error caught" << std::endl;
441  exit(1);
442  }
443 
444  //convert map to vector
445  // short iclass=0;
446  if(verbose_opt[0]>1)
447  std::cout << "training pixels: " << std::endl;
448  map<string,Vector2d<float> >::iterator mapit=trainingMap.begin();
449  while(mapit!=trainingMap.end()){
450  //delete small classes
451  if((mapit->second).size()<minSize_opt[0]){
452  trainingMap.erase(mapit);
453  continue;
454  }
455  trainingPixels.push_back(mapit->second);
456  if(verbose_opt[0]>1)
457  std::cout << mapit->first << ": " << (mapit->second).size() << " samples" << std::endl;
458  ++mapit;
459  }
460  if(!ibag){
461  nclass=trainingPixels.size();
462  if(classname_opt.size())
463  assert(nclass==classname_opt.size());
464  nband=trainingPixels[0][0].size()-2;//X and Y//trainingPixels[0][0].size();
465  }
466  else{
467  assert(nclass==trainingPixels.size());
468  assert(nband==trainingPixels[0][0].size()-2);
469  }
470 
471  //do not remove outliers here: could easily be obtained through ogr2ogr -where 'B2<110' output.shp input.shp
472  //balance training data
473  if(balance_opt[0]>0){
474  while(balance_opt.size()<nclass)
475  balance_opt.push_back(balance_opt.back());
476  if(random_opt[0])
477  srand(time(NULL));
478  totalSamples=0;
479  for(short iclass=0;iclass<nclass;++iclass){
480  if(trainingPixels[iclass].size()>balance_opt[iclass]){
481  while(trainingPixels[iclass].size()>balance_opt[iclass]){
482  int index=rand()%trainingPixels[iclass].size();
483  trainingPixels[iclass].erase(trainingPixels[iclass].begin()+index);
484  }
485  }
486  else{
487  int oldsize=trainingPixels[iclass].size();
488  for(int isample=trainingPixels[iclass].size();isample<balance_opt[iclass];++isample){
489  int index = rand()%oldsize;
490  trainingPixels[iclass].push_back(trainingPixels[iclass][index]);
491  }
492  }
493  totalSamples+=trainingPixels[iclass].size();
494  }
495  }
496 
497  //set scale and offset
498  offset[ibag].resize(nband);
499  scale[ibag].resize(nband);
500  if(offset_opt.size()>1)
501  assert(offset_opt.size()==nband);
502  if(scale_opt.size()>1)
503  assert(scale_opt.size()==nband);
504  for(int iband=0;iband<nband;++iband){
505  if(verbose_opt[0]>=1)
506  std::cout << "scaling for band" << iband << std::endl;
507  offset[ibag][iband]=(offset_opt.size()==1)?offset_opt[0]:offset_opt[iband];
508  scale[ibag][iband]=(scale_opt.size()==1)?scale_opt[0]:scale_opt[iband];
509  //search for min and maximum
510  if(scale[ibag][iband]<=0){
511  float theMin=trainingPixels[0][0][iband+startBand];
512  float theMax=trainingPixels[0][0][iband+startBand];
513  for(short iclass=0;iclass<nclass;++iclass){
514  for(int isample=0;isample<trainingPixels[iclass].size();++isample){
515  if(theMin>trainingPixels[iclass][isample][iband+startBand])
516  theMin=trainingPixels[iclass][isample][iband+startBand];
517  if(theMax<trainingPixels[iclass][isample][iband+startBand])
518  theMax=trainingPixels[iclass][isample][iband+startBand];
519  }
520  }
521  offset[ibag][iband]=theMin+(theMax-theMin)/2.0;
522  scale[ibag][iband]=(theMax-theMin)/2.0;
523  if(verbose_opt[0]>=1){
524  std::cout << "Extreme image values for band " << iband << ": [" << theMin << "," << theMax << "]" << std::endl;
525  std::cout << "Using offset, scale: " << offset[ibag][iband] << ", " << scale[ibag][iband] << std::endl;
526  std::cout << "scaled values for band " << iband << ": [" << (theMin-offset[ibag][iband])/scale[ibag][iband] << "," << (theMax-offset[ibag][iband])/scale[ibag][iband] << "]" << std::endl;
527  }
528  }
529  }
530  }
531  else{//use same offset and scale
532  offset[ibag].resize(nband);
533  scale[ibag].resize(nband);
534  for(int iband=0;iband<nband;++iband){
535  offset[ibag][iband]=offset[0][iband];
536  scale[ibag][iband]=scale[0][iband];
537  }
538  }
539 
540  if(!ibag){
541  if(priors_opt.size()==1){//default: equal priors for each class
542  priors.resize(nclass);
543  for(short iclass=0;iclass<nclass;++iclass)
544  priors[iclass]=1.0/nclass;
545  }
546  assert(priors_opt.size()==1||priors_opt.size()==nclass);
547 
548  //set bagsize for each class if not done already via command line
549  while(bagSize_opt.size()<nclass)
550  bagSize_opt.push_back(bagSize_opt.back());
551 
552  if(verbose_opt[0]>=1){
553  std::cout << "number of bands: " << nband << std::endl;
554  std::cout << "number of classes: " << nclass << std::endl;
555  if(priorimg_opt.empty()){
556  std::cout << "priors:";
557  for(short iclass=0;iclass<nclass;++iclass)
558  std::cout << " " << priors[iclass];
559  std::cout << std::endl;
560  }
561  }
562  map<string,Vector2d<float> >::iterator mapit=trainingMap.begin();
563  bool doSort=true;
564  try{
565  while(mapit!=trainingMap.end()){
566  nameVector.push_back(mapit->first);
567  if(classValueMap.size()){
568  //check if name in training is covered by classname_opt (values can not be 0)
569  if(classValueMap[mapit->first]>0){
570  if(cm.getClassIndex(type2string<short>(classValueMap[mapit->first]))<0){
571  cm.pushBackClassName(type2string<short>(classValueMap[mapit->first]),doSort);
572  }
573  }
574  else{
575  std::cerr << "Error: names in classname option are not complete, please check names in training vector and make sure classvalue is > 0" << std::endl;
576  exit(1);
577  }
578  }
579  else
580  cm.pushBackClassName(mapit->first,doSort);
581  ++mapit;
582  }
583  }
584  catch(BadConversion conversionString){
585  std::cerr << "Error: did you provide class pairs names (-c) and integer values (-r) for each class in training vector?" << std::endl;
586  exit(1);
587  }
588  if(classname_opt.empty()){
589  //std::cerr << "Warning: no class name and value pair provided for all " << nclass << " classes, using string2type<int> instead!" << std::endl;
590  for(int iclass=0;iclass<nclass;++iclass){
591  if(verbose_opt[0])
592  std::cout << iclass << " " << cm.getClass(iclass) << " -> " << string2type<short>(cm.getClass(iclass)) << std::endl;
593  classValueMap[cm.getClass(iclass)]=string2type<short>(cm.getClass(iclass));
594  }
595  }
596 
597  // if(priors_opt.size()==nameVector.size()){
598  // std::cerr << "Warning: please check if priors are provided in correct order!!!" << std::endl;
599  // for(int iclass=0;iclass<nameVector.size();++iclass)
600  // std::cerr << nameVector[iclass] << " " << priors_opt[iclass] << std::endl;
601  // }
602  }//if(!ibag)
603 
604  //Calculate features of training set
605  vector< Vector2d<float> > trainingFeatures(nclass);
606  for(short iclass=0;iclass<nclass;++iclass){
607  int nctraining=0;
608  if(verbose_opt[0]>=1)
609  std::cout << "calculating features for class " << iclass << std::endl;
610  if(random_opt[0])
611  srand(time(NULL));
612  nctraining=(bagSize_opt[iclass]<100)? trainingPixels[iclass].size()/100.0*bagSize_opt[iclass] : trainingPixels[iclass].size();//bagSize_opt[iclass] given in % of training size
613  if(nctraining<=0)
614  nctraining=1;
615  assert(nctraining<=trainingPixels[iclass].size());
616  int index=0;
617  if(bagSize_opt[iclass]<100)
618  random_shuffle(trainingPixels[iclass].begin(),trainingPixels[iclass].end());
619  if(verbose_opt[0]>1)
620  std::cout << "nctraining (class " << iclass << "): " << nctraining << std::endl;
621  trainingFeatures[iclass].resize(nctraining);
622  for(int isample=0;isample<nctraining;++isample){
623  //scale pixel values according to scale and offset!!!
624  for(int iband=0;iband<nband;++iband){
625  float value=trainingPixels[iclass][isample][iband+startBand];
626  trainingFeatures[iclass][isample].push_back((value-offset[ibag][iband])/scale[ibag][iband]);
627  }
628  }
629  assert(trainingFeatures[iclass].size()==nctraining);
630  }
631 
632  unsigned int nFeatures=trainingFeatures[0][0].size();
633  if(verbose_opt[0]>=1)
634  std::cout << "number of features: " << nFeatures << std::endl;
635  unsigned int ntraining=0;
636  for(short iclass=0;iclass<nclass;++iclass)
637  ntraining+=trainingFeatures[iclass].size();
638  if(verbose_opt[0]>=1)
639  std::cout << "training size over all classes: " << ntraining << std::endl;
640 
641  prob[ibag].l=ntraining;
642  prob[ibag].y = Malloc(double,prob[ibag].l);
643  prob[ibag].x = Malloc(struct svm_node *,prob[ibag].l);
644  x_space[ibag] = Malloc(struct svm_node,(nFeatures+1)*ntraining);
645  unsigned long int spaceIndex=0;
646  int lIndex=0;
647  for(short iclass=0;iclass<nclass;++iclass){
648  for(int isample=0;isample<trainingFeatures[iclass].size();++isample){
649  prob[ibag].x[lIndex]=&(x_space[ibag][spaceIndex]);
650  for(int ifeature=0;ifeature<nFeatures;++ifeature){
651  x_space[ibag][spaceIndex].index=ifeature+1;
652  x_space[ibag][spaceIndex].value=trainingFeatures[iclass][isample][ifeature];
653  ++spaceIndex;
654  }
655  x_space[ibag][spaceIndex++].index=-1;
656  prob[ibag].y[lIndex]=iclass;
657  ++lIndex;
658  }
659  }
660  assert(lIndex==prob[ibag].l);
661 
662  //set SVM parameters through command line options
663  param[ibag].svm_type = svmMap[svm_type_opt[0]];
664  param[ibag].kernel_type = kernelMap[kernel_type_opt[0]];
665  param[ibag].degree = kernel_degree_opt[0];
666  param[ibag].gamma = (gamma_opt[0]>0)? gamma_opt[0] : 1.0/nFeatures;
667  param[ibag].coef0 = coef0_opt[0];
668  param[ibag].nu = nu_opt[0];
669  param[ibag].cache_size = cache_opt[0];
670  param[ibag].C = ccost_opt[0];
671  param[ibag].eps = epsilon_tol_opt[0];
672  param[ibag].p = epsilon_loss_opt[0];
673  param[ibag].shrinking = (shrinking_opt[0])? 1 : 0;
674  param[ibag].probability = (prob_est_opt[0])? 1 : 0;
675  param[ibag].nr_weight = 0;//not used: I use priors and balancing
676  param[ibag].weight_label = NULL;
677  param[ibag].weight = NULL;
678  param[ibag].verbose=(verbose_opt[0]>1)? true:false;
679 
680  if(verbose_opt[0]>1)
681  std::cout << "checking parameters" << std::endl;
682  svm_check_parameter(&prob[ibag],&param[ibag]);
683  if(verbose_opt[0])
684  std::cout << "parameters ok, training" << std::endl;
685  svm[ibag]=svm_train(&prob[ibag],&param[ibag]);
686  if(verbose_opt[0]>1)
687  std::cout << "SVM is now trained" << std::endl;
688  if(cv_opt[0]>1){
689  if(verbose_opt[0]>1)
690  std::cout << "Cross validating" << std::endl;
691  double *target = Malloc(double,prob[ibag].l);
692  svm_cross_validation(&prob[ibag],&param[ibag],cv_opt[0],target);
693  assert(param[ibag].svm_type != EPSILON_SVR&&param[ibag].svm_type != NU_SVR);//only for regression
694 
695  for(int i=0;i<prob[ibag].l;i++){
696  string refClassName=nameVector[prob[ibag].y[i]];
697  string className=nameVector[target[i]];
698  if(classValueMap.size())
699  cm.incrementResult(type2string<short>(classValueMap[refClassName]),type2string<short>(classValueMap[className]),1.0/nbag);
700  else
701  cm.incrementResult(cm.getClass(prob[ibag].y[i]),cm.getClass(target[i]),1.0/nbag);
702  }
703  free(target);
704  }
705  // *NOTE* Because svm_model contains pointers to svm_problem, you can
706  // not free the memory used by svm_problem if you are still using the
707  // svm_model produced by svm_train().
708  }//for ibag
709  if(cv_opt[0]>1){
710  assert(cm.nReference());
711  cm.setFormat(cmformat_opt[0]);
712  cm.reportSE95(false);
713  std::cout << cm << std::endl;
714  // cout << "class #samples userAcc prodAcc" << endl;
715  // double se95_ua=0;
716  // double se95_pa=0;
717  // double se95_oa=0;
718  // double dua=0;
719  // double dpa=0;
720  // double doa=0;
721  // for(short iclass=0;iclass<cm.nClasses();++iclass){
722  // dua=cm.ua(cm.getClass(iclass),&se95_ua);
723  // dpa=cm.pa(cm.getClass(iclass),&se95_pa);
724  // cout << cm.getClass(iclass) << " " << cm.nReference(cm.getClass(iclass)) << " " << dua << " (" << se95_ua << ")" << " " << dpa << " (" << se95_pa << ")" << endl;
725  // }
726  // std::cout << "Kappa: " << cm.kappa() << std::endl;
727  // doa=cm.oa(&se95_oa);
728  // std::cout << "Overall Accuracy: " << 100*doa << " (" << 100*se95_oa << ")" << std::endl;
729  }
730 
731  //--------------------------------- end of training -----------------------------------
732  if(input_opt.empty())
733  exit(0);
734 
735  const char* pszMessage;
736  void* pProgressArg=NULL;
737  GDALProgressFunc pfnProgress=GDALTermProgress;
738  float progress=0;
739  if(!verbose_opt[0])
740  pfnProgress(progress,pszMessage,pProgressArg);
741  //-------------------------------- open image file ------------------------------------
742  bool inputIsRaster=false;
743  ImgReaderOgr imgReaderOgr;
744  try{
745  imgReaderOgr.open(input_opt[0]);
746  imgReaderOgr.close();
747  }
748  catch(string errorString){
749  inputIsRaster=true;
750  }
751  if(!inputIsRaster){//classify vector file
752  cm.clearResults();
753  //notice that fields have already been set by readDataImageOgr (taking into account appropriate bands)
754  for(int ivalidation=0;ivalidation<input_opt.size();++ivalidation){
755  if(output_opt.size())
756  assert(output_opt.size()==input_opt.size());
757  if(verbose_opt[0])
758  std::cout << "opening img reader " << input_opt[ivalidation] << std::endl;
759  imgReaderOgr.open(input_opt[ivalidation]);
760  ImgWriterOgr imgWriterOgr;
761 
762  if(output_opt.size()){
763  if(verbose_opt[0])
764  std::cout << "opening img writer and copying fields from img reader" << output_opt[ivalidation] << std::endl;
765  imgWriterOgr.open(output_opt[ivalidation],imgReaderOgr);
766  }
767  if(verbose_opt[0])
768  cout << "number of layers in input ogr file: " << imgReaderOgr.getLayerCount() << endl;
769  for(int ilayer=0;ilayer<imgReaderOgr.getLayerCount();++ilayer){
770  if(verbose_opt[0])
771  cout << "processing input layer " << ilayer << endl;
772  if(output_opt.size()){
773  if(verbose_opt[0])
774  std::cout << "creating field class" << std::endl;
775  if(classValueMap.size())
776  imgWriterOgr.createField("class",OFTInteger,ilayer);
777  else
778  imgWriterOgr.createField("class",OFTString,ilayer);
779  }
780  unsigned int nFeatures=imgReaderOgr.getFeatureCount(ilayer);
781  unsigned int ifeature=0;
782  progress=0;
783  pfnProgress(progress,pszMessage,pProgressArg);
784  OGRFeature *poFeature;
785  while( (poFeature = imgReaderOgr.getLayer(ilayer)->GetNextFeature()) != NULL ){
786  if(verbose_opt[0]>1)
787  std::cout << "feature " << ifeature << std::endl;
788  if( poFeature == NULL ){
789  cout << "Warning: could not read feature " << ifeature << " in layer " << imgReaderOgr.getLayerName(ilayer) << endl;
790  continue;
791  }
792  OGRFeature *poDstFeature = NULL;
793  if(output_opt.size()){
794  poDstFeature=imgWriterOgr.createFeature(ilayer);
795  if( poDstFeature->SetFrom( poFeature, TRUE ) != OGRERR_NONE ){
796  CPLError( CE_Failure, CPLE_AppDefined,
797  "Unable to translate feature %d from layer %s.\n",
798  poFeature->GetFID(), imgWriterOgr.getLayerName(ilayer).c_str() );
799  OGRFeature::DestroyFeature( poFeature );
800  OGRFeature::DestroyFeature( poDstFeature );
801  }
802  }
803  vector<float> validationPixel;
804  vector<float> validationFeature;
805 
806  imgReaderOgr.readData(validationPixel,OFTReal,fields,poFeature,ilayer);
807  assert(validationPixel.size()==nband);
808  vector<float> probOut(nclass);//posterior prob for each class
809  for(short iclass=0;iclass<nclass;++iclass)
810  probOut[iclass]=0;
811  for(int ibag=0;ibag<nbag;++ibag){
812  for(int iband=0;iband<nband;++iband){
813  validationFeature.push_back((validationPixel[iband]-offset[ibag][iband])/scale[ibag][iband]);
814  if(verbose_opt[0]==2)
815  std::cout << " " << validationFeature.back();
816  }
817  if(verbose_opt[0]==2)
818  std::cout << std::endl;
819  vector<double> result(nclass);
820  struct svm_node *x;
821  x = (struct svm_node *) malloc((validationFeature.size()+1)*sizeof(struct svm_node));
822  for(int i=0;i<validationFeature.size();++i){
823  x[i].index=i+1;
824  x[i].value=validationFeature[i];
825  }
826 
827  x[validationFeature.size()].index=-1;//to end svm feature vector
828  double predict_label=0;
829  if(!prob_est_opt[0]){
830  predict_label = svm_predict(svm[ibag],x);
831  for(short iclass=0;iclass<nclass;++iclass){
832  if(iclass==static_cast<short>(predict_label))
833  result[iclass]=1;
834  else
835  result[iclass]=0;
836  }
837  }
838  else{
839  assert(svm_check_probability_model(svm[ibag]));
840  predict_label = svm_predict_probability(svm[ibag],x,&(result[0]));
841  }
842  if(verbose_opt[0]>1){
843  std::cout << "predict_label: " << predict_label << std::endl;
844  for(int iclass=0;iclass<result.size();++iclass)
845  std::cout << result[iclass] << " ";
846  std::cout << std::endl;
847  }
848 
849  //calculate posterior prob of bag
850  for(short iclass=0;iclass<nclass;++iclass){
851  switch(comb_opt[0]){
852  default:
853  case(0)://sum rule
854  probOut[iclass]+=result[iclass]*priors[iclass];//add probabilities for each bag
855  break;
856  case(1)://product rule
857  probOut[iclass]*=pow(static_cast<float>(priors[iclass]),static_cast<float>(1.0-nbag)/nbag)*result[iclass];//multiply probabilities for each bag
858  break;
859  case(2)://max rule
860  if(priors[iclass]*result[iclass]>probOut[iclass])
861  probOut[iclass]=priors[iclass]*result[iclass];
862  break;
863  }
864  }
865  free(x);
866  }//for ibag
867 
868  //search for max class prob
869  float maxBag=0;
870  float normBag=0;
871  string classOut="Unclassified";
872  for(short iclass=0;iclass<nclass;++iclass){
873  if(verbose_opt[0]>1)
874  std::cout << probOut[iclass] << " ";
875  if(probOut[iclass]>maxBag){
876  maxBag=probOut[iclass];
877  classOut=nameVector[iclass];
878  }
879  }
880  //look for class name
881  if(verbose_opt[0]>1){
882  if(classValueMap.size())
883  std::cout << "->" << classValueMap[classOut] << std::endl;
884  else
885  std::cout << "->" << classOut << std::endl;
886  }
887  if(output_opt.size()){
888  if(classValueMap.size())
889  poDstFeature->SetField("class",classValueMap[classOut]);
890  else
891  poDstFeature->SetField("class",classOut.c_str());
892  poDstFeature->SetFID( poFeature->GetFID() );
893  }
894  int labelIndex=poFeature->GetFieldIndex(label_opt[0].c_str());
895  if(labelIndex>=0){
896  string classRef=poFeature->GetFieldAsString(labelIndex);
897  if(classRef!="0"){
898  if(classValueMap.size())
899  cm.incrementResult(type2string<short>(classValueMap[classRef]),type2string<short>(classValueMap[classOut]),1);
900  else
901  cm.incrementResult(classRef,classOut,1);
902  }
903  }
904  CPLErrorReset();
905  if(output_opt.size()){
906  if(imgWriterOgr.createFeature(poDstFeature,ilayer) != OGRERR_NONE){
907  CPLError( CE_Failure, CPLE_AppDefined,
908  "Unable to translate feature %d from layer %s.\n",
909  poFeature->GetFID(), imgWriterOgr.getLayerName(ilayer).c_str() );
910  OGRFeature::DestroyFeature( poDstFeature );
911  OGRFeature::DestroyFeature( poDstFeature );
912  }
913  }
914  ++ifeature;
915  if(!verbose_opt[0]){
916  progress=static_cast<float>(ifeature+1.0)/nFeatures;
917  pfnProgress(progress,pszMessage,pProgressArg);
918  }
919  OGRFeature::DestroyFeature( poFeature );
920  OGRFeature::DestroyFeature( poDstFeature );
921  }//get next feature
922  }//next layer
923  imgReaderOgr.close();
924  if(output_opt.size())
925  imgWriterOgr.close();
926  }
927  if(cm.nReference()){
928  std::cout << cm << std::endl;
929  cout << "class #samples userAcc prodAcc" << endl;
930  double se95_ua=0;
931  double se95_pa=0;
932  double se95_oa=0;
933  double dua=0;
934  double dpa=0;
935  double doa=0;
936  for(short iclass=0;iclass<cm.nClasses();++iclass){
937  dua=cm.ua_pct(cm.getClass(iclass),&se95_ua);
938  dpa=cm.pa_pct(cm.getClass(iclass),&se95_pa);
939  cout << cm.getClass(iclass) << " " << cm.nReference(cm.getClass(iclass)) << " " << dua << " (" << se95_ua << ")" << " " << dpa << " (" << se95_pa << ")" << endl;
940  }
941  std::cout << "Kappa: " << cm.kappa() << std::endl;
942  doa=cm.oa(&se95_oa);
943  std::cout << "Overall Accuracy: " << 100*doa << " (" << 100*se95_oa << ")" << std::endl;
944  }
945  }
946  try{
947  if(active_opt.size())
948  activeWriter.close();
949  }
950  catch(string errorString){
951  std::cerr << "Error: errorString" << std::endl;
952  }
953 
954  for(int ibag=0;ibag<nbag;++ibag){
955  // svm_destroy_param[ibag](&param[ibag]);
956  svm_destroy_param(&param[ibag]);
957  free(prob[ibag].y);
958  free(prob[ibag].x);
959  free(x_space[ibag]);
960  svm_free_and_destroy_model(&(svm[ibag]));
961  }
962  return 0;
963 }
throw this class when syntax error in command line option
Definition: Optionpk.h:45
STL namespace.
Definition: svm.h:12