pktools  2.6.7
Processing Kernel for geospatial data
pkextractimg.cc
1 /**********************************************************************
2 pkextractimg.cc: extract pixel values from raster image using a raster sample
3 Copyright (C) 2008-2016 Pieter Kempeneers
4 
5 This file is part of pktools
6 
7 pktools is free software: you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation, either version 3 of the License, or
10 (at your option) any later version.
11 
12 pktools is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
16 
17 You should have received a copy of the GNU General Public License
18 along with pktools. If not, see <http://www.gnu.org/licenses/>.
19 ***********************************************************************/
20 #include <assert.h>
21 #include <math.h>
22 #include <stdlib.h>
23 #include <sstream>
24 #include <string>
25 #include <algorithm>
26 #include <ctime>
27 #include <vector>
28 #include "imageclasses/ImgReaderGdal.h"
29 #include "imageclasses/ImgWriterOgr.h"
30 #include "base/Optionpk.h"
31 #include "algorithms/StatFactory.h"
32 
33 #ifndef PI
34 #define PI 3.1415926535897932384626433832795
35 #endif
36 
37 /******************************************************************************/
88 using namespace std;
89 
90 int main(int argc, char *argv[])
91 {
92  Optionpk<string> image_opt("i", "input", "Raster input dataset containing band information");
93  Optionpk<string> sample_opt("s", "sample", "Raster dataset with features to be extracted from input data. Output will contain features with input band information included.");
94  Optionpk<string> output_opt("o", "output", "Output sample dataset");
95  Optionpk<int> class_opt("c", "class", "Class(es) to extract from input sample image. Leave empty to extract all valid data pixels from sample dataset");
96  Optionpk<float> threshold_opt("t", "threshold", "Probability threshold for selecting samples (randomly). Provide probability in percentage (>0) or absolute (<0). Use a single threshold per vector sample layer. If using raster land cover maps as a sample dataset, you can provide a threshold value for each class (e.g. -t 80 -t 60). Use value 100 to select all pixels for selected class(es)", 100);
97  Optionpk<string> ogrformat_opt("f", "f", "Output sample dataset format","SQLite");
98  Optionpk<string> ftype_opt("ft", "ftype", "Field type (only Real or Integer)", "Real");
99  Optionpk<string> ltype_opt("lt", "ltype", "Label type: In16 or String", "Integer");
100  Optionpk<int> band_opt("b", "band", "Band index(es) to extract (0 based). Leave empty to use all bands");
101  Optionpk<unsigned short> bstart_opt("sband", "startband", "Start band sequence number");
102  Optionpk<unsigned short> bend_opt("eband", "endband", "End band sequence number");
103  Optionpk<double> srcnodata_opt("srcnodata", "srcnodata", "Invalid value(s) for input image");
104  Optionpk<int> bndnodata_opt("bndnodata", "bndnodata", "Band in input image to check if pixel is valid (used for srcnodata)", 0);
105  Optionpk<string> fieldname_opt("bn", "bname", "For single band input data, this extra attribute name will correspond to the raster values. For multi-band input data, multiple attributes with this prefix will be added (e.g. b0, b1, b2, etc.)", "b");
106  Optionpk<string> label_opt("cn", "cname", "Name of the class label in the output vector dataset", "label");
107  Optionpk<short> down_opt("down", "down", "Down sampling factor", 1);
108  Optionpk<short> verbose_opt("v", "verbose", "Verbose mode if > 0", 0,2);
109 
110  bstart_opt.setHide(1);
111  bend_opt.setHide(1);
112  bndnodata_opt.setHide(1);
113  srcnodata_opt.setHide(1);
114  fieldname_opt.setHide(1);
115  label_opt.setHide(1);
116  down_opt.setHide(1);
117 
118  bool doProcess;//stop process when program was invoked with help option (-h --help)
119  try{
120  doProcess=image_opt.retrieveOption(argc,argv);
121  sample_opt.retrieveOption(argc,argv);
122  output_opt.retrieveOption(argc,argv);
123  class_opt.retrieveOption(argc,argv);
124  threshold_opt.retrieveOption(argc,argv);
125  ogrformat_opt.retrieveOption(argc,argv);
126  ftype_opt.retrieveOption(argc,argv);
127  ltype_opt.retrieveOption(argc,argv);
128  band_opt.retrieveOption(argc,argv);
129  bstart_opt.retrieveOption(argc,argv);
130  bend_opt.retrieveOption(argc,argv);
131  bndnodata_opt.retrieveOption(argc,argv);
132  srcnodata_opt.retrieveOption(argc,argv);
133  fieldname_opt.retrieveOption(argc,argv);
134  label_opt.retrieveOption(argc,argv);
135  down_opt.retrieveOption(argc,argv);
136  verbose_opt.retrieveOption(argc,argv);
137  }
138  catch(string predefinedString){
139  std::cout << predefinedString << std::endl;
140  exit(0);
141  }
142  if(!doProcess){
143  cout << endl;
144  cout << "Usage: pkextractimg -i input -s sample -o output" << endl;
145  cout << endl;
146  std::cout << "short option -h shows basic options only, use long option --help to show all options" << std::endl;
147  exit(0);//help was invoked, stop processing
148  }
149 
150  // if(srcnodata_opt.size()){
151  // while(srcnodata_opt.size()<bndnodata_opt.size())
152  // srcnodata_opt.push_back(srcnodata_opt[0]);
153  // }
154 
155  if(verbose_opt[0])
156  std::cout << class_opt << std::endl;
158  stat.setNoDataValues(srcnodata_opt);
159  Vector2d<unsigned int> posdata;
160  unsigned long int nsample=0;
161  unsigned long int ntotalvalid=0;
162  unsigned long int ntotalinvalid=0;
163 
164  map<int,unsigned long int> nvalid;
165  map<int,unsigned long int> ninvalid;
166  // vector<unsigned long int> nvalid(class_opt.size());
167  // vector<unsigned long int> ninvalid(class_opt.size());
168  // if(class_opt.empty()){
169  // nvalid.resize(256);
170  // ninvalid.resize(256);
171  // }
172  // for(int it=0;it<nvalid.size();++it){
173  // nvalid[it]=0;
174  // ninvalid[it]=0;
175  // }
176 
177  map <int,short> classmap;//class->index
178  for(int iclass=0;iclass<class_opt.size();++iclass){
179  nvalid[class_opt[iclass]]=0;
180  ninvalid[class_opt[iclass]]=0;
181  classmap[class_opt[iclass]]=iclass;
182  }
183 
184  ImgReaderGdal imgReader;
185  if(image_opt.empty()){
186  std::cerr << "No image dataset provided (use option -i). Use --help for help information";
187  exit(0);
188  }
189  if(output_opt.empty()){
190  std::cerr << "No output dataset provided (use option -o). Use --help for help information";
191  exit(0);
192  }
193  try{
194  imgReader.open(image_opt[0]);
195  }
196  catch(std::string errorstring){
197  std::cout << errorstring << std::endl;
198  exit(0);
199  }
200 
201  //convert start and end band options to vector of band indexes
202  try{
203  if(bstart_opt.size()){
204  if(bend_opt.size()!=bstart_opt.size()){
205  string errorstring="Error: options for start and end band indexes must be provided as pairs, missing end band";
206  throw(errorstring);
207  }
208  band_opt.clear();
209  for(int ipair=0;ipair<bstart_opt.size();++ipair){
210  if(bend_opt[ipair]<=bstart_opt[ipair]){
211  string errorstring="Error: index for end band must be smaller then start band";
212  throw(errorstring);
213  }
214  for(int iband=bstart_opt[ipair];iband<=bend_opt[ipair];++iband)
215  band_opt.push_back(iband);
216  }
217  }
218  }
219  catch(string error){
220  cerr << error << std::endl;
221  exit(1);
222  }
223 
224  int nband=(band_opt.size()) ? band_opt.size() : imgReader.nrOfBand();
225 
226  if(fieldname_opt.size()<nband){
227  std::string bandString=fieldname_opt[0];
228  fieldname_opt.clear();
229  fieldname_opt.resize(nband);
230  for(int iband=0;iband<nband;++iband){
231  int theBand=(band_opt.size()) ? band_opt[iband] : iband;
232  ostringstream fs;
233  fs << bandString << theBand;
234  fieldname_opt[iband]=fs.str();
235  }
236  }
237 
238  if(verbose_opt[0])
239  std::cout << fieldname_opt << std::endl;
240 
241  if(verbose_opt[0]>1)
242  std::cout << "Number of bands in input image: " << imgReader.nrOfBand() << std::endl;
243 
244  OGRFieldType fieldType;
245  OGRFieldType labelType;
246  int ogr_typecount=11;//hard coded for now!
247  if(verbose_opt[0]>1)
248  std::cout << "field and label types can be: ";
249  for(int iType = 0; iType < ogr_typecount; ++iType){
250  if(verbose_opt[0]>1)
251  std::cout << " " << OGRFieldDefn::GetFieldTypeName((OGRFieldType)iType);
252  if( OGRFieldDefn::GetFieldTypeName((OGRFieldType)iType) != NULL
253  && EQUAL(OGRFieldDefn::GetFieldTypeName((OGRFieldType)iType),
254  ftype_opt[0].c_str()))
255  fieldType=(OGRFieldType) iType;
256  if( OGRFieldDefn::GetFieldTypeName((OGRFieldType)iType) != NULL
257  && EQUAL(OGRFieldDefn::GetFieldTypeName((OGRFieldType)iType),
258  ltype_opt[0].c_str()))
259  labelType=(OGRFieldType) iType;
260  }
261  switch( fieldType ){
262  case OFTInteger:
263  case OFTReal:
264  case OFTRealList:
265  case OFTString:
266  if(verbose_opt[0]>1)
267  std::cout << std::endl << "field type is: " << OGRFieldDefn::GetFieldTypeName(fieldType) << std::endl;
268  break;
269  default:
270  cerr << "field type " << OGRFieldDefn::GetFieldTypeName(fieldType) << " not supported" << std::endl;
271  exit(0);
272  break;
273  }
274  switch( labelType ){
275  case OFTInteger:
276  case OFTReal:
277  case OFTRealList:
278  case OFTString:
279  if(verbose_opt[0]>1)
280  std::cout << std::endl << "label type is: " << OGRFieldDefn::GetFieldTypeName(labelType) << std::endl;
281  break;
282  default:
283  cerr << "label type " << OGRFieldDefn::GetFieldTypeName(labelType) << " not supported" << std::endl;
284  exit(0);
285  break;
286  }
287 
288  const char* pszMessage;
289  void* pProgressArg=NULL;
290  GDALProgressFunc pfnProgress=GDALTermProgress;
291  double progress=0;
292  srand(time(NULL));
293 
294  bool sampleIsRaster=true;
295 
296  ImgReaderGdal classReader;
297  ImgWriterOgr sampleWriterOgr;
298 
299  if(sample_opt.size()){
300  try{
301  classReader.open(sample_opt[0]);
302  }
303  catch(string errorString){
304  //todo: sampleIsRaster will not work from GDAL 2.0!!?? (unification of driver for raster and vector datasets)
305  sampleIsRaster=false;
306  }
307  }
308  else{
309  std::cerr << "No raster sample dataset provided (use option -s filename). Use --help for help information";
310  exit(1);
311  }
312 
313  if(sampleIsRaster){
314  if(class_opt.empty()){
315  ImgWriterOgr ogrWriter;
316  assert(sample_opt.size());
317  classReader.open(sample_opt[0]);
318  // vector<int> classBuffer(classReader.nrOfCol());
319  vector<double> classBuffer(classReader.nrOfCol());
320  Vector2d<double> imgBuffer(nband,imgReader.nrOfCol());//[band][col]
321  // vector<double> imgBuffer(nband);//[band]
322  vector<double> sample(2+nband);//x,y,band values
323  Vector2d<double> writeBuffer;
324  vector<int> writeBufferClass;
325  vector<int> selectedClass;
326  Vector2d<double> selectedBuffer;
327  double oldimgrow=-1;
328  int irow=0;
329  int icol=0;
330  if(verbose_opt[0]>1)
331  std::cout << "extracting sample from image..." << std::endl;
332  progress=0;
333  pfnProgress(progress,pszMessage,pProgressArg);
334  for(irow=0;irow<classReader.nrOfRow();++irow){
335  if(irow%down_opt[0])
336  continue;
337  classReader.readData(classBuffer,irow);
338  double x=0;//geo x coordinate
339  double y=0;//geo y coordinate
340  double iimg=0;//image x-coordinate in img image
341  double jimg=0;//image y-coordinate in img image
342 
343  //find col in img
344  classReader.image2geo(icol,irow,x,y);
345  imgReader.geo2image(x,y,iimg,jimg);
346  //nearest neighbour
347  if(static_cast<int>(jimg)<0||static_cast<int>(jimg)>=imgReader.nrOfRow())
348  continue;
349  for(int iband=0;iband<nband;++iband){
350  int theBand=(band_opt.size()) ? band_opt[iband] : iband;
351  imgReader.readData(imgBuffer[iband],static_cast<int>(jimg),theBand);
352  }
353  for(icol=0;icol<classReader.nrOfCol();++icol){
354  if(icol%down_opt[0])
355  continue;
356  int theClass=classBuffer[icol];
357  int processClass=0;
358  bool valid=false;
359  if(class_opt.empty()){
360  valid=true;//process every class
361  processClass=theClass;
362  }
363  else{
364  for(int iclass=0;iclass<class_opt.size();++iclass){
365  if(classBuffer[icol]==class_opt[iclass]){
366  processClass=iclass;
367  theClass=class_opt[iclass];
368  valid=true;//process this class
369  break;
370  }
371  }
372  }
373  classReader.image2geo(icol,irow,x,y);
374  sample[0]=x;
375  sample[1]=y;
376  if(verbose_opt[0]>1){
377  std::cout.precision(12);
378  std::cout << theClass << " " << x << " " << y << std::endl;
379  }
380  //find col in img
381  imgReader.geo2image(x,y,iimg,jimg);
382  //nearest neighbour
383  iimg=static_cast<int>(iimg);
384  if(static_cast<int>(iimg)<0||static_cast<int>(iimg)>=imgReader.nrOfCol())
385  continue;
386 
387  for(int iband=0;iband<nband&&valid;++iband){
388  int theBand=(band_opt.size()) ? band_opt[iband] : iband;
389  if(srcnodata_opt.size()&&theBand==bndnodata_opt[0]){
390  // vector<int>::const_iterator bndit=bndnodata_opt.begin();
391  for(int inodata=0;inodata<srcnodata_opt.size()&&valid;++inodata){
392  if(imgBuffer[iband][iimg]==srcnodata_opt[inodata])
393  valid=false;
394  }
395  }
396  }
397  // oldimgrow=jimg;
398 
399  if(valid){
400  for(int iband=0;iband<imgBuffer.size();++iband){
401  sample[iband+2]=imgBuffer[iband][iimg];
402  }
403  float theThreshold=(threshold_opt.size()>1)?threshold_opt[processClass]:threshold_opt[0];
404  if(theThreshold>0){//percentual value
405  double p=static_cast<double>(rand())/(RAND_MAX);
406  p*=100.0;
407  if(p>theThreshold)
408  continue;//do not select for now, go to next column
409  }
410  // else if(nvalid.size()>processClass){//absolute value
411  // if(nvalid[processClass]>=-theThreshold)
412  // continue;//do not select any more pixels for this class, go to next column to search for other classes
413  // }
414  writeBuffer.push_back(sample);
415  writeBufferClass.push_back(theClass);
416  ++ntotalvalid;
417  if(nvalid.count(theClass))
418  nvalid[theClass]+=1;
419  else
420  nvalid[theClass]=1;
421  }
422  else{
423  ++ntotalinvalid;
424  if(ninvalid.count(theClass))
425  ninvalid[theClass]+=1;
426  else
427  ninvalid[theClass]=1;
428  }
429  }
430  progress=static_cast<float>(irow+1.0)/classReader.nrOfRow();
431  pfnProgress(progress,pszMessage,pProgressArg);
432  }//irow
433  progress=100;
434  pfnProgress(progress,pszMessage,pProgressArg);
435  if(writeBuffer.size()>0){
436  assert(ntotalvalid==writeBuffer.size());
437  if(verbose_opt[0]>0)
438  std::cout << "creating image sample writer " << output_opt[0] << " with " << writeBuffer.size() << " samples (" << ntotalinvalid << " invalid)" << std::endl;
439  ogrWriter.open(output_opt[0],ogrformat_opt[0]);
440  char **papszOptions=NULL;
441  ostringstream slayer;
442  slayer << "training data";
443  std::string layername=slayer.str();
444  ogrWriter.createLayer(layername, imgReader.getProjection(), wkbPoint, papszOptions);
445  std::string fieldname="fid";//number of the point
446  ogrWriter.createField(fieldname,OFTInteger);
447  map<std::string,double> pointAttributes;
448  ogrWriter.createField(label_opt[0],labelType);
449  for(int iband=0;iband<nband;++iband){
450  int theBand=(band_opt.size()) ? band_opt[iband] : iband;
451  ogrWriter.createField(fieldname_opt[iband],fieldType);
452  }
453  progress=0;
454  pfnProgress(progress,pszMessage,pProgressArg);
455 
456  map<int,short> classDone;
457  Vector2d<double> writeBufferTmp;
458  vector<int> writeBufferClassTmp;
459 
460  if(threshold_opt[0]<0){//absolute threshold
461  map<int,unsigned long int>::iterator mapit;
462  map<int,unsigned long int> ncopied;
463  for(mapit=nvalid.begin();mapit!=nvalid.end();++mapit)
464  ncopied[mapit->first]=0;
465 
466  cout << "ncopied.size(): " << ncopied.size() << endl;
467  while(classDone.size()<nvalid.size()){
468  int index=rand()%writeBufferClass.size();
469  int theClass=writeBufferClass[index];
470  float theThreshold=threshold_opt[0];
471  if(threshold_opt.size()>1&&class_opt.size())
472  theThreshold=threshold_opt[classmap[theClass]];
473  theThreshold=-theThreshold;
474  if(ncopied[theClass]<theThreshold){
475  writeBufferClassTmp.push_back(*(writeBufferClass.begin()+index));
476  writeBufferTmp.push_back(*(writeBuffer.begin()+index));
477  writeBufferClass.erase(writeBufferClass.begin()+index);
478  writeBuffer.erase(writeBuffer.begin()+index);
479  ++(ncopied[theClass]);
480  }
481  else
482  classDone[theClass]=1;
483  if(ncopied[theClass]>=nvalid[theClass]){
484  classDone[theClass]=1;
485  }
486  }
487  writeBuffer=writeBufferTmp;
488  writeBufferClass=writeBufferClassTmp;
489 
490  // while(classDone.size()<nvalid.size()){
491  // int index=rand()%writeBufferClass.size();
492  // int theClass=writeBufferClass[index];
493  // float theThreshold=threshold_opt[0];
494  // if(threshold_opt.size()>1&&class_opt.size())
495  // theThreshold=threshold_opt[classmap[theClass]];
496  // theThreshold=-theThreshold;
497  // if(nvalid[theClass]>theThreshold){
498  // writeBufferClass.erase(writeBufferClass.begin()+index);
499  // writeBuffer.erase(writeBuffer.begin()+index);
500  // --(nvalid[theClass]);
501  // }
502  // else
503  // classDone[theClass]=1;
504  // }
505  }
506  for(int isample=0;isample<writeBuffer.size();++isample){
507  if(verbose_opt[0]>1)
508  std::cout << "writing sample " << isample << std::endl;
509  pointAttributes[label_opt[0]]=writeBufferClass[isample];
510  for(int iband=0;iband<writeBuffer[0].size()-2;++iband){
511  int theBand=(band_opt.size()) ? band_opt[iband] : iband;
512  pointAttributes[fieldname_opt[iband]]=writeBuffer[isample][iband+2];
513  }
514  if(verbose_opt[0]>1)
515  std::cout << "all bands written" << std::endl;
516  ogrWriter.addPoint(writeBuffer[isample][0],writeBuffer[isample][1],pointAttributes,fieldname,isample);
517  progress=static_cast<float>(isample+1.0)/writeBuffer.size();
518  pfnProgress(progress,pszMessage,pProgressArg);
519  }
520  ogrWriter.close();
521  }
522  else{
523  std::cout << "No data found for any class " << std::endl;
524  }
525  classReader.close();
526  nsample=writeBuffer.size();
527  if(verbose_opt[0])
528  std::cout << "total number of samples written: " << nsample << std::endl;
529  }
530  else{//class_opt.size()!=0
531  assert(class_opt[0]);
532  // if(class_opt[0]){
533  assert(threshold_opt.size()==1||threshold_opt.size()==class_opt.size());
534  ImgReaderGdal classReader;
535  ImgWriterOgr ogrWriter;
536  if(verbose_opt[0]>1){
537  std::cout << "reading position from sample dataset " << std::endl;
538  std::cout << "class thresholds: " << std::endl;
539  for(int iclass=0;iclass<class_opt.size();++iclass){
540  if(threshold_opt.size()>1)
541  std::cout << class_opt[iclass] << ": " << threshold_opt[iclass] << std::endl;
542  else
543  std::cout << class_opt[iclass] << ": " << threshold_opt[0] << std::endl;
544  }
545  }
546  classReader.open(sample_opt[0]);
547  vector<int> classBuffer(classReader.nrOfCol());
548  // vector<double> classBuffer(classReader.nrOfCol());
549  Vector2d<double> imgBuffer(nband,imgReader.nrOfCol());//[band][col]
550  // vector<double> imgBuffer(nband);//[band]
551  vector<double> sample(2+nband);//x,y,band values
552  Vector2d<double> writeBuffer;
553  vector<int> writeBufferClass;
554  vector<int> selectedClass;
555  Vector2d<double> selectedBuffer;
556  double oldimgrow=-1;
557  int irow=0;
558  int icol=0;
559  if(verbose_opt[0]>1)
560  std::cout << "extracting sample from image..." << std::endl;
561  progress=0;
562  pfnProgress(progress,pszMessage,pProgressArg);
563  for(irow=0;irow<classReader.nrOfRow();++irow){
564  if(irow%down_opt[0])
565  continue;
566  classReader.readData(classBuffer,irow);
567  double x=0;//geo x coordinate
568  double y=0;//geo y coordinate
569  double iimg=0;//image x-coordinate in img image
570  double jimg=0;//image y-coordinate in img image
571 
572  //find col in img
573  classReader.image2geo(icol,irow,x,y);
574  imgReader.geo2image(x,y,iimg,jimg);
575  //nearest neighbour
576  if(static_cast<int>(jimg)<0||static_cast<int>(jimg)>=imgReader.nrOfRow())
577  continue;
578  for(int iband=0;iband<nband;++iband){
579  int theBand=(band_opt.size()) ? band_opt[iband] : iband;
580  imgReader.readData(imgBuffer[iband],static_cast<int>(jimg),theBand);
581  }
582 
583  for(icol=0;icol<classReader.nrOfCol();++icol){
584  if(icol%down_opt[0])
585  continue;
586  int theClass=0;
587  // double theClass=0;
588  int processClass=-1;
589  if(class_opt.empty()){//process every class
590  if(classBuffer[icol]){
591  processClass=0;
592  theClass=classBuffer[icol];
593  }
594  }
595  else{
596  for(int iclass=0;iclass<class_opt.size();++iclass){
597  if(classBuffer[icol]==class_opt[iclass]){
598  processClass=iclass;
599  theClass=class_opt[iclass];
600  }
601  }
602  }
603  if(processClass>=0){
604  // if(classBuffer[icol]==class_opt[0]){
605  classReader.image2geo(icol,irow,x,y);
606  sample[0]=x;
607  sample[1]=y;
608  if(verbose_opt[0]>1){
609  std::cout.precision(12);
610  std::cout << theClass << " " << x << " " << y << std::endl;
611  }
612  //find col in img
613  imgReader.geo2image(x,y,iimg,jimg);
614  //nearest neighbour
615  iimg=static_cast<int>(iimg);
616  if(static_cast<int>(iimg)<0||static_cast<int>(iimg)>=imgReader.nrOfCol())
617  continue;
618  bool valid=true;
619 
620  for(int iband=0;iband<nband;++iband){
621  int theBand=(band_opt.size()) ? band_opt[iband] : iband;
622  if(srcnodata_opt.size()&&theBand==bndnodata_opt[0]){
623  // vector<int>::const_iterator bndit=bndnodata_opt.begin();
624  for(int inodata=0;inodata<srcnodata_opt.size()&&valid;++inodata){
625  if(imgBuffer[iband][iimg]==srcnodata_opt[inodata])
626  valid=false;
627  }
628  }
629  }
630  if(valid){
631  for(int iband=0;iband<imgBuffer.size();++iband){
632  sample[iband+2]=imgBuffer[iband][iimg];
633  }
634  float theThreshold=(threshold_opt.size()>1)?threshold_opt[processClass]:threshold_opt[0];
635  if(theThreshold>0){//percentual value
636  double p=static_cast<double>(rand())/(RAND_MAX);
637  p*=100.0;
638  if(p>theThreshold)
639  continue;//do not select for now, go to next column
640  }
641  // else if(nvalid.size()>processClass){//absolute value
642  // if(nvalid[processClass]>=-theThreshold)
643  // continue;//do not select any more pixels for this class, go to next column to search for other classes
644  // }
645  writeBuffer.push_back(sample);
646  writeBufferClass.push_back(theClass);
647  ++ntotalvalid;
648  if(nvalid.count(theClass))
649  nvalid[theClass]+=1;
650  else
651  nvalid[theClass]=1;
652  }
653  else{
654  ++ntotalinvalid;
655  if(ninvalid.count(theClass))
656  ninvalid[theClass]+=1;
657  else
658  ninvalid[theClass]=1;
659  }
660  }//processClass
661  }//icol
662  progress=static_cast<float>(irow+1.0)/classReader.nrOfRow();
663  pfnProgress(progress,pszMessage,pProgressArg);
664  }//irow
665  if(writeBuffer.size()>0){
666  assert(ntotalvalid==writeBuffer.size());
667  if(verbose_opt[0]>0)
668  std::cout << "creating image sample writer " << output_opt[0] << " with " << writeBuffer.size() << " samples (" << ntotalinvalid << " invalid)" << std::endl;
669  ogrWriter.open(output_opt[0],ogrformat_opt[0]);
670  char **papszOptions=NULL;
671  ostringstream slayer;
672  slayer << "training data";
673  std::string layername=slayer.str();
674  ogrWriter.createLayer(layername, imgReader.getProjection(), wkbPoint, papszOptions);
675  std::string fieldname="fid";//number of the point
676  ogrWriter.createField(fieldname,OFTInteger);
677  map<std::string,double> pointAttributes;
678  ogrWriter.createField(label_opt[0],labelType);
679  for(int iband=0;iband<nband;++iband){
680  int theBand=(band_opt.size()) ? band_opt[iband] : iband;
681  ogrWriter.createField(fieldname_opt[iband],fieldType);
682  }
683  pfnProgress(progress,pszMessage,pProgressArg);
684  progress=0;
685  pfnProgress(progress,pszMessage,pProgressArg);
686 
687  map<int,short> classDone;
688  Vector2d<double> writeBufferTmp;
689  vector<int> writeBufferClassTmp;
690 
691  if(threshold_opt[0]<0){//absolute threshold
692  map<int,unsigned long int>::iterator mapit;
693  map<int,unsigned long int> ncopied;
694  for(mapit=nvalid.begin();mapit!=nvalid.end();++mapit)
695  ncopied[mapit->first]=0;
696 
697  while(classDone.size()<nvalid.size()){
698  int index=rand()%writeBufferClass.size();
699  int theClass=writeBufferClass[index];
700  float theThreshold=threshold_opt[0];
701  if(threshold_opt.size()>1&&class_opt.size())
702  theThreshold=threshold_opt[classmap[theClass]];
703  theThreshold=-theThreshold;
704  if(ncopied[theClass]<theThreshold){
705  writeBufferClassTmp.push_back(*(writeBufferClass.begin()+index));
706  writeBufferTmp.push_back(*(writeBuffer.begin()+index));
707  writeBufferClass.erase(writeBufferClass.begin()+index);
708  writeBuffer.erase(writeBuffer.begin()+index);
709  ++(ncopied[theClass]);
710  }
711  else
712  classDone[theClass]=1;
713  if(ncopied[theClass]>=nvalid[theClass]){
714  classDone[theClass]=1;
715  }
716  }
717  writeBuffer=writeBufferTmp;
718  writeBufferClass=writeBufferClassTmp;
719  // while(classDone.size()<nvalid.size()){
720  // int index=rand()%writeBufferClass.size();
721  // int theClass=writeBufferClass[index];
722  // float theThreshold=threshold_opt[0];
723  // if(threshold_opt.size()>1&&class_opt.size())
724  // theThreshold=threshold_opt[classmap[theClass]];
725  // theThreshold=-theThreshold;
726  // if(nvalid[theClass]>theThreshold){
727  // writeBufferClass.erase(writeBufferClass.begin()+index);
728  // writeBuffer.erase(writeBuffer.begin()+index);
729  // --(nvalid[theClass]);
730  // }
731  // else
732  // classDone[theClass]=1;
733  // }
734  }
735 
736  for(int isample=0;isample<writeBuffer.size();++isample){
737  pointAttributes[label_opt[0]]=writeBufferClass[isample];
738  for(int iband=0;iband<writeBuffer[0].size()-2;++iband){
739  int theBand=(band_opt.size()) ? band_opt[iband] : iband;
740  pointAttributes[fieldname_opt[iband]]=writeBuffer[isample][iband+2];
741  }
742  ogrWriter.addPoint(writeBuffer[isample][0],writeBuffer[isample][1],pointAttributes,fieldname,isample);
743  progress=static_cast<float>(isample+1.0)/writeBuffer.size();
744  pfnProgress(progress,pszMessage,pProgressArg);
745  }
746  ogrWriter.close();
747  }
748  else{
749  std::cout << "No data found for any class " << std::endl;
750  }
751  classReader.close();
752  nsample=writeBuffer.size();
753  if(verbose_opt[0]){
754  std::cout << "total number of samples written: " << nsample << std::endl;
755  if(nvalid.size()==class_opt.size()){
756  for(int iclass=0;iclass<class_opt.size();++iclass)
757  std::cout << "class " << class_opt[iclass] << " has " << nvalid[iclass] << " samples" << std::endl;
758  }
759  }
760  }
761  }
762  else{//vector dataset
763  cerr << "Error: vector sample not supported, consider using pkextractogr" << endl;
764  }//else (vector)
765  progress=1.0;
766  pfnProgress(progress,pszMessage,pProgressArg);
767  imgReader.close();
768 }
769 
STL namespace.
bool geo2image(double x, double y, double &i, double &j) const
Convert georeferenced coordinates (x and y) to image coordinates (column and row) ...
void close(void)
Set the memory (in MB) to cache a number of rows in memory.
int nrOfCol(void) const
Get the number of columns of this dataset.
Definition: ImgRasterGdal.h:98
void readData(T &value, int col, int row, int band=0)
Read a single pixel cell value at a specific column and row for a specific band (all indices start co...
Definition: ImgReaderGdal.h:95
int nrOfRow(void) const
Get the number of rows of this dataset.
bool image2geo(double i, double j, double &x, double &y) const
Convert image coordinates (column and row) to georeferenced coordinates (x and y) ...
std::string getProjection(void) const
Get the projection string (deprecated, use getProjectionRef instead)
void open(const std::string &filename, const GDALAccess &readMode=GA_ReadOnly)
Open an image.
int nrOfBand(void) const
Get the number of bands of this dataset.