pktools  2.6.7
Processing Kernel for geospatial data
pkstatogr.cc
1 /**********************************************************************
2 pkstatogr.cc: program to calculate basic statistics from vector file
3 Copyright (C) 2008-2012 Pieter Kempeneers
4 
5 This file is part of pktools
6 
7 pktools is free software: you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation, either version 3 of the License, or
10 (at your option) any later version.
11 
12 pktools is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
16 
17 You should have received a copy of the GNU General Public License
18 along with pktools. If not, see <http://www.gnu.org/licenses/>.
19 ***********************************************************************/
20 #include <math.h>
21 #include <string>
22 #include <fstream>
23 #include <assert.h>
24 #include "base/Optionpk.h"
25 #include "imageclasses/ImgReaderOgr.h"
26 #include "algorithms/StatFactory.h"
27 /******************************************************************************/
75 using namespace std;
76 
77 int main(int argc, char *argv[])
78 {
79  Optionpk<string> input_opt("i", "input", "Input OGR vector file", "");
80  Optionpk<string> layer_opt("ln", "lname", "Layer name(s) in sample (leave empty to select all)");
81  Optionpk<string> fieldname_opt("n", "fname", "Fields on which to calculate statistics", "");
82  Optionpk<double> nodata_opt("nodata","nodata","Set nodata value(s)");
83  Optionpk<double> src_min_opt("src_min","src_min","Set minimum value for histogram");
84  Optionpk<double> src_max_opt("src_max","src_max","Set maximum value for histogram");
85  Optionpk<bool> size_opt("s","size","Sample size (number of points)",false);
86  Optionpk<bool> minmax_opt("mm","minmax","Calculate minimum and maximum value",false);
87  Optionpk<bool> min_opt("min","min","Calculate minimum value",0);
88  Optionpk<bool> max_opt("max","max","Calculate maximum value",0);
89  Optionpk<bool> mean_opt("mean","mean","Calculate mean value",false);
90  Optionpk<bool> median_opt("median","median","Calculate median value",false);
91  Optionpk<bool> stdev_opt("stdev","stdev","Calculate standard deviation",false);
92  Optionpk<bool> histogram_opt("hist","hist","Calculate histogram",false);
93  Optionpk<unsigned int> nbin_opt("nbin", "nbin", "Number of bins");
94  Optionpk<bool> relative_opt("rel","relative","Use percentiles for histogram to calculate histogram",false);
95  Optionpk<bool> kde_opt("kde","kde","Use Kernel density estimation when producing histogram. The standard deviation is estimated based on Silverman's rule of thumb",false);
96  Optionpk<short> verbose_opt("v", "verbose", "Verbose level", 0,2);
97 
98  bool doProcess;//stop process when program was invoked with help option (-h --help)
99  try{
100  doProcess=input_opt.retrieveOption(argc,argv);
101  fieldname_opt.retrieveOption(argc,argv);
102  layer_opt.retrieveOption(argc,argv);
103  nodata_opt.retrieveOption(argc,argv);
104  src_min_opt.retrieveOption(argc,argv);
105  src_max_opt.retrieveOption(argc,argv);
106  size_opt.retrieveOption(argc,argv);
107  minmax_opt.retrieveOption(argc,argv);
108  min_opt.retrieveOption(argc,argv);
109  max_opt.retrieveOption(argc,argv);
110  mean_opt.retrieveOption(argc,argv);
111  median_opt.retrieveOption(argc,argv);
112  stdev_opt.retrieveOption(argc,argv);
113  histogram_opt.retrieveOption(argc,argv);
114  nbin_opt.retrieveOption(argc,argv);
115  relative_opt.retrieveOption(argc,argv);
116  kde_opt.retrieveOption(argc,argv);
117  verbose_opt.retrieveOption(argc,argv);
118  }
119  catch(string predefinedString){
120  cout << predefinedString << endl;
121  exit(0);
122  }
123  if(!doProcess){
124  cout << endl;
125  cout << "Usage: pkstatogr -i input [-n attribute]*" << endl;
126  cout << endl;
127  cout << "short option -h shows basic options only, use long option --help to show all options" << endl;
128  exit(0);//help was invoked, stop processing
129  }
130 
131  ImgReaderOgr imgReader;
132  try{
133  imgReader.open(input_opt[0]);
134  }
135  catch(string errorstring){
136  cerr << errorstring << endl;
137  }
138 
139  ImgReaderOgr inputReader(input_opt[0]);
140  vector<double> theData;
142  //todo: implement ALL
143 
144  stat.setNoDataValues(nodata_opt);
145 
146  //support multiple layers
147  int nlayerRead=inputReader.getDataSource()->GetLayerCount();
148  if(verbose_opt[0])
149  cout << "number of layers: " << nlayerRead << endl;
150 
151  for(int ilayer=0;ilayer<nlayerRead;++ilayer){
152  OGRLayer *readLayer=inputReader.getLayer(ilayer);
153  string currentLayername=readLayer->GetName();
154  if(layer_opt.size())
155  if(find(layer_opt.begin(),layer_opt.end(),currentLayername)==layer_opt.end())
156  continue;
157  if(verbose_opt[0])
158  cout << "processing layer " << currentLayername << endl;
159  if(layer_opt.size())
160  cout << " --lname " << currentLayername;
161 
162  for(int ifield=0;ifield<fieldname_opt.size();++ifield){
163  if(verbose_opt[0])
164  cout << "field: " << ifield << endl;
165  theData.clear();
166  inputReader.readData(theData,OFTReal,fieldname_opt[ifield],ilayer,verbose_opt[0]);
167  vector<double> binData;
168  double minValue=0;
169  double maxValue=0;
170  stat.minmax(theData,theData.begin(),theData.end(),minValue,maxValue);
171  if(src_min_opt.size())
172  minValue=src_min_opt[0];
173  if(src_max_opt.size())
174  maxValue=src_max_opt[0];
175  unsigned int nbin=(nbin_opt.size())? nbin_opt[0]:0;
176 
177  if(histogram_opt[0]){
178  double sigma=0;
179  if(kde_opt[0]){
180  // if(kde_opt[0]>0)
181  // sigma=kde_opt[0];
182  // else
183  sigma=1.06*sqrt(stat.var(theData))*pow(theData.size(),-0.2);
184  }
185  if(nbin<1)
186  nbin=(maxValue-minValue+1);
187  try{
188  stat.distribution(theData,theData.begin(),theData.end(),binData,nbin,minValue,maxValue,sigma);
189  }
190  catch(string theError){
191  cerr << "Warning: all identical values in data" << endl;
192  exit(1);
193  }
194  }
195  // int nbin=(nbin_opt[0]>1)? nbin_opt[0] : 2;
196  cout << " --fname " << fieldname_opt[ifield];
197  try{
198  double theMean=0;
199  double theVar=0;
200  stat.meanVar(theData,theMean,theVar);
201  if(mean_opt[0])
202  cout << " --mean " << theMean;
203  if(stdev_opt[0])
204  cout << " --stdev " << sqrt(theVar);
205  if(minmax_opt[0]||min_opt[0]||max_opt[0]){
206  if(minmax_opt[0])
207  cout << " --min " << minValue << " --max " << maxValue << " ";
208  else{
209  if(min_opt[0])
210  cout << " --min " << minValue << " ";
211  if(max_opt[0])
212  cout << " --max " << maxValue << " ";
213  }
214  }
215  if(median_opt[0])
216  cout << " -median " << stat.median(theData);
217  if(size_opt[0])
218  cout << " -size " << theData.size();
219  cout << endl;
220  if(histogram_opt[0]){
221  for(int ibin=0;ibin<nbin;++ibin){
222  double binValue=0;
223  if(nbin==maxValue-minValue+1)
224  binValue=minValue+ibin;
225  else
226  binValue=minValue+static_cast<double>(maxValue-minValue)*(ibin+0.5)/nbin;
227  cout << binValue << " ";
228  if(relative_opt[0]||kde_opt[0])
229  cout << 100.0*static_cast<double>(binData[ibin])/theData.size() << endl;
230  else
231  cout << binData[ibin] << endl;
232  }
233  }
234  }
235  catch(string theError){
236  if(mean_opt[0])
237  cout << " --mean " << theData.back();
238  if(stdev_opt[0])
239  cout << " --stdev " << "0";
240  if(min_opt[0])
241  cout << " -min " << theData.back();
242  if(max_opt[0])
243  cout << " -max " << theData.back();
244  if(median_opt[0])
245  cout << " -median " << theData.back();
246  if(size_opt[0])
247  cout << " -size " << theData.size();
248  cout << endl;
249  cerr << "Warning: all identical values in data" << endl;
250  }
251  }
252  }
253  imgReader.close();
254 }
255 
STL namespace.