pktools  2.6.7 Processing Kernel for geospatial data
StatFactory.h
1 /**********************************************************************
2 StatFactory.h: class for statistical operations on vectors
3 Copyright (C) 2008-2013 Pieter Kempeneers
4
5 This file is part of pktools
6
7 pktools is free software: you can redistribute it and/or modify
9 the Free Software Foundation, either version 3 of the License, or
10 (at your option) any later version.
11
12 pktools is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
16
17 You should have received a copy of the GNU General Public License
18 along with pktools. If not, see <http://www.gnu.org/licenses/>.
19 ***********************************************************************/
20 #ifndef _STATFACTORY_H_
21 #define _STATFACTORY_H_
22
23 #include <iostream>
24 #include <vector>
25 #include <map>
26 #include <math.h>
27 #include <assert.h>
28 #include <string>
29 #include <sstream>
30 #include <fstream>
31 #include <algorithm>
32 #include <gsl/gsl_fit.h>
33 #include <gsl/gsl_errno.h>
34 #include <gsl/gsl_spline.h>
35 #include <gsl/gsl_rng.h>
36 #include <gsl/gsl_randist.h>
37 #include <gsl/gsl_cdf.h>
38 #include <gsl/gsl_statistics.h>
39
40 namespace statfactory
41 {
42
44
45 public:
46  enum INTERPOLATION_TYPE {undefined=0,linear=1,polynomial=2,cspline=3,cspline_periodic=4,akima=5,akima_periodic=6};
47  //todo: expand with other distributions as in http://www.gnu.org/software/gsl/manual/gsl-ref_toc.html#TOC320
48  enum DISTRIBUTION_TYPE {uniform=1,gaussian=2};
49
50  StatFactory(void){};
51  virtual ~StatFactory(void){};
52  INTERPOLATION_TYPE getInterpolationType(const std::string interpolationType){
53  std::map<std::string, INTERPOLATION_TYPE> m_interpMap;
54  initMap(m_interpMap);
55  return m_interpMap[interpolationType];
56  };
57  DISTRIBUTION_TYPE getDistributionType(const std::string distributionType){
58  std::map<std::string, DISTRIBUTION_TYPE> m_distMap;
59  initDist(m_distMap);
60  return m_distMap[distributionType];
61  };
62  static void allocAcc(gsl_interp_accel *&acc){
63  acc = gsl_interp_accel_alloc ();
64  };
65
66  static void getSpline(const std::string type, int size, gsl_spline *& spline){
67  std::map<std::string, INTERPOLATION_TYPE> m_interpMap;
68  initMap(m_interpMap);
69  switch(m_interpMap[type]){
70  case(polynomial):
71  spline=gsl_spline_alloc(gsl_interp_polynomial,size);
72  break;
73  case(cspline):
74  spline=gsl_spline_alloc(gsl_interp_cspline,size);
75  break;
76  case(cspline_periodic):
77  spline=gsl_spline_alloc(gsl_interp_cspline_periodic,size);
78  break;
79  case(akima):
80  spline=gsl_spline_alloc(gsl_interp_akima,size);
81  break;
82  case(akima_periodic):
83  spline=gsl_spline_alloc(gsl_interp_akima_periodic,size);
84  break;
85  case(linear):
86  default:
87  spline=gsl_spline_alloc(gsl_interp_linear,size);
88  break;
89  }
90  assert(spline);
91  };
92  static int initSpline(gsl_spline *spline, const double *x, const double *y, int size){
93  return gsl_spline_init (spline, x, y, size);
94  };
95  static double evalSpline(gsl_spline *spline, double x, gsl_interp_accel *acc){
96  return gsl_spline_eval (spline, x, acc);
97  };
98
99  static gsl_rng* getRandomGenerator(unsigned long int theSeed){
100  const gsl_rng_type * T;
101  gsl_rng * r;
102
103  // select random number generator
104  r = gsl_rng_alloc (gsl_rng_mt19937);
105  gsl_rng_set(r,theSeed);
106  return r;
107  }
108  void getNodataValues(std::vector<double>& nodatav) const{nodatav=m_noDataValues;};
109  bool isNoData(double value) const{
110  if(m_noDataValues.empty())
111  return false;
112  else
113  return find(m_noDataValues.begin(),m_noDataValues.end(),value)!=m_noDataValues.end();
114  };
115  unsigned int pushNodDataValue(double noDataValue){
116  if(find(m_noDataValues.begin(),m_noDataValues.end(),noDataValue)==m_noDataValues.end())
117  m_noDataValues.push_back(noDataValue);
118  return m_noDataValues.size();
119  };
120  unsigned int setNoDataValues(std::vector<double> vnodata){
121  m_noDataValues=vnodata;
122  return m_noDataValues.size();
123  };
124  double getRandomValue(const gsl_rng* r, const std::string type, double a=0, double b=1) const{
125  std::map<std::string, DISTRIBUTION_TYPE> m_distMap;
126  initDist(m_distMap);
127  double randValue=0;
128  switch(m_distMap[type]){
129  case(uniform):
130  randValue = a+gsl_rng_uniform(r);
131  break;
132  case(gaussian):
133  default:
134  randValue = a+gsl_ran_gaussian(r, b);
135  break;
136  }
137  return randValue;
138  };
139
140
141  template<class T> T mymin(const typename std::vector<T>& v) const;
142  template<class T> T mymax(const typename std::vector<T>& v) const;
143  template<class T> T mymin(const typename std::vector<T>& v, T minConstraint) const;
144  template<class T> T mymax(const typename std::vector<T>& v, T maxConstraint) const;
145  template<class T> T absmin(const typename std::vector<T>& v) const;
146  template<class T> T absmax(const typename std::vector<T>& v) const;
147
148 // template<class T> typename std::vector<T>::const_iterator mymax(const std::vector<T>& v, typename std::vector<T>::const_iterator begin, typename std::vector<T>::const_iterator end) const;
149  template<class T> typename std::vector<T>::const_iterator mymin(const typename std::vector<T>& v, typename std::vector<T>::const_iterator begin, typename std::vector<T>::const_iterator end) const;
150  template<class T> typename std::vector<T>::iterator mymin(const typename std::vector<T>& v, typename std::vector<T>::iterator begin, typename std::vector<T>::iterator end) const;
151  template<class T> typename std::vector<T>::const_iterator mymin(const typename std::vector<T>& v, typename std::vector<T>::const_iterator begin, typename std::vector<T>::const_iterator end, T minConstraint) const;
152  template<class T> typename std::vector<T>::iterator mymin(const typename std::vector<T>& v, typename std::vector<T>::iterator begin, typename std::vector<T>::iterator end, T minConstraint) const;
153  template<class T> typename std::vector<T>::const_iterator mymax(const std::vector<T>& v, typename std::vector<T>::const_iterator begin, typename std::vector<T>::const_iterator end) const;
154  template<class T> typename std::vector<T>::iterator mymax(const std::vector<T>& v, typename std::vector<T>::iterator begin, typename std::vector<T>::iterator end) const;
155  template<class T> typename std::vector<T>::const_iterator mymax(const std::vector<T>& v, typename std::vector<T>::const_iterator begin, typename std::vector<T>::const_iterator end, T maxConstraint) const;
156  template<class T> typename std::vector<T>::iterator mymax(const std::vector<T>& v, typename std::vector<T>::iterator begin, typename std::vector<T>::iterator end, T maxConstraint) const;
157  template<class T> typename std::vector<T>::const_iterator absmin(const std::vector<T>& v, typename std::vector<T>::const_iterator begin, typename std::vector<T>::const_iterator end) const;
158  template<class T> typename std::vector<T>::const_iterator absmax(const std::vector<T>& v, typename std::vector<T>::const_iterator begin, typename std::vector<T>::const_iterator end) const;
159
160  template<class T> void minmax(const std::vector<T>& v, typename std::vector<T>::const_iterator begin, typename std::vector<T>::const_iterator end, T& theMin, T& theMax) const;
161  template<class T> T sum(const std::vector<T>& v) const;
162  template<class T> double mean(const std::vector<T>& v) const;
163  template<class T> void eraseNoData(std::vector<T>& v) const;
164  template<class T> unsigned int nvalid(const std::vector<T>& v) const;
165  template<class T> T median(const std::vector<T>& v) const;
166  template<class T> double var(const std::vector<T>& v) const;
167  template<class T> double moment(const std::vector<T>& v, int n) const;
168  template<class T> double cmoment(const std::vector<T>& v, int n) const;
169  template<class T> double skewness(const std::vector<T>& v) const;
170  template<class T> double kurtosis(const std::vector<T>& v) const;
171  template<class T> void meanVar(const std::vector<T>& v, double& m1, double& v1) const;
172  template<class T1, class T2> void scale2byte(const std::vector<T1>& input, std::vector<T2>& output, unsigned char lbound=0, unsigned char ubound=255) const;
173  template<class T> void distribution(const std::vector<T>& input, typename std::vector<T>::const_iterator begin, typename std::vector<T>::const_iterator end, std::vector<double>& output, int nbin, T &minimum, T &maximum, double sigma=0, const std::string &filename="") const;
174  template<class T> void distribution(const std::vector<T>& input, std::vector<double>& output, int nbin, double sigma=0, const std::string &filename="") const{distribution(input,input.begin(),input.end(),output,nbin,0,0,sigma,filename);};
175  template<class T> void distribution2d(const std::vector<T>& inputX, const std::vector<T>& inputY, std::vector< std::vector<double> >& output, int nbin, T& minX, T& maxX, T& minY, T& maxY, double sigma=0, const std::string& filename="") const;
176  template<class T> void cumulative (const std::vector<T>& input, typename std::vector<T>::const_iterator begin, typename std::vector<T>::const_iterator end, std::vector<int>& output, int nbin, T &minimum, T &maximum) const;
177  template<class T> void percentiles (const std::vector<T>& input, typename std::vector<T>::const_iterator begin, typename std::vector<T>::const_iterator end, std::vector<T>& output, int nbin, T &minimum, T &maximum, const std::string &filename="") const;
178  template<class T> T percentile(const std::vector<T>& input, typename std::vector<T>::const_iterator begin, typename std::vector<T>::const_iterator end, double percent, T minimum=0, T maximum=0) const;
179  template<class T> void signature(const std::vector<T>& input, double& k, double& alpha, double& beta, double e) const;
180  void signature(double m1, double m2, double& k, double& alpha, double& beta, double e) const;
181  template<class T> void normalize(const std::vector<T>& input, std::vector<double>& output) const;
182  template<class T> void normalize_pct(std::vector<T>& input) const;
183  template<class T> double rmse(const std::vector<T>& x, const std::vector<T>& y) const;
184  template<class T> double nrmse(const std::vector<T>& x, const std::vector<T>& y) const;
185  template<class T> double cvrmse(const std::vector<T>& x, const std::vector<T>& y) const;
186  template<class T> double correlation(const std::vector<T>& x, const std::vector<T>& y, int delay=0) const;
187  // template<class T> double gsl_correlation(const std::vector<T>& x, const std::vector<T>& y) const;
188  template<class T> double gsl_covariance(const std::vector<T>& x, const std::vector<T>& y) const;
189  template<class T> double cross_correlation(const std::vector<T>& x, const std::vector<T>& y, int maxdelay, std::vector<T>& z) const;
190  template<class T> double linear_regression(const std::vector<T>& x, const std::vector<T>& y, double &c0, double &c1) const;
191  template<class T> double linear_regression_err(const std::vector<T>& x, const std::vector<T>& y, double &c0, double &c1) const;
192  template<class T> void interpolateNoData(const std::vector<double>& wavelengthIn, const std::vector<T>& input, const std::string& type, std::vector<T>& output, bool verbose=false) const;
193  template<class T> void interpolateUp(const std::vector<double>& wavelengthIn, const std::vector<T>& input, const std::vector<double>& wavelengthOut, const std::string& type, std::vector<T>& output, bool verbose=false) const;
194  template<class T> void interpolateUp(const std::vector<double>& wavelengthIn, const std::vector< std::vector<T> >& input, const std::vector<double>& wavelengthOut, const std::string& type, std::vector< std::vector<T> >& output, bool verbose=false) const;
195  // template<class T> void interpolateUp(const std::vector< std::vector<T> >& input, std::vector< std::vector<T> >& output, double start, double end, double step, const gsl_interp_type* type);
196  // template<class T> void interpolateUp(const std::vector< std::vector<T> >& input, const std::vector<double>& wavelengthIn, std::vector< std::vector<T> >& output, std::vector<double>& wavelengthOut, double start, double end, double step, const gsl_interp_type* type);
197  template<class T> void interpolateUp(const std::vector<T>& input, std::vector<T>& output, int nbin) const;
198  template<class T> void nearUp(const std::vector<T>& input, std::vector<T>& output) const;
199  template<class T> void interpolateUp(double* input, int dim, std::vector<T>& output, int nbin);
200  template<class T> void interpolateDown(const std::vector<T>& input, std::vector<T>& output, int nbin) const;
201  template<class T> void interpolateDown(double* input, int dim, std::vector<T>& output, int nbin);
202
203 private:
204  static void initMap(std::map<std::string, INTERPOLATION_TYPE>& m_interpMap){
205  //initialize selMap
206  m_interpMap["linear"]=linear;
207  m_interpMap["polynomial"]=polynomial;
208  m_interpMap["cspline"]=cspline;
209  m_interpMap["cspline_periodic"]=cspline_periodic;
210  m_interpMap["akima"]=akima;
211  m_interpMap["akima_periodic"]=akima_periodic;
212  }
213  static void initDist(std::map<std::string, DISTRIBUTION_TYPE>& m_distMap){
214  //initialize distMap
215  m_distMap["gaussian"]=gaussian;
216  m_distMap["uniform"]=uniform;
217  }
218  std::vector<double> m_noDataValues;
219 };
220
221
222 template<class T> inline typename std::vector<T>::const_iterator StatFactory::mymin(const std::vector<T>& v, typename std::vector<T>::const_iterator begin, typename std::vector<T>::const_iterator end) const
223 {
224  bool isValid=false;
225  typename std::vector<T>::const_iterator tmpIt;
226  for(typename std::vector<T>::const_iterator it = begin; it!=end; ++it){
227  if(!isNoData(*it)){
228  if(isValid){
229  if(*tmpIt>*it)
230  tmpIt=it;
231  }
232  else{
233  tmpIt=it;
234  isValid=true;
235  }
236  }
237  }
238  if(isValid)
239  return tmpIt;
240  else if(m_noDataValues.size())
241  return m_noDataValues[0];
242  else{
243  std::string errorString="Error: no valid data found";
244  throw(errorString);
245  }
246 }
247
248 template<class T> inline typename std::vector<T>::iterator StatFactory::mymin(const std::vector<T>& v, typename std::vector<T>::iterator begin, typename std::vector<T>::iterator end) const
249 {
250  bool isValid=false;
251  typename std::vector<T>::iterator tmpIt;
252  for (typename std::vector<T>::const_iterator it = begin; it!=end; ++it){
253  if(!isNoData(*it)){
254  if(isValid){
255  if(*tmpIt>*it)
256  tmpIt=it;
257  }
258  else{
259  tmpIt=it;
260  isValid=true;
261  }
262  }
263  }
264  if(isValid)
265  return tmpIt;
266  else if(m_noDataValues.size())
267  return m_noDataValues[0];
268  else{
269  std::string errorString="Error: no valid data found";
270  throw(errorString);
271  }
272 }
273
274 template<class T> inline typename std::vector<T>::const_iterator StatFactory::mymin(const std::vector<T>& v, typename std::vector<T>::const_iterator begin, typename std::vector<T>::const_iterator end, T minConstraint) const
275 {
276  bool isValid=false;
277  typename std::vector<T>::const_iterator tmpIt;
278  T minValue=minConstraint;
279  for (typename std::vector<T>::const_iterator it = begin; it!=end; ++it){
280  if(isNoData(*it))
281  continue;
282  if(isValid){
283  if((minConstraint<=*it)&&(*it<minValue)){
284  tmpIt=it;
285  minValue=*it;
286  }
287  }
288  else{
289  if(*it<minValue)
290  continue;
291  tmpIt=it;
292  minValue=*it;
293  isValid=true;
294  }
295  }
296  if(isValid)
297  return tmpIt;
298  else if(m_noDataValues.size())
299  return m_noDataValues[0];
300  else{
301  std::string errorString="Error: no valid data found";
302  throw(errorString);
303  }
304 }
305
306 template<class T> inline typename std::vector<T>::iterator StatFactory::mymin(const std::vector<T>& v, typename std::vector<T>::iterator begin, typename std::vector<T>::iterator end, T minConstraint) const
307 {
308  bool isValid=false;
309  typename std::vector<T>::iterator tmpIt;
310  T minValue=minConstraint;
311  for (typename std::vector<T>::const_iterator it = begin; it!=end; ++it){
312  if(isNoData(*it))
313  continue;
314  if(isValid){
315  if((minConstraint<=*it)&&(*it<minValue)){
316  tmpIt=it;
317  minValue=*it;
318  }
319  }
320  else{
321  if(*it<minConstraint)
322  continue;
323  tmpIt=it;
324  minValue=*it;
325  isValid=true;
326  }
327  }
328  if(isValid)
329  return tmpIt;
330  else if(m_noDataValues.size())
331  return m_noDataValues[0];
332  else{
333  std::string errorString="Error: no valid data found";
334  throw(errorString);
335  }
336 }
337
338 template<class T> inline typename std::vector<T>::const_iterator StatFactory::mymax(const std::vector<T>& v, typename std::vector<T>::const_iterator begin, typename std::vector<T>::const_iterator end) const
339 {
340  bool isValid=false;
341  typename std::vector<T>::const_iterator tmpIt;
342  for (typename std::vector<T>::iterator it = begin; it!=end; ++it){
343  if(isNoData(*it))
344  continue;
345  if(isValid){
346  if(*tmpIt<*it)
347  tmpIt=it;
348  }
349  else{
350  tmpIt=it;
351  isValid=true;
352  }
353  }
354  if(isValid)
355  return tmpIt;
356  else if(m_noDataValues.size())
357  return m_noDataValues[0];
358  else{
359  std::string errorString="Error: no valid data found";
360  throw(errorString);
361  }
362 }
363
364 template<class T> inline typename std::vector<T>::iterator StatFactory::mymax(const std::vector<T>& v, typename std::vector<T>::iterator begin, typename std::vector<T>::iterator end) const
365 {
366  bool isValid=false;
367  typename std::vector<T>::iterator tmpIt;
368  for (typename std::vector<T>::iterator it = begin; it!=end; ++it){
369  if(isNoData(*it))
370  continue;
371  if(isValid){
372  if(*tmpIt<*it)
373  tmpIt=it;
374  }
375  else{
376  tmpIt=it;
377  isValid=true;
378  }
379  }
380  if(isValid)
381  return tmpIt;
382  else
383  return end;
384 }
385
386 template<class T> inline typename std::vector<T>::const_iterator StatFactory::mymax(const std::vector<T>& v, typename std::vector<T>::const_iterator begin, typename std::vector<T>::const_iterator end, T maxConstraint) const
387 {
388  bool isValid=false;
389  typename std::vector<T>::const_iterator tmpIt;
390  T maxValue=maxConstraint;
391  for (typename std::vector<T>::const_iterator it = begin; it!=end; ++it){
392  if(isNoData(*it))
393  continue;
394  if(isValid){
395  if((maxConstraint>=*it)&&(*it>maxValue)){
396  tmpIt=it;
397  maxValue=*it;
398  }
399  }
400  else{
401  if(*it>maxConstraint)
402  continue;
403  tmpIt=it;
404  maxValue=*it;
405  isValid=true;
406  }
407  }
408  if(isValid)
409  return tmpIt;
410  else
411  return end;
412 }
413
414 template<class T> inline typename std::vector<T>::iterator StatFactory::mymax(const std::vector<T>& v, typename std::vector<T>::iterator begin, typename std::vector<T>::iterator end, T maxConstraint) const
415 {
416  bool isValid=false;
417  typename std::vector<T>::iterator tmpIt=v.end();
418  T maxValue=maxConstraint;
419  for (typename std::vector<T>::const_iterator it = begin; it!=end; ++it){
420  if(isNoData(*it))
421  continue;
422  if(isValid){
423  if((maxConstraint>=*it)&&(*it>maxValue)){
424  tmpIt=it;
425  maxValue=*it;
426  }
427  }
428  else{
429  if(*it>maxValue)
430  continue;
431  tmpIt=it;
432  maxValue=*it;
433  isValid=true;
434  }
435  }
436  if(isValid)
437  return tmpIt;
438  else
439  return end;
440 }
441
442 template<class T> inline T StatFactory::mymin(const std::vector<T>& v) const
443 {
444  bool isValid=false;
445  if(v.empty()){
446  std::string errorString="Error: vector is empty";
447  throw(errorString);
448  }
449  T minValue;
450  for (typename std::vector<T>::const_iterator it = v.begin(); it!=v.end(); ++it){
451  if(isNoData(*it))
452  continue;
453  if(isValid){
454  if(minValue>*it)
455  minValue=*it;
456  }
457  else{
458  minValue=*it;
459  isValid=true;
460  }
461  }
462  if(isValid)
463  return minValue;
464  else if(m_noDataValues.size())
465  return m_noDataValues[0];
466  else{
467  std::string errorString="Error: no valid data found";
468  throw(errorString);
469  }
470 }
471
472  template<class T> inline T StatFactory::mymin(const std::vector<T>& v, T minConstraint) const
473 {
474  bool isValid=false;
475  T minValue=minConstraint;
476  for (typename std::vector<T>::const_iterator it = v.begin(); it!=v.end(); ++it){
477  if(isNoData(*it))
478  continue;
479  if(isValid){
480  if((minConstraint<=*it)&&(*it<minValue))
481  minValue=*it;
482  }
483  else{
484  if(*it<minValue)
485  continue;
486  minValue=*it;
487  isValid=true;
488  }
489  }
490  if(isValid)
491  return minValue;
492  else if(m_noDataValues.size())
493  return m_noDataValues[0];
494  else{
495  std::string errorString="Error: no valid data found";
496  throw(errorString);
497  }
498 }
499
500 template<class T> inline T StatFactory::mymax(const std::vector<T>& v) const
501 {
502  bool isValid=false;
503  if(v.empty()){
504  std::string errorString="Error: vector is empty";
505  throw(errorString);
506  }
507  T maxValue;
508  for (typename std::vector<T>::const_iterator it = v.begin(); it!=v.end(); ++it){
509  if(isNoData(*it))
510  continue;
511  if(isValid){
512  if(maxValue<*it)
513  maxValue=*it;
514  }
515  else{
516  maxValue=*it;
517  isValid=true;
518  }
519  }
520  if(isValid)
521  return maxValue;
522  else if(m_noDataValues.size())
523  return m_noDataValues[0];
524  else{
525  std::string errorString="Error: no valid data found";
526  throw(errorString);
527  }
528 }
529
530 template<class T> inline T StatFactory::mymax(const std::vector<T>& v, T maxConstraint) const
531 {
532  bool isValid=false;
533  T maxValue=maxConstraint;
534  for (typename std::vector<T>::const_iterator it = v.begin(); it!=v.end(); ++it){
535  if(isNoData(*it))
536  continue;
537  if(isValid){
538  if((*it<=maxConstraint)&&(*it>maxValue))
539  maxValue=*it;
540  }
541  else{
542  if(*it>maxValue)
543  continue;
544  maxValue=*it;
545  isValid=true;
546  }
547  }
548  if(isValid)
549  return maxValue;
550  else if(m_noDataValues.size())
551  return m_noDataValues[0];
552  else{
553  std::string errorString="Error: no valid data found";
554  throw(errorString);
555  }
556 }
557
558 template<class T> inline typename std::vector<T>::const_iterator StatFactory::absmin(const std::vector<T>& v, typename std::vector<T>::const_iterator begin, typename std::vector<T>::const_iterator end) const
559 {
560  bool isValid=false;
561  if(v.empty()){
562  std::string errorString="Error: vector is empty";
563  throw(errorString);
564  }
565  typename std::vector<T>::const_iterator tmpIt;
566  for (typename std::vector<T>::const_iterator it = begin; it!=end; ++it){
567  if(isNoData(*it))
568  continue;
569  if(isValid){
570  if(abs(*tmpIt)>abs(*it))
571  tmpIt=it;
572  }
573  else{
574  tmpIt=it;
575  isValid=true;
576  }
577  }
578  if(isValid)
579  return tmpIt;
580  else
581  return end;
582 }
583
584 template<class T> inline T StatFactory::absmin(const std::vector<T>& v) const
585 {
586  typename std::vector<T>::const_iterator tmpIt;
587  tmpIt=absmin(v, v.begin(), v.end());
588  if(tmpIt!=v.end())
589  return *tmpIt;
590  else if(m_noDataValues.size())
591  return m_noDataValues[0];
592  else{
593  std::string errorString="Error: no valid data found";
594  throw(errorString);
595  }
596 }
597
598  template<class T> inline typename std::vector<T>::const_iterator StatFactory::absmax(const std::vector<T>& v, typename std::vector<T>::const_iterator begin, typename std::vector<T>::const_iterator end) const
599 {
600  bool isValid=false;
601  typename std::vector<T>::const_iterator tmpIt;
602  for (typename std::vector<T>::const_iterator it = begin; it!=end; ++it){
603  if(isNoData(*it))
604  continue;
605  if(isValid){
606  if(abs(*tmpIt)<abs(*it))
607  tmpIt=it;
608  }
609  else{
610  tmpIt=it;
611  isValid=true;
612  }
613  }
614  if(isValid)
615  return tmpIt;
616  else
617  return end;
618 }
619
620 template<class T> inline T StatFactory::absmax(const std::vector<T>& v) const
621 {
622  typename std::vector<T>::const_iterator tmpIt;
623  tmpIt=absmax(v, v.begin(), v.end());
624  if(tmpIt!=v.end())
625  return *tmpIt;
626  else if(m_noDataValues.size())
627  return m_noDataValues[0];
628  else{
629  std::string errorString="Error: no valid data found";
630  throw(errorString);
631  }
632 }
633
634 template<class T> inline void StatFactory::minmax(const std::vector<T>& v, typename std::vector<T>::const_iterator begin, typename std::vector<T>::const_iterator end, T& theMin, T& theMax) const
635 {
636  bool isConstraint=(theMax>theMin);
637  double minConstraint=theMin;
638  double maxConstraint=theMax;
639  bool isValid=false;
640  for (typename std::vector<T>::const_iterator it = begin; it!=end; ++it){
641  if(isNoData(*it))
642  continue;
643  if(isValid){
644  if(isConstraint){
645  if(*it<minConstraint)
646  continue;
647  if(*it>maxConstraint)
648  continue;
649  }
650  if(*it<theMin)
651  theMin=*it;
652  if(*it>theMax)
653  theMax=*it;
654  }
655  else{
656  if(isConstraint){
657  if(*it<minConstraint)
658  continue;
659  if(*it>maxConstraint)
660  continue;
661  }
662  theMin=*it;
663  theMax=*it;
664  isValid=true;
665  }
666  }
667  if(!isValid){
668  if(m_noDataValues.size()){
669  theMin=m_noDataValues[0];
670  theMax=m_noDataValues[0];
671  }
672  else{
673  std::string errorString="Error: no valid data found";
674  throw(errorString);
675  }
676  }
677 }
678
679 template<class T> inline T StatFactory::sum(const std::vector<T>& v) const
680 {
681  bool isValid=false;
682  typename std::vector<T>::const_iterator it;
683  T tmpSum=0;
684  for (it = v.begin(); it!= v.end(); ++it){
685  if(isNoData(*it))
686  continue;
687  isValid=true;
688  tmpSum+=*it;
689  }
690  if(isValid)
691  return tmpSum;
692  else if(m_noDataValues.size())
693  return m_noDataValues[0];
694  else{
695  std::string errorString="Error: no valid data found";
696  throw(errorString);
697  }
698 }
699
700 template<class T> inline double StatFactory::mean(const std::vector<T>& v) const
701 {
702  typename std::vector<T>::const_iterator it;
703  T tmpSum=0;
704  unsigned int validSize=0;
705  for (it = v.begin(); it!= v.end(); ++it){
706  if(isNoData(*it))
707  continue;
708  ++validSize;
709  tmpSum+=*it;
710  }
711  if(validSize)
712  return static_cast<double>(tmpSum)/validSize;
713  else if(m_noDataValues.size())
714  return m_noDataValues[0];
715  else{
716  std::string errorString="Error: no valid data found";
717  throw(errorString);
718  }
719 }
720
721 template<class T> inline void StatFactory::eraseNoData(std::vector<T>& v) const
722 {
723  if(m_noDataValues.size()){
724  typename std::vector<T>::iterator it=v.begin();
725  while(it!=v.end()){
726  if(isNoData(*it))
727  v.erase(it);
728  else
729  ++it;
730  }
731  }
732 }
733
734  template<class T> unsigned int StatFactory::nvalid(const std::vector<T>& v) const{
735  std::vector<T> tmpV=v;
736  eraseNoData(tmpV);
737  return(tmpV.size());
738  }
739
740 template<class T> T StatFactory::median(const std::vector<T>& v) const
741 {
742  std::vector<T> tmpV=v;
743  eraseNoData(tmpV);
744  if(tmpV.size()){
745  sort(tmpV.begin(),tmpV.end());
746  if(tmpV.size()%2)
747  return tmpV[tmpV.size()/2];
748  else
749  return 0.5*(tmpV[tmpV.size()/2-1]+tmpV[tmpV.size()/2]);
750  }
751  else if(m_noDataValues.size())
752  return m_noDataValues[0];
753  else{
754  std::string errorString="Error: no valid data found";
755  throw(errorString);
756  }
757 }
758
759 template<class T> double StatFactory::var(const std::vector<T>& v) const
760 {
761  typename std::vector<T>::const_iterator it;
762  unsigned int validSize=0;
763  double m1=0;
764  double m2=0;
765  for (it = v.begin(); it!= v.end(); ++it){
766  if(isNoData(*it))
767  continue;
768  m1+=*it;
769  m2+=(*it)*(*it);
770  ++validSize;
771  }
772  if(validSize){
773  m2/=validSize;
774  m1/=validSize;
775  return m2-m1*m1;
776  }
777  else if(m_noDataValues.size())
778  return m_noDataValues[0];
779  else{
780  std::string errorString="Error: no valid data found";
781  throw(errorString);
782  }
783 }
784
785 template<class T> double StatFactory::moment(const std::vector<T>& v, int n) const
786 {
787  unsigned int validSize=0;
788  typename std::vector<T>::const_iterator it;
789  double m=0;
790 // double m1=mean(v);
791  for(it = v.begin(); it!= v.end(); ++it){
792  if(isNoData(*it))
793  continue;
794  m+=pow((*it),n);
795  ++validSize;
796  }
797  if(validSize)
798  return m/validSize;
799  else if(m_noDataValues.size())
800  return m_noDataValues[0];
801  else{
802  std::string errorString="Error: no valid data found";
803  throw(errorString);
804  }
805 }
806
807  //central moment
808 template<class T> double StatFactory::cmoment(const std::vector<T>& v, int n) const
809 {
810  unsigned int validSize=0;
811  typename std::vector<T>::const_iterator it;
812  double m=0;
813  double m1=mean(v);
814  for(it = v.begin(); it!= v.end(); ++it){
815  if(isNoData(*it))
816  continue;
817  m+=pow((*it-m1),n);
818  ++validSize;
819  }
820  if(validSize)
821  return m/validSize;
822  else if(m_noDataValues.size())
823  return m_noDataValues[0];
824  else{
825  std::string errorString="Error: no valid data found";
826  throw(errorString);
827  }
828 }
829
830 template<class T> double StatFactory::skewness(const std::vector<T>& v) const
831 {
832  //todo: what if nodata value?
833  return cmoment(v,3)/pow(var(v),1.5);
834 }
835
836 template<class T> double StatFactory::kurtosis(const std::vector<T>& v) const
837 {
838  //todo: what if nodata value?
839  double m2=cmoment(v,2);
840  double m4=cmoment(v,4);
841  return m4/m2/m2-3.0;
842 }
843
844 template<class T> void StatFactory::meanVar(const std::vector<T>& v, double& m1, double& v1) const
845 {
846  typename std::vector<T>::const_iterator it;
847  unsigned int validSize=0;
848  m1=0;
849  v1=0;
850  double m2=0;
851  for (it = v.begin(); it!= v.end(); ++it){
852  if(isNoData(*it))
853  continue;
854  m1+=*it;
855  m2+=(*it)*(*it);
856  ++validSize;
857  }
858  if(validSize){
859  m2/=validSize;
860  m1/=validSize;
861  v1=m2-m1*m1;
862  }
863  else if(m_noDataValues.size()){
864  m1=m_noDataValues[0];
865  v1=m_noDataValues[0];
866  }
867  else{
868  std::string errorString="Error: no valid data found";
869  throw(errorString);
870  }
871 }
872
873 template<class T1, class T2> void StatFactory::scale2byte(const std::vector<T1>& input, std::vector<T2>& output, unsigned char lbound, unsigned char ubound) const
874 {
875  output.resize(input.size());
876  T1 minimum=mymin(input);
877  T1 maximum=mymax(input);
878  if(minimum>=maximum){
879  std::string errorString="Error: no valid data found";
880  throw(errorString);
881  }
882  double scale=(ubound-lbound)/(maximum-minimum);
883  //todo: what if nodata value?
884  for (int i=0;i<input.size();++i){
885  output[i]=scale*(input[i]-(minimum))+lbound;
886  }
887 }
888
889 template<class T> void StatFactory::distribution(const std::vector<T>& input, typename std::vector<T>::const_iterator begin, typename std::vector<T>::const_iterator end, std::vector<double>& output, int nbin, T &minimum, T &maximum, double sigma, const std::string &filename) const
890 {
891  double minValue=0;
892  double maxValue=0;
893  minmax(input,begin,end,minimum,maximum);
894  if(minimum<maximum&&minimum>minValue)
895  minValue=minimum;
896  if(minimum<maximum&&maximum<maxValue)
897  maxValue=maximum;
898
899  //todo: check...
900  minimum=minValue;
901  maximum=maxValue;
902
903  if(maximum<=minimum){
904  std::ostringstream s;
905  s<<"Error: could not calculate distribution (min>=max)";
906  throw(s.str());
907  }
908  if(!nbin){
909  std::string errorString="Error: nbin not defined";
910  throw(errorString);
911  }
912  if(!input.size()){
913  std::string errorString="Error: no valid data found";
914  throw(errorString);
915  }
916  if(output.size()!=nbin){
917  output.resize(nbin);
918  for(int i=0;i<nbin;output[i++]=0);
919  }
920  bool isValid=false;
921  typename std::vector<T>::const_iterator it;
922  for(it=begin;it!=end;++it){
923  if(*it<minimum)
924  continue;
925  if(*it>maximum)
926  continue;
927  if(isNoData(*it))
928  continue;
929  isValid=true;
930  if(sigma>0){
931  // minimum-=2*sigma;
932  // maximum+=2*sigma;
933  //create kde for Gaussian basis function
934  //todo: speed up by calculating first and last bin with non-zero contriubtion...
935  //todo: calculate real surface below pdf by using gsl_cdf_gaussian_P(x-mean+binsize,sigma)-gsl_cdf_gaussian_P(x-mean,sigma)
936  for(int ibin=0;ibin<nbin;++ibin){
937  double icenter=minimum+static_cast<double>(maximum-minimum)*(ibin+0.5)/nbin;
938  double thePdf=gsl_ran_gaussian_pdf(*it-icenter, sigma);
939  output[ibin]+=thePdf;
940  }
941  }
942  else{
943  int theBin=0;
944  if(*it==maximum)
945  theBin=nbin-1;
946  else if(*it>minimum && *it<maximum)
947  theBin=static_cast<int>(static_cast<double>((nbin-1)*(*it)-minimum)/(maximum-minimum));
948  ++output[theBin];
949  // if(*it==maximum)
950  // ++output[nbin-1];
951  // else if(*it>=minimum && *it<maximum)
952  // ++output[static_cast<int>(static_cast<double>((*it)-minimum)/(maximum-minimum)*nbin)];
953  }
954  }
955  if(!isValid){
956  std::string errorString="Error: no valid data found";
957  throw(errorString);
958  }
959  else if(!filename.empty()){
960  std::ofstream outputfile;
961  outputfile.open(filename.c_str());
962  if(!outputfile){
963  std::ostringstream s;
964  s<<"Error opening distribution file , " << filename;
965  throw(s.str());
966  }
967  for(int ibin=0;ibin<nbin;++ibin)
968  outputfile << minimum+static_cast<double>(maximum-minimum)*(ibin+0.5)/nbin << " " << static_cast<double>(output[ibin])/input.size() << std::endl;
969  outputfile.close();
970  }
971 }
972
973 template<class T> void StatFactory::distribution2d(const std::vector<T>& inputX, const std::vector<T>& inputY, std::vector< std::vector<double> >& output, int nbin, T& minX, T& maxX, T& minY, T& maxY, double sigma, const std::string& filename) const
974 {
975  if(inputX.empty()){
976  std::ostringstream s;
977  s<<"Error: inputX is empty";
978  throw(s.str());
979  }
980  if(inputX.size()!=inputY.size()){
981  std::ostringstream s;
982  s<<"Error: inputX is empty";
983  throw(s.str());
984  }
985  unsigned long int npoint=inputX.size();
986  if(maxX<=minX)
987  minmax(inputX,inputX.begin(),inputX.end(),minX,maxX);
988  if(maxX<=minX){
989  std::ostringstream s;
990  s<<"Error: could not calculate distribution (minX>=maxX)";
991  throw(s.str());
992  }
993  if(maxY<=minY)
994  minmax(inputY,inputY.begin(),inputY.end(),minY,maxY);
995  if(maxY<=minY){
996  std::ostringstream s;
997  s<<"Error: could not calculate distribution (minY>=maxY)";
998  throw(s.str());
999  }
1000  if(nbin<=1){
1001  std::ostringstream s;
1002  s<<"Error: nbin must be larger than 1";
1003  throw(s.str());
1004  }
1005  output.resize(nbin);
1006  for(int i=0;i<nbin;++i){
1007  output[i].resize(nbin);
1008  for(int j=0;j<nbin;++j)
1009  output[i][j]=0;
1010  }
1011  int binX=0;
1012  int binY=0;
1013  for(unsigned long int ipoint=0;ipoint<npoint;++ipoint){
1014  if(inputX[ipoint]==maxX)
1015  binX=nbin-1;
1016  else
1017  binX=static_cast<int>(static_cast<double>(inputX[ipoint]-minX)/(maxX-minX)*nbin);
1018  if(inputY[ipoint]==maxY)
1019  binY=nbin-1;
1020  else
1021  binY=static_cast<int>(static_cast<double>(inputY[ipoint]-minY)/(maxY-minY)*nbin);
1022  if(binX<0){
1023  std::ostringstream s;
1024  s<<"Error: binX is smaller than 0";
1025  throw(s.str());
1026  }
1027  if(output.size()<=binX){
1028  std::ostringstream s;
1029  s<<"Error: output size must be larger than binX";
1030  throw(s.str());
1031  }
1032  if(binY<0){
1033  std::ostringstream s;
1034  s<<"Error: binY is smaller than 0";
1035  throw(s.str());
1036  }
1037  if(output.size()<=binY){
1038  std::ostringstream s;
1039  s<<"Error: output size must be larger than binY";
1040  throw(s.str());
1041  }
1042  if(sigma>0){
1043  // minX-=2*sigma;
1044  // maxX+=2*sigma;
1045  // minY-=2*sigma;
1046  // maxY+=2*sigma;
1047  //create kde for Gaussian basis function
1048  //todo: speed up by calculating first and last bin with non-zero contriubtion...
1049  for(int ibinX=0;ibinX<nbin;++ibinX){
1050  double centerX=minX+static_cast<double>(maxX-minX)*ibinX/nbin;
1051  double pdfX=gsl_ran_gaussian_pdf(inputX[ipoint]-centerX, sigma);
1052  for(int ibinY=0;ibinY<nbin;++ibinY){
1053  //calculate \integral_ibinX^(ibinX+1)
1054  double centerY=minY+static_cast<double>(maxY-minY)*ibinY/nbin;
1055  double pdfY=gsl_ran_gaussian_pdf(inputY[ipoint]-centerY, sigma);
1056  output[ibinX][binY]+=pdfX*pdfY;
1057  }
1058  }
1059  }
1060  else
1061  ++output[binX][binY];
1062  }
1063  if(!filename.empty()){
1064  std::ofstream outputfile;
1065  outputfile.open(filename.c_str());
1066  if(!outputfile){
1067  std::ostringstream s;
1068  s<<"Error opening distribution file , " << filename;
1069  throw(s.str());
1070  }
1071  for(int binX=0;binX<nbin;++binX){
1072  outputfile << std::endl;
1073  for(int binY=0;binY<nbin;++binY){
1074  double binValueX=0;
1075  if(nbin==maxX-minX+1)
1076  binValueX=minX+binX;
1077  else
1078  binValueX=minX+static_cast<double>(maxX-minX)*(binX+0.5)/nbin;
1079  double binValueY=0;
1080  if(nbin==maxY-minY+1)
1081  binValueY=minY+binY;
1082  else
1083  binValueY=minY+static_cast<double>(maxY-minY)*(binY+0.5)/nbin;
1084  double value=0;
1085  value=static_cast<double>(output[binX][binY])/npoint;
1086  outputfile << binValueX << " " << binValueY << " " << value << std::endl;
1087  /* double value=static_cast<double>(output[binX][binY])/npoint; */
1088  /* outputfile << (maxX-minX)*bin/(nbin-1)+minX << " " << (maxY-minY)*bin/(nbin-1)+minY << " " << value << std::endl; */
1089  }
1090  }
1091  outputfile.close();
1092  }
1093 }
1094
1095 //todo: what with nodata values?
1096 template<class T> void StatFactory::percentiles (const std::vector<T>& input, typename std::vector<T>::const_iterator begin, typename std::vector<T>::const_iterator end, std::vector<T>& output, int nbin, T &minimum, T &maximum, const std::string &filename) const
1097 {
1098  if(maximum<=minimum)
1099  minmax(input,begin,end,minimum,maximum);
1100  if(maximum<=minimum){
1101  std::ostringstream s;
1102  s<<"Error: maximum must be at least minimum";
1103  throw(s.str());
1104  }
1105  if(nbin<=1){
1106  std::ostringstream s;
1107  s<<"Error: nbin must be larger than 1";
1108  throw(s.str());
1109  }
1110  if(input.empty()){
1111  std::ostringstream s;
1112  s<<"Error: input is empty";
1113  throw(s.str());
1114  }
1115  output.resize(nbin);
1116  std::vector<T> inputSort;
1117  inputSort.assign(begin,end);
1118  typename std::vector<T>::iterator vit=inputSort.begin();
1119  while(vit!=inputSort.end()){
1120  if(*vit<minimum||*vit>maximum)
1121  inputSort.erase(vit);
1122  else
1123  ++vit;
1124  }
1125  eraseNoData(inputSort);
1126  std::sort(inputSort.begin(),inputSort.end());
1127  vit=inputSort.begin();
1128  std::vector<T> inputBin;
1129  for(int ibin=0;ibin<nbin;++ibin){
1130  inputBin.clear();
1131  while(inputBin.size()<inputSort.size()/nbin&&vit!=inputSort.end()){
1132  inputBin.push_back(*vit);
1133  ++vit;
1134  }
1135  if(inputBin.size()){
1136  output[ibin]=mymax(inputBin);
1137  }
1138  }
1139  if(!filename.empty()){
1140  std::ofstream outputfile;
1141  outputfile.open(filename.c_str());
1142  if(!outputfile){
1143  std::ostringstream s;
1144  s<<"error opening distribution file , " << filename;
1145  throw(s.str());
1146  }
1147  for(int ibin=0;ibin<nbin;++ibin)
1148  outputfile << ibin*100.0/nbin << " " << static_cast<double>(output[ibin])/input.size() << std::endl;
1149  outputfile.close();
1150  }
1151 }
1152
1153 //todo: what with nodata values?
1154 template<class T> T StatFactory::percentile(const std::vector<T>& input, typename std::vector<T>::const_iterator begin, typename std::vector<T>::const_iterator end, double percent, T minimum, T maximum) const
1155 {
1156  if(input.empty()){
1157  std::ostringstream s;
1158  s<<"Error: input is empty";
1159  throw(s.str());
1160  }
1161  std::vector<double> inputSort;
1162  inputSort.assign(begin,end);
1163  typename std::vector<double>::iterator vit=inputSort.begin();
1164  while(vit!=inputSort.end()){
1165  if(maximum>minimum){
1166  if(*vit<minimum||*vit>maximum)
1167  inputSort.erase(vit);
1168  }
1169  else
1170  ++vit;
1171  }
1172  eraseNoData(inputSort);
1173  std::sort(inputSort.begin(),inputSort.end());
1174  return gsl_stats_quantile_from_sorted_data(&(inputSort[0]),1,inputSort.size(),percent/100.0);
1175 }
1176
1177 template<class T> void StatFactory::signature(const std::vector<T>& input, double&k, double& alpha, double& beta, double e) const
1178 {
1179  double m1=moment(input,1);
1180  double m2=moment(input,2);
1181  signature(m1,m2,k,alpha,beta,e);
1182 }
1183
1184 //todo: what with nodata values?
1185 template<class T> void StatFactory::normalize(const std::vector<T>& input, std::vector<double>& output) const{
1186  double total=sum(input);
1187  if(total){
1188  output.resize(input.size());
1189  for(int index=0;index<input.size();++index)
1190  output[index]=input[index]/total;
1191  }
1192  else
1193  output=input;
1194 }
1195
1196 //todo: what with nodata values?
1197 template<class T> void StatFactory::normalize_pct(std::vector<T>& input) const{
1198  double total=sum(input);
1199  if(total){
1200  typename std::vector<T>::iterator it;
1201  for(it=input.begin();it!=input.end();++it)
1202  *it=100.0*(*it)/total;
1203  }
1204 }
1205
1206 template<class T> double StatFactory::rmse(const std::vector<T>& x, const std::vector<T>& y) const{
1207  if(x.size()!=y.size()){
1208  std::ostringstream s;
1209  s<<"Error: x and y not equal in size";
1210  throw(s.str());
1211  }
1212  if(x.empty()){
1213  std::ostringstream s;
1214  s<<"Error: x is empty";
1215  throw(s.str());
1216  }
1217  double mse=0;
1218  for(int isample=0;isample<x.size();++isample){
1219  if(isNoData(x[isample])||isNoData(y[isample]))
1220  continue;
1221  double e=x[isample]-y[isample];
1222  mse+=e*e/x.size();
1223  }
1224  return sqrt(mse);
1225 }
1226
1227 //normalized root mean square error
1228 template<class T> double StatFactory::nrmse(const std::vector<T>& x, const std::vector<T>& y) const{
1229  if(x.size()!=y.size()){
1230  std::ostringstream s;
1231  s<<"Error: x and y not equal in size";
1232  throw(s.str());
1233  }
1234  if(x.empty()){
1235  std::ostringstream s;
1236  s<<"Error: x is empty";
1237  throw(s.str());
1238  }
1239  std::vector<T> tmpX=x;
1240  eraseNoData(tmpX);
1241  std::vector<T> tmpY=y;
1242  eraseNoData(tmpY);
1243  double maxY=mymax(y);
1244  double minY=mymin(y);
1245  double rangeY=maxY-minY;
1246  double mse=0;
1247  for(int isample=0;isample<x.size();++isample){
1248  double e=x[isample]-y[isample];
1249  mse+=e*e/x.size();
1250  }
1251  return sqrt(mse)/rangeY;
1252 }
1253
1254 // coefficient of variation root mean square error
1255 template<class T> double StatFactory::cvrmse(const std::vector<T>& x, const std::vector<T>& y) const{
1256  if(x.size()!=y.size()){
1257  std::ostringstream s;
1258  s<<"Error: x and y not equal in size";
1259  throw(s.str());
1260  }
1261  if(x.empty()){
1262  std::ostringstream s;
1263  s<<"Error: x is empty";
1264  throw(s.str());
1265  }
1266  std::vector<T> tmpX=x;
1267  eraseNoData(tmpX);
1268  std::vector<T> tmpY=y;
1269  eraseNoData(tmpY);
1270  double maxY=mymax(tmpY);
1271  double minY=mymin(tmpY);
1272  double rangeY=maxY-minY;
1273  double mse=0;
1274  for(int isample=0;isample<x.size();++isample){
1275  double e=x[isample]-y[isample];
1276  mse+=e*e/x.size();
1277  }
1278  return sqrt(mse)/mean(tmpY);
1279 }
1280
1281 // template<class T> double StatFactory::gsl_correlation(const std::vector<T>& x, const std::vector<T>& y) const{
1282 // return(gsl_stats_correlation(&(x[0]),1,&(y[0]),1,x.size()));
1283 // }
1284
1285  template<class T> double StatFactory::gsl_covariance(const std::vector<T>& x, const std::vector<T>& y) const{
1286  return(gsl_stats_covariance(&(x[0]),1,&(y[0]),1,x.size()));
1287  }
1288
1289
1290 template<class T> double StatFactory::correlation(const std::vector<T>& x, const std::vector<T>& y, int delay) const{
1291  double meanX=0;
1292  double meanY=0;
1293  double varX=0;
1294  double varY=0;
1295  double sXY=0;
1296  meanVar(x,meanX,varX);
1297  meanVar(y,meanY,varY);
1298  double denom = sqrt(varX*varY);
1299  bool isValid=false;
1300  if(denom){
1301  //Calculate the correlation series
1302  sXY = 0;
1303  for (int i=0;i<x.size();++i) {
1304  int j = i + delay;
1305  if (j < 0 || j >= y.size())
1306  continue;
1307  else if(isNoData(x[i])||isNoData(y[j]))
1308  continue;
1309  else{
1310  isValid=true;
1311  if(i<0){
1312  std::ostringstream s;
1313  s<<"Error: i must be positive";
1314  throw(s.str());
1315  }
1316  if(i>=x.size()){
1317  std::ostringstream s;
1318  s<<"Error: i must be smaller than x.size()";
1319  throw(s.str());
1320  }
1321  if(j<0){
1322  std::ostringstream s;
1323  s<<"Error: j must be positive";
1324  throw(s.str());
1325  }
1326  if(j>=y.size()){
1327  std::ostringstream s;
1328  s<<"Error: j must be smaller than y.size()";
1329  throw(s.str());
1330  }
1331  sXY += (x[i] - meanX) * (y[j] - meanY);
1332  }
1333  }
1334  if(isValid){
1335  double minSize=(x.size()<y.size())?x.size():y.size();
1336  return(sXY / denom / (minSize-1));
1337  }
1338  else if(m_noDataValues.size())
1339  return m_noDataValues[0];
1340  else{
1341  std::string errorString="Error: no valid data found";
1342  throw(errorString);
1343  }
1344  }
1345  else
1346  return 0;
1347 }
1348
1349 //todo: what if no valid data?
1350 template<class T> double StatFactory::cross_correlation(const std::vector<T>& x, const std::vector<T>& y, int maxdelay, std::vector<T>& z) const{
1351  z.clear();
1352  double sumCorrelation=0;
1353  for (int delay=-maxdelay;delay<maxdelay;delay++) {
1354  z.push_back(correlation(x,y,delay));
1355  sumCorrelation+=z.back();
1356  }
1357  return sumCorrelation;
1358 }
1359
1360 //todo: nodata?
1361 template<class T> double StatFactory::linear_regression(const std::vector<T>& x, const std::vector<T>& y, double &c0, double &c1) const{
1362  if(x.size()!=y.size()){
1363  std::ostringstream s;
1364  s<<"Error: x and y not equal in size";
1365  throw(s.str());
1366  }
1367  if(x.empty()){
1368  std::ostringstream s;
1369  s<<"Error: x is empty";
1370  throw(s.str());
1371  }
1372  double cov00;
1373  double cov01;
1374  double cov11;
1375  double sumsq;
1376  gsl_fit_linear(&(x[0]),1,&(y[0]),1,x.size(),&c0,&c1,&cov00,&cov01,&cov11,&sumsq);
1377  return (1-sumsq/var(y)/(y.size()-1));
1378 }
1379
1380 //todo: nodata?
1381 template<class T> double StatFactory::linear_regression_err(const std::vector<T>& x, const std::vector<T>& y, double &c0, double &c1) const{
1382  if(x.size()!=y.size()){
1383  std::ostringstream s;
1384  s<<"Error: x and y not equal in size";
1385  throw(s.str());
1386  }
1387  if(x.empty()){
1388  std::ostringstream s;
1389  s<<"Error: x is empty";
1390  throw(s.str());
1391  }
1392  double cov00;
1393  double cov01;
1394  double cov11;
1395  double sumsq;
1396  gsl_fit_linear(&(x[0]),1,&(y[0]),1,x.size(),&c0,&c1,&cov00,&cov01,&cov11,&sumsq);
1397  return sqrt((sumsq)/(y.size()));
1398 }
1399
1400 //alternatively: use GNU scientific library:
1401 // gsl_stats_correlation (const double data1[], const size_t stride1, const double data2[], const size_t stride2, const size_t n)
1402
1403 template<class T> void StatFactory::interpolateNoData(const std::vector<double>& wavelengthIn, const std::vector<T>& input, const std::string& type, std::vector<T>& output, bool verbose) const{
1404  if(wavelengthIn.empty()){
1405  std::ostringstream s;
1406  s<<"Error: wavelengthIn is empty";
1407  throw(s.str());
1408  }
1409  std::vector<double> wavelengthOut=wavelengthIn;
1410  std::vector<T> validIn=input;
1411  if(input.size()!=wavelengthIn.size()){
1412  std::ostringstream s;
1413  s<<"Error: x and y not equal in size";
1414  throw(s.str());
1415  }
1416  int nband=wavelengthIn.size();
1417  output.clear();
1418  //remove nodata from input and corresponding wavelengthIn
1419  if(m_noDataValues.size()){
1420  typename std::vector<T>::iterator itValue=validIn.begin();
1421  typename std::vector<T>::iterator itWavelength=wavelengthOut.begin();
1422  while(itValue!=validIn.end()&&itWavelength!=wavelengthOut.end()){
1423  if(isNoData(*itValue)){
1424  validIn.erase(itValue);
1425  wavelengthOut.erase(itWavelength);
1426  }
1427  else{
1428  ++itValue;
1429  ++itWavelength;
1430  }
1431  }
1432  if(validIn.size()>1){
1433  try{
1434  interpolateUp(wavelengthOut, validIn, wavelengthIn, type, output, verbose);
1435  }
1436  catch(...){
1437  output=input;
1438  }
1439  }
1440  else//we can not interpolate if no valid data
1441  output=input;
1442  }
1443  else//no nodata values to interpolate
1444  output=input;
1445 }
1446
1447 template<class T> void StatFactory::interpolateUp(const std::vector<double>& wavelengthIn, const std::vector<T>& input, const std::vector<double>& wavelengthOut, const std::string& type, std::vector<T>& output, bool verbose) const{
1448  if(wavelengthIn.empty()){
1449  std::ostringstream s;
1450  s<<"Error: wavelengthIn is empty";
1451  throw(s.str());
1452  }
1453  if(input.size()!=wavelengthIn.size()){
1454  std::ostringstream s;
1455  s<<"Error: input and wavelengthIn not equal in size";
1456  throw(s.str());
1457  }
1458  if(wavelengthOut.empty()){
1459  std::ostringstream s;
1460  s<<"Error: wavelengthOut is empty";
1461  throw(s.str());
1462  }
1463  int nband=wavelengthIn.size();
1464  output.clear();
1465  gsl_interp_accel *acc;
1466  allocAcc(acc);
1467  gsl_spline *spline;
1468  getSpline(type,nband,spline);
1469  assert(spline);
1470  assert(&(wavelengthIn[0]));
1471  assert(&(input[0]));
1472  int status=initSpline(spline,&(wavelengthIn[0]),&(input[0]),nband);
1473  if(status){
1474  std::string errorString="Could not initialize spline";
1475  throw(errorString);
1476  }
1477  for(int index=0;index<wavelengthOut.size();++index){
1478  if(wavelengthOut[index]<*wavelengthIn.begin()){
1479  output.push_back(*(input.begin()));
1480  continue;
1481  }
1482  else if(wavelengthOut[index]>wavelengthIn.back()){
1483  output.push_back(input.back());
1484  continue;
1485  }
1486  double dout=evalSpline(spline,wavelengthOut[index],acc);
1487  output.push_back(dout);
1488  }
1489  gsl_spline_free(spline);
1490  gsl_interp_accel_free(acc);
1491 }
1492
1493 // template<class T> void StatFactory::interpolateUp(const std::vector<double>& wavelengthIn, const std::vector< std::vector<T> >& input, const std::vector<double>& wavelengthOut, const std::string& type, std::vector< std::vector<T> >& output, bool verbose){
1494 // assert(wavelengthIn.size());
1495 // assert(wavelengthOut.size());
1496 // int nsample=input.size();
1497 // int nband=wavelengthIn.size();
1498 // output.clear();
1499 // output.resize(nsample);
1500 // gsl_interp_accel *acc;
1501 // allocAcc(acc);
1502 // gsl_spline *spline;
1503 // getSpline(type,nband,spline);
1504 // for(int isample=0;isample<nsample;++isample){
1505 // assert(input[isample].size()==wavelengthIn.size());
1506 // initSpline(spline,&(wavelengthIn[0]),&(input[isample][0]),nband);
1507 // for(int index=0;index<wavelengthOut.size();++index){
1508 // if(type=="linear"){
1509 // if(wavelengthOut[index]<wavelengthIn.back())
1510 // output[isample].push_back(*(input.begin()));
1511 // else if(wavelengthOut[index]>wavelengthIn.back())
1512 // output[isample].push_back(input.back());
1513 // }
1514 // else{
1515 // double dout=evalSpline(spline,wavelengthOut[index],acc);
1516 // output[isample].push_back(dout);
1517 // }
1518 // }
1519 // }
1520 // gsl_spline_free(spline);
1521 // gsl_interp_accel_free(acc);
1522 // }
1523
1524 //todo: nodata?
1525 template<class T> void StatFactory::interpolateUp(const std::vector<T>& input, std::vector<T>& output, int nbin) const
1526 {
1527  if(input.empty()){
1528  std::ostringstream s;
1529  s<<"Error: input is empty";
1530  throw(s.str());
1531  }
1532  if(!nbin){
1533  std::ostringstream s;
1534  s<<"Error: nbin must be larger than 0";
1535  throw(s.str());
1536  }
1537  output.clear();
1538  int dim=input.size();
1539  for(int i=0;i<dim;++i){
1540  double deltaX=0;
1541  double left=input[i];
1542  if(i<dim-1){
1543  double right=(i<dim-1)? input[i+1]:input[i];
1544  deltaX=(right-left)/static_cast<double>(nbin);
1545  for(int x=0;x<nbin;++x){
1546  output.push_back(left+x*deltaX);
1547  }
1548  }
1549  else
1550  output.push_back(input.back());
1551  }
1552 }
1553
1554 //todo: nodata?
1555 template<class T> void StatFactory::nearUp(const std::vector<T>& input, std::vector<T>& output) const
1556 {
1557  if(input.empty()){
1558  std::ostringstream s;
1559  s<<"Error: input is empty";
1560  throw(s.str());
1561  }
1562  if(output.size()<input.size()){
1563  std::ostringstream s;
1564  s<<"Error: output size is smaller than input size";
1565  throw(s.str());
1566  }
1567  int dimInput=input.size();
1568  int dimOutput=output.size();
1569
1570  for(int iin=0;iin<dimInput;++iin){
1571  for(int iout=0;iout<dimOutput/dimInput;++iout){
1572  int indexOutput=iin*dimOutput/dimInput+iout;
1573  if(indexOutput>=output.size()){
1574  std::ostringstream s;
1575  s<<"Error: indexOutput must be smaller than output.size()";
1576  throw(s.str());
1577  }
1578  output[indexOutput]=input[iin];
1579  }
1580  }
1581 }
1582
1583 //todo: nodata?
1584 template<class T> void StatFactory::interpolateUp(double* input, int dim, std::vector<T>& output, int nbin)
1585 {
1586  if(!nbin){
1587  std::ostringstream s;
1588  s<<"Error: nbin must be larger than 0";
1589  throw(s.str());
1590  }
1591  output.clear();
1592  for(int i=0;i<dim;++i){
1593  double deltaX=0;
1594  double left=input[i];
1595  if(i<dim-1){
1596  double right=(i<dim-1)? input[i+1]:input[i];
1597  deltaX=(right-left)/static_cast<double>(nbin);
1598  for(int x=0;x<nbin;++x){
1599  output.push_back(left+x*deltaX);
1600  }
1601  }
1602  else
1603  output.push_back(input[dim-1]);
1604  }
1605 }
1606
1607 //todo: nodata?
1608 template<class T> void StatFactory::interpolateDown(const std::vector<T>& input, std::vector<T>& output, int nbin) const
1609 {
1610  if(input.empty()){
1611  std::ostringstream s;
1612  s<<"Error: input is empty";
1613  throw(s.str());
1614  }
1615  if(!nbin){
1616  std::ostringstream s;
1617  s<<"Error: nbin must be larger than 0";
1618  throw(s.str());
1619  }
1620  output.clear();
1621  int dim=input.size();
1622  int x=0;
1623  output.push_back(input[0]);
1624  for(int i=1;i<dim;++i){
1625  if(i%nbin)
1626  continue;
1627  else{
1628  x=(i-1)/nbin+1;
1629  output.push_back(input[i]);
1630  }
1631  }
1632 }
1633
1634 //todo: nodata?
1635 template<class T> void StatFactory::interpolateDown(double* input, int dim, std::vector<T>& output, int nbin)
1636 {
1637  if(!nbin){
1638  std::ostringstream s;
1639  s<<"Error: nbin must be larger than 0";
1640  throw(s.str());
1641  }
1642  output.clear();
1643  int x=0;
1644  output.push_back(input[0]);
1645  for(int i=1;i<dim;++i){
1646  if(i%nbin)
1647  continue;
1648  else{
1649  x=(i-1)/nbin+1;
1650  output.push_back(input[i]);
1651  }
1652  }
1653 }
1654 }
1655
1656 #endif /* _STATFACTORY_H_ */
1657
1658 // void Histogram::signature(double m1, double m2, double& k, double& alpha, double& beta, double e)
1659 // {
1660 // double y=m1*m1/m2;
1661 // beta=F_1(y,0.1,10.0,e);
1662 // double fb=F(beta);
1663 // double g=exp(lgamma(1.0/beta));
1664 // alpha=m1*g/exp(lgamma(2.0/beta));
1665 // k=beta/(2*alpha*g);
1666 // // std::cout << "y, alpha, beta: " << y << ", " << alpha << ", " << beta << std::endl;
1667 // }
1668
1669 // double Histogram::F(double x)
1670 // {
1671 // double g2=exp(lgamma(2.0/x));
1672 // return(g2*g2/exp(lgamma(3.0/x))/exp(lgamma(1.0/x)));
1673 // }
1674
1675 // //x1 is under estimate, x2 is over estimate, e is error
1676 // double Histogram::F_1(double y, double x1, double x2, double e)
1677 // {
1678 // double f1=F(x1);
1679 // double f2=F(x2);
1680 // assert(f1!=f2);
1681 // double x=x1+(x2-x1)*(y-f1)/(f2-f1);
1682 // double f=F(x);
1683 // while(f-y>=e||y-f>=e){
1684 // if(f<y)
1685 // x1=x;
1686 // else
1687 // x2=x;
1688 // if(x1==x2)
1689 // return x1;
1690 // assert(f1!=f2);
1691 // x=x1+(x2-x1)*(y-f1)/(f2-f1);
1692 // f=F(x);
1693 // }
1694 // return x;
1695 // }