Processing Kernel for remote sensing data
StatFactory.h
1 /**********************************************************************
2 StatFactory.h: class for statistical operations on vectors
3 Copyright (C) 2008-2013 Pieter Kempeneers
4 
5 This file is part of pktools
6 
7 pktools is free software: you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation, either version 3 of the License, or
10 (at your option) any later version.
11 
12 pktools is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
16 
17 You should have received a copy of the GNU General Public License
18 along with pktools. If not, see <http://www.gnu.org/licenses/>.
19 ***********************************************************************/
20 #ifndef _STATFACTORY_H_
21 #define _STATFACTORY_H_
22 
23 #include <iostream>
24 #include <vector>
25 #include <map>
26 #include <math.h>
27 #include <assert.h>
28 #include <string>
29 #include <sstream>
30 #include <fstream>
31 #include <algorithm>
32 #include <gsl/gsl_fit.h>
33 #include <gsl/gsl_errno.h>
34 #include <gsl/gsl_spline.h>
35 #include <gsl/gsl_rng.h>
36 #include <gsl/gsl_randist.h>
37 #include <gsl/gsl_cdf.h>
38 
39 namespace statfactory
40 {
41 
43 
44 public:
45  enum INTERPOLATION_TYPE {undefined=0,linear=1,polynomial=2,cspline=3,cspline_periodic=4,akima=5,akima_periodic=6};
46  //todo: expand with other distributions as in http://www.gnu.org/software/gsl/manual/gsl-ref_toc.html#TOC320
47  enum DISTRIBUTION_TYPE {uniform=1,gaussian=2};
48 
49  StatFactory(void){};
50  virtual ~StatFactory(void){};
51  INTERPOLATION_TYPE getInterpolationType(const std::string interpolationType){
52  std::map<std::string, INTERPOLATION_TYPE> m_interpMap;
53  initMap(m_interpMap);
54  return m_interpMap[interpolationType];
55  };
56  DISTRIBUTION_TYPE getDistributionType(const std::string distributionType){
57  std::map<std::string, DISTRIBUTION_TYPE> m_distMap;
58  initDist(m_distMap);
59  return m_distMap[distributionType];
60  };
61  static void allocAcc(gsl_interp_accel *&acc){
62  acc = gsl_interp_accel_alloc ();
63  };
64 
65  static void getSpline(const std::string type, int size, gsl_spline *& spline){
66  std::map<std::string, INTERPOLATION_TYPE> m_interpMap;
67  initMap(m_interpMap);
68  switch(m_interpMap[type]){
69  case(polynomial):
70  spline=gsl_spline_alloc(gsl_interp_polynomial,size);
71  break;
72  case(cspline):
73  spline=gsl_spline_alloc(gsl_interp_cspline,size);
74  break;
75  case(cspline_periodic):
76  spline=gsl_spline_alloc(gsl_interp_cspline_periodic,size);
77  break;
78  case(akima):
79  spline=gsl_spline_alloc(gsl_interp_akima,size);
80  break;
81  case(akima_periodic):
82  spline=gsl_spline_alloc(gsl_interp_akima_periodic,size);
83  break;
84  case(linear):
85  default:
86  spline=gsl_spline_alloc(gsl_interp_linear,size);
87  break;
88  }
89  assert(spline);
90  };
91  static void initSpline(gsl_spline *spline, const double *x, const double *y, int size){
92  gsl_spline_init (spline, x, y, size);
93  };
94  static double evalSpline(gsl_spline *spline, double x, gsl_interp_accel *acc){
95  return gsl_spline_eval (spline, x, acc);
96  };
97 
98  static gsl_rng* getRandomGenerator(unsigned long int theSeed){
99  const gsl_rng_type * T;
100  gsl_rng * r;
101 
102  // select random number generator
103  r = gsl_rng_alloc (gsl_rng_mt19937);
104  gsl_rng_set(r,theSeed);
105  return r;
106  }
107  void getNodataValues(std::vector<double>& nodatav) const{nodatav=m_noDataValues;};
108  bool isNoData(double value) const{
109  if(m_noDataValues.empty())
110  return false;
111  else
112  return find(m_noDataValues.begin(),m_noDataValues.end(),value)!=m_noDataValues.end();
113  };
114  unsigned int pushNodDataValue(double noDataValue){
115  if(find(m_noDataValues.begin(),m_noDataValues.end(),noDataValue)==m_noDataValues.end())
116  m_noDataValues.push_back(noDataValue);
117  return m_noDataValues.size();
118  };
119  unsigned int setNoDataValues(std::vector<double> vnodata){
120  m_noDataValues=vnodata;
121  return m_noDataValues.size();
122  };
123  double getRandomValue(const gsl_rng* r, const std::string type, double a=0, double b=1) const{
124  std::map<std::string, DISTRIBUTION_TYPE> m_distMap;
125  initDist(m_distMap);
126  double randValue=0;
127  switch(m_distMap[type]){
128  case(uniform):
129  randValue = a+gsl_rng_uniform(r);
130  break;
131  case(gaussian):
132  default:
133  randValue = a+gsl_ran_gaussian(r, b);
134  break;
135  }
136  return randValue;
137  };
138 
139 
140  template<class T> T mymin(const typename std::vector<T>& v) const;
141  template<class T> T mymax(const typename std::vector<T>& v) const;
142  template<class T> T mymin(const typename std::vector<T>& v, T minConstraint) const;
143  template<class T> T mymax(const typename std::vector<T>& v, T maxConstraint) const;
144 // template<class T> typename std::vector<T>::const_iterator mymax(const std::vector<T>& v, typename std::vector<T>::const_iterator begin, typename std::vector<T>::const_iterator end) const;
145  template<class T> typename std::vector<T>::const_iterator mymin(const typename std::vector<T>& v, typename std::vector<T>::const_iterator begin, typename std::vector<T>::const_iterator end) const;
146  template<class T> typename std::vector<T>::iterator mymin(const typename std::vector<T>& v, typename std::vector<T>::iterator begin, typename std::vector<T>::iterator end) const;
147  template<class T> typename std::vector<T>::const_iterator mymin(const typename std::vector<T>& v, typename std::vector<T>::const_iterator begin, typename std::vector<T>::const_iterator end, T minConstraint) const;
148  template<class T> typename std::vector<T>::iterator mymin(const typename std::vector<T>& v, typename std::vector<T>::iterator begin, typename std::vector<T>::iterator end, T minConstraint) const;
149  template<class T> typename std::vector<T>::const_iterator mymax(const std::vector<T>& v, typename std::vector<T>::const_iterator begin, typename std::vector<T>::const_iterator end) const;
150  template<class T> typename std::vector<T>::iterator mymax(const std::vector<T>& v, typename std::vector<T>::iterator begin, typename std::vector<T>::iterator end) const;
151  template<class T> typename std::vector<T>::const_iterator mymax(const std::vector<T>& v, typename std::vector<T>::const_iterator begin, typename std::vector<T>::const_iterator end, T maxConstraint) const;
152  template<class T> typename std::vector<T>::iterator mymax(const std::vector<T>& v, typename std::vector<T>::iterator begin, typename std::vector<T>::iterator end, T maxConstraint) const;
153  template<class T> typename std::vector<T>::const_iterator absmin(const std::vector<T>& v, typename std::vector<T>::const_iterator begin, typename std::vector<T>::const_iterator end) const;
154  template<class T> typename std::vector<T>::const_iterator absmax(const std::vector<T>& v, typename std::vector<T>::const_iterator begin, typename std::vector<T>::const_iterator end) const;
155 
156  template<class T> void minmax(const std::vector<T>& v, typename std::vector<T>::const_iterator begin, typename std::vector<T>::const_iterator end, T& theMin, T& theMax) const;
157  template<class T> T sum(const std::vector<T>& v) const;
158  template<class T> double mean(const std::vector<T>& v) const;
159  template<class T> void eraseNoData(std::vector<T>& v) const;
160  template<class T> T median(const std::vector<T>& v) const;
161  template<class T> double var(const std::vector<T>& v) const;
162  template<class T> double moment(const std::vector<T>& v, int n) const;
163  template<class T> double cmoment(const std::vector<T>& v, int n) const;
164  template<class T> double skewness(const std::vector<T>& v) const;
165  template<class T> double kurtosis(const std::vector<T>& v) const;
166  template<class T> void meanVar(const std::vector<T>& v, double& m1, double& v1) const;
167  template<class T1, class T2> void scale2byte(const std::vector<T1>& input, std::vector<T2>& output, unsigned char lbound=0, unsigned char ubound=255) const;
168  template<class T> void distribution(const std::vector<T>& input, typename std::vector<T>::const_iterator begin, typename std::vector<T>::const_iterator end, std::vector<double>& output, int nbin, T &minimum, T &maximum, double sigma=0, const std::string &filename="") const;
169  template<class T> void distribution(const std::vector<T>& input, std::vector<double>& output, int nbin, double sigma=0, const std::string &filename="") const{distribution(input,input.begin(),input.end(),output,nbin,0,0,sigma,filename);};
170  template<class T> void distribution2d(const std::vector<T>& inputX, const std::vector<T>& inputY, std::vector< std::vector<double> >& output, int nbin, T& minX, T& maxX, T& minY, T& maxY, double sigma=0, const std::string& filename="") const;
171  template<class T> void cumulative (const std::vector<T>& input, typename std::vector<T>::const_iterator begin, typename std::vector<T>::const_iterator end, std::vector<int>& output, int nbin, T &minimum, T &maximum) const;
172  template<class T> void percentiles (const std::vector<T>& input, typename std::vector<T>::const_iterator begin, typename std::vector<T>::const_iterator end, std::vector<T>& output, int nbin, T &minimum, T &maximum, const std::string &filename="") const;
173  template<class T> void signature(const std::vector<T>& input, double& k, double& alpha, double& beta, double e) const;
174  void signature(double m1, double m2, double& k, double& alpha, double& beta, double e) const;
175  template<class T> void normalize(const std::vector<T>& input, std::vector<double>& output) const;
176  template<class T> void normalize_pct(std::vector<T>& input) const;
177  template<class T> double rmse(const std::vector<T>& x, const std::vector<T>& y) const;
178  template<class T> double correlation(const std::vector<T>& x, const std::vector<T>& y, int delay=0) const;
179  template<class T> double cross_correlation(const std::vector<T>& x, const std::vector<T>& y, int maxdelay, std::vector<T>& z) const;
180  template<class T> double linear_regression(const std::vector<T>& x, const std::vector<T>& y, double &c0, double &c1) const;
181  template<class T> double linear_regression_err(const std::vector<T>& x, const std::vector<T>& y, double &c0, double &c1) const;
182  template<class T> void interpolateUp(const std::vector<double>& wavelengthIn, const std::vector<T>& input, const std::vector<double>& wavelengthOut, const std::string& type, std::vector<T>& output, bool verbose=false) const;
183  template<class T> void interpolateUp(const std::vector<double>& wavelengthIn, const std::vector< std::vector<T> >& input, const std::vector<double>& wavelengthOut, const std::string& type, std::vector< std::vector<T> >& output, bool verbose=false) const;
184  // template<class T> void interpolateUp(const std::vector< std::vector<T> >& input, std::vector< std::vector<T> >& output, double start, double end, double step, const gsl_interp_type* type);
185  // template<class T> void interpolateUp(const std::vector< std::vector<T> >& input, const std::vector<double>& wavelengthIn, std::vector< std::vector<T> >& output, std::vector<double>& wavelengthOut, double start, double end, double step, const gsl_interp_type* type);
186  template<class T> void interpolateUp(const std::vector<T>& input, std::vector<T>& output, int nbin) const;
187  template<class T> void nearUp(const std::vector<T>& input, std::vector<T>& output) const;
188  template<class T> void interpolateUp(double* input, int dim, std::vector<T>& output, int nbin);
189  template<class T> void interpolateDown(const std::vector<T>& input, std::vector<T>& output, int nbin) const;
190  template<class T> void interpolateDown(double* input, int dim, std::vector<T>& output, int nbin);
191 
192 private:
193  static void initMap(std::map<std::string, INTERPOLATION_TYPE>& m_interpMap){
194  //initialize selMap
195  m_interpMap["linear"]=linear;
196  m_interpMap["polynomial"]=polynomial;
197  m_interpMap["cspline"]=cspline;
198  m_interpMap["cspline_periodic"]=cspline_periodic;
199  m_interpMap["akima"]=akima;
200  m_interpMap["akima_periodic"]=akima_periodic;
201  }
202  static void initDist(std::map<std::string, DISTRIBUTION_TYPE>& m_distMap){
203  //initialize distMap
204  m_distMap["gaussian"]=gaussian;
205  m_distMap["uniform"]=uniform;
206  }
207  std::vector<double> m_noDataValues;
208 };
209 
210 
211 template<class T> inline typename std::vector<T>::const_iterator StatFactory::mymin(const std::vector<T>& v, typename std::vector<T>::const_iterator begin, typename std::vector<T>::const_iterator end) const
212 {
213  typename std::vector<T>::const_iterator tmpIt=begin;
214  for (typename std::vector<T>::const_iterator it = begin; it!=end; ++it){
215  if(!isNoData(*it))
216  if(*tmpIt>*it)
217  tmpIt=it;
218  }
219  return tmpIt;
220 }
221 
222 template<class T> inline typename std::vector<T>::iterator StatFactory::mymin(const std::vector<T>& v, typename std::vector<T>::iterator begin, typename std::vector<T>::iterator end) const
223 {
224  typename std::vector<T>::iterator tmpIt=begin;
225  for (typename std::vector<T>::const_iterator it = begin; it!=end; ++it){
226  if(!isNoData(*it))
227  if(*tmpIt>*it)
228  tmpIt=it;
229  }
230  return tmpIt;
231 }
232 
233 template<class T> inline typename std::vector<T>::const_iterator StatFactory::mymin(const std::vector<T>& v, typename std::vector<T>::const_iterator begin, typename std::vector<T>::const_iterator end, T minConstraint) const
234 {
235  typename std::vector<T>::const_iterator tmpIt=v.end();
236  T minValue=minConstraint;
237  for (typename std::vector<T>::const_iterator it = begin; it!=end; ++it){
238  if(isNoData(*it))
239  continue;
240  if((minConstraint<=*it)&&(*it<=minValue)){
241  tmpIt=it;
242  minValue=*it;
243  }
244  }
245  return tmpIt;
246 }
247 
248 template<class T> inline typename std::vector<T>::iterator StatFactory::mymin(const std::vector<T>& v, typename std::vector<T>::iterator begin, typename std::vector<T>::iterator end, T minConstraint) const
249 {
250  typename std::vector<T>::iterator tmpIt=v.end();
251  T minValue=minConstraint;
252  for (typename std::vector<T>::const_iterator it = begin; it!=end; ++it){
253  if(isNoData(*it))
254  continue;
255  if((minConstraint<=*it)&&(*it<=minValue)){
256  tmpIt=it;
257  minValue=*it;
258  }
259  }
260  return tmpIt;
261 }
262 
263 template<class T> inline typename std::vector<T>::const_iterator StatFactory::mymax(const std::vector<T>& v, typename std::vector<T>::const_iterator begin, typename std::vector<T>::const_iterator end) const
264 {
265  typename std::vector<T>::const_iterator tmpIt=begin;
266  for (typename std::vector<T>::iterator it = begin; it!=end; ++it){
267  if(isNoData(*it))
268  continue;
269  if(*tmpIt<*it)
270  tmpIt=it;
271  }
272  return tmpIt;
273 }
274 
275 template<class T> inline typename std::vector<T>::iterator StatFactory::mymax(const std::vector<T>& v, typename std::vector<T>::iterator begin, typename std::vector<T>::iterator end) const
276 {
277  typename std::vector<T>::iterator tmpIt=begin;
278  for (typename std::vector<T>::iterator it = begin; it!=end; ++it){
279  if(isNoData(*it))
280  continue;
281  if(*tmpIt<*it)
282  tmpIt=it;
283  }
284  return tmpIt;
285 }
286 
287 template<class T> inline typename std::vector<T>::const_iterator StatFactory::mymax(const std::vector<T>& v, typename std::vector<T>::const_iterator begin, typename std::vector<T>::const_iterator end, T maxConstraint) const
288 {
289  typename std::vector<T>::const_iterator tmpIt=v.end();
290  T maxValue=maxConstraint;
291  for (typename std::vector<T>::const_iterator it = begin; it!=end; ++it){
292  if(isNoData(*it))
293  continue;
294  if((maxValue<=*it)&&(*it<=maxConstraint)){
295  tmpIt=it;
296  maxValue=*it;
297  }
298  }
299  return tmpIt;
300 }
301 
302 template<class T> inline typename std::vector<T>::iterator StatFactory::mymax(const std::vector<T>& v, typename std::vector<T>::iterator begin, typename std::vector<T>::iterator end, T maxConstraint) const
303 {
304  typename std::vector<T>::iterator tmpIt=v.end();
305  T maxValue=maxConstraint;
306  for (typename std::vector<T>::const_iterator it = begin; it!=end; ++it){
307  if(isNoData(*it))
308  continue;
309  if((maxValue<=*it)&&(*it<=maxConstraint)){
310  tmpIt=it;
311  maxValue=*it;
312  }
313  }
314  return tmpIt;
315 }
316 
317 
318 
319 
320 template<class T> inline T StatFactory::mymin(const std::vector<T>& v) const
321 {
322  T minValue=*(v.begin());
323  for (typename std::vector<T>::const_iterator it = v.begin(); it!=v.end(); ++it){
324  if(isNoData(*it))
325  continue;
326  if(minValue>*it)
327  minValue=*it;
328  }
329  return minValue;
330 }
331 
332  template<class T> inline T StatFactory::mymin(const std::vector<T>& v, T minConstraint) const
333 {
334  T minValue=minConstraint;
335  for (typename std::vector<T>::const_iterator it = v.begin(); it!=v.end(); ++it){
336  if((minConstraint<=*it)&&(*it<=minValue))
337  minValue=*it;
338  }
339  return minValue;
340 }
341 
342 template<class T> inline T StatFactory::mymax(const std::vector<T>& v) const
343 {
344  T maxValue=*(v.begin());
345  for (typename std::vector<T>::const_iterator it = v.begin(); it!=v.end(); ++it){
346  if(isNoData(*it))
347  continue;
348  if(maxValue<*it)
349  maxValue=*it;
350  }
351  return maxValue;
352 }
353 
354 template<class T> inline T StatFactory::mymax(const std::vector<T>& v, T maxConstraint) const
355 {
356  T maxValue=maxConstraint;
357  for (typename std::vector<T>::const_iterator it = v.begin(); it!=v.end(); ++it){
358  if(isNoData(*it))
359  continue;
360  if((maxValue<=*it)&&(*it<=maxConstraint))
361  maxValue=*it;
362  }
363  return maxValue;
364 }
365 
366 template<class T> inline typename std::vector<T>::const_iterator StatFactory::absmax(const std::vector<T>& v, typename std::vector<T>::const_iterator begin, typename std::vector<T>::const_iterator end) const
367 {
368  typename std::vector<T>::const_iterator tmpIt=begin;
369  for (typename std::vector<T>::const_iterator it = begin; it!=end; ++it){
370  if(isNoData(*it))
371  continue;
372  if(abs(*tmpIt)<abs(*it))
373  tmpIt=it;
374  }
375  return tmpIt;
376 }
377 
378 template<class T> inline typename std::vector<T>::const_iterator StatFactory::absmin(const std::vector<T>& v, typename std::vector<T>::const_iterator begin, typename std::vector<T>::const_iterator end) const
379 {
380  typename std::vector<T>::const_iterator tmpIt=begin;
381  for (typename std::vector<T>::const_iterator it = begin; it!=end; ++it){
382  if(isNoData(*it))
383  continue;
384  if(abs(*tmpIt)>abs(*it))
385  tmpIt=it;
386  }
387 }
388 
389 template<class T> inline void StatFactory::minmax(const std::vector<T>& v, typename std::vector<T>::const_iterator begin, typename std::vector<T>::const_iterator end, T& theMin, T& theMax) const
390 {
391  theMin=*begin;
392  theMax=*begin;
393  for (typename std::vector<T>::const_iterator it = begin; it!=end; ++it){
394  if(isNoData(*it))
395  continue;
396  if(theMin>*it)
397  theMin=*it;
398  if(theMax<*it)
399  theMax=*it;
400  }
401 }
402 
403 template<class T> inline T StatFactory::sum(const std::vector<T>& v) const
404 {
405  typename std::vector<T>::const_iterator it;
406  T tmpSum=0;
407  for (it = v.begin(); it!= v.end(); ++it){
408  if(isNoData(*it))
409  continue;
410  tmpSum+=*it;
411  }
412  return tmpSum;
413 }
414 
415 template<class T> inline double StatFactory::mean(const std::vector<T>& v) const
416 {
417  assert(v.size());
418  typename std::vector<T>::const_iterator it;
419  T tmpSum=0;
420  unsigned int validSize=0;
421  for (it = v.begin(); it!= v.end(); ++it){
422  if(isNoData(*it))
423  continue;
424  ++validSize;
425  tmpSum+=*it;
426  }
427  if(validSize)
428  return static_cast<double>(tmpSum)/validSize;
429  else
430  return 0;
431 }
432 
433 template<class T> inline void StatFactory::eraseNoData(std::vector<T>& v) const
434 {
435  typename std::vector<T>::iterator it=v.begin();
436  while(it!=v.end()){
437  if(isNoData(*it))
438  v.erase(it);
439  else
440  ++it;
441  }
442 }
443 
444 template<class T> T StatFactory::median(const std::vector<T>& v) const
445 {
446  std::vector<T> tmpV=v;
447  eraseNoData(tmpV);
448  sort(tmpV.begin(),tmpV.end());
449  if(tmpV.size()%2)
450  return tmpV[tmpV.size()/2];
451  else
452  return 0.5*(tmpV[tmpV.size()/2-1]+tmpV[tmpV.size()/2]);
453 }
454 
455 template<class T> double StatFactory::var(const std::vector<T>& v) const
456 {
457  typename std::vector<T>::const_iterator it;
458  unsigned int validSize=0;
459  double m1=0;
460  double m2=0;
461  for (it = v.begin(); it!= v.end(); ++it){
462  if(isNoData(*it))
463  continue;
464  m1+=*it;
465  m2+=(*it)*(*it);
466  ++validSize;
467  }
468  if(validSize){
469  m2/=validSize;
470  m1/=validSize;
471  return m2-m1*m1;
472  }
473  else
474  return 0;
475  /* double v1=0; */
476  /* double m1=mean(v); */
477  /* double n=v.size(); */
478  /* assert(n>1); */
479  /* for (it = v.begin(); it!= v.end(); ++it) */
480  /* v1+=(*it-m1)*(*it-m1); */
481  /* v1/=(n-1); */
482  /* assert(v1>=0); */
483  /* return v1; */
484 }
485 
486 template<class T> double StatFactory::moment(const std::vector<T>& v, int n) const
487 {
488  assert(v.size());
489  unsigned int validSize=0;
490  typename std::vector<T>::const_iterator it;
491  double m=0;
492 // double m1=mean(v);
493  for(it = v.begin(); it!= v.end(); ++it){
494  if(isNoData(*it))
495  continue;
496  m+=pow((*it),n);
497  ++validSize;
498  }
499  if(validSize)
500  return m/validSize;
501  else
502  return 0;
503  /* return m/v.size(); */
504 }
505 
506  //central moment
507 template<class T> double StatFactory::cmoment(const std::vector<T>& v, int n) const
508 {
509  assert(v.size());
510  unsigned int validSize=0;
511  typename std::vector<T>::const_iterator it;
512  double m=0;
513  double m1=mean(v);
514  for(it = v.begin(); it!= v.end(); ++it){
515  if(isNoData(*it))
516  continue;
517  m+=pow((*it-m1),n);
518  ++validSize;
519  }
520  return m/validSize;
521  return m/v.size();
522 }
523 
524 template<class T> double StatFactory::skewness(const std::vector<T>& v) const
525 {
526  return cmoment(v,3)/pow(var(v),1.5);
527 }
528 
529 template<class T> double StatFactory::kurtosis(const std::vector<T>& v) const
530 {
531  double m2=cmoment(v,2);
532  double m4=cmoment(v,4);
533  return m4/m2/m2-3.0;
534 }
535 
536 template<class T> void StatFactory::meanVar(const std::vector<T>& v, double& m1, double& v1) const
537 {
538  typename std::vector<T>::const_iterator it;
539  unsigned int validSize=0;
540  m1=0;
541  v1=0;
542  double m2=0;
543  for (it = v.begin(); it!= v.end(); ++it){
544  if(isNoData(*it))
545  continue;
546  m1+=*it;
547  m2+=(*it)*(*it);
548  ++validSize;
549  }
550  if(validSize){
551  m2/=validSize;
552  m1/=validSize;
553  v1=m2-m1*m1;
554  }
555  /* typename std::vector<T>::const_iterator it; */
556  /* v1=0; */
557  /* m1=mean(v); */
558  /* double n=v.size(); */
559  /* assert(n>1); */
560  /* for (it = v.begin(); it!= v.end(); ++it) */
561  /* v1+=(*(it)-m1)*(*(it)-m1); */
562  /* v1/=(n-1); */
563  /* assert(v1>=0); */
564 }
565 
566 template<class T1, class T2> void StatFactory::scale2byte(const std::vector<T1>& input, std::vector<T2>& output, unsigned char lbound, unsigned char ubound) const
567 {
568  output.resize(input.size());
569  T1 minimum=mymin(input);
570  T1 maximum=mymax(input);
571  assert(maximum>minimum);
572  double scale=(ubound-lbound)/(maximum-minimum);
573  for (int i=0;i<input.size();++i)
574  output[i]=scale*(input[i]-(minimum))+lbound;
575 }
576 
577 template<class T> void StatFactory::distribution(const std::vector<T>& input, typename std::vector<T>::const_iterator begin, typename std::vector<T>::const_iterator end, std::vector<double>& output, int nbin, T &minimum, T &maximum, double sigma, const std::string &filename) const
578 {
579  double minValue=0;
580  double maxValue=0;
581  minmax(input,begin,end,minValue,maxValue);
582  if(minimum<maximum&&minimum>minValue)
583  minValue=minimum;
584  if(minimum<maximum&&maximum<maxValue)
585  maxValue=maximum;
586 
587  //todo: check...
588  minimum=minValue;
589  maximum=maxValue;
590 
591  if(maximum<=minimum){
592  std::ostringstream s;
593  s<<"Error: could not calculate distribution (min>=max)";
594  throw(s.str());
595  }
596  assert(nbin);
597  assert(input.size());
598  if(output.size()!=nbin){
599  output.resize(nbin);
600  for(int i=0;i<nbin;output[i++]=0);
601  }
602  typename std::vector<T>::const_iterator it;
603  for(it=begin;it!=end;++it){
604  if(*it<minimum)
605  continue;
606  if(*it>maximum)
607  continue;
608  if(isNoData(*it))
609  continue;
610  if(sigma>0){
611  // minimum-=2*sigma;
612  // maximum+=2*sigma;
613  //create kde for Gaussian basis function
614  //todo: speed up by calculating first and last bin with non-zero contriubtion...
615  //todo: calculate real surface below pdf by using gsl_cdf_gaussian_P(x-mean+binsize,sigma)-gsl_cdf_gaussian_P(x-mean,sigma)
616  for(int ibin=0;ibin<nbin;++ibin){
617  double icenter=minimum+static_cast<double>(maximum-minimum)*(ibin+0.5)/nbin;
618  double thePdf=gsl_ran_gaussian_pdf(*it-icenter, sigma);
619  output[ibin]+=thePdf;
620  }
621  }
622  else{
623  int theBin=0;
624  if(*it==maximum)
625  theBin=nbin-1;
626  else if(*it>minimum && *it<maximum)
627  theBin=static_cast<int>(static_cast<double>((nbin-1)*(*it)-minimum)/(maximum-minimum));
628  ++output[theBin];
629  // if(*it==maximum)
630  // ++output[nbin-1];
631  // else if(*it>=minimum && *it<maximum)
632  // ++output[static_cast<int>(static_cast<double>((*it)-minimum)/(maximum-minimum)*nbin)];
633  }
634  }
635  if(!filename.empty()){
636  std::ofstream outputfile;
637  outputfile.open(filename.c_str());
638  if(!outputfile){
639  std::ostringstream s;
640  s<<"Error opening distribution file , " << filename;
641  throw(s.str());
642  }
643  for(int ibin=0;ibin<nbin;++ibin)
644  outputfile << minimum+static_cast<double>(maximum-minimum)*(ibin+0.5)/nbin << " " << static_cast<double>(output[ibin])/input.size() << std::endl;
645  outputfile.close();
646  }
647 }
648 
649 template<class T> void StatFactory::distribution2d(const std::vector<T>& inputX, const std::vector<T>& inputY, std::vector< std::vector<double> >& output, int nbin, T& minX, T& maxX, T& minY, T& maxY, double sigma, const std::string& filename) const
650 {
651  assert(inputX.size());
652  assert(inputX.size()==inputY.size());
653  unsigned long int npoint=inputX.size();
654  if(maxX<=minX)
655  minmax(inputX,inputX.begin(),inputX.end(),minX,maxX);
656  if(maxX<=minX){
657  std::ostringstream s;
658  s<<"Error: could not calculate distribution (minX>=maxX)";
659  throw(s.str());
660  }
661  if(maxY<=minY)
662  minmax(inputY,inputY.begin(),inputY.end(),minY,maxY);
663  if(maxY<=minY){
664  std::ostringstream s;
665  s<<"Error: could not calculate distribution (minY>=maxY)";
666  throw(s.str());
667  }
668  assert(nbin>1);
669  output.resize(nbin);
670  for(int i=0;i<nbin;++i){
671  output[i].resize(nbin);
672  for(int j=0;j<nbin;++j)
673  output[i][j]=0;
674  }
675  int binX=0;
676  int binY=0;
677  for(unsigned long int ipoint=0;ipoint<npoint;++ipoint){
678  if(inputX[ipoint]==maxX)
679  binX=nbin-1;
680  else
681  binX=static_cast<int>(static_cast<double>(inputX[ipoint]-minX)/(maxX-minX)*nbin);
682  if(inputY[ipoint]==maxY)
683  binY=nbin-1;
684  else
685  binY=static_cast<int>(static_cast<double>(inputY[ipoint]-minY)/(maxY-minY)*nbin);
686  assert(binX>=0);
687  assert(binX<output.size());
688  assert(binY>=0);
689  assert(binY<output[binX].size());
690  if(sigma>0){
691  // minX-=2*sigma;
692  // maxX+=2*sigma;
693  // minY-=2*sigma;
694  // maxY+=2*sigma;
695  //create kde for Gaussian basis function
696  //todo: speed up by calculating first and last bin with non-zero contriubtion...
697  for(int ibinX=0;ibinX<nbin;++ibinX){
698  double centerX=minX+static_cast<double>(maxX-minX)*ibinX/nbin;
699  double pdfX=gsl_ran_gaussian_pdf(inputX[ipoint]-centerX, sigma);
700  for(int ibinY=0;ibinY<nbin;++ibinY){
701  //calculate \integral_ibinX^(ibinX+1)
702  double centerY=minY+static_cast<double>(maxY-minY)*ibinY/nbin;
703  double pdfY=gsl_ran_gaussian_pdf(inputY[ipoint]-centerY, sigma);
704  output[ibinX][binY]+=pdfX*pdfY;
705  }
706  }
707  }
708  else
709  ++output[binX][binY];
710  }
711  if(!filename.empty()){
712  std::ofstream outputfile;
713  outputfile.open(filename.c_str());
714  if(!outputfile){
715  std::ostringstream s;
716  s<<"Error opening distribution file , " << filename;
717  throw(s.str());
718  }
719  for(int bin=0;bin<nbin;++bin){
720  for(int bin=0;bin<nbin;++bin){
721  double value=static_cast<double>(output[binX][binY])/npoint;
722  outputfile << (maxX-minX)*bin/(nbin-1)+minX << " " << (maxY-minY)*bin/(nbin-1)+minY << " " << value << std::endl;
723  }
724  }
725  outputfile.close();
726  }
727 }
728 
729 template<class T> void StatFactory::percentiles (const std::vector<T>& input, typename std::vector<T>::const_iterator begin, typename std::vector<T>::const_iterator end, std::vector<T>& output, int nbin, T &minimum, T &maximum, const std::string &filename) const
730 {
731  if(maximum<=minimum)
732  minmax(input,begin,end,minimum,maximum);
733  // if(!minimum)
734  // minimum=*(min(input,begin,end));
735  // if(!maximum)
736  // maximum=*(max(input,begin,end));
737  assert(maximum>minimum);
738  assert(nbin>1);
739  assert(input.size());
740  output.resize(nbin);
741  std::vector<T> inputSort;
742  inputSort.assign(begin,end);
743  typename std::vector<T>::iterator vit=inputSort.begin();
744  while(vit!=inputSort.end()){
745  if(*vit<minimum||*vit>maximum)
746  inputSort.erase(vit);
747  else
748  ++vit;
749  }
750  std::sort(inputSort.begin(),inputSort.end());
751  vit=inputSort.begin();
752  std::vector<T> inputBin;
753  for(int ibin=0;ibin<nbin;++ibin){
754  inputBin.clear();
755  while(inputBin.size()<inputSort.size()/nbin&&vit!=inputSort.end()){
756  inputBin.push_back(*vit);
757  ++vit;
758  }
759  if(inputBin.size()){
760  /* output[ibin]=median(inputBin); */
761  output[ibin]=mymax(inputBin);
762  }
763  }
764  if(!filename.empty()){
765  std::ofstream outputfile;
766  outputfile.open(filename.c_str());
767  if(!outputfile){
768  std::ostringstream s;
769  s<<"error opening distribution file , " << filename;
770  throw(s.str());
771  }
772  for(int ibin=0;ibin<nbin;++ibin)
773  outputfile << ibin*100.0/nbin << " " << static_cast<double>(output[ibin])/input.size() << std::endl;
774  outputfile.close();
775  }
776 }
777 
778 // template<class T> void StatFactory::cumulative (const std::vector<T>& input, typename std::vector<T>::const_iterator begin, typename std::vector<T>::const_iterator end, std::vector<int>& output, int nbin, T &minimum, T &maximum)
779 // {
780 // assert(nbin>1);
781 // assert(input.size());
782 // distribution(input,output,nbin,minimum,maximum);
783 // for(std::vector<int>::iterator it=begin+1;it!=end;++it)
784 // *it+=*(it-1);
785 // if(!filename.empty()){
786 // std::ofstream outputfile;
787 // outputfile.open(filename.c_str());
788 // if(!outputfile){
789 // std::ostringstream s;
790 // s<<"error opening cumulative file , " << filename;
791 // throw(s.str());
792 // }
793 // for(int bin=0;bin<nbin;++bin)
794 // outputfile << (maximum-minimum)*bin/(nbin-1)+minimum << " " << static_cast<double>(output[bin])/input.size() << std::endl;
795 // outputfile.close();
796 // }
797 // }
798 
799 template<class T> void StatFactory::signature(const std::vector<T>& input, double&k, double& alpha, double& beta, double e) const
800 {
801  double m1=moment(input,1);
802  double m2=moment(input,2);
803  signature(m1,m2,k,alpha,beta,e);
804 }
805 
806 template<class T> void StatFactory::normalize(const std::vector<T>& input, std::vector<double>& output) const{
807  double total=sum(input);
808  if(total){
809  output.resize(input.size());
810  for(int index=0;index<input.size();++index)
811  output[index]=input[index]/total;
812  }
813  else
814  output=input;
815 }
816 
817 template<class T> void StatFactory::normalize_pct(std::vector<T>& input) const{
818  double total=sum(input);
819  if(total){
820  typename std::vector<T>::iterator it;
821  for(it=input.begin();it!=input.end();++it)
822  *it=100.0*(*it)/total;
823  }
824 }
825 
826 template<class T> double StatFactory::rmse(const std::vector<T>& x, const std::vector<T>& y) const{
827  assert(x.size()==y.size());
828  assert(x.size());
829  double mse=0;
830  for(int isample=0;isample<x.size();++isample){
831  double e=x[isample]-y[isample];
832  mse+=e*e/x.size();
833  }
834  return sqrt(mse);
835 }
836 
837 template<class T> double StatFactory::correlation(const std::vector<T>& x, const std::vector<T>& y, int delay) const{
838  double meanX=0;
839  double meanY=0;
840  double varX=0;
841  double varY=0;
842  double sXY=0;
843  meanVar(x,meanX,varX);
844  meanVar(y,meanY,varY);
845  double denom = sqrt(varX*varY);
846  if(denom){
847  //Calculate the correlation series
848  sXY = 0;
849  for (int i=0;i<x.size();++i) {
850  int j = i + delay;
851  if (j < 0 || j >= y.size())
852  continue;
853  else{
854  assert(i>=0&&i<x.size());
855  assert(j>=0&&j<y.size());
856  sXY += (x[i] - meanX) * (y[j] - meanY);
857  }
858  }
859  double minSize=(x.size()<y.size())?x.size():y.size();
860  return(sXY / denom / (minSize-1));
861  }
862  else
863  return 0;
864 }
865 
866 template<class T> double StatFactory::cross_correlation(const std::vector<T>& x, const std::vector<T>& y, int maxdelay, std::vector<T>& z) const{
867  z.clear();
868  double sumCorrelation=0;
869  for (int delay=-maxdelay;delay<maxdelay;delay++) {
870  z.push_back(correlation(x,y,delay));
871  sumCorrelation+=z.back();
872  }
873  return sumCorrelation;
874 }
875 
876 template<class T> double StatFactory::linear_regression(const std::vector<T>& x, const std::vector<T>& y, double &c0, double &c1) const{
877  assert(x.size()==y.size());
878  assert(x.size());
879  double cov00;
880  double cov01;
881  double cov11;
882  double sumsq;
883  gsl_fit_linear(&(x[0]),1,&(y[0]),1,x.size(),&c0,&c1,&cov00,&cov01,&cov11,&sumsq);
884  return (1-sumsq/var(y)/(y.size()-1));
885 }
886 
887 template<class T> double StatFactory::linear_regression_err(const std::vector<T>& x, const std::vector<T>& y, double &c0, double &c1) const{
888  assert(x.size()==y.size());
889  assert(x.size());
890  double cov00;
891  double cov01;
892  double cov11;
893  double sumsq;
894  gsl_fit_linear(&(x[0]),1,&(y[0]),1,x.size(),&c0,&c1,&cov00,&cov01,&cov11,&sumsq);
895  return sqrt((sumsq)/(y.size()));
896 }
897 
898 //alternatively: use GNU scientific library:
899 // gsl_stats_correlation (const double data1[], const size_t stride1, const double data2[], const size_t stride2, const size_t n)
900 
901 template<class T> void StatFactory::interpolateUp(const std::vector<double>& wavelengthIn, const std::vector<T>& input, const std::vector<double>& wavelengthOut, const std::string& type, std::vector<T>& output, bool verbose) const{
902  assert(wavelengthIn.size());
903  assert(input.size()==wavelengthIn.size());
904  assert(wavelengthOut.size());
905  int nband=wavelengthIn.size();
906  output.clear();
907  gsl_interp_accel *acc;
908  allocAcc(acc);
909  gsl_spline *spline;
910  getSpline(type,nband,spline);
911  assert(spline);
912  assert(&(wavelengthIn[0]));
913  assert(&(input[0]));
914  initSpline(spline,&(wavelengthIn[0]),&(input[0]),nband);
915  for(int index=0;index<wavelengthOut.size();++index){
916  if(wavelengthOut[index]<*wavelengthIn.begin()){
917  output.push_back(*(input.begin()));
918  continue;
919  }
920  else if(wavelengthOut[index]>wavelengthIn.back()){
921  output.push_back(input.back());
922  continue;
923  }
924  // if(type=="linear"){
925  // if(wavelengthOut[index]<wavelengthIn.back()){
926  // output.push_back(*(input.begin()));
927  // continue;
928  // }
929  // else if(wavelengthOut[index]>wavelengthIn.back()){
930  // output.push_back(input.back());
931  // continue;
932  // }
933  // }
934  double dout=evalSpline(spline,wavelengthOut[index],acc);
935  output.push_back(dout);
936  }
937  gsl_spline_free(spline);
938  gsl_interp_accel_free(acc);
939 }
940 
941 // template<class T> void StatFactory::interpolateUp(const std::vector<double>& wavelengthIn, const std::vector< std::vector<T> >& input, const std::vector<double>& wavelengthOut, const std::string& type, std::vector< std::vector<T> >& output, bool verbose){
942 // assert(wavelengthIn.size());
943 // assert(wavelengthOut.size());
944 // int nsample=input.size();
945 // int nband=wavelengthIn.size();
946 // output.clear();
947 // output.resize(nsample);
948 // gsl_interp_accel *acc;
949 // allocAcc(acc);
950 // gsl_spline *spline;
951 // getSpline(type,nband,spline);
952 // for(int isample=0;isample<nsample;++isample){
953 // assert(input[isample].size()==wavelengthIn.size());
954 // initSpline(spline,&(wavelengthIn[0]),&(input[isample][0]),nband);
955 // for(int index=0;index<wavelengthOut.size();++index){
956 // if(type=="linear"){
957 // if(wavelengthOut[index]<wavelengthIn.back())
958 // output[isample].push_back(*(input.begin()));
959 // else if(wavelengthOut[index]>wavelengthIn.back())
960 // output[isample].push_back(input.back());
961 // }
962 // else{
963 // double dout=evalSpline(spline,wavelengthOut[index],acc);
964 // output[isample].push_back(dout);
965 // }
966 // }
967 // }
968 // gsl_spline_free(spline);
969 // gsl_interp_accel_free(acc);
970 // }
971 
972 template<class T> void StatFactory::interpolateUp(const std::vector<T>& input, std::vector<T>& output, int nbin) const
973 {
974  assert(input.size());
975  assert(nbin);
976  output.clear();
977  int dim=input.size();
978  for(int i=0;i<dim;++i){
979  double deltaX=0;
980  double left=input[i];
981  if(i<dim-1){
982  double right=(i<dim-1)? input[i+1]:input[i];
983  deltaX=(right-left)/static_cast<double>(nbin);
984  for(int x=0;x<nbin;++x){
985  output.push_back(left+x*deltaX);
986  }
987  }
988  else
989  output.push_back(input.back());
990  }
991 }
992 
993 template<class T> void StatFactory::nearUp(const std::vector<T>& input, std::vector<T>& output) const
994 {
995  assert(input.size());
996  assert(output.size()>=input.size());
997  int dimInput=input.size();
998  int dimOutput=output.size();
999 
1000  for(int iin=0;iin<dimInput;++iin){
1001  for(int iout=0;iout<dimOutput/dimInput;++iout){
1002  int indexOutput=iin*dimOutput/dimInput+iout;
1003  assert(indexOutput<output.size());
1004  output[indexOutput]=input[iin];
1005  }
1006  }
1007 }
1008 
1009 template<class T> void StatFactory::interpolateUp(double* input, int dim, std::vector<T>& output, int nbin)
1010 {
1011  assert(nbin);
1012  output.clear();
1013  for(int i=0;i<dim;++i){
1014  double deltaX=0;
1015  double left=input[i];
1016  if(i<dim-1){
1017  double right=(i<dim-1)? input[i+1]:input[i];
1018  deltaX=(right-left)/static_cast<double>(nbin);
1019  for(int x=0;x<nbin;++x){
1020  output.push_back(left+x*deltaX);
1021  }
1022  }
1023  else
1024  output.push_back(input[dim-1]);
1025  }
1026 }
1027 
1028 template<class T> void StatFactory::interpolateDown(const std::vector<T>& input, std::vector<T>& output, int nbin) const
1029 {
1030  assert(input.size());
1031  assert(nbin);
1032  output.clear();
1033  int dim=input.size();
1034  int x=0;
1035  output.push_back(input[0]);
1036  for(int i=1;i<dim;++i){
1037  if(i%nbin)
1038  continue;
1039  else{
1040  x=(i-1)/nbin+1;
1041  output.push_back(input[i]);
1042  }
1043  }
1044 }
1045 
1046 template<class T> void StatFactory::interpolateDown(double* input, int dim, std::vector<T>& output, int nbin)
1047 {
1048  assert(nbin);
1049  output.clear();
1050  int x=0;
1051  output.push_back(input[0]);
1052  for(int i=1;i<dim;++i){
1053  if(i%nbin)
1054  continue;
1055  else{
1056  x=(i-1)/nbin+1;
1057  output.push_back(input[i]);
1058  }
1059  }
1060 }
1061 }
1062 
1063 #endif /* _STATFACTORY_H_ */
1064 
1065 // void Histogram::signature(double m1, double m2, double& k, double& alpha, double& beta, double e)
1066 // {
1067 // double y=m1*m1/m2;
1068 // beta=F_1(y,0.1,10.0,e);
1069 // double fb=F(beta);
1070 // double g=exp(lgamma(1.0/beta));
1071 // alpha=m1*g/exp(lgamma(2.0/beta));
1072 // k=beta/(2*alpha*g);
1073 // // std::cout << "y, alpha, beta: " << y << ", " << alpha << ", " << beta << std::endl;
1074 // }
1075 
1076 // double Histogram::F(double x)
1077 // {
1078 // double g2=exp(lgamma(2.0/x));
1079 // return(g2*g2/exp(lgamma(3.0/x))/exp(lgamma(1.0/x)));
1080 // }
1081 
1082 // //x1 is under estimate, x2 is over estimate, e is error
1083 // double Histogram::F_1(double y, double x1, double x2, double e)
1084 // {
1085 // double f1=F(x1);
1086 // double f2=F(x2);
1087 // assert(f1!=f2);
1088 // double x=x1+(x2-x1)*(y-f1)/(f2-f1);
1089 // double f=F(x);
1090 // while(f-y>=e||y-f>=e){
1091 // if(f<y)
1092 // x1=x;
1093 // else
1094 // x2=x;
1095 // if(x1==x2)
1096 // return x1;
1097 // assert(f1!=f2);
1098 // x=x1+(x2-x1)*(y-f1)/(f2-f1);
1099 // f=F(x);
1100 // }
1101 // return x;
1102 // }