20 #ifndef _FEATURESELECTOR_H_ 21 #define _FEATURESELECTOR_H_ 29 #include "ConfusionMatrix.h" 30 #include "base/IndexValue.h" 31 #include "base/Vector2d.h" 32 #include "gsl/gsl_combination.h" 33 #include "CostFactory.h" 40 template<
class T>
double forward(std::vector<
Vector2d<T> >& v,
CostFactory& theCostFactory, std::list<int>& subset,
int maxFeatures=0,
short verbose=0);
41 template<
class T>
double backward(std::vector<
Vector2d<T> >& v,
CostFactory& theCostFactory, std::list<int>& subset,
int minFeatures,
short verbose=0);
42 template<
class T>
double floating(std::vector<
Vector2d<T> >& v,
CostFactory& theCostFactory, std::list<int>& subset,
int maxFeatures=0,
double epsilon=0.001,
short verbose=0);
43 template<
class T>
double bruteForce(std::vector<
Vector2d<T> >& v,
CostFactory& theCostFactory, std::list<int>& subset,
int maxFeatures=0,
short verbose=0);
46 template<
class T>
double addFeature(std::vector<
Vector2d<T> >& v,
CostFactory& theCostFactory, std::list<int>& subset,
short verbose=0);
47 template<
class T>
double removeFeature(std::vector<
Vector2d<T> >& v,
CostFactory& theCostFactory, std::list<int>& subset,
int& r,
short verbose=0);
48 template<
class T>
double forwardUnivariate(std::vector<
Vector2d<T> >& v,
CostFactory& theCostFactory, std::list<int>& subset,
int maxFeatures=0,
short verbose=0);
52 template<
class T>
double FeatureSelector::forwardUnivariate(std::vector<
Vector2d<T> >& v,
CostFactory& theCostFactory, std::list<int>& subset,
int maxFeatures,
short verbose){
53 int maxLevels=v[0][0].size();
55 maxFeatures=maxLevels;
59 std::vector<IndexValue> cost(maxLevels);
60 std::list<int> tmpset=subset;
61 std::vector< Vector2d<T> > tmp(v.size());
62 for(
int ilevel=0;ilevel<maxLevels;++ilevel){
63 if(find(tmpset.begin(),tmpset.end(),ilevel)==tmpset.end()){
64 tmpset.push_back(ilevel);
65 for(
int iclass=0;iclass<v.size();++iclass){
67 v[iclass].selectCols(tmpset,tmp[iclass]);
72 pv.value=theCostFactory.getCost(tmp);
86 while((subset.size()<maxFeatures)&&(ilevel<maxLevels)){
87 if(cost[ilevel].value>0)
88 subset.push_back(cost[ilevel].position);
90 std::cout <<
"feature " << subset.back() <<
" has cost: " << cost[ilevel].value << std::endl;
95 for(
int iclass=0;iclass<v.size();++iclass){
97 v[iclass].selectCols(subset,tmp[iclass]);
100 maxCost=theCostFactory.getCost(tmp);
111 template<
class T>
double FeatureSelector::forward(std::vector<
Vector2d<T> >& v,
CostFactory& theCostFactory, std::list<int>& subset,
int maxFeatures,
short verbose){
114 int maxLevels=v[0][0].size();
116 maxFeatures=maxLevels;
117 while(subset.size()<maxFeatures){
118 maxCost=addFeature(v,theCostFactory,subset,verbose);
120 for(std::list<int>::const_iterator lit=subset.begin();lit!=subset.end();++lit)
121 std::cout << *lit <<
" ";
122 std::cout << std::endl;
130 template<
class T>
double FeatureSelector::backward(std::vector<
Vector2d<T> >& v,
CostFactory& theCostFactory, std::list<int>& subset,
int minFeatures,
short verbose){
135 for(
int iFeature=0;iFeature<v[0][0].size();++iFeature)
136 subset.push_back(iFeature);
138 if(subset.size()==minFeatures)
139 maxCost=theCostFactory.getCost(v);
140 while(subset.size()>minFeatures){
141 maxCost=removeFeature(v,theCostFactory,subset,removedFeature,verbose);
143 for(std::list<int>::const_iterator lit=subset.begin();lit!=subset.end();++lit)
144 std::cout << *lit <<
" ";
145 std::cout << std::endl;
153 template<
class T>
double FeatureSelector::floating(std::vector<
Vector2d<T> >& v,
CostFactory& theCostFactory, std::list<int>& subset,
int maxFeatures,
double epsilon,
short verbose){
155 int maxLevels=v[0][0].size();
157 maxFeatures=maxLevels;
161 while(cost.size()<subset.size())
163 cost.push_back(addFeature(v,theCostFactory,subset,verbose));
166 std::cout <<
"added " << subset.back() <<
", " << subset.size() <<
"/" << maxFeatures <<
" features selected with cost: " << cost.back() << std::endl;
168 for(std::list<int>::const_iterator lit=subset.begin();lit!=subset.end();++lit)
169 std::cout << *lit <<
" ";
170 std::cout << std::endl;
172 while(k<maxFeatures){
173 cost.push_back(addFeature(v,theCostFactory,subset,verbose));
176 std::cout <<
"added " << subset.back() <<
", " << subset.size() <<
"/" << maxFeatures <<
" features selected with cost: " << cost.back() << std::endl;
178 for(std::list<int>::const_iterator lit=subset.begin();lit!=subset.end();++lit)
179 std::cout << *lit <<
" ";
180 std::cout <<
" (cost: " << cost.back() <<
")" << std::endl;
185 double cost_r=removeFeature(v,theCostFactory,subset,x_r,verbose);
186 if(cost_r>cost[k-1]+epsilon){
191 std::cout <<
"removed " << x_r <<
", " << subset.size() <<
"/" << maxFeatures <<
" features remain with cost: " << cost_r << std::endl;
193 for(std::list<int>::const_iterator lit=subset.begin();lit!=subset.end();++lit)
194 std::cout << *lit <<
" ";
195 std::cout <<
" (cost: " << cost.back() <<
")" << std::endl;
200 subset.push_back(x_r);
204 std::cout <<
"could not remove any feature" << std::endl;
212 template<
class T>
double FeatureSelector::bruteForce(std::vector<
Vector2d<T> >& v,
CostFactory& theCostFactory, std::list<int>& subset,
int maxFeatures,
short verbose){
213 int maxLevels=v[0][0].size();
215 maxFeatures=maxLevels;
221 c=gsl_combination_calloc(v[0][0].size(),maxFeatures);
223 std::list<int> tmpset;
224 std::vector< Vector2d<T> > tmpv(v.size());
225 std::list<int> catchset;
229 if(subset.size()>=maxLevels)
231 gsl_combination_next(c);
233 for(
int ifeature=0;ifeature<maxFeatures;++ifeature)
234 tmpset.push_back(c->data[ifeature]);
235 for(
int iclass=0;iclass<v.size();++iclass)
236 v[iclass].selectCols(tmpset,tmpv[iclass]);
238 cost=theCostFactory.getCost(tmpv);
243 std::cout <<
"Could not get cost from set: " << std::endl;
244 gsl_combination_fprintf(stdout,c,
" %u");
255 }
while(gsl_combination_next(c)==GSL_SUCCESS);
256 gsl_combination_free(c);
261 for(std::list<int>::iterator lit=subset.begin();lit!=subset.end();++lit){
262 std::list<int>::iterator lit2=lit;
263 assert(find(++lit2,subset.end(),*lit)==subset.end());
268 template<
class T>
double FeatureSelector::addFeature(std::vector<
Vector2d<T> >& v,
CostFactory& theCostFactory, std::list<int>& subset,
short verbose){
270 std::list<int> tmpset=subset;
271 std::vector< Vector2d<T> > tmp(v.size());
272 std::list<int> catchset;
276 int maxLevels=v[0][0].size();
277 if(subset.size()>=maxLevels)
279 for(
int ilevel=0;ilevel<maxLevels;++ilevel){
280 if(find(tmpset.begin(),tmpset.end(),ilevel)!=tmpset.end())
282 tmpset.push_back(ilevel);
283 for(
int iclass=0;iclass<v.size();++iclass){
285 v[iclass].selectCols(tmpset,tmp[iclass]);
288 cost=theCostFactory.getCost(tmp);
293 std::cout <<
"Could not add feature " << tmpset.back() << std::endl;
306 for(std::list<int>::iterator lit=subset.begin();lit!=subset.end();++lit){
307 std::list<int>::iterator lit2=lit;
308 assert(find(++lit2,subset.end(),*lit)==subset.end());
313 template<
class T>
double FeatureSelector::removeFeature(std::vector<
Vector2d<T> >& v,
CostFactory& theCostFactory, std::list<int>& subset,
int& r,
short verbose){
315 std::list<int> tmpset=subset;
316 std::vector< Vector2d<T> > tmp(v.size());
317 int nFeatures=subset.size();
318 std::list<int> catchset;
323 int maxLevels=v[0][0].size();
324 if(subset.size()>maxLevels||subset.empty()){
327 std::list<int>::const_iterator it;
328 for(
int i=0;i<nFeatures;++i){
331 for(
int iclass=0;iclass<v.size();++iclass){
333 v[iclass].selectCols(tmpset,tmp[iclass]);
336 cost=theCostFactory.getCost(tmp);
341 std::cout <<
"Could not remove feature " << last << std::endl;
342 tmpset.push_front(last);
350 tmpset.push_front(last);
356 for(std::list<int>::iterator lit=subset.begin();lit!=subset.end();++lit){
357 std::list<int>::iterator lit2=lit;
358 assert(find(++lit2,subset.end(),*lit)==subset.end());