pktools  2.6.7
Processing Kernel for geospatial data
FileReaderAscii.h
1 /**********************************************************************
2 FileReaderAscii.h: class to read ASCII files using (colum based)
3 Copyright (C) 2008-2013 Pieter Kempeneers
4 
5 This file is part of pktools
6 
7 pktools is free software: you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation, either version 3 of the License, or
10 (at your option) any later version.
11 
12 pktools is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
16 
17 You should have received a copy of the GNU General Public License
18 along with pktools. If not, see <http://www.gnu.org/licenses/>.
19 ***********************************************************************/
20 #ifndef _IMGREADERASCII_H_
21 #define _IMGREADERASCII_H_
22 
23 #include <string>
24 #include <vector>
25 #include <fstream>
26 #include "base/Optionpk.h"
27 #include <armadillo>
28 
29 //--------------------------------------------------------------------------
31 {
32 public:
33  FileReaderAscii(void);
34  FileReaderAscii(const std::string& filename);
35  FileReaderAscii(const std::string& filename, const char& fieldseparator);
36  ~FileReaderAscii(void);
37  void reset(){m_ifstream.clear();m_ifstream.seekg(0,std::ios::beg);};
38  void open(const std::string& filename);
39  void close(void);
40  void setFieldSeparator(const char& fieldseparator){m_fs=fieldseparator;};
41  void setMinRow(int minRow){m_minRow=minRow;};
42  void setMaxRow(int maxRow){m_maxRow=maxRow;};
43  void setComment(char comment){m_comment=comment;};
44  unsigned int nrOfCol(bool checkCols=false, bool verbose=false);
45  unsigned int nrOfRow(bool checkCols=false, bool verbose=false);
46  template<class T> unsigned int readData(std::vector<std::vector<T> > &dataVector, const std::vector<int> &cols, double scale=1.0, double offset=0.0, bool transpose=false, bool verbose=false);
47  template<class T> unsigned int readData(std::vector<T> &dataVector, int col, double scale=1.0, double offset=0, bool verbose=false);
48 
49  protected:
50  std::string m_filename;
51  std::ifstream m_ifstream;
52  char m_fs;
53  char m_comment;
54  double m_min;
55  double m_max;
56  int m_minRow;
57  int m_maxRow;
58 };
59 
60 template<class T> unsigned int FileReaderAscii::readData(std::vector<T> &dataVector, int col, double scale, double offset, bool verbose){
61  reset();
62  dataVector.clear();
63  int nrow=0;
64  bool withinRange=true;
65  if(m_fs>' '&&m_fs<='~'){//field separator is a regular character (minimum ASCII code is space, maximum ASCII code is tilde)
66  if(verbose)
67  std::cout << "reading csv file " << m_filename << std::endl;
68  std::string csvRecord;
69  while(getline(m_ifstream,csvRecord)){//read a line
70  withinRange=true;
71  if(nrow<m_minRow)
72  withinRange=false;
73  if(m_maxRow>m_minRow)
74  if(nrow>m_maxRow)
75  withinRange=false;
76  if(withinRange){
77  std::istringstream csvstream(csvRecord);
78  std::string item;
79  int ncol=0;
80  bool isComment=false;
81  while(getline(csvstream,item,m_fs)){//read a column
82  if(verbose)
83  std::cout << item << " ";
84  size_t pos=item.find(m_comment);
85  if(pos!=std::string::npos){
86  isComment=true;
87  if(pos>0)
88  item=item.substr(0,pos-1);
89  else
90  break;
91  if(verbose)
92  std::cout << "comment found, string is " << item << std::endl;
93  }
94  if(ncol==col){
95  T value=scale*string2type<T>(item)+offset;
96  if((value>=m_min&&value<=m_max)||m_max<=m_min)
97  dataVector.push_back(value);
98  }
99  ++ncol;
100  if(isComment)
101  break;
102  }
103  if(verbose)
104  std::cout << std::endl;
105  if(dataVector.size()&&ncol<=col){
106  std::ostringstream ess;
107  ess << "Error: different number of cols found in line " << nrow << " (" << ncol << ")" << std::endl;
108  throw(ess.str());
109  }
110  }
111  ++nrow;
112  }
113  assert(dataVector.size());
114  }
115  else{//space or tab delimited fields
116  if(verbose)
117  std::cout << "space or tab delimited fields" << std::endl;
118  std::string spaceRecord;
119  while(!getline(m_ifstream, spaceRecord).eof()){
120  withinRange=true;
121  if(nrow<m_minRow)
122  withinRange=false;
123  if(m_maxRow>m_minRow)
124  if(nrow>m_maxRow)
125  withinRange=false;
126  if(withinRange){
127  if(verbose>1)
128  std::cout << spaceRecord << std::endl;
129  std::istringstream lineStream(spaceRecord);
130  std::string item;
131  int ncol=0;
132  bool isComment=false;
133  while(lineStream >> item){
134  if(verbose)
135  std::cout << item << " ";
136  // std::istringstream itemStream(item);
137  size_t pos=item.find(m_comment);
138  if(pos!=std::string::npos){
139  isComment=true;
140  if(pos>0)
141  item=item.substr(0,pos-1);
142  else
143  break;
144  if(verbose)
145  std::cout << "comment found, string is " << item << std::endl;
146  }
147  T value=scale*string2type<T>(item)+offset;
148  // T value=string2type<T>(item);
149  if(ncol==col){
150  if((value>=m_min&&value<=m_max)||m_max<=m_min)
151  dataVector.push_back(value);
152  }
153  ++ncol;
154  if(isComment)
155  break;
156  }
157  if(verbose>1)
158  std::cout << std::endl;
159  if(verbose)
160  std::cout << "number of columns: " << ncol << std::endl;
161  if(dataVector.size()&&ncol<=col){
162  std::ostringstream ess;
163  ess << "Error: different number of cols found in line " << nrow << " (" << ncol << ")" << std::endl;
164  throw(ess.str());
165  }
166  }
167  ++nrow;
168  }
169  }
170  return dataVector.size();
171 }
172 
173 template<class T> unsigned int FileReaderAscii::readData(std::vector<std::vector<T> > &dataVector, const std::vector<int> &cols, double scale, double offset, bool transpose, bool verbose){
174  reset();
175  dataVector.clear();
176  if(!transpose)
177  dataVector.resize(cols.size());
178  int nrow=0;
179  bool withinRange=true;
180  if(m_fs>' '&&m_fs<='~'){//field separator is a regular character (minimum ASCII code is space, maximum ASCII code is tilde)
181  if(verbose)
182  std::cout << "reading csv file " << m_filename << std::endl;
183  std::string csvRecord;
184  while(getline(m_ifstream,csvRecord)){//read a line
185  std::vector<T> sampleVector;
186  withinRange=true;
187  if(nrow<m_minRow)
188  withinRange=false;
189  if(m_maxRow>m_minRow)
190  if(nrow>m_maxRow)
191  withinRange=false;
192  if(withinRange){
193  std::istringstream csvstream(csvRecord);
194  std::string item;
195  int ncol=0;
196  bool isComment=false;
197  while(getline(csvstream,item,m_fs)){//read a column
198  if(verbose)
199  std::cout << item << " ";
200  size_t pos=item.find(m_comment);
201  if(pos!=std::string::npos){
202  isComment=true;
203  if(pos>0)
204  item=item.substr(0,pos-1);
205  else
206  break;
207  if(verbose)
208  std::cout << "comment found, string is " << item << std::endl;
209  }
210  for(int icol=0;icol<cols.size();++icol){
211  if(ncol==cols[icol]){
212  T value=scale*string2type<T>(item)+offset;
213  // T value=string2type<T>(item);
214  if((value>=m_min&&value<=m_max)||m_max<=m_min){
215  if(transpose)
216  sampleVector.push_back(value);
217  else
218  dataVector[icol].push_back(value);
219  }
220  }
221  }
222  ++ncol;
223  if(isComment)
224  break;
225  }
226  if(verbose)
227  std::cout << std::endl;
228  // if(dataVector.back().size())
229  // assert(ncol>=cols[0]);
230  }
231  if(sampleVector.size()&&transpose)
232  dataVector.push_back(sampleVector);
233  ++nrow;
234  }
235  assert(dataVector.size());
236  }
237  else{//space or tab delimited fields
238  if(verbose)
239  std::cout << "space or tab delimited fields" << std::endl;
240  std::string spaceRecord;
241  while(!getline(m_ifstream, spaceRecord).eof()){
242  std::vector<T> sampleVector;
243  withinRange=true;
244  if(nrow<m_minRow)
245  withinRange=false;
246  if(m_maxRow>m_minRow)
247  if(nrow>m_maxRow)
248  withinRange=false;
249  if(withinRange){
250  if(verbose>1)
251  std::cout << spaceRecord << std::endl;
252  std::istringstream lineStream(spaceRecord);
253  std::string item;
254  int ncol=0;
255  bool isComment=false;
256  while(lineStream >> item){
257  if(verbose)
258  std::cout << item << " ";
259  // std::istringstream itemStream(item);
260  size_t pos=item.find(m_comment);
261  if(pos!=std::string::npos){
262  isComment=true;
263  if(pos>0)
264  item=item.substr(0,pos-1);
265  else
266  break;
267  if(verbose)
268  std::cout << "comment found, string is " << item << std::endl;
269  }
270  T value=scale*string2type<T>(item)+offset;
271  // T value=string2type<T>(item);
272  for(int icol=0;icol<cols.size();++icol){
273  if(ncol==cols[icol]){
274  if((value>=m_min&&value<=m_max)||m_max<=m_min){
275  if(transpose)
276  sampleVector.push_back(value);
277  else
278  dataVector[icol].push_back(value);
279  }
280  }
281  }
282  ++ncol;
283  if(isComment)
284  break;
285  }
286  if(verbose>1)
287  std::cout << std::endl;
288  if(verbose)
289  std::cout << "number of columns: " << ncol << std::endl;
290  // if(dataVector.back().size())
291  // assert(ncol>=cols[0]);
292  }
293  if(sampleVector.size()&&transpose)
294  dataVector.push_back(sampleVector);
295  ++nrow;
296  }
297  }
298  return dataVector.size();
299 }
300 
301 #endif // _IMGREADERASCII_H_